def test_run(self): gs = gelscore.Sequence('RGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGN') dist = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.1) res = rf.find(gs) #printRegions(res) self.assertTrue(len(res) == 3)
def test_run(self): gs = gelscore.Sequence('MASNDYTQQATQSYGAYPTQPGQGYSQQSSQPYGQQSYSGYSQSTDTSGYGQSSYSSYGQSQNTGYGTQSTPQGYGSTGGYGSSQSSQSSYGQQSSYPGYGQQPAPSSTSGSYGSSSQSSSYGQPQSGSYSQQPSYGGQQQSYGQQQSYNPPQGYGQQNQYNSSSGGGGGGGGGGNYGQDQSSMSSGGGSGGGYGNQDQSGGGGSGGYGQQDRGGRGRGGSGGGGGGGGGGYNRSSGGYEPRGRGGGRGGRGGMGGSDRGGFNKFGGPRDQGSRHDSEQDNSDNNTIFVQGLGENVTIESVADYFKQIGIIKTNKKTGQPMINLYTDRETGKLKGEATVSFDDPPSAKAAIDWFDGKEFSGNPIKVSFATRRADFNRGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGNYGDDRRGGRGGYDRGGYRGRGGDRGGFRGGRGGGDRGGFGPGKMDSRGEHRQDRRERPY') #gs = gelscore.Sequence('AAAAAAAAAAAAAYYGSGSGSGSGSAAAAAAAAA') dist = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=10, score_threshold=0.025) res = rf.find(gs) #printRegions(res) self.assertTrue(len(res) >= 7)
def test_run(self): gs = gelscore.Sequence('AAAAAAAAAAAAGGGGGGGGGGG') dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 4 comp = gelscore.SequenceCompositionSimilarity() rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.9, max_regions=max_regions) res = rf.find(gs) self.assertTrue(len(res) < max_regions) self.assertTrue(len(res) == 2)
def test_run(self): # DAD: this is not a passing test right now, doesn't split into 3 gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGACDEFGHIKLMNPQRSTVWYGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG') dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 16 rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.01, max_regions=max_regions) res = rf.find(gs) printRegions(res) self.assertTrue(len(res) == 3)
def test_run(self): gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG') #dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0) dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 16 rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.3, max_regions=max_regions) res = rf.find(gs) #for r in res: # print r.region.start, r.region.end, r.score, r.norm_score, r.region self.assertTrue(len(res) == 1)
def test_run(self): gs = gelscore.Sequence('GGGGGGGGGGAAAAAAAAAAPPPPPPPPPP') sim = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(sim, min_region_size=5, score_threshold=0.8) D = sim.weights(gs) #print D #print D.sum() #print D[1,:].sum() #print D[:,1].sum() res = rf.find(gs) #print "" #for r in res: # print r.score, r.norm_score, r.region self.assertTrue(len(res) == 3)
# Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene] if len(query_keys) == 0: # Go through all proteins in database query_keys = all_keys # Distance definition dist = gelscore.SimilarityWeight(sim_add=1.0, max_dist=options.max_distance) # Region finder reg_finder = gelscore.NormalizedCutRegionFinder( dist, min_region_size=options.min_region_size, score_threshold=options.score_threshold, max_regions=options.max_regions) # Remove gaps? if options.degap: for k in query_keys: prot_dict[k] = prot_dict[k].replace("-", '') if options.debugging: query_keys = query_keys[0:min(len(query_keys, 100))] region_dict = {} n_processed = 0 for orf in query_keys: seq = gelscore.Sequence(prot_dict[orf])