def test_run(self): """sliding, no matching""" a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG') b = gelscore.Sequence('AAAAA') fac = slide.SequenceCompositionComparatorFactory() scb = fac.make(b, 'sequence') sw = slide.SequenceWindow(a, 5) res = sw.slide(scb) self.assertAlmostEqual(sum(res.scores), 0.0)
def test_run(self): gs1 = gelscore.Sequence('AAAA') gs2 = gelscore.Sequence('DDDD') comp1 = gelscore.SequenceComposition(gs1) comp2 = gelscore.SequenceComposition(gs2) comp3 = gelscore.SequenceComposition(gs1, weights=[1,0,0,0]) self.assertAlmostEqual(comp1.dot(comp2),0.0) self.assertAlmostEqual(comp1.dot(comp3),1.0) self.assertAlmostEqual(np.linalg.norm(comp1.vector), 1)
def test_run(self): """straight comparison""" a = gelscore.Sequence('AAAAA') b = gelscore.Sequence('AAAAA') c = gelscore.Sequence('LAAAA') fac = slide.SequenceCompositionComparatorFactory() ac = fac.make(a, 'sequence') bc = fac.make(b, 'sequence') cc = fac.make(c, 'sequence') self.assertTrue(ac.compare(bc) > ac.compare(cc))
def test_run(self): """composition comparison""" a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG') b = gelscore.Sequence('GYPMG') fac = slide.SequenceCompositionComparatorFactory() sca = fac.make(a, 'sequence') scb = fac.make(b, 'sequence') self.assertAlmostEqual(sca.compare(sca), 1.0) # Equal to self self.assertAlmostEqual(sca.compare(scb), 1.0) self.assertAlmostEqual(scb.compare(sca), 1.0)
def test_run(self): """sliding, no matching""" a = gelscore.Sequence( 'QLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQ' ) b = gelscore.Sequence('MPQNGRA') fac = slide.SequenceCompositionComparatorFactory() scb = fac.make(b, 'sequence') sw = slide.SequenceWindow(a, len(b)) ress = sw.slide(scb) for (xi, res) in enumerate(ress.results()): self.assertTrue(a[res.position - 1] == a[xi]) self.assertTrue(res.score >= 0.0)
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') gs1 = gelscore.ContiguousRegion(gs, 0, 5) gs2 = gelscore.ContiguousRegion(gs, 15, 20) [left, right] = gs1.difference(gs2) self.assertTrue(str(left) == str(gs1)) self.assertTrue(str(right) == str(gs2))
def test_run(self): gs = gelscore.Sequence('RGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGN') dist = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.1) res = rf.find(gs) #printRegions(res) self.assertTrue(len(res) == 3)
def test_run(self): gs = gelscore.Sequence('MASNDYTQQATQSYGAYPTQPGQGYSQQSSQPYGQQSYSGYSQSTDTSGYGQSSYSSYGQSQNTGYGTQSTPQGYGSTGGYGSSQSSQSSYGQQSSYPGYGQQPAPSSTSGSYGSSSQSSSYGQPQSGSYSQQPSYGGQQQSYGQQQSYNPPQGYGQQNQYNSSSGGGGGGGGGGNYGQDQSSMSSGGGSGGGYGNQDQSGGGGSGGYGQQDRGGRGRGGSGGGGGGGGGGYNRSSGGYEPRGRGGGRGGRGGMGGSDRGGFNKFGGPRDQGSRHDSEQDNSDNNTIFVQGLGENVTIESVADYFKQIGIIKTNKKTGQPMINLYTDRETGKLKGEATVSFDDPPSAKAAIDWFDGKEFSGNPIKVSFATRRADFNRGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGNYGDDRRGGRGGYDRGGYRGRGGDRGGFRGGRGGGDRGGFGPGKMDSRGEHRQDRRERPY') #gs = gelscore.Sequence('AAAAAAAAAAAAAYYGSGSGSGSGSAAAAAAAAA') dist = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=10, score_threshold=0.025) res = rf.find(gs) #printRegions(res) self.assertTrue(len(res) >= 7)
def test_run(self): gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG') n = len(gs) #dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0) sim = gelscore.SimilarityWeight(sim_add=1.0) W = sim.weights(gs) score = sim.score(W) self.assertAlmostEqual(score, 1.0)
def test_run(self): gs = gelscore.Sequence('AAAAAAAAAAAAGGGGGGGGGGG') dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 4 comp = gelscore.SequenceCompositionSimilarity() rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.9, max_regions=max_regions) res = rf.find(gs) self.assertTrue(len(res) < max_regions) self.assertTrue(len(res) == 2)
def test_run(self): # DAD: this is not a passing test right now, doesn't split into 3 gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGACDEFGHIKLMNPQRSTVWYGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG') dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 16 rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.01, max_regions=max_regions) res = rf.find(gs) printRegions(res) self.assertTrue(len(res) == 3)
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') gs1 = gelscore.ContiguousRegion(gs, 0, 10) gs2 = gelscore.ContiguousRegion(gs, 5, 15) #print gs1.composition #print gs2.composition gs3 = gs1.intersection(gs2) #print gs3 self.assertTrue(str(gs3) == 'GHIKL')
def test_run(self): gs = gelscore.Sequence('MSEAQETHVEQLPESVVDAPVEEQHQEPPQAPDAPQEPQVPQESAPQESAPQEPPAPQEQNDVPPPSNAPIYEGEESHSVQDYQEAHQHHQPPEPQPYYPPPPPGEHMHGRPPMHHRQEGELSNTRLFVRPFPLDVQESELNEIFGPFGPMKEVKILNGFAFVEFEEAESAAKAIEEVHGKSFANQPLEVVYSKLPAKRYRITMKNLPEGCSWQDLKDLARENSLETTFSSVNTRDFDGTGALEFPSEEILVEALERLNNIEFRGSVITVERDDNPPPIRRSNRGGFRGRGGFRGGFRGGFRGGFSRGGFGGPRGGFGGPRGGYGGYSRGGYGGYSRGGYGGSRGGYDSPRGGYDSPRGGYSRGGYGGPRNDYGPPRGSYGGSRGGYDGPRGDYGPPRDAYRTRDAPRERSPTR') gs1 = gelscore.ContiguousRegion(gs, 74, 106) gs2 = gelscore.ContiguousRegion(gs, 88, 111) #print gs1.composition #print gs2.composition gs3 = gs1.intersection(gs2) #print gs3.start self.assertTrue(gs3.start==88) self.assertTrue(gs3.end==106)
def test_run(self): gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG') #dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0) dist = gelscore.SimilarityWeight(sim_add=1.0) max_regions = 16 rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.3, max_regions=max_regions) res = rf.find(gs) #for r in res: # print r.region.start, r.region.end, r.score, r.norm_score, r.region self.assertTrue(len(res) == 1)
def test_run(self): """iterating""" a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY') #b = gelscore.Sequence('AAAAA') winsize = 5 sw = slide.SequenceWindow(a, winsize) xi = 0 while sw.isValid(): seq = sw.currentSequence() #print seq, str(a[xi:(xi+winsize)]) self.assertTrue(str(seq) == str(a[xi:(xi + winsize)])) sw.next() xi += 1
def test_run(self): gs = gelscore.Sequence('GGGGGGGGGGAAAAAAAAAAPPPPPPPPPP') sim = gelscore.SimilarityWeight(sim_add=1.0) rf = gelscore.NormalizedCutRegionFinder(sim, min_region_size=5, score_threshold=0.8) D = sim.weights(gs) #print D #print D.sum() #print D[1,:].sum() #print D[:,1].sum() res = rf.find(gs) #print "" #for r in res: # print r.score, r.norm_score, r.region self.assertTrue(len(res) == 3)
def test_run(self): gs = gelscore.Sequence('AAAAAAAAAAAAAAAAA') eq = gelscore.EntropyQuant() res = eq.quant(gs,20) self.assertAlmostEqual(res, 0.0)
def test_run(self): """searching""" a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY') sw = slide.SequenceWindow(a, 5) res = sw.search() self.assertTrue(str(res.currentSequence()) == 'AAAAA')
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') gs1 = gelscore.ContiguousRegion(gs, 0, 5) gs2 = gelscore.ContiguousRegion(gs, 5, 10) gs3 = gs1.merge(gs2)
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') gs1 = gelscore.ContiguousRegion(gs, 0, 5) gs2 = gelscore.ContiguousRegion(gs, 15, 20) inter = gs1.intersection(gs2) self.assertTrue(inter is None)
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') eq = gelscore.EntropyQuant() res = eq.quant(gs,20) self.assertAlmostEqual(res,1.0)
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') gs1 = gelscore.ContiguousRegion(gs) gs1.trimright(2) self.assertTrue(gs1[-1] == 'V')
def test_run(self): gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY') n = 0 for (i,a) in enumerate(gs): n += 1 self.assertTrue(n == len(gs))
dout = util.DelimitedOutput() dout.addHeader('orf', 'S. cerevisiae systematic name', 's') dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd') dout.addHeader( 'max.score', 'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)', 'f') dout.addHeader( 'max.position', '1-based sequence position of window (start of window) having the maximum score', 'd') dout.describeHeader(data_outs) dout.writeHeader(data_outs) for orf in query_keys: seq = gelscore.Sequence(prot_dict[orf]) sw = slide.SequenceWindow(seq, options.window_size) # Slide window and collect results slideres = sw.slide(comparator) # Anything interesting? n_above = 0 max_score = 0.0 max_pos = None for res in slideres.results(): if res.score > max_score: max_score = res.score max_pos = res.position if res.score >= options.score_threshold: n_above += 1 # Write out results # Find stretches of sequence that are above threshold in score
def test_run(self): """window larger than sequence""" a = gelscore.Sequence( 'MLSLIFYLRFPSYIRG') # Actual protein sequence YJR151W-A sw = slide.SequenceWindow(a, 20)