Esempio n. 1
0
 def test_run(self):
     """sliding, no matching"""
     a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG')
     b = gelscore.Sequence('AAAAA')
     fac = slide.SequenceCompositionComparatorFactory()
     scb = fac.make(b, 'sequence')
     sw = slide.SequenceWindow(a, 5)
     res = sw.slide(scb)
     self.assertAlmostEqual(sum(res.scores), 0.0)
Esempio n. 2
0
	def test_run(self):
		gs1 = gelscore.Sequence('AAAA')
		gs2 = gelscore.Sequence('DDDD')
		comp1 = gelscore.SequenceComposition(gs1)
		comp2 = gelscore.SequenceComposition(gs2)
		comp3 = gelscore.SequenceComposition(gs1, weights=[1,0,0,0])
		self.assertAlmostEqual(comp1.dot(comp2),0.0)
		self.assertAlmostEqual(comp1.dot(comp3),1.0)
		self.assertAlmostEqual(np.linalg.norm(comp1.vector), 1)
Esempio n. 3
0
 def test_run(self):
     """straight comparison"""
     a = gelscore.Sequence('AAAAA')
     b = gelscore.Sequence('AAAAA')
     c = gelscore.Sequence('LAAAA')
     fac = slide.SequenceCompositionComparatorFactory()
     ac = fac.make(a, 'sequence')
     bc = fac.make(b, 'sequence')
     cc = fac.make(c, 'sequence')
     self.assertTrue(ac.compare(bc) > ac.compare(cc))
Esempio n. 4
0
 def test_run(self):
     """composition comparison"""
     a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG')
     b = gelscore.Sequence('GYPMG')
     fac = slide.SequenceCompositionComparatorFactory()
     sca = fac.make(a, 'sequence')
     scb = fac.make(b, 'sequence')
     self.assertAlmostEqual(sca.compare(sca), 1.0)  # Equal to self
     self.assertAlmostEqual(sca.compare(scb), 1.0)
     self.assertAlmostEqual(scb.compare(sca), 1.0)
Esempio n. 5
0
 def test_run(self):
     """sliding, no matching"""
     a = gelscore.Sequence(
         'QLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQ'
     )
     b = gelscore.Sequence('MPQNGRA')
     fac = slide.SequenceCompositionComparatorFactory()
     scb = fac.make(b, 'sequence')
     sw = slide.SequenceWindow(a, len(b))
     ress = sw.slide(scb)
     for (xi, res) in enumerate(ress.results()):
         self.assertTrue(a[res.position - 1] == a[xi])
         self.assertTrue(res.score >= 0.0)
Esempio n. 6
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		gs1 = gelscore.ContiguousRegion(gs, 0, 5)
		gs2 = gelscore.ContiguousRegion(gs, 15, 20)
		[left, right] = gs1.difference(gs2)
		self.assertTrue(str(left) == str(gs1))
		self.assertTrue(str(right) == str(gs2))
Esempio n. 7
0
	def test_run(self):
		gs = gelscore.Sequence('RGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGN')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.1)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) == 3)
Esempio n. 8
0
	def test_run(self):
		gs = gelscore.Sequence('MASNDYTQQATQSYGAYPTQPGQGYSQQSSQPYGQQSYSGYSQSTDTSGYGQSSYSSYGQSQNTGYGTQSTPQGYGSTGGYGSSQSSQSSYGQQSSYPGYGQQPAPSSTSGSYGSSSQSSSYGQPQSGSYSQQPSYGGQQQSYGQQQSYNPPQGYGQQNQYNSSSGGGGGGGGGGNYGQDQSSMSSGGGSGGGYGNQDQSGGGGSGGYGQQDRGGRGRGGSGGGGGGGGGGYNRSSGGYEPRGRGGGRGGRGGMGGSDRGGFNKFGGPRDQGSRHDSEQDNSDNNTIFVQGLGENVTIESVADYFKQIGIIKTNKKTGQPMINLYTDRETGKLKGEATVSFDDPPSAKAAIDWFDGKEFSGNPIKVSFATRRADFNRGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGNYGDDRRGGRGGYDRGGYRGRGGDRGGFRGGRGGGDRGGFGPGKMDSRGEHRQDRRERPY')
		#gs = gelscore.Sequence('AAAAAAAAAAAAAYYGSGSGSGSGSAAAAAAAAA')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=10, score_threshold=0.025)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) >= 7)
Esempio n. 9
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		n = len(gs)
		#dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0)
		sim = gelscore.SimilarityWeight(sim_add=1.0)
		W = sim.weights(gs)
		score = sim.score(W)
		self.assertAlmostEqual(score, 1.0)
Esempio n. 10
0
	def test_run(self):
		gs = gelscore.Sequence('AAAAAAAAAAAAGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 4
		comp = gelscore.SequenceCompositionSimilarity()
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.9, max_regions=max_regions)
		res = rf.find(gs)
		self.assertTrue(len(res) < max_regions)
		self.assertTrue(len(res) == 2)
Esempio n. 11
0
	def test_run(self):
		# DAD: this is not a passing test right now, doesn't split into 3
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGACDEFGHIKLMNPQRSTVWYGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.01, max_regions=max_regions)
		res = rf.find(gs)
		printRegions(res)
		self.assertTrue(len(res) == 3)
Esempio n. 12
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		gs1 = gelscore.ContiguousRegion(gs, 0, 10)
		gs2 = gelscore.ContiguousRegion(gs, 5, 15)
		#print gs1.composition
		#print gs2.composition
		gs3 = gs1.intersection(gs2)
		#print gs3
		self.assertTrue(str(gs3) == 'GHIKL')
Esempio n. 13
0
	def test_run(self):
		gs = gelscore.Sequence('MSEAQETHVEQLPESVVDAPVEEQHQEPPQAPDAPQEPQVPQESAPQESAPQEPPAPQEQNDVPPPSNAPIYEGEESHSVQDYQEAHQHHQPPEPQPYYPPPPPGEHMHGRPPMHHRQEGELSNTRLFVRPFPLDVQESELNEIFGPFGPMKEVKILNGFAFVEFEEAESAAKAIEEVHGKSFANQPLEVVYSKLPAKRYRITMKNLPEGCSWQDLKDLARENSLETTFSSVNTRDFDGTGALEFPSEEILVEALERLNNIEFRGSVITVERDDNPPPIRRSNRGGFRGRGGFRGGFRGGFRGGFSRGGFGGPRGGFGGPRGGYGGYSRGGYGGYSRGGYGGSRGGYDSPRGGYDSPRGGYSRGGYGGPRNDYGPPRGSYGGSRGGYDGPRGDYGPPRDAYRTRDAPRERSPTR')
		gs1 = gelscore.ContiguousRegion(gs, 74, 106)
		gs2 = gelscore.ContiguousRegion(gs, 88, 111)
		#print gs1.composition
		#print gs2.composition
		gs3 = gs1.intersection(gs2)
		#print gs3.start
		self.assertTrue(gs3.start==88)
		self.assertTrue(gs3.end==106)
Esempio n. 14
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		#dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0)
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.3, max_regions=max_regions)
		res = rf.find(gs)
		#for r in res:
		#	print r.region.start, r.region.end, r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 1)
Esempio n. 15
0
 def test_run(self):
     """iterating"""
     a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY')
     #b = gelscore.Sequence('AAAAA')
     winsize = 5
     sw = slide.SequenceWindow(a, winsize)
     xi = 0
     while sw.isValid():
         seq = sw.currentSequence()
         #print seq, str(a[xi:(xi+winsize)])
         self.assertTrue(str(seq) == str(a[xi:(xi + winsize)]))
         sw.next()
         xi += 1
Esempio n. 16
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGAAAAAAAAAAPPPPPPPPPP')
		sim = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(sim, min_region_size=5, score_threshold=0.8)
		D = sim.weights(gs)
		#print D
		#print D.sum()
		#print D[1,:].sum()
		#print D[:,1].sum()
		res = rf.find(gs)
		#print ""
		#for r in res:
		#	print r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 3)
Esempio n. 17
0
	def test_run(self):
		gs = gelscore.Sequence('AAAAAAAAAAAAAAAAA')
		eq = gelscore.EntropyQuant()
		res = eq.quant(gs,20)
		self.assertAlmostEqual(res, 0.0)
Esempio n. 18
0
 def test_run(self):
     """searching"""
     a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY')
     sw = slide.SequenceWindow(a, 5)
     res = sw.search()
     self.assertTrue(str(res.currentSequence()) == 'AAAAA')
Esempio n. 19
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		gs1 = gelscore.ContiguousRegion(gs, 0, 5)
		gs2 = gelscore.ContiguousRegion(gs, 5, 10)
		gs3 = gs1.merge(gs2)
Esempio n. 20
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		gs1 = gelscore.ContiguousRegion(gs, 0, 5)
		gs2 = gelscore.ContiguousRegion(gs, 15, 20)
		inter = gs1.intersection(gs2)
		self.assertTrue(inter is None)
Esempio n. 21
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		eq = gelscore.EntropyQuant()
		res = eq.quant(gs,20)
		self.assertAlmostEqual(res,1.0)
Esempio n. 22
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		gs1 = gelscore.ContiguousRegion(gs)
		gs1.trimright(2)
		self.assertTrue(gs1[-1] == 'V')
Esempio n. 23
0
	def test_run(self):
		gs = gelscore.Sequence('ACDEFGHIKLMNPQRSTVWY')
		n = 0
		for (i,a) in enumerate(gs):
			n += 1
		self.assertTrue(n == len(gs))
Esempio n. 24
0
    dout = util.DelimitedOutput()
    dout.addHeader('orf', 'S. cerevisiae systematic name', 's')
    dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd')
    dout.addHeader(
        'max.score',
        'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)',
        'f')
    dout.addHeader(
        'max.position',
        '1-based sequence position of window (start of window) having the maximum score',
        'd')
    dout.describeHeader(data_outs)

    dout.writeHeader(data_outs)
    for orf in query_keys:
        seq = gelscore.Sequence(prot_dict[orf])
        sw = slide.SequenceWindow(seq, options.window_size)
        # Slide window and collect results
        slideres = sw.slide(comparator)
        # Anything interesting?
        n_above = 0
        max_score = 0.0
        max_pos = None
        for res in slideres.results():
            if res.score > max_score:
                max_score = res.score
                max_pos = res.position
            if res.score >= options.score_threshold:
                n_above += 1
        # Write out results
        # Find stretches of sequence that are above threshold in score
Esempio n. 25
0
 def test_run(self):
     """window larger than sequence"""
     a = gelscore.Sequence(
         'MLSLIFYLRFPSYIRG')  # Actual protein sequence YJR151W-A
     sw = slide.SequenceWindow(a, 20)