Exemple #1
0
	def test_run(self):
		gs = gelscore.Sequence('RGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGN')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.1)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) == 3)
Exemple #2
0
	def test_run(self):
		gs = gelscore.Sequence('MASNDYTQQATQSYGAYPTQPGQGYSQQSSQPYGQQSYSGYSQSTDTSGYGQSSYSSYGQSQNTGYGTQSTPQGYGSTGGYGSSQSSQSSYGQQSSYPGYGQQPAPSSTSGSYGSSSQSSSYGQPQSGSYSQQPSYGGQQQSYGQQQSYNPPQGYGQQNQYNSSSGGGGGGGGGGNYGQDQSSMSSGGGSGGGYGNQDQSGGGGSGGYGQQDRGGRGRGGSGGGGGGGGGGYNRSSGGYEPRGRGGGRGGRGGMGGSDRGGFNKFGGPRDQGSRHDSEQDNSDNNTIFVQGLGENVTIESVADYFKQIGIIKTNKKTGQPMINLYTDRETGKLKGEATVSFDDPPSAKAAIDWFDGKEFSGNPIKVSFATRRADFNRGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGNYGDDRRGGRGGYDRGGYRGRGGDRGGFRGGRGGGDRGGFGPGKMDSRGEHRQDRRERPY')
		#gs = gelscore.Sequence('AAAAAAAAAAAAAYYGSGSGSGSGSAAAAAAAAA')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=10, score_threshold=0.025)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) >= 7)
Exemple #3
0
	def test_run(self):
		gs = gelscore.Sequence('AAAAAAAAAAAAGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 4
		comp = gelscore.SequenceCompositionSimilarity()
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.9, max_regions=max_regions)
		res = rf.find(gs)
		self.assertTrue(len(res) < max_regions)
		self.assertTrue(len(res) == 2)
Exemple #4
0
	def test_run(self):
		# DAD: this is not a passing test right now, doesn't split into 3
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGACDEFGHIKLMNPQRSTVWYGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.01, max_regions=max_regions)
		res = rf.find(gs)
		printRegions(res)
		self.assertTrue(len(res) == 3)
Exemple #5
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		#dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0)
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.3, max_regions=max_regions)
		res = rf.find(gs)
		#for r in res:
		#	print r.region.start, r.region.end, r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 1)
Exemple #6
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGAAAAAAAAAAPPPPPPPPPP')
		sim = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(sim, min_region_size=5, score_threshold=0.8)
		D = sim.weights(gs)
		#print D
		#print D.sum()
		#print D[1,:].sum()
		#print D[:,1].sum()
		res = rf.find(gs)
		#print ""
		#for r in res:
		#	print r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 3)
Exemple #7
0
        # Specific ORF(s)
        query_keys += options.query_orf
    if not options.query_gene is []:
        # Specific gene(s)
        query_keys += [gene_orf_dict[k] for k in options.query_gene]
    if len(query_keys) == 0:
        # Go through all proteins in database
        query_keys = all_keys

    # Distance definition
    dist = gelscore.SimilarityWeight(sim_add=1.0,
                                     max_dist=options.max_distance)
    # Region finder
    reg_finder = gelscore.NormalizedCutRegionFinder(
        dist,
        min_region_size=options.min_region_size,
        score_threshold=options.score_threshold,
        max_regions=options.max_regions)

    # Remove gaps?
    if options.degap:
        for k in query_keys:
            prot_dict[k] = prot_dict[k].replace("-", '')

    if options.debugging:
        query_keys = query_keys[0:min(len(query_keys, 100))]

    region_dict = {}
    n_processed = 0
    for orf in query_keys:
        seq = gelscore.Sequence(prot_dict[orf])