Beispiel #1
0
	def test_run(self):
		gs = gelscore.Sequence('RGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGN')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.1)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) == 3)
Beispiel #2
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		n = len(gs)
		#dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0)
		sim = gelscore.SimilarityWeight(sim_add=1.0)
		W = sim.weights(gs)
		score = sim.score(W)
		self.assertAlmostEqual(score, 1.0)
Beispiel #3
0
	def test_run(self):
		gs = gelscore.Sequence('MASNDYTQQATQSYGAYPTQPGQGYSQQSSQPYGQQSYSGYSQSTDTSGYGQSSYSSYGQSQNTGYGTQSTPQGYGSTGGYGSSQSSQSSYGQQSSYPGYGQQPAPSSTSGSYGSSSQSSSYGQPQSGSYSQQPSYGGQQQSYGQQQSYNPPQGYGQQNQYNSSSGGGGGGGGGGNYGQDQSSMSSGGGSGGGYGNQDQSGGGGSGGYGQQDRGGRGRGGSGGGGGGGGGGYNRSSGGYEPRGRGGGRGGRGGMGGSDRGGFNKFGGPRDQGSRHDSEQDNSDNNTIFVQGLGENVTIESVADYFKQIGIIKTNKKTGQPMINLYTDRETGKLKGEATVSFDDPPSAKAAIDWFDGKEFSGNPIKVSFATRRADFNRGGGNGRGGRGRGGPMGRGGYGGGGSGGGGRGGFPSGGGGGGGQQRAGDWKCPNPTCENMNFSWRNECNQCKAPKPDGPGGGPGGSHMGGNYGDDRRGGRGGYDRGGYRGRGGDRGGFRGGRGGGDRGGFGPGKMDSRGEHRQDRRERPY')
		#gs = gelscore.Sequence('AAAAAAAAAAAAAYYGSGSGSGSGSAAAAAAAAA')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=10, score_threshold=0.025)
		res = rf.find(gs)
		#printRegions(res)
		self.assertTrue(len(res) >= 7)
Beispiel #4
0
	def test_run(self):
		gs = gelscore.Sequence('AAAAAAAAAAAAGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 4
		comp = gelscore.SequenceCompositionSimilarity()
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.9, max_regions=max_regions)
		res = rf.find(gs)
		self.assertTrue(len(res) < max_regions)
		self.assertTrue(len(res) == 2)
Beispiel #5
0
	def test_run(self):
		# DAD: this is not a passing test right now, doesn't split into 3
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGACDEFGHIKLMNPQRSTVWYGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.01, max_regions=max_regions)
		res = rf.find(gs)
		printRegions(res)
		self.assertTrue(len(res) == 3)
Beispiel #6
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG')
		#dist = gelscore.SimilarityDistanceWeight(sim_add=1.0, sd=10.0)
		dist = gelscore.SimilarityWeight(sim_add=1.0)
		max_regions = 16
		rf = gelscore.NormalizedCutRegionFinder(dist, min_region_size=6, score_threshold=0.3, max_regions=max_regions)
		res = rf.find(gs)
		#for r in res:
		#	print r.region.start, r.region.end, r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 1)
Beispiel #7
0
	def test_run(self):
		gs = gelscore.Sequence('GGGGGGGGGGAAAAAAAAAAPPPPPPPPPP')
		sim = gelscore.SimilarityWeight(sim_add=1.0)
		rf = gelscore.NormalizedCutRegionFinder(sim, min_region_size=5, score_threshold=0.8)
		D = sim.weights(gs)
		#print D
		#print D.sum()
		#print D[1,:].sum()
		#print D[:,1].sum()
		res = rf.find(gs)
		#print ""
		#for r in res:
		#	print r.score, r.norm_score, r.region
		self.assertTrue(len(res) == 3)
Beispiel #8
0
    # Read input
    data = {}
    headers = None
    if not os.path.isfile(options.in_fname):
        raise IOError("# Error: file {} does not exist".format(
            options.in_fname))
    inf = file(options.in_fname, 'r')
    dlr = util.DelimitedLineReader(inf, header=True)
    headers = dlr.headers
    for flds in dlr.dictentries:
        data[flds['id']] = flds
    inf.close()

    # Distance measure
    dist = gelscore.SimilarityWeight(sim_add=1.0, max_distance=200)

    # Arrange the data
    keys = sorted(data.keys())

    for k in keys:
        dat = data[k]
        seq = dat['sequence']
        sim = selfSimilarity(seq, dist)

    # Write output
    n_written = 0
    # Tack sequence on the end, for better readability
    columns = headers[:]
    columns.remove('sequence')
    data_outs.write('\t'.join(columns) + "\tp.value\tp.value.adj\tsequence\n")
Beispiel #9
0
    orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()])

    # Select which genes to process
    query_keys = []
    if not options.query_orf is []:
        # Specific ORF(s)
        query_keys += options.query_orf
    if not options.query_gene is []:
        # Specific gene(s)
        query_keys += [gene_orf_dict[k] for k in options.query_gene]
    if len(query_keys) == 0:
        # Go through all proteins in database
        query_keys = all_keys

    # Distance definition
    dist = gelscore.SimilarityWeight(sim_add=1.0,
                                     max_dist=options.max_distance)
    # Region finder
    reg_finder = gelscore.NormalizedCutRegionFinder(
        dist,
        min_region_size=options.min_region_size,
        score_threshold=options.score_threshold,
        max_regions=options.max_regions)

    # Remove gaps?
    if options.degap:
        for k in query_keys:
            prot_dict[k] = prot_dict[k].replace("-", '')

    if options.debugging:
        query_keys = query_keys[0:min(len(query_keys, 100))]