Exemple #1
0
 def test_run(self):
     """sliding, no matching"""
     a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG')
     b = gelscore.Sequence('AAAAA')
     fac = slide.SequenceCompositionComparatorFactory()
     scb = fac.make(b, 'sequence')
     sw = slide.SequenceWindow(a, 5)
     res = sw.slide(scb)
     self.assertAlmostEqual(sum(res.scores), 0.0)
Exemple #2
0
 def test_run(self):
     """iterating"""
     a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY')
     #b = gelscore.Sequence('AAAAA')
     winsize = 5
     sw = slide.SequenceWindow(a, winsize)
     xi = 0
     while sw.isValid():
         seq = sw.currentSequence()
         #print seq, str(a[xi:(xi+winsize)])
         self.assertTrue(str(seq) == str(a[xi:(xi + winsize)]))
         sw.next()
         xi += 1
Exemple #3
0
 def test_run(self):
     """sliding, no matching"""
     a = gelscore.Sequence(
         'QLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQ'
     )
     b = gelscore.Sequence('MPQNGRA')
     fac = slide.SequenceCompositionComparatorFactory()
     scb = fac.make(b, 'sequence')
     sw = slide.SequenceWindow(a, len(b))
     ress = sw.slide(scb)
     for (xi, res) in enumerate(ress.results()):
         self.assertTrue(a[res.position - 1] == a[xi])
         self.assertTrue(res.score >= 0.0)
Exemple #4
0
    dout.addHeader('orf', 'S. cerevisiae systematic name', 's')
    dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd')
    dout.addHeader(
        'max.score',
        'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)',
        'f')
    dout.addHeader(
        'max.position',
        '1-based sequence position of window (start of window) having the maximum score',
        'd')
    dout.describeHeader(data_outs)

    dout.writeHeader(data_outs)
    for orf in query_keys:
        seq = gelscore.Sequence(prot_dict[orf])
        sw = slide.SequenceWindow(seq, options.window_size)
        # Slide window and collect results
        slideres = sw.slide(comparator)
        # Anything interesting?
        n_above = 0
        max_score = 0.0
        max_pos = None
        for res in slideres.results():
            if res.score > max_score:
                max_score = res.score
                max_pos = res.position
            if res.score >= options.score_threshold:
                n_above += 1
        # Write out results
        # Find stretches of sequence that are above threshold in score
        line = "{:s}\t{:d}\t{:1.4f}\t{:d}\n".format(orf, n_above, max_score,
Exemple #5
0
 def test_run(self):
     """searching"""
     a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY')
     sw = slide.SequenceWindow(a, 5)
     res = sw.search()
     self.assertTrue(str(res.currentSequence()) == 'AAAAA')
Exemple #6
0
 def test_run(self):
     """window larger than sequence"""
     a = gelscore.Sequence(
         'MLSLIFYLRFPSYIRG')  # Actual protein sequence YJR151W-A
     sw = slide.SequenceWindow(a, 20)
Exemple #7
0
		for k in query_keys:
			prot_dict[k] = prot_dict[k].replace("-",'')
	
	if options.debugging:
		query_keys = query_keys[0:min(len(query_keys,100))]
	
	# Set up motif to compare
	fac = slide.SequenceCompositionComparatorFactory()
	comparator = fac.make(options.motif, 'sequence')
	window_size = len(options.motif)
	
	# Write output
	n_written = 0
	for orf in query_keys:
		seq = gelscore.Sequence(prot_dict[orf])
		sw = slide.SequenceWindow(seq, window_size)
		# Slide window and collect results
		ress = sw.slide(comparator)
		# Write out results
		data_outs.write("pos\taa\tscore\twindow\n")
		for (xi, res) in enumerate(ress.results()):
			line = "{res.position}\t{res.aa}\t{res.score}\t{res.window}\n".format(res=res)
			data_outs.write(line)
		data_outs.write("\n") # empty line between orfs
		n_written += 1

	# Write out stopping time
	data_outs.write("# Run finished {}\n".format(util.timestamp()))

	# Shut down output
	if not options.out_fname is None: