def test_run(self): """sliding, no matching""" a = gelscore.Sequence('GYPMGGYPMGGYPMGGYPMGGYPMG') b = gelscore.Sequence('AAAAA') fac = slide.SequenceCompositionComparatorFactory() scb = fac.make(b, 'sequence') sw = slide.SequenceWindow(a, 5) res = sw.slide(scb) self.assertAlmostEqual(sum(res.scores), 0.0)
def test_run(self): """iterating""" a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY') #b = gelscore.Sequence('AAAAA') winsize = 5 sw = slide.SequenceWindow(a, winsize) xi = 0 while sw.isValid(): seq = sw.currentSequence() #print seq, str(a[xi:(xi+winsize)]) self.assertTrue(str(seq) == str(a[xi:(xi + winsize)])) sw.next() xi += 1
def test_run(self): """sliding, no matching""" a = gelscore.Sequence( 'QLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQ' ) b = gelscore.Sequence('MPQNGRA') fac = slide.SequenceCompositionComparatorFactory() scb = fac.make(b, 'sequence') sw = slide.SequenceWindow(a, len(b)) ress = sw.slide(scb) for (xi, res) in enumerate(ress.results()): self.assertTrue(a[res.position - 1] == a[xi]) self.assertTrue(res.score >= 0.0)
dout.addHeader('orf', 'S. cerevisiae systematic name', 's') dout.addHeader('n.above', 'Number of windows with score >= threshold', 'd') dout.addHeader( 'max.score', 'Maximum score (1 - chi-squared histogram distance on normalized aa-composition histograms)', 'f') dout.addHeader( 'max.position', '1-based sequence position of window (start of window) having the maximum score', 'd') dout.describeHeader(data_outs) dout.writeHeader(data_outs) for orf in query_keys: seq = gelscore.Sequence(prot_dict[orf]) sw = slide.SequenceWindow(seq, options.window_size) # Slide window and collect results slideres = sw.slide(comparator) # Anything interesting? n_above = 0 max_score = 0.0 max_pos = None for res in slideres.results(): if res.score > max_score: max_score = res.score max_pos = res.position if res.score >= options.score_threshold: n_above += 1 # Write out results # Find stretches of sequence that are above threshold in score line = "{:s}\t{:d}\t{:1.4f}\t{:d}\n".format(orf, n_above, max_score,
def test_run(self): """searching""" a = gelscore.Sequence('ACDEFGHIKLAAAAAAAAAAAMNPQRSTVWY') sw = slide.SequenceWindow(a, 5) res = sw.search() self.assertTrue(str(res.currentSequence()) == 'AAAAA')
def test_run(self): """window larger than sequence""" a = gelscore.Sequence( 'MLSLIFYLRFPSYIRG') # Actual protein sequence YJR151W-A sw = slide.SequenceWindow(a, 20)
for k in query_keys: prot_dict[k] = prot_dict[k].replace("-",'') if options.debugging: query_keys = query_keys[0:min(len(query_keys,100))] # Set up motif to compare fac = slide.SequenceCompositionComparatorFactory() comparator = fac.make(options.motif, 'sequence') window_size = len(options.motif) # Write output n_written = 0 for orf in query_keys: seq = gelscore.Sequence(prot_dict[orf]) sw = slide.SequenceWindow(seq, window_size) # Slide window and collect results ress = sw.slide(comparator) # Write out results data_outs.write("pos\taa\tscore\twindow\n") for (xi, res) in enumerate(ress.results()): line = "{res.position}\t{res.aa}\t{res.score}\t{res.window}\n".format(res=res) data_outs.write(line) data_outs.write("\n") # empty line between orfs n_written += 1 # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.out_fname is None: