def reverse_complement_to_longest_reading_frame(seq_iter, gap_characters=['-'], table=1, allow_partial=True, require_start_after_stop=True, log_frequency=0): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) rc = sequtils.get_reverse_complement(s) p1 = sequtils.get_longest_reading_frames( seq_record=s, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) p2 = sequtils.get_longest_reading_frames( seq_record=rc, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format( s.id, len(p1[0].seq), len(p2[0].seq))) if len(p2) == 0: yield s elif len(p1) == 0: _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc elif len(p2[0].seq) > len(p1[0].seq): _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc else: yield s
def reverse_complement_to_longest_reading_frame(seq_iter, gap_characters=['-'], table = 1, allow_partial = True, require_start_after_stop = True, log_frequency = 0): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) rc = sequtils.get_reverse_complement(s) p1 = sequtils.get_longest_reading_frames(seq_record = s, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) p2 = sequtils.get_longest_reading_frames(seq_record = rc, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format( s.id, len(p1[0].seq), len(p2[0].seq))) if len(p2) == 0: yield s elif len(p1) == 0: _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc elif len(p2[0].seq) > len(p1[0].seq): _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc else: yield s
def summarize_longest_read_lengths(seq_iter, gap_characters=['-'], table=1, allow_partial=True, require_start_after_stop=True): lengths = [] for seq in seqmod.remove_gaps(seq_iter, gap_characters=gap_characters): l = 0 rcl = 0 lrf = sequtils.get_longest_reading_frames( seq, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) if lrf: l = len(lrf[0].seq) rc_lrf = sequtils.get_longest_reading_frames( sequtils.get_reverse_complement(seq), table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) if rc_lrf: rcl = len(rc_lrf[0].seq) lengths.append((l, rcl, seq.id)) return sorted(lengths)
def summarize_longest_read_lengths(seq_iter, gap_characters=['-'], table = 1, allow_partial = True, require_start_after_stop = True): lengths = [] for seq in seqmod.remove_gaps(seq_iter, gap_characters = gap_characters): l = 0 rcl = 0 lrf = sequtils.get_longest_reading_frames( seq, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) if lrf: l = len(lrf[0].seq) rc_lrf = sequtils.get_longest_reading_frames( sequtils.get_reverse_complement(seq), table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) if rc_lrf: rcl = len(rc_lrf[0].seq) lengths.append((l, rcl, seq.id)) return sorted(lengths)
def test_require_start_after_stop(self): seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames( seq, table=1, allow_partial=True, require_start_after_stop=True) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertFalse(lrf is seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames( seq, table=1, allow_partial=False, require_start_after_stop=True) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertFalse(lrf is seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames( seq, table=1, allow_partial=True, require_start_after_stop=False) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertFalse(lrf is seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames( seq, table=1, allow_partial=False, require_start_after_stop=False) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertFalse(lrf is seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq))
def test_require_start_after_stop(self): seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames(seq, table = 1, allow_partial = True, require_start_after_stop = True) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertNotEqual(lrf, seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames(seq, table = 1, allow_partial = False, require_start_after_stop = True) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertNotEqual(lrf, seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames(seq, table = 1, allow_partial = True, require_start_after_stop = False) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertNotEqual(lrf, seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq)) seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1') exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1') lrf = sequtils.get_longest_reading_frames(seq, table = 1, allow_partial = False, require_start_after_stop = False) self.assertEqual(len(lrf), 1) lrf = lrf[0] self.assertNotEqual(lrf, seq) self.assertSameMetadata(lrf, seq) self.assertEqual(str(lrf.seq), str(exp.seq))
def longest_reading_frames(seq_iter, gap_characters=['-'], table = 1, allow_partial = True, require_start_after_stop = True): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): lrf = sequtils.get_longest_reading_frames(s, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) if lrf: yield lrf[0] else: yield sequtils.copy_seq_metadata(s, '')
def longest_reading_frames(seq_iter, gap_characters=['-'], table=1, allow_partial=True, require_start_after_stop=True): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): lrf = sequtils.get_longest_reading_frames( s, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) if lrf: yield lrf[0] else: yield sequtils.copy_seq_metadata(s, '')