Beispiel #1
0
def reverse_complement_to_longest_reading_frame(seq_iter,
                                                gap_characters=['-'],
                                                table=1,
                                                allow_partial=True,
                                                require_start_after_stop=True,
                                                log_frequency=0):
    for i, s in enumerate(remove_gaps(seq_iter,
                                      gap_characters=gap_characters)):
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                datetime.datetime.now(), (i + 1)))
        rc = sequtils.get_reverse_complement(s)
        p1 = sequtils.get_longest_reading_frames(
            seq_record=s,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        p2 = sequtils.get_longest_reading_frames(
            seq_record=rc,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format(
            s.id, len(p1[0].seq), len(p2[0].seq)))
        if len(p2) == 0:
            yield s
        elif len(p1) == 0:
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        elif len(p2[0].seq) > len(p1[0].seq):
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        else:
            yield s
Beispiel #2
0
def reverse_complement_to_longest_reading_frame(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True,
        log_frequency = 0):
    for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)):
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                    datetime.datetime.now(),
                    (i + 1)))
        rc = sequtils.get_reverse_complement(s)
        p1 = sequtils.get_longest_reading_frames(seq_record = s,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        p2 = sequtils.get_longest_reading_frames(seq_record = rc,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format(
                s.id, len(p1[0].seq), len(p2[0].seq)))
        if len(p2) == 0:
            yield s
        elif len(p1) == 0:
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        elif len(p2[0].seq) > len(p1[0].seq):
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        else:
            yield s
Beispiel #3
0
def summarize_longest_read_lengths(seq_iter,
                                   gap_characters=['-'],
                                   table=1,
                                   allow_partial=True,
                                   require_start_after_stop=True):
    lengths = []
    for seq in seqmod.remove_gaps(seq_iter, gap_characters=gap_characters):
        l = 0
        rcl = 0
        lrf = sequtils.get_longest_reading_frames(
            seq,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if lrf:
            l = len(lrf[0].seq)
        rc_lrf = sequtils.get_longest_reading_frames(
            sequtils.get_reverse_complement(seq),
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if rc_lrf:
            rcl = len(rc_lrf[0].seq)
        lengths.append((l, rcl, seq.id))
    return sorted(lengths)
Beispiel #4
0
def summarize_longest_read_lengths(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True):
    lengths = []
    for seq in seqmod.remove_gaps(seq_iter,
            gap_characters = gap_characters):
        l = 0
        rcl = 0
        lrf = sequtils.get_longest_reading_frames(
                seq,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if lrf:
            l = len(lrf[0].seq)
        rc_lrf = sequtils.get_longest_reading_frames(
                sequtils.get_reverse_complement(seq),
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if rc_lrf:
            rcl = len(rc_lrf[0].seq)
        lengths.append((l, rcl, seq.id))
    return sorted(lengths)
Beispiel #5
0
    def test_require_start_after_stop(self):
        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA',
                            IUPAC.ambiguous_dna),
                        id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(
            seq, table=1, allow_partial=True, require_start_after_stop=True)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertFalse(lrf is seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA',
                            IUPAC.ambiguous_dna),
                        id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(
            seq, table=1, allow_partial=False, require_start_after_stop=True)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertFalse(lrf is seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA',
                            IUPAC.ambiguous_dna),
                        id='1')
        exp = SeqRecord(Seq('ATAGAAATTGGCCATGACCAACTGAATA',
                            IUPAC.ambiguous_dna),
                        id='1')
        lrf = sequtils.get_longest_reading_frames(
            seq, table=1, allow_partial=True, require_start_after_stop=False)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertFalse(lrf is seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA',
                            IUPAC.ambiguous_dna),
                        id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(
            seq, table=1, allow_partial=False, require_start_after_stop=False)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertFalse(lrf is seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))
Beispiel #6
0
    def test_require_start_after_stop(self):
        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(seq,
                table = 1,
                allow_partial = True,
                require_start_after_stop = True)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertNotEqual(lrf, seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(seq,
                table = 1,
                allow_partial = False,
                require_start_after_stop = True)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertNotEqual(lrf, seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1')
        exp = SeqRecord(Seq('ATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(seq,
                table = 1,
                allow_partial = True,
                require_start_after_stop = False)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertNotEqual(lrf, seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))

        seq = SeqRecord(Seq('TAGATAGATAGAAATTGGCCATGACCAACTGAATA', IUPAC.ambiguous_dna), id='1')
        exp = SeqRecord(Seq('ATGACCAACTGA', IUPAC.ambiguous_dna), id='1')
        lrf = sequtils.get_longest_reading_frames(seq,
                table = 1,
                allow_partial = False,
                require_start_after_stop = False)
        self.assertEqual(len(lrf), 1)
        lrf = lrf[0]
        self.assertNotEqual(lrf, seq)
        self.assertSameMetadata(lrf, seq)
        self.assertEqual(str(lrf.seq), str(exp.seq))
Beispiel #7
0
def longest_reading_frames(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True):
    for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)):
        lrf = sequtils.get_longest_reading_frames(s,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if lrf:
            yield lrf[0]
        else:
            yield sequtils.copy_seq_metadata(s, '')
Beispiel #8
0
def longest_reading_frames(seq_iter,
                           gap_characters=['-'],
                           table=1,
                           allow_partial=True,
                           require_start_after_stop=True):
    for i, s in enumerate(remove_gaps(seq_iter,
                                      gap_characters=gap_characters)):
        lrf = sequtils.get_longest_reading_frames(
            s,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if lrf:
            yield lrf[0]
        else:
            yield sequtils.copy_seq_metadata(s, '')