Ejemplo n.º 1
0
def get_distances(seq1,
                  seq2,
                  per_site=True,
                  aligned=False,
                  ignore_gaps=True,
                  alphabet=None,
                  aligner_tools=['mafft', 'muscle']):
    d = distance(seq1=seq1,
                 seq2=seq2,
                 per_site=per_site,
                 aligned=aligned,
                 ignore_gaps=ignore_gaps,
                 alphabet=alphabet,
                 aligner_tools=aligner_tools)
    rc = None
    drc = None
    if (not alphabet) or (not alphabet.has_state('M')):
        try:
            rc = sequtils.get_reverse_complement(seq1),
        except:
            pass
    if rc:
        drc = distance(seq1=sequtils.get_reverse_complement(seq1),
                       seq2=seq2,
                       per_site=per_site,
                       aligned=False,
                       ignore_gaps=ignore_gaps,
                       alphabet=alphabet,
                       aligner_tools=aligner_tools)
    return d, drc
Ejemplo n.º 2
0
def get_distances(seq1, seq2,
        per_site = True,
        aligned = False,
        ignore_gaps = True,
        alphabet = None,
        aligner_tools = ['mafft', 'muscle']):
        d = distance(
                seq1 = seq1,
                seq2 = seq2,
                per_site = per_site,
                aligned = aligned,
                ignore_gaps = ignore_gaps,
                alphabet = alphabet,
                aligner_tools = aligner_tools)
        rc = None
        drc = None
        if (not alphabet) or (not alphabet.has_state('M')):
            try:
                rc = sequtils.get_reverse_complement(seq1),
            except:
                pass
        if rc:
            drc = distance(
                    seq1 = sequtils.get_reverse_complement(seq1),
                    seq2 = seq2,
                    per_site = per_site,
                    aligned = False,
                    ignore_gaps = ignore_gaps,
                    alphabet = alphabet,
                    aligner_tools = aligner_tools)
        return d, drc
Ejemplo n.º 3
0
def reverse_complement_to_longest_reading_frame(seq_iter,
                                                gap_characters=['-'],
                                                table=1,
                                                allow_partial=True,
                                                require_start_after_stop=True,
                                                log_frequency=0):
    for i, s in enumerate(remove_gaps(seq_iter,
                                      gap_characters=gap_characters)):
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                datetime.datetime.now(), (i + 1)))
        rc = sequtils.get_reverse_complement(s)
        p1 = sequtils.get_longest_reading_frames(
            seq_record=s,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        p2 = sequtils.get_longest_reading_frames(
            seq_record=rc,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format(
            s.id, len(p1[0].seq), len(p2[0].seq)))
        if len(p2) == 0:
            yield s
        elif len(p1) == 0:
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        elif len(p2[0].seq) > len(p1[0].seq):
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        else:
            yield s
Ejemplo n.º 4
0
def reverse_complement_to_first_seq(seq_iter,
                                    per_site=True,
                                    aligned=False,
                                    ignore_gaps=True,
                                    alphabet=None,
                                    aligner_tools=['mafft', 'muscle'],
                                    log_frequency=0):
    seq1 = None
    for i, seq2 in enumerate(seq_iter):
        if i == 0:
            seq1 = seq2
            yield seq2
            continue
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                datetime.datetime.now(), (i + 1)))
        d, drc = seqstats.get_distances(seq1=seq1,
                                        seq2=seq2,
                                        per_site=per_site,
                                        aligned=aligned,
                                        ignore_gaps=ignore_gaps,
                                        alphabet=alphabet,
                                        aligner_tools=aligner_tools)
        _LOG.debug('{0}: distance {1}, rev comp distance {2}'.format(
            seq2.id, d, drc))
        if drc < d:
            _LOG.warning(
                'Reverse complementing sequence {0!r} (length {1})\n\t'
                'rev comp distance ({2}) < current distance '
                '({3})'.format(seq2.id, len(seq2.seq), drc, d))
            yield sequtils.get_reverse_complement(seq2)
            continue
        yield seq2
Ejemplo n.º 5
0
def reverse_complement_to_longest_reading_frame(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True,
        log_frequency = 0):
    for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)):
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                    datetime.datetime.now(),
                    (i + 1)))
        rc = sequtils.get_reverse_complement(s)
        p1 = sequtils.get_longest_reading_frames(seq_record = s,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        p2 = sequtils.get_longest_reading_frames(seq_record = rc,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format(
                s.id, len(p1[0].seq), len(p2[0].seq)))
        if len(p2) == 0:
            yield s
        elif len(p1) == 0:
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        elif len(p2[0].seq) > len(p1[0].seq):
            _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id))
            yield rc
        else:
            yield s
Ejemplo n.º 6
0
def reverse_complement_to_first_seq(seq_iter,
        per_site = True,
        aligned = False,
        ignore_gaps = True,
        alphabet = None,
        aligner_tools = ['mafft', 'muscle'],
        log_frequency = 0):
    seq1 = None
    for i, seq2 in enumerate(seq_iter):
        if i == 0:
            seq1 = seq2
            yield seq2
            continue
        if (log_frequency > 0) and (((i + 1) % log_frequency) == 0):
            _LOG.info('{0}: Checking reverse complement of seq {1}...'.format(
                    datetime.datetime.now(),
                    (i + 1)))
        d, drc = seqstats.get_distances(
                seq1 = seq1,
                seq2 = seq2,
                per_site = per_site,
                aligned = aligned,
                ignore_gaps = ignore_gaps,
                alphabet = alphabet,
                aligner_tools = aligner_tools)
        _LOG.debug('{0}: distance {1}, rev comp distance {2}'.format(
                seq2.id, d, drc))
        if drc < d:
            _LOG.warning('Reverse complementing sequence {0!r} (length {1})\n\t'
                    'rev comp distance ({2}) < current distance '
                    '({3})'.format(seq2.id, len(seq2.seq), drc, d))
            yield sequtils.get_reverse_complement(seq2)
            continue
        yield seq2
Ejemplo n.º 7
0
def summarize_longest_read_lengths(seq_iter,
                                   gap_characters=['-'],
                                   table=1,
                                   allow_partial=True,
                                   require_start_after_stop=True):
    lengths = []
    for seq in seqmod.remove_gaps(seq_iter, gap_characters=gap_characters):
        l = 0
        rcl = 0
        lrf = sequtils.get_longest_reading_frames(
            seq,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if lrf:
            l = len(lrf[0].seq)
        rc_lrf = sequtils.get_longest_reading_frames(
            sequtils.get_reverse_complement(seq),
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if rc_lrf:
            rcl = len(rc_lrf[0].seq)
        lengths.append((l, rcl, seq.id))
    return sorted(lengths)
Ejemplo n.º 8
0
def summarize_longest_read_lengths(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True):
    lengths = []
    for seq in seqmod.remove_gaps(seq_iter,
            gap_characters = gap_characters):
        l = 0
        rcl = 0
        lrf = sequtils.get_longest_reading_frames(
                seq,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if lrf:
            l = len(lrf[0].seq)
        rc_lrf = sequtils.get_longest_reading_frames(
                sequtils.get_reverse_complement(seq),
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if rc_lrf:
            rcl = len(rc_lrf[0].seq)
        lengths.append((l, rcl, seq.id))
    return sorted(lengths)
Ejemplo n.º 9
0
def reverse_complement(seq_iter):
    for s in seq_iter:
        yield sequtils.get_reverse_complement(s)
Ejemplo n.º 10
0
def reverse_complement(seq_iter):
    for s in seq_iter:
        yield sequtils.get_reverse_complement(s)