Exemple #1
0
def group_pairs_by_name(seqs, all_pairs_same_n_seqs=False):
    paired_seqs = []
    prev_name = None
    n_seqs_per_pair = None
    for seq in iter(seqs):
        try:
            name = _parse_pair_direction_and_name(seq)[0]
        except PairDirectionError:
            name = None
        if name is None or (paired_seqs and name != prev_name):
            if all_pairs_same_n_seqs:
                if n_seqs_per_pair is None:
                    n_seqs_per_pair = len(paired_seqs)
                elif n_seqs_per_pair != len(paired_seqs):
                    msg = 'Pair had different number of reads: '
                    msg += prev_name
                    raise InterleaveError(msg)
            if paired_seqs:
                yield paired_seqs
                paired_seqs = []
        paired_seqs.append(seq)
        prev_name = name
    if paired_seqs:
        if all_pairs_same_n_seqs and n_seqs_per_pair is not None:
            if n_seqs_per_pair != len(paired_seqs):
                msg = 'Pair had different number of reads: '
                msg += prev_name
                raise InterleaveError(msg)
        yield paired_seqs
Exemple #2
0
def _check_name_and_direction_match(seq1, seq2):
    'It fails if the names do not match or if the direction are equal'
    name1, direction1 = _parse_pair_direction_and_name(seq1)
    name2, direction2 = _parse_pair_direction_and_name(seq2)
    if name1 != name2:
        msg = 'The reads from the two files do not match: {}, {}'
        msg = msg.format(name1, name2)
        raise InterleaveError(msg)
    if direction1 == direction2:
        msg = 'Two paired reads have the same direction: {}, {}'
        msg = msg.format(name1 + ' ' + direction1, name2 + ' ' + direction2)
        raise InterleaveError(msg)
Exemple #3
0
def group_pairs(seqs,
                n_seqs_in_pair=None,
                check_all_same_n_seqs=True,
                check_name_matches=True):

    seqs = iter(seqs)
    if n_seqs_in_pair is None:
        first_pair, next_read = _get_first_pair_by_name(seqs)
        if first_pair is None:
            n_seqs_in_pair = None
        else:
            yield first_pair
            n_seqs_in_pair = len(first_pair)
            seqs = chain([next_read], seqs)

    if n_seqs_in_pair == 1:
        # No need to check anything, a pair cannot have less than one read
        # or more than one name
        check_all_same_n_seqs = False
        check_name_matches = False

    if n_seqs_in_pair:
        pairs = group_in_packets_fill_last(seqs, packet_size=n_seqs_in_pair)
        for pair in pairs:
            pair = filter(lambda seq: seq is not None, pair)
            if check_all_same_n_seqs and n_seqs_in_pair != len(pair):
                msg = 'The last pair has fewer reads'
                raise InterleaveError(msg)
            if check_name_matches:
                _check_name_and_direction_match(*pair)
            yield pair
Exemple #4
0
def _check_name_and_direction_match(*seqs):
    'It fails if the names do not match or if the directions are equal'
    n_seqs = len(seqs)
    names = set()
    directions = set()
    for seq in seqs:
        name, direction = _parse_pair_direction_and_name(seq)
        names.add(name)
        directions.add(direction)

    if len(names) > 1:
        msg = 'The read names from a pair do not match: %s'
        msg %= ','.join(names)
        raise InterleaveError(msg)
    if len(directions) != n_seqs:
        msg = 'A pair has repeated directions: ' + first(names)
        raise InterleaveError(msg)
Exemple #5
0
def interleave_pairs(seqs1, seqs2, skip_checks=False):
    '''A generator that interleaves the paired reads found in two iterators.

    It will fail if forward and reverse reads do not match in both sequence
    iterators.
    '''
    for seq1, seq2 in izip_longest(seqs1, seqs2, fillvalue=None):
        if not skip_checks:
            if seq1 is None or seq2 is None:
                msg = 'The files had a different number of sequences'
                raise InterleaveError(msg)
            _check_name_and_direction_match(seq1, seq2)
        if seq1 is not None:
            yield seq1
        if seq2 is not None:
            yield seq2
Exemple #6
0
def deinterleave_pairs(seqs, out_fhand1, out_fhand2, out_format):
    '''It splits a sequence iterator with alternating paired reads in two.

    It will fail if forward and reverse reads are not alternating.
    '''
    while True:
        try:
            seq1 = seqs.next()
        except StopIteration:
            seq1 = None
        try:
            seq2 = seqs.next()
        except StopIteration:
            seq2 = None
        if seq1 is None:
            break  # we have consumed the input iterator completely
        if seq2 is None:
            msg = 'The file had an odd number of sequences'
            raise InterleaveError(msg)
        _check_name_and_direction_match(seq1, seq2)
        write_seqs([seq1], out_fhand1, out_format)
        write_seqs([seq2], out_fhand2, out_format)
    out_fhand1.flush()
    out_fhand2.flush()