Ejemplo n.º 1
0
def group_pairs(seqs, n_seqs_in_pair=None, check_all_same_n_seqs=True,
                check_name_matches=True):

    seqs = iter(seqs)
    if n_seqs_in_pair is None:
        first_pair, next_read = _get_first_pair_by_name(seqs)
        if first_pair is None:
            n_seqs_in_pair = None
        else:
            yield first_pair
            n_seqs_in_pair = len(first_pair)
            seqs = chain([next_read], seqs)

    if n_seqs_in_pair == 1:
        # No need to check anything, a pair cannot have less than one read
        # or more than one name
        check_all_same_n_seqs = False
        check_name_matches = False

    if n_seqs_in_pair:
        pairs = group_in_packets_fill_last(seqs, packet_size=n_seqs_in_pair)
        for pair in pairs:
            pair = filter(lambda seq: seq is not None, pair)
            if check_all_same_n_seqs and n_seqs_in_pair != len(pair):
                msg = 'The last pair has fewer reads'
                raise InterleaveError(msg)
            if check_name_matches:
                _check_name_and_direction_match(*pair)
            yield pair
Ejemplo n.º 2
0
def group_pairs(seqs,
                n_seqs_in_pair=None,
                check_all_same_n_seqs=True,
                check_name_matches=True):

    seqs = iter(seqs)
    if n_seqs_in_pair is None:
        first_pair, next_read = _get_first_pair_by_name(seqs)
        if first_pair is None:
            n_seqs_in_pair = None
        else:
            yield first_pair
            n_seqs_in_pair = len(first_pair)
            seqs = chain([next_read], seqs)

    if n_seqs_in_pair == 1:
        # No need to check anything, a pair cannot have less than one read
        # or more than one name
        check_all_same_n_seqs = False
        check_name_matches = False

    if n_seqs_in_pair:
        pairs = group_in_packets_fill_last(seqs, packet_size=n_seqs_in_pair)
        for pair in pairs:
            pair = filter(lambda seq: seq is not None, pair)
            if check_all_same_n_seqs and n_seqs_in_pair != len(pair):
                msg = 'The last pair has fewer reads'
                raise InterleaveError(msg)
            if check_name_matches:
                _check_name_and_direction_match(*pair)
            yield pair
Ejemplo n.º 3
0
    def test_group_in_packets(self):
        'It groups an iterator in packets of items'
        packets = list(group_in_packets(range(4), 2))
        assert packets == [(0, 1), (2, 3)]

        packets = [packet for packet in  group_in_packets(range(5), 2)]
        assert packets == [(0, 1), (2, 3), (4,)]

        packets = list(group_in_packets_fill_last(range(5), 2))
        assert packets == [(0, 1), (2, 3), (4, None)]

        packets = list(group_in_packets([], 2))
        assert packets == []
Ejemplo n.º 4
0
    def test_group_in_packets(self):
        'It groups an iterator in packets of items'
        packets = list(group_in_packets(range(4), 2))
        assert packets == [(0, 1), (2, 3)]

        packets = [packet for packet in  group_in_packets(range(5), 2)]
        assert packets == [(0, 1), (2, 3), (4,)]

        packets = list(group_in_packets_fill_last(range(5), 2))
        assert packets == [(0, 1), (2, 3), (4, None)]

        packets = list(group_in_packets([], 2))
        assert packets == []
Ejemplo n.º 5
0
def _itemize_fastq(fhand):
    'It returns the fhand divided in chunks, one per seq'
    # group_in_packets_fill_last is faster than group_in_packets
    blobs = group_in_packets_fill_last(ifilter(_line_is_not_empty, fhand), 4)
    return (SeqItem(_get_name_from_lines(lines), lines) for lines in blobs)
Ejemplo n.º 6
0
def _itemize_fastq_singleline(fhand):
    'It returns the fhand divided in chunks, one per seq'
    # group_in_packets_fill_last is faster than group_in_packets
    blobs = group_in_packets_fill_last(ifilter(_line_is_not_empty, fhand), 4)
    return (SeqItem(_get_name_from_lines(lines), lines) for lines in blobs)