def test_key(self): items = [(1, 'a'), (1, 'b'), (2, 'a')] _sorted_items = sorted_items(iter(items), key=lambda x: x[0]) assert list(_sorted_items) == [(1, 'a'), (1, 'b'), (2, 'a')] unique_items = unique(_sorted_items, key=lambda x: x[0]) assert list(unique_items) == [(1, 'a'), (2, 'a')] _sorted_items = sorted_items(iter(items), key=lambda x: x[1]) assert list(_sorted_items) == [(1, 'a'), (2, 'a'), (1, 'b')] unique_items = unique(_sorted_items, key=lambda x: x[1]) assert list(unique_items) == [(1, 'a'), (1, 'b')]
def test_key(self): items = [(1, 'a'), (1, 'b'), (2, 'a')] _sorted_items = list(sorted_items(iter(items), key=lambda x: x[0], max_items_in_memory=1)) assert _sorted_items == [(1, 'a'), (1, 'b'), (2, 'a')] unique_items = unique(_sorted_items, key=lambda x: x[0]) assert list(unique_items) == [(1, 'a'), (2, 'a')] _sorted_items = sorted_items(iter(items), key=lambda x: x[1]) assert list(_sorted_items) == [(1, 'a'), (2, 'a'), (1, 'b')] unique_items = unique(_sorted_items, key=lambda x: x[1]) assert list(unique_items) == [(1, 'a'), (1, 'b')]
def filter_duplicates(in_fhands, out_fhand, paired_reads, n_seqs_packet=None, tempdir=None): if not in_fhands: raise ValueError('At least one input fhand is required') pairs = _read_pairs(in_fhands, paired_reads) sorted_pairs = sorted_items(pairs, key=_get_pair_key, tempdir=tempdir, max_items_in_memory=n_seqs_packet) for pair in unique(sorted_pairs, key=_get_pair_key): write_seqs(pair, out_fhand)
def filter_duplicates(in_fhands, out_fhand, paired_reads, use_length=None, n_seqs_packet=None, tempdir=None): if not in_fhands: raise ValueError('At least one input fhand is required') pairs = _read_pairs(in_fhands, paired_reads) get_pair_key = _PairKeyGetter(use_length=use_length) if n_seqs_packet is None: unique_pairs = unique_unordered(pairs, key=get_pair_key) else: sorted_pairs = sorted_items(pairs, key=get_pair_key, tempdir=tempdir, max_items_in_memory=n_seqs_packet) unique_pairs = unique(sorted_pairs, key=get_pair_key) for pair in unique_pairs: write_seqs(pair, out_fhand)
def test_unique_items(self): items = [1, 1, 2, 2, 3, 3, 4] unique_items = unique(items) assert list(unique_items) == [1, 2, 3, 4]