Beispiel #1
0
    def test_sorted_items(self):
        items = [1, 2, 3, 4, 4, 3, 2, 1]
        unique_items = sorted_items(iter(items))
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]
        unique_items = sorted_items(iter(items), tempdir=tempfile.tempdir)
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]
        unique_items = sorted_items(iter(items), max_items_in_memory=3)
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]

        items = iter([])
        assert not list(unique_items)
Beispiel #2
0
    def test_key(self):
        items = [(1, 'a'), (1, 'b'), (2, 'a')]
        _sorted_items = sorted_items(iter(items), key=lambda x: x[0])
        assert list(_sorted_items) == [(1, 'a'), (1, 'b'), (2, 'a')]
        unique_items = unique(_sorted_items, key=lambda x: x[0])
        assert list(unique_items) == [(1, 'a'), (2, 'a')]

        _sorted_items = sorted_items(iter(items), key=lambda x: x[1])
        assert list(_sorted_items) == [(1, 'a'), (2, 'a'), (1, 'b')]
        unique_items = unique(_sorted_items, key=lambda x: x[1])
        assert list(unique_items) == [(1, 'a'), (1, 'b')]
    def test_key(self):
        items = [(1, 'a'), (1, 'b'), (2, 'a')]
        _sorted_items = list(sorted_items(iter(items), key=lambda x: x[0],
                                     max_items_in_memory=1))
        assert _sorted_items == [(1, 'a'), (1, 'b'), (2, 'a')]
        unique_items = unique(_sorted_items, key=lambda x: x[0])
        assert list(unique_items) == [(1, 'a'), (2, 'a')]

        _sorted_items = sorted_items(iter(items), key=lambda x: x[1])
        assert list(_sorted_items) == [(1, 'a'), (2, 'a'), (1, 'b')]
        unique_items = unique(_sorted_items, key=lambda x: x[1])
        assert list(unique_items) == [(1, 'a'), (1, 'b')]
    def test_sorted_items(self):
        items = [1, 2, 3, 4, 4, 3, 2, 1]
        unique_items = sorted_items(iter(items))
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]
        unique_items = sorted_items(iter(items), tempdir=tempfile.tempdir)
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]
        unique_items = sorted_items(iter(items),
                                    max_items_in_memory=3)
        assert list(unique_items) == [1, 1, 2, 2, 3, 3, 4, 4]

        items = iter([])
        assert not list(unique_items)
Beispiel #5
0
    def test_key(self):
        items = [(1, 'a'), (1, 'b'), (2, 'a')]
        _sorted_items = list(sorted_items(iter(items), key=lambda x: x[0],
                                     max_items_in_memory=1))
        assert _sorted_items == [(1, 'a'), (1, 'b'), (2, 'a')]
        unique_items = unique(_sorted_items, key=lambda x: x[0])
        assert list(unique_items) == [(1, 'a'), (2, 'a')]

        _sorted_items = sorted_items(iter(items), key=lambda x: x[1])
        assert list(_sorted_items) == [(1, 'a'), (2, 'a'), (1, 'b')]
        unique_items = unique(_sorted_items, key=lambda x: x[1])
        assert list(unique_items) == [(1, 'a'), (1, 'b')]
Beispiel #6
0
def sort_fastx_files(in_fhands, key, index_fpath=None, directory=None,
                     max_items_in_memory=None, tempdir=None):
    if key == 'seq':
        reads = read_seqs(in_fhands)
        return sorted_items(reads, key=get_str_seq, tempdir=tempdir,
                            max_items_in_memory=max_items_in_memory)
    elif key == 'coordinate':
        return sort_by_position_in_ref(in_fhands, index_fpath=index_fpath,
                                       directory=directory,
                                       tempdir=tempdir)
    elif key == 'name':
        reads = read_seqs(in_fhands)
        return sorted_items(reads, key=get_name, tempdir=tempdir,
                            max_items_in_memory=max_items_in_memory)
    else:
        raise ValueError('Non-supported sorting key')
Beispiel #7
0
def _get_paired_and_orphan(reads, ordered, max_reads_memory, temp_dir):
    if ordered:
        sorted_reads = reads
    else:
        def _key(seq):
            return get_title(seq)
        sorted_reads = sorted_items(reads, _key, max_reads_memory, temp_dir)
    return group_pairs_by_name(sorted_reads)
Beispiel #8
0
def _get_paired_and_orphan(reads, ordered, max_reads_memory, temp_dir):
    if ordered:
        sorted_reads = reads
    else:
        def _key(seq):
            return get_title(seq)
        sorted_reads = sorted_items(reads, _key, max_reads_memory, temp_dir)
    return group_pairs_by_name(sorted_reads)
Beispiel #9
0
def filter_duplicates(in_fhands, out_fhand, paired_reads,
                      n_seqs_packet=None, tempdir=None):
    if not in_fhands:
        raise ValueError('At least one input fhand is required')
    pairs = _read_pairs(in_fhands, paired_reads)
    sorted_pairs = sorted_items(pairs, key=_get_pair_key, tempdir=tempdir,
                                max_items_in_memory=n_seqs_packet)
    for pair in unique(sorted_pairs, key=_get_pair_key):
        write_seqs(pair, out_fhand)
Beispiel #10
0
def filter_duplicates(in_fhands, out_fhand, paired_reads, use_length=None,
                      n_seqs_packet=None, tempdir=None):
    if not in_fhands:
        raise ValueError('At least one input fhand is required')
    pairs = _read_pairs(in_fhands, paired_reads)
    get_pair_key = _PairKeyGetter(use_length=use_length)
    if n_seqs_packet is None:
        unique_pairs = unique_unordered(pairs, key=get_pair_key)
    else:
        sorted_pairs = sorted_items(pairs, key=get_pair_key, tempdir=tempdir,
                                    max_items_in_memory=n_seqs_packet)
        unique_pairs = unique(sorted_pairs, key=get_pair_key)
    for pair in unique_pairs:
        write_seqs(pair, out_fhand)
Beispiel #11
0
def filter_duplicates(in_fhands,
                      out_fhand,
                      paired_reads,
                      use_length=None,
                      n_seqs_packet=None,
                      tempdir=None):
    if not in_fhands:
        raise ValueError('At least one input fhand is required')
    pairs = _read_pairs(in_fhands, paired_reads)
    get_pair_key = _PairKeyGetter(use_length=use_length)
    if n_seqs_packet is None:
        unique_pairs = unique_unordered(pairs, key=get_pair_key)
    else:
        sorted_pairs = sorted_items(pairs,
                                    key=get_pair_key,
                                    tempdir=tempdir,
                                    max_items_in_memory=n_seqs_packet)
        unique_pairs = unique(sorted_pairs, key=get_pair_key)
    for pair in unique_pairs:
        write_seqs(pair, out_fhand)