Ejemplo n.º 1
0
def _read_sequences(filename):
    cat_call = factory.new("cat")
    cat_call.add_multiple_values((filename,))
    cat_call = cat_call.finalized_call

    cat = None
    try:
        cat = procs.open_proc(cat_call,
                              bufsize=io.DEFAULT_BUFFER_SIZE,
                              stderr=procs.PIPE,
                              stdout=procs.PIPE)
        qualities = _collect_qualities(cat.stdout, filename)

        return sampling.reservoir_sampling(qualities, 100000)
    except:
        if cat:
            cat.kill()
            cat.wait()
            cat = None
        raise
    finally:
        rc_cat = cat.wait() if cat else 0
        if rc_cat:
            message = "Error running 'paleomix cat':\n" \
                      "  Unicat return-code = %i\n\n%s" \
                      % (rc_cat, cat.stderr.read())
            raise NodeError(message)
Ejemplo n.º 2
0
def _read_sequences(file_type, filename, stats):
    cat_call = factory.new("cat")
    cat_call.add_multiple_values((filename, ))
    cat_call = cat_call.finalized_call

    cat = None
    try:
        cat = procs.open_proc(cat_call,
                              bufsize=io.DEFAULT_BUFFER_SIZE,
                              stderr=procs.PIPE,
                              stdout=procs.PIPE)
        qualities = _collect_qualities(cat.stdout, file_type, filename, stats)

        return sampling.reservoir_sampling(qualities, 100000)
    except StandardError as error:
        if cat:
            try:
                cat.kill()
            except OSError:
                pass
            cat.wait()
            cat = None
        raise error
    finally:
        rc_cat = cat.wait() if cat else 0
        if rc_cat:
            message = "Error running 'paleomix cat':\n" \
                      "  Unicat return-code = %i\n\n%s" \
                      % (rc_cat, cat.stderr.read())
            raise NodeError(message)
Ejemplo n.º 3
0
    def __init__(self, handle, downsample, included_references):
        self._records = collections.defaultdict(list)

        references = handle.references
        if len(references) != len(included_references):
            raise ValueError("Length of 'included_references' must match the "
                             "number of references in BAM file.")

        records = _filter_records(handle)
        for record in reservoir_sampling(records, downsample):
            key = references[record.tid]
            self._records[key].append(record)

        self.references = references

        self._records = dict(self._records)
        for value in self._records.values():
            value.sort(key=lambda rec: rec.pos)
Ejemplo n.º 4
0
    def __init__(self, handle, downsample, included_references):
        self._records = collections.defaultdict(list)

        references = handle.references
        if len(references) != len(included_references):
            raise ValueError("Length of 'included_references' must match the "
                             "number of references in BAM file.")

        records = _filter_records(handle)
        for record in reservoir_sampling(records, downsample):
            key = references[record.tid]
            self._records[key].append(record)

        self.references = references

        self._records = dict(self._records)
        for value in self._records.values():
            value.sort(key=lambda rec: rec.pos)
Ejemplo n.º 5
0
def test_reservoir_sampling__downsample_to_non_number_raises_type_error():
    with pytest.raises(TypeError):
        sampling.reservoir_sampling(list(range(5)), "Eh?")
Ejemplo n.º 6
0
def test_reservoir_sampling__downsample_to_float_raises_type_error():
    with pytest.raises(TypeError):
        sampling.reservoir_sampling(list(range(5)), 1.0)
Ejemplo n.º 7
0
def test_reservoir_sampling__downsample_to_negative_raises_value_error():
    with pytest.raises(ValueError):
        sampling.reservoir_sampling(list(range(5)), -1)
Ejemplo n.º 8
0
def test_reservoir_sampling__downsample_to_zero():
    result = sampling.reservoir_sampling(list(range(5)), 0)
    assert result == []
Ejemplo n.º 9
0
def test_reservoir_sampling__upsample_equals_input():
    result = sampling.reservoir_sampling(list(range(5)), 10)
    assert result == list(range(5))
Ejemplo n.º 10
0
def test_reservoir_sampling__select_second_item():
    rng = Mock(randint=lambda _min, _max: 0)
    values = [1, 2]
    result = sampling.reservoir_sampling(values, 1, rng)
    assert result == [2]
Ejemplo n.º 11
0
def test_reservoir_sampling__select_first_item():
    rng = flexmock(randint=lambda _min, _max: 1)
    values = [1, 2]
    result = sampling.reservoir_sampling(values, 1, rng)
    assert_equal(result, [1])
Ejemplo n.º 12
0
def test_reservoir_sampling__downsample_to_zero():
    result = sampling.reservoir_sampling(range(5), 0)
    assert_equal(result, [])
Ejemplo n.º 13
0
def test_reservoir_sampling__upsample_equals_input():
    result = sampling.reservoir_sampling(range(5), 10)
    assert_equal(result, range(5))
Ejemplo n.º 14
0
def test_reservoir_sampling__select_first_item():
    rng = flexmock(randint=lambda _min, _max: 1)
    values = [1, 2]
    result = sampling.reservoir_sampling(values, 1, rng)
    assert_equal(result, [1])
Ejemplo n.º 15
0
def test_reservoir_sampling__downsample_to_zero():
    result = sampling.reservoir_sampling(range(5), 0)
    assert_equal(result, [])
Ejemplo n.º 16
0
def test_reservoir_sampling__upsample_equals_input():
    result = sampling.reservoir_sampling(range(5), 10)
    assert_equal(result, range(5))