def _read_sequences(filename): cat_call = factory.new("cat") cat_call.add_multiple_values((filename,)) cat_call = cat_call.finalized_call cat = None try: cat = subprocess.Popen(cat_call, bufsize=io.DEFAULT_BUFFER_SIZE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) qualities = _collect_qualities(cat.stdout, filename) return sampling.reservoir_sampling(qualities, 100000) except: if cat: cat.kill() cat.wait() cat = None raise finally: rc_cat = cat.wait() if cat else 0 if rc_cat: message = "Error running 'paleomix cat':\n" \ " Unicat return-code = %i\n\n%s" \ % (rc_cat, cat.stderr.read()) raise NodeError(message)
def _read_sequences(filename): cat_call = factory.new("cat") cat_call.add_multiple_values((filename, )) cat_call = cat_call.finalized_call cat = None try: cat = subprocess.Popen(cat_call, bufsize=io.DEFAULT_BUFFER_SIZE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) qualities = _collect_qualities(cat.stdout, filename) return sampling.reservoir_sampling(qualities, 100000) except: if cat: cat.kill() cat.wait() cat = None raise finally: rc_cat = cat.wait() if cat else 0 if rc_cat: message = "Error running 'paleomix cat':\n" \ " Unicat return-code = %i\n\n%s" \ % (rc_cat, cat.stderr.read()) raise NodeError(message)
def test_reservoir_sampling__downsample_to_zero(): result = sampling.reservoir_sampling(range(5), 0) assert_equal(result, [])
def test_reservoir_sampling__upsample_equals_input(): result = sampling.reservoir_sampling(range(5), 10) assert_equal(result, range(5))
def test_reservoir_sampling__select_second_item(): rng = flexmock(randint=lambda _min, _max: 0) values = [1, 2] result = sampling.reservoir_sampling(values, 1, rng) assert_equal(result, [2])
def test_reservoir_sampling__select_first_item(): rng = flexmock(randint = lambda _min, _max: 1) values = [1, 2] result = sampling.reservoir_sampling(values, 1, rng) assert_equal(result, [1])