def test_buffer(c, s, a, b): source = Stream(asynchronous=True) L = source.scatter().map(slowinc, delay=0.5).buffer(5).gather().sink_to_list() start = time.time() for i in range(5): yield source.emit(i) end = time.time() assert end - start < 0.5 for i in range(5, 10): yield source.emit(i) end2 = time.time() assert end2 - start > (0.5 / 3) while len(L) < 10: yield gen.sleep(0.01) assert time.time() - start < 5 assert L == list(map(inc, range(10))) assert source.loop == c.loop
def test_zip_literals(): a = Stream() b = Stream() c = sz.zip(a, 123, b) L = c.sink_to_list() a.emit(1) b.emit(2) assert L == [(1, 123, 2)] a.emit(4) b.emit(5) assert L == [(1, 123, 2), (4, 123, 5)]
def test_flatten(): source = Stream() L = source.flatten().sink_to_list() source.emit([1, 2, 3]) source.emit([4, 5]) source.emit([6, 7, 8]) assert L == [1, 2, 3, 4, 5, 6, 7, 8]
def test_pluck(): a = Stream() L = a.pluck(1).sink_to_list() a.emit([1, 2, 3]) assert L == [2] a.emit([4, 5, 6, 7, 8, 9]) assert L == [2, 5] with pytest.raises(IndexError): a.emit([1])
def test_combine_latest(): a = Stream() b = Stream() c = a.combine_latest(b) d = a.combine_latest(b, emit_on=[a, b]) L = c.sink_to_list() L2 = d.sink_to_list() a.emit(1) a.emit(2) b.emit('a') a.emit(3) b.emit('b') assert L == [(2, 'a'), (3, 'a'), (3, 'b')] assert L2 == [(2, 'a'), (3, 'a'), (3, 'b')]
def execute(self): self.config_log() client = self.get_client() #logger = Stream(asynchronous=True) stream = Stream() stream.map(self.load_url_safe, self.timeout_seconds) \ .buffer(self._get_cpu_count() / 2) \ .sink(self.log_info) for url in self.urls: stream.emit(url) client.close() # if __name__ == "__main__": # if len(sys.argv) > 1: # file = sys.argv[1] # else: # file = "/Users/mikeartz/dev/make-requests-fast/make_requests_fast/resources/test_urls.csv" # dsr = DaskStreamzRequestor(file) # dsr.execute()
def test_non_unique_emit(c, s, a, b): """Regression for https://github.com/python-streamz/streams/issues/397 Non-unique stream entries still need to each be processed. """ source = Stream(asynchronous=True) futures = source.scatter().map(lambda x: random.random()) L = futures.gather().sink_to_list() for _ in range(3): # Emit non-unique values yield source.emit(0) assert len(L) == 3 assert L[0] != L[1] or L[0] != L[2]
def test_merge(): ''' Test the merging option for StreamDoc's.''' s1 = Stream() s2 = Stream() stot = s1.zip(s2).map(merge) L = stot.sink_to_list() sdoc1 = StreamDoc(args=[1, 2], kwargs={'a': 1, 'c': 3}) sdoc2 = StreamDoc(args=[3, 4], kwargs={'b': 2, 'c': 4}) s1.emit(sdoc1) assert len(L) == 0 s2.emit(sdoc2) result_kwargs = L[0]['kwargs'] result_args = L[0]['args'] assert result_kwargs['a'] == 1 assert result_kwargs['b'] == 2 assert result_kwargs['c'] == 4 assert result_args == [1, 2, 3, 4]
def test_pluck_list(): a = Stream() L = a.pluck([0, 2]).sink_to_list() a.emit([1, 2, 3]) assert L == [(1, 3)] a.emit([4, 5, 6, 7, 8, 9]) assert L == [(1, 3), (4, 6)] with pytest.raises(IndexError): a.emit([1])
def test_delay(): source = Stream(asynchronous=True) L = source.delay(0.02).sink_to_list() for i in range(5): yield source.emit(i) assert not L yield gen.sleep(0.04) assert len(L) < 5 yield gen.sleep(0.1) assert len(L) == 5
def test_connect_discombine_latest(): a = Stream() b = Stream() c = Stream() x = a.combine_latest(b, c, emit_on=a) L = x.sink_to_list() c.disconnect(x) b.emit(1) c.emit(1) a.emit(1) assert L == [(1, 1)]
def test_unique_key(): source = Stream() L = source.unique(key=lambda x: x % 2, history=1).sink_to_list() source.emit(1) source.emit(2) source.emit(4) source.emit(6) source.emit(3) assert L == [1, 2, 3]
def test_partition_metadata(): source = Stream() L = metadata(source.partition(2)).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) source.emit(2, metadata=[{'v': 2}]) source.emit(3, metadata=[{'v': 3}]) assert L == [ [{'v': 1}], # first emit when 1 is introduced. 0 has no metadata [{'v': 2}, {'v': 3}] # second emit ]
def test_disconnect_zip(): a = Stream() b = Stream() c = Stream() x = a.zip(b, c) L = x.sink_to_list() b.disconnect(x) a.emit(1) b.emit(1) assert not L c.emit(1) assert L == [(1, 1)]
def test_zip_metadata(): a = Stream() b = Stream() L = metadata(a.zip(b)).sink_to_list() a.emit(1, metadata=[{'v': 1}]) b.emit(2, metadata=[{'v': 2}]) a.emit(3) b.emit(4, metadata=[{'v': 4}]) assert L == [ [{ 'v': 1 }, { 'v': 2 }], # first emit when 2 is introduced [{ 'v': 4 }] # second emit when 4 is introduced, and 3 has no metadata ]
def test_partition_then_scatter_async(c, s, a, b): # Ensure partition w/ timeout before scatter works correctly for # asynchronous start = time.monotonic() source = Stream(asynchronous=True) L = source.partition( 2, timeout=.1).scatter().map(lambda x: [xx + 1 for xx in x]).buffer( 2).gather().flatten().sink_to_list() rc = RefCounter(loop=source.loop) for i in range(3): yield source.emit(i, metadata=[{'ref': rc}]) while rc.count != 0 and time.monotonic() - start < 1.: yield gen.sleep(1e-2) assert L == [1, 2, 3]
def test_sliding_window_metadata(): source = Stream() L = metadata(source.sliding_window(2)).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) source.emit(2, metadata=[{'v': 2}]) source.emit(3, metadata=[{'v': 3}]) assert L == [ [{'v': 1}], # First emit, because 0 has no metadata [{'v': 1}, {'v': 2}], # Second emit [{'v': 2}, {'v': 3}] # Third emit ]
def test_map_on_dict(c, s, a, b): # dask treats dicts differently, so we have to make sure # the user sees no difference in the streamz api. # Regression test against #336 def add_to_dict(d): d["x"] = d["i"] return d source = Stream(asynchronous=True) futures = source.scatter().map(add_to_dict) L = futures.gather().sink_to_list() for i in range(5): yield source.emit({"i": i}) assert len(L) == 5 for i, item in enumerate(sorted(L, key=lambda x: x["x"])): assert item["x"] == i assert item["i"] == i
def test_timed_window(): source = Stream(asynchronous=True) a = source.timed_window(0.01) assert a.loop is IOLoop.current() L = a.sink_to_list() for i in range(10): yield source.emit(i) yield gen.sleep(0.004) yield gen.sleep(a.interval) assert L assert sum(L, []) == list(range(10)) assert all(len(x) <= 3 for x in L) assert any(len(x) >= 2 for x in L) yield gen.sleep(0.1) assert not L[-1]
def test_timed_window_metadata(): source = Stream() L = metadata(source.timed_window(0.01)).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) yield gen.sleep(0.1) source.emit(2, metadata=[{'v': 2}]) source.emit(3, metadata=[{'v': 3}]) yield gen.sleep(0.1) assert L == [ [{'v': 1}], # first emit because 0 has no metadata [{'v': 2}, {'v': 3}] # second emit ]
def test_sliding_window_ref_counts(): source = Stream() _ = source.sliding_window(2) r_prev = RefCounter() source.emit(-2) source.emit(-1, metadata=[{'ref': r_prev}]) for i in range(10): r = RefCounter() assert r_prev.count == 1 source.emit(i, metadata=[{'ref': r}]) assert r_prev.count == 0 assert r.count == 1 r_prev = r
def test_backpressure(): q = Queue(maxsize=2) source = Stream(asynchronous=True) source.map(inc).scan(add, start=0).sink(q.put) @gen.coroutine def read_from_q(): while True: yield q.get() yield gen.sleep(0.1) IOLoop.current().add_callback(read_from_q) start = time() for i in range(5): yield source.emit(i) end = time() assert end - start >= 0.2
def test_disconnect(): source = Stream() upstream = Stream() L = upstream.sink_to_list() source.emit(1) assert L == [] source.connect(upstream) source.emit(2) source.emit(3) assert L == [2, 3] source.disconnect(upstream) source.emit(4) assert L == [2, 3]
def test_timed_window_backpressure(): q = Queue(maxsize=1) source = Stream(asynchronous=True) source.timed_window(0.01).sink(q.put) @gen.coroutine def read_from_q(): while True: yield q.get() yield gen.sleep(0.1) IOLoop.current().add_callback(read_from_q) start = time() for i in range(5): yield source.emit(i) yield gen.sleep(0.01) stop = time() assert stop - start > 0.2
def test_collect_metadata(): source = Stream() collector = source.collect() L = metadata(collector).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) source.emit(2, metadata=[{'v': 2}]) collector.flush() source.emit(3, metadata=[{'v': 3}]) source.emit(4, metadata=[{'v': 4}]) collector.flush() assert L == [ [{'v': 1}, {'v': 2}], # Flush 0-2, but 0 has no metadata [{'v': 3}, {'v': 4}] # Flush the rest ]
def test_combine_latest_metadata(): a = Stream() b = Stream() L = metadata(a.combine_latest(b)).sink_to_list() a.emit(1, metadata=[{'v': 1}]) b.emit(2, metadata=[{'v': 2}]) b.emit(3) b.emit(4, metadata=[{'v': 4}]) assert L == [ [{ 'v': 1 }, { 'v': 2 }], # first emit when 2 is introduced [{ 'v': 1 }], # 3 has no metadata but it replaces the value on 'b' [{ 'v': 1 }, { 'v': 4 }] # 4 replaces the value without metadata on 'b' ]
def test_combine_latest_ref_counts(): a = Stream() b = Stream() _ = a.combine_latest(b) ref1 = RefCounter() a.emit(1, metadata=[{'ref': ref1}]) assert ref1.count == 1 # The new value kicks out the old value ref2 = RefCounter() a.emit(2, metadata=[{'ref': ref2}]) assert ref1.count == 0 assert ref2.count == 1 # The value on stream a is still retained and the value on stream b is new ref3 = RefCounter() b.emit(3, metadata=[{'ref': ref3}]) assert ref2.count == 1 assert ref3.count == 1
def test_zip_ref_counts(): a = Stream() b = Stream() _ = a.zip(b) # The first value in a becomes buffered ref1 = RefCounter() a.emit(1, metadata=[{'ref': ref1}]) assert ref1.count == 1 # The second value in a also becomes buffered ref2 = RefCounter() a.emit(2, metadata=[{'ref': ref2}]) assert ref1.count == 1 assert ref2.count == 1 # All emitted values are removed from the buffer ref3 = RefCounter() b.emit(3, metadata=[{'ref': ref3}]) assert ref1.count == 0 assert ref2.count == 1 # still in the buffer assert ref3.count == 0
def test_triple_zip_latest(): from streamz.core import Stream s1 = Stream() s2 = Stream() s3 = Stream() s_simple = s1.zip_latest(s2, s3) L_simple = s_simple.sink_to_list() s1.emit(1) s2.emit('I') s2.emit("II") s1.emit(2) s2.emit("III") s3.emit('a') s3.emit('b') s1.emit(3) assert L_simple == [(1, 'III', 'a'), (2, 'III', 'a'), (3, 'III', 'b')]
def test_no_output(): source = Stream() assert source.emit(1) is None