def test_multiple_consumers(redis: StrictRedis, data): out = Stream().pluck(2) S = set() out.pluck("i").sink(S.add) stream, group = uuid(2) sources = set() for _ in range(3): con = uuid() source = Stream.from_redis_consumer_group( stream, group, con, count=1, timeout=0.1, ) source.connect(out) source.start() sources.add(source) for x in data: redis.xadd(stream, x) wait_for(lambda: len(S) == 50, 1) assert S == set(x["i"] for x in data) for s in sources: s.stop()
def test_heartbeats(redis: StrictRedis): stream, group = uuid(2) redis.xgroup_create(stream, group, mkstream=True) interval = 0.1 timeout = 0.5 hearts = [] for _ in range(5): heart = Heart(stream, group, uuid(), interval=interval, timeout=timeout) hearts.append(heart) heart.start() S = set() sub = redis.pubsub() sub.subscribe(group) def predicate(): m = sub.get_message() if m is not None: S.add(m["data"]) return len(S) == 5 wait_for(predicate, 5, period=0.01) for h in hearts: h.stop()
def run_and_fail(): name = uuid() source = Stream.from_redis_consumer_group( stream, group, name, count=1, timeout=0.1, ) buffer = source.buffer(10) buffer.rate_limit(0.1).pluck(1).sink_to_redis_list(target) source.start() wait_for(lambda: buffer.queue.qsize() == 10, 3) buffer.queue = Queue(10) # lose data in the buffer, won't be ACKed source.stop() def pending_10(): cons = convert_bytes(redis.xpending(stream, group))["consumers"] for con in cons: if con["name"] == name and con["pending"] == 10: return True return False wait_for(pending_10, 1, period=0.1)
def test_backpressure_connect_empty_stream(): @Stream.register_api() class from_list(Stream): def __init__(self, source, **kwargs): self.source = source super().__init__(ensure_io_loop=True, **kwargs) def start(self): self.stopped = False self.loop.add_callback(self.run) @gen.coroutine def run(self): while not self.stopped and len(self.source) > 0: yield self._emit(self.source.pop(0)) source_list = [0, 1, 2, 3, 4] source = Stream.from_list(source_list) sout = Stream() L = sout.rate_limit(1).sink_to_list() source.connect(sout) source.start() wait_for(lambda: L == [0], 0.01) assert len(source_list) > 0
def test_simple(): cat = intake.open_catalog(catfile) s = cat.simple.read() l = s.sink_to_list() assert not l s.start() wait_for(lambda: l == [1, 2, 3], timeout=1)
def test_from_kafka(): j = random.randint(0, 10000) ARGS = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j } with kafka_service() as kafka: kafka, TOPIC = kafka stream = Stream.from_kafka([TOPIC], ARGS, asynchronous=True) out = stream.sink_to_list() stream.start() yield gen.sleep(0.1) # for loop to run for i in range(10): yield gen.sleep(0.2) kafka.produce(TOPIC, b'value-%d' % i) kafka.flush() # it takes some time for messages to come back out of kafka wait_for(lambda: len(out) == 10, 10, period=0.1) assert out[-1] == b'value-9' kafka.produce(TOPIC, b'final message') kafka.flush() wait_for(lambda: out[-1] == b'final message', 10, period=0.1) stream._close_consumer() kafka.produce(TOPIC, b'lost message') kafka.flush() # absolute sleep here, since we expect output list *not* to change yield gen.sleep(1) assert out[-1] == b'final message' stream._close_consumer()
def launch_kafka(): stop_docker(let_fail=True) subprocess.call(shlex.split("docker pull spotify/kafka")) cmd = ("docker run -d -p 2181:2181 -p 9092:9092 --env " "ADVERTISED_HOST=127.0.0.1 --env ADVERTISED_PORT=9092 " "--name streamz-kafka spotify/kafka") print(cmd) cid = subprocess.check_output(shlex.split(cmd)).decode()[:-1] def end(): if cid: stop_docker(cid=cid) atexit.register(end) def predicate(): try: out = subprocess.check_output(['docker', 'logs', cid], stderr=subprocess.STDOUT) return b'kafka entered RUNNING state' in out except subprocess.CalledProcessError: pass wait_for(predicate, 10, period=0.1) return cid
def test_from_kafka_thread(): j = random.randint(0, 10000) ARGS = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j } with kafka_service() as kafka: stream = Stream.from_kafka([TOPIC], ARGS) out = stream.sink_to_list() stream.start() for i in range(10): kafka.produce(TOPIC, b'value-%d' % i) kafka.flush() # it takes some time for messages to come back out of kafka startlen = len([o for o in out if o]) wait_for(lambda: len([o for o in out if o]) == startlen + 10, 10, period=0.1) assert out[-1] == b'value-9' kafka.produce(TOPIC, b'final message') kafka.flush() wait_for(lambda: out[-1] == b'final message', 10, period=0.1) stream._close_consumer() kafka.produce(TOPIC, b'lost message') kafka.flush() # absolute sleep here, since we expect output list *not* to change sleep(1) assert out[-1] == b'final message'
def test_tcp(): port = 9876 s = Source.from_tcp(port) out = s.sink_to_list() s.start() wait_for(lambda: s.server is not None, 2, period=0.02) try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("localhost", port)) sock.send(b'data\n') sock.close() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("localhost", port)) sock.send(b'data\n') sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock2.connect(("localhost", port)) sock2.send(b'data2\n') wait_for(lambda: out == [b'data\n', b'data\n', b'data2\n'], 2, period=0.01) finally: s.stop() sock.close() sock2.close()
def test_kafka_batch(): j = random.randint(0, 10000) ARGS = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j, 'auto.offset.reset': 'latest' } with kafka_service() as kafka: kafka, TOPIC = kafka # These messages aren't read since Stream starts reading from latest offsets for i in range(10): kafka.produce(TOPIC, b'value-%d' % i, b'%d' % i) kafka.flush() stream = Stream.from_kafka_batched(TOPIC, ARGS, max_batch_size=4, keys=True) out = stream.sink_to_list() stream.start() wait_for(lambda: stream.upstream.started, 10, 0.1) for i in range(10): kafka.produce(TOPIC, b'value-%d' % i, b'%d' % i) kafka.flush() # out may still be empty or first item of out may be [] wait_for(lambda: any(out) and out[-1][-1]['value'] == b'value-9', 10, period=0.2) assert out[-1][-1]['key'] == b'9' # max_batch_size checks assert len(out[0]) == len(out[1]) == 4 and len(out) == 3 stream.upstream.stopped = True
def test_from_iterable_backpressure(): it = iter(range(5)) source = Source.from_iterable(it) L = source.rate_limit(0.1).sink_to_list() source.start() wait_for(lambda: L == [0], 1, period=0.01) assert next(it) == 2 # 1 is in blocked _emit
def test_periodic(): s = Source.from_periodic(lambda: True) l = s.sink_to_list() assert s.stopped s.start() wait_for(lambda: l, 0.3, period=0.01) wait_for(lambda: len(l) > 1, 0.3, period=0.01) assert all(l)
def test_df(): dataframe = pytest.importorskip("streamz.dataframe") cat = intake.open_catalog(catfile) s = cat.df.read() assert isinstance(s, dataframe.DataFrame) s.start() wait_for(lambda: s.current_value is not None, timeout=1) cv = s.current_value wait_for(lambda: not s.current_value.equals(cv), timeout=1)
def test_from_redis_lists(redis: StrictRedis): name = uuid() source = Stream.from_redis_lists(name, timeout=0.1) L = source.pluck(1).map(int).sink_to_list() source.start() redis.rpush(name, *list(range(3))) wait_for(lambda: L == [0, 1, 2], 3) source.stop()
def test_dask(): distr = pytest.importorskip("dask.distributed") import streamz.dask with distr.Client(processes=False): cat = intake.open_catalog(catfile) s = cat.simple.to_dask() assert isinstance(s, streamz.dask.DaskStream) l = s.gather().sink_to_list() s.start() wait_for(lambda: l == [1, 2, 3], timeout=1)
def test_claim(redis: StrictRedis, data): stream, group, target = uuid(3) for x in data: redis.xadd(stream, x) def run_and_fail(): name = uuid() source = Stream.from_redis_consumer_group( stream, group, name, count=1, timeout=0.1, ) buffer = source.buffer(10) buffer.rate_limit(0.1).pluck(1).sink_to_redis_list(target) source.start() wait_for(lambda: buffer.queue.qsize() == 10, 3) buffer.queue = Queue(10) # lose data in the buffer, won't be ACKed source.stop() def pending_10(): cons = convert_bytes(redis.xpending(stream, group))["consumers"] for con in cons: if con["name"] == name and con["pending"] == 10: return True return False wait_for(pending_10, 1, period=0.1) for _ in range(10): run_and_fail() source = Stream.from_redis_consumer_group( stream, group, uuid(), heartbeat_interval=0.1, claim_timeout=1, count=10, timeout=0.1, ) source.pluck(1).sink_to_redis_list(target) source.start() wait_for( lambda: redis.llen(target) == 500, 15, lambda: print(redis.llen(target)), period=0.1, ) source.stop()
def test_basic(redis: StrictRedis, data): stream = uuid() source = Stream.from_redis_streams(stream, timeout=0.1, default_start_id=0) L = source.sink_to_list() source.start() for x in data: redis.xadd(stream, x) wait_for(lambda: len(L) == 3, 2) assert [x[2] for x in L] == data source.stop()
def test_multiple(redis: StrictRedis): l1, l2 = uuid(2) source = Stream.from_redis_lists([l1, l2], timeout=0.1) L = source.pluck(1).map(int).sink_to_list() source.start() redis.rpush(l1, *list(range(3))) redis.rpush(l2, *list(range(3))) wait_for(lambda: len(L) == 6, 2) source.stop()
def test_index(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) a = DataFrame(example=df, stream=stream) b = a.index + 5 L = b.stream.gather().sink_to_list() a.emit(df) a.emit(df) wait_for(lambda: len(L) > 1, timeout=2, period=0.05) assert_eq(L[0], df.index + 5) assert_eq(L[1], df.index + 5)
def test_ws_roundtrip(): pytest.importorskip("websockets") s0 = Stream.from_websocket("localhost", 8989, start=True) l = s0.sink_to_list() data = [b'0123'] * 4 s = Stream.from_iterable(data) s.to_websocket("ws://localhost:8989") s.start() wait_for(lambda: data == l, timeout=1) s.stop() s0.stop()
def test_from_iterable_stop(): from _pytest.outcomes import Failed source = Source.from_iterable(range(5)) L = source.rate_limit(0.01).sink_to_list() source.start() wait_for(lambda: L == [0], 1) source.stop() assert source.stopped with pytest.raises(Failed): wait_for(lambda: L == [0, 1, 2], 0.1)
def test_mqtt_roundtrip(): pytest.importorskip("paho.mqtt.client") s0 = Stream.from_mqtt("mqtt.eclipseprojects.io", 1883, "streamz/sensor/temperature") l = s0.map(lambda msg: msg.payload).sink_to_list() s0.start() data = [b'0123'] * 4 s = Stream.from_iterable(data) s.to_mqtt("mqtt.eclipseprojects.io", 1883, "streamz/sensor/temperature") s.start() wait_for(lambda: data == l, timeout=1) s.stop() s0.stop()
def test_increment_restart(pg): table = "inc_re" src = Stream.from_postgres_increment(table, pg, initial_value=30, polling_interval=1, limit=10) L = src.sink_to_list() w = Writer(src.strategy.loader.connection, table) w.create_table() w.insert(50) src.start() wait_for(lambda: len(L) == 20, 1, period=0.1)
def test_to_kafka(): ARGS = {'bootstrap.servers': 'localhost:9092'} with kafka_service() as kafka: _, TOPIC = kafka source = Stream() kafka = source.to_kafka(TOPIC, ARGS) out = kafka.sink_to_list() for i in range(10): yield source.emit(b'value-%d' % i) source.emit('final message') kafka.flush() wait_for(lambda: len(out) == 11, 10, period=0.1) assert out[-1] == b'final message'
def test_source(clients, info): topic, subscription = info source = Stream.from_gcp_pubsub(subscription, timeout=1) L = source.map(lambda x: int(x.data)).sink_to_list() source.start() pub, _ = clients for i in range(10): pub.publish(topic, str(i).encode()) wait_for(lambda: list(range(10)) == L, 1) with pytest.raises(Failed): wait_for(lambda: len(L) > 10, 3) # test ack source.stop()
def test_ack(redis: StrictRedis, data): stream, group, con = uuid(3) source = Stream.from_redis_consumer_group(stream, group, con, timeout=0.1) L = source.sink_to_list() for x in data: redis.xadd(stream, x) source.start() wait_for(lambda: len(L) == 3, 3, lambda: print(L)) sleep(0.05) # wait a bit for the last ack for _, messages in redis.xreadgroup(group, con, {stream: 0}): assert messages == [] source.stop()
def redis(name="test-streamz-redis"): cleanup(name=name) run_cmd = shlex.split(f"docker run -d -p 6379:6379 --name {name} redis") subprocess.check_call(run_cmd) def predicate(): cmd = shlex.split(f"docker logs {name}") logs = subprocess.check_output(cmd, stderr=subprocess.STDOUT) return b"Ready to accept connections" in logs wait_for(predicate, 10, period=0.1) try: with StrictRedis() as client: yield client finally: cleanup(name=name, fail=True)
def test_multiple(redis: StrictRedis, data): stream1, stream2 = uuid(2) source = Stream.from_redis_streams({stream1: 0, stream2: 0}, timeout=0.1) L1 = source.pluck(0).filter(lambda x: x == stream1).sink_to_list() L2 = source.pluck(0).filter(lambda x: x == stream2).sink_to_list() source.start() for x in data: redis.xadd(stream1, x) redis.xadd(stream2, x) wait_for(lambda: len(L1) == 3, 3) wait_for(lambda: len(L2) == 3, 3) assert L1 == [stream1] * 3 assert L2 == [stream2] * 3 source.stop()
def test_kafka_batch_checkpointing_sync_nodes(): ''' Streams 1 and 3 have different consumer groups, while Stream 2 has the same group as 1. Hence, Stream 2 does not re-read the data that had been finished processing by Stream 1, i.e. it picks up from where Stream 1 had left off. ''' j1 = random.randint(0, 10000) ARGS1 = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j1, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' } j2 = j1 + 1 ARGS2 = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j2, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' } with kafka_service() as kafka: kafka, TOPIC = kafka for i in range(10): kafka.produce(TOPIC, b'value-%d' % i) kafka.flush() stream1 = Stream.from_kafka_batched(TOPIC, ARGS1) out1 = stream1.map(split).filter( lambda x: x[-1] % 2 == 1).sink_to_list() stream1.start() wait_for(lambda: any(out1) and out1[-1][-1] == 9, 10, period=0.2) stream1.upstream.stopped = True stream2 = Stream.from_kafka_batched(TOPIC, ARGS1) out2 = stream2.map(split).filter( lambda x: x[-1] % 2 == 1).sink_to_list() stream2.start() time.sleep(5) assert len(out2) == 0 stream2.upstream.stopped = True stream3 = Stream.from_kafka_batched(TOPIC, ARGS2) out3 = stream3.map(split).filter( lambda x: x[-1] % 2 == 1).sink_to_list() stream3.start() wait_for(lambda: any(out3) and out3[-1][-1] == 9, 10, period=0.2) stream3.upstream.stopped = True
def test_kafka_batch(): j = random.randint(0, 10000) ARGS = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'streamz-test%i' % j } with kafka_service() as kafka: stream = Stream.from_kafka_batched(TOPIC, ARGS) out = stream.sink_to_list() stream.start() for i in range(10): kafka.produce(TOPIC, b'value-%d' % i) kafka.flush() # out may still be empty or first item of out may be [] wait_for(lambda: any(out) and out[-1][-1] == b'value-9', 10, period=0.2) stream.stopped = True