Beispiel #1
0
def test_multiple_consumers(redis: StrictRedis, data):
    out = Stream().pluck(2)
    S = set()
    out.pluck("i").sink(S.add)

    stream, group = uuid(2)
    sources = set()
    for _ in range(3):
        con = uuid()
        source = Stream.from_redis_consumer_group(
            stream,
            group,
            con,
            count=1,
            timeout=0.1,
        )
        source.connect(out)
        source.start()
        sources.add(source)

    for x in data:
        redis.xadd(stream, x)

    wait_for(lambda: len(S) == 50, 1)
    assert S == set(x["i"] for x in data)

    for s in sources:
        s.stop()
Beispiel #2
0
def test_heartbeats(redis: StrictRedis):
    stream, group = uuid(2)

    redis.xgroup_create(stream, group, mkstream=True)

    interval = 0.1
    timeout = 0.5

    hearts = []
    for _ in range(5):
        heart = Heart(stream,
                      group,
                      uuid(),
                      interval=interval,
                      timeout=timeout)
        hearts.append(heart)
        heart.start()

    S = set()
    sub = redis.pubsub()
    sub.subscribe(group)

    def predicate():
        m = sub.get_message()
        if m is not None:
            S.add(m["data"])
        return len(S) == 5

    wait_for(predicate, 5, period=0.01)

    for h in hearts:
        h.stop()
Beispiel #3
0
    def run_and_fail():
        name = uuid()
        source = Stream.from_redis_consumer_group(
            stream,
            group,
            name,
            count=1,
            timeout=0.1,
        )
        buffer = source.buffer(10)
        buffer.rate_limit(0.1).pluck(1).sink_to_redis_list(target)
        source.start()

        wait_for(lambda: buffer.queue.qsize() == 10, 3)
        buffer.queue = Queue(10)  # lose data in the buffer, won't be ACKed
        source.stop()

        def pending_10():
            cons = convert_bytes(redis.xpending(stream, group))["consumers"]
            for con in cons:
                if con["name"] == name and con["pending"] == 10:
                    return True
            return False

        wait_for(pending_10, 1, period=0.1)
Beispiel #4
0
def test_backpressure_connect_empty_stream():
    @Stream.register_api()
    class from_list(Stream):
        def __init__(self, source, **kwargs):
            self.source = source
            super().__init__(ensure_io_loop=True, **kwargs)

        def start(self):
            self.stopped = False
            self.loop.add_callback(self.run)

        @gen.coroutine
        def run(self):
            while not self.stopped and len(self.source) > 0:
                yield self._emit(self.source.pop(0))

    source_list = [0, 1, 2, 3, 4]
    source = Stream.from_list(source_list)
    sout = Stream()
    L = sout.rate_limit(1).sink_to_list()
    source.connect(sout)
    source.start()

    wait_for(lambda: L == [0], 0.01)
    assert len(source_list) > 0
def test_simple():
    cat = intake.open_catalog(catfile)
    s = cat.simple.read()
    l = s.sink_to_list()
    assert not l
    s.start()
    wait_for(lambda: l == [1, 2, 3], timeout=1)
Beispiel #6
0
def test_from_kafka():
    j = random.randint(0, 10000)
    ARGS = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j
    }
    with kafka_service() as kafka:
        kafka, TOPIC = kafka
        stream = Stream.from_kafka([TOPIC], ARGS, asynchronous=True)
        out = stream.sink_to_list()
        stream.start()
        yield gen.sleep(0.1)  # for loop to run
        for i in range(10):
            yield gen.sleep(0.2)
            kafka.produce(TOPIC, b'value-%d' % i)
        kafka.flush()
        # it takes some time for messages to come back out of kafka
        wait_for(lambda: len(out) == 10, 10, period=0.1)
        assert out[-1] == b'value-9'

        kafka.produce(TOPIC, b'final message')
        kafka.flush()
        wait_for(lambda: out[-1] == b'final message', 10, period=0.1)

        stream._close_consumer()
        kafka.produce(TOPIC, b'lost message')
        kafka.flush()
        # absolute sleep here, since we expect output list *not* to change
        yield gen.sleep(1)
        assert out[-1] == b'final message'
        stream._close_consumer()
Beispiel #7
0
def launch_kafka():
    stop_docker(let_fail=True)
    subprocess.call(shlex.split("docker pull spotify/kafka"))
    cmd = ("docker run -d -p 2181:2181 -p 9092:9092 --env "
           "ADVERTISED_HOST=127.0.0.1 --env ADVERTISED_PORT=9092 "
           "--name streamz-kafka spotify/kafka")
    print(cmd)
    cid = subprocess.check_output(shlex.split(cmd)).decode()[:-1]

    def end():
        if cid:
            stop_docker(cid=cid)

    atexit.register(end)

    def predicate():
        try:
            out = subprocess.check_output(['docker', 'logs', cid],
                                          stderr=subprocess.STDOUT)
            return b'kafka entered RUNNING state' in out
        except subprocess.CalledProcessError:
            pass

    wait_for(predicate, 10, period=0.1)
    return cid
Beispiel #8
0
def test_from_kafka_thread():
    j = random.randint(0, 10000)
    ARGS = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j
    }
    with kafka_service() as kafka:
        stream = Stream.from_kafka([TOPIC], ARGS)
        out = stream.sink_to_list()
        stream.start()
        for i in range(10):
            kafka.produce(TOPIC, b'value-%d' % i)
        kafka.flush()
        # it takes some time for messages to come back out of kafka
        startlen = len([o for o in out if o])
        wait_for(lambda: len([o for o in out if o]) == startlen + 10,
                 10,
                 period=0.1)

        assert out[-1] == b'value-9'
        kafka.produce(TOPIC, b'final message')
        kafka.flush()
        wait_for(lambda: out[-1] == b'final message', 10, period=0.1)

        stream._close_consumer()
        kafka.produce(TOPIC, b'lost message')
        kafka.flush()
        # absolute sleep here, since we expect output list *not* to change
        sleep(1)
        assert out[-1] == b'final message'
Beispiel #9
0
def test_tcp():
    port = 9876
    s = Source.from_tcp(port)
    out = s.sink_to_list()
    s.start()
    wait_for(lambda: s.server is not None, 2, period=0.02)

    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect(("localhost", port))
        sock.send(b'data\n')
        sock.close()

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect(("localhost", port))
        sock.send(b'data\n')

        sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock2.connect(("localhost", port))
        sock2.send(b'data2\n')
        wait_for(lambda: out == [b'data\n', b'data\n', b'data2\n'],
                 2,
                 period=0.01)
    finally:
        s.stop()
        sock.close()
        sock2.close()
Beispiel #10
0
def test_kafka_batch():
    j = random.randint(0, 10000)
    ARGS = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j,
        'auto.offset.reset': 'latest'
    }
    with kafka_service() as kafka:
        kafka, TOPIC = kafka
        # These messages aren't read since Stream starts reading from latest offsets
        for i in range(10):
            kafka.produce(TOPIC, b'value-%d' % i, b'%d' % i)
        kafka.flush()
        stream = Stream.from_kafka_batched(TOPIC,
                                           ARGS,
                                           max_batch_size=4,
                                           keys=True)
        out = stream.sink_to_list()
        stream.start()
        wait_for(lambda: stream.upstream.started, 10, 0.1)
        for i in range(10):
            kafka.produce(TOPIC, b'value-%d' % i, b'%d' % i)
        kafka.flush()
        # out may still be empty or first item of out may be []
        wait_for(lambda: any(out) and out[-1][-1]['value'] == b'value-9',
                 10,
                 period=0.2)
        assert out[-1][-1]['key'] == b'9'
        # max_batch_size checks
        assert len(out[0]) == len(out[1]) == 4 and len(out) == 3
        stream.upstream.stopped = True
Beispiel #11
0
def test_from_iterable_backpressure():
    it = iter(range(5))
    source = Source.from_iterable(it)
    L = source.rate_limit(0.1).sink_to_list()
    source.start()

    wait_for(lambda: L == [0], 1, period=0.01)
    assert next(it) == 2  # 1 is in blocked _emit
Beispiel #12
0
def test_periodic():
    s = Source.from_periodic(lambda: True)
    l = s.sink_to_list()
    assert s.stopped
    s.start()
    wait_for(lambda: l, 0.3, period=0.01)
    wait_for(lambda: len(l) > 1, 0.3, period=0.01)
    assert all(l)
def test_df():
    dataframe = pytest.importorskip("streamz.dataframe")
    cat = intake.open_catalog(catfile)
    s = cat.df.read()
    assert isinstance(s, dataframe.DataFrame)
    s.start()
    wait_for(lambda: s.current_value is not None, timeout=1)
    cv = s.current_value
    wait_for(lambda: not s.current_value.equals(cv), timeout=1)
Beispiel #14
0
def test_from_redis_lists(redis: StrictRedis):
    name = uuid()
    source = Stream.from_redis_lists(name, timeout=0.1)
    L = source.pluck(1).map(int).sink_to_list()
    source.start()

    redis.rpush(name, *list(range(3)))

    wait_for(lambda: L == [0, 1, 2], 3)
    source.stop()
def test_dask():
    distr = pytest.importorskip("dask.distributed")
    import streamz.dask
    with distr.Client(processes=False):
        cat = intake.open_catalog(catfile)
        s = cat.simple.to_dask()
        assert isinstance(s, streamz.dask.DaskStream)
        l = s.gather().sink_to_list()
        s.start()
        wait_for(lambda: l == [1, 2, 3], timeout=1)
Beispiel #16
0
def test_claim(redis: StrictRedis, data):
    stream, group, target = uuid(3)

    for x in data:
        redis.xadd(stream, x)

    def run_and_fail():
        name = uuid()
        source = Stream.from_redis_consumer_group(
            stream,
            group,
            name,
            count=1,
            timeout=0.1,
        )
        buffer = source.buffer(10)
        buffer.rate_limit(0.1).pluck(1).sink_to_redis_list(target)
        source.start()

        wait_for(lambda: buffer.queue.qsize() == 10, 3)
        buffer.queue = Queue(10)  # lose data in the buffer, won't be ACKed
        source.stop()

        def pending_10():
            cons = convert_bytes(redis.xpending(stream, group))["consumers"]
            for con in cons:
                if con["name"] == name and con["pending"] == 10:
                    return True
            return False

        wait_for(pending_10, 1, period=0.1)

    for _ in range(10):
        run_and_fail()

    source = Stream.from_redis_consumer_group(
        stream,
        group,
        uuid(),
        heartbeat_interval=0.1,
        claim_timeout=1,
        count=10,
        timeout=0.1,
    )
    source.pluck(1).sink_to_redis_list(target)
    source.start()

    wait_for(
        lambda: redis.llen(target) == 500,
        15,
        lambda: print(redis.llen(target)),
        period=0.1,
    )

    source.stop()
Beispiel #17
0
def test_basic(redis: StrictRedis, data):
    stream = uuid()
    source = Stream.from_redis_streams(stream, timeout=0.1, default_start_id=0)
    L = source.sink_to_list()
    source.start()

    for x in data:
        redis.xadd(stream, x)

    wait_for(lambda: len(L) == 3, 2)
    assert [x[2] for x in L] == data
    source.stop()
Beispiel #18
0
def test_multiple(redis: StrictRedis):
    l1, l2 = uuid(2)

    source = Stream.from_redis_lists([l1, l2], timeout=0.1)
    L = source.pluck(1).map(int).sink_to_list()
    source.start()

    redis.rpush(l1, *list(range(3)))
    redis.rpush(l2, *list(range(3)))

    wait_for(lambda: len(L) == 6, 2)
    source.stop()
Beispiel #19
0
def test_index(stream):
    df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
    a = DataFrame(example=df, stream=stream)
    b = a.index + 5
    L = b.stream.gather().sink_to_list()

    a.emit(df)
    a.emit(df)

    wait_for(lambda: len(L) > 1, timeout=2, period=0.05)

    assert_eq(L[0], df.index + 5)
    assert_eq(L[1], df.index + 5)
Beispiel #20
0
def test_ws_roundtrip():
    pytest.importorskip("websockets")
    s0 = Stream.from_websocket("localhost", 8989, start=True)
    l = s0.sink_to_list()

    data = [b'0123'] * 4
    s = Stream.from_iterable(data)
    s.to_websocket("ws://localhost:8989")
    s.start()

    wait_for(lambda: data == l, timeout=1)
    s.stop()
    s0.stop()
Beispiel #21
0
def test_from_iterable_stop():
    from _pytest.outcomes import Failed

    source = Source.from_iterable(range(5))
    L = source.rate_limit(0.01).sink_to_list()
    source.start()

    wait_for(lambda: L == [0], 1)
    source.stop()

    assert source.stopped
    with pytest.raises(Failed):
        wait_for(lambda: L == [0, 1, 2], 0.1)
Beispiel #22
0
def test_mqtt_roundtrip():
    pytest.importorskip("paho.mqtt.client")
    s0 = Stream.from_mqtt("mqtt.eclipseprojects.io", 1883,
                          "streamz/sensor/temperature")
    l = s0.map(lambda msg: msg.payload).sink_to_list()
    s0.start()

    data = [b'0123'] * 4
    s = Stream.from_iterable(data)
    s.to_mqtt("mqtt.eclipseprojects.io", 1883, "streamz/sensor/temperature")
    s.start()

    wait_for(lambda: data == l, timeout=1)
    s.stop()
    s0.stop()
Beispiel #23
0
def test_increment_restart(pg):
    table = "inc_re"
    src = Stream.from_postgres_increment(table,
                                         pg,
                                         initial_value=30,
                                         polling_interval=1,
                                         limit=10)
    L = src.sink_to_list()

    w = Writer(src.strategy.loader.connection, table)
    w.create_table()
    w.insert(50)
    src.start()

    wait_for(lambda: len(L) == 20, 1, period=0.1)
Beispiel #24
0
def test_to_kafka():
    ARGS = {'bootstrap.servers': 'localhost:9092'}
    with kafka_service() as kafka:
        _, TOPIC = kafka
        source = Stream()
        kafka = source.to_kafka(TOPIC, ARGS)
        out = kafka.sink_to_list()

        for i in range(10):
            yield source.emit(b'value-%d' % i)

        source.emit('final message')
        kafka.flush()
        wait_for(lambda: len(out) == 11, 10, period=0.1)
        assert out[-1] == b'final message'
Beispiel #25
0
def test_source(clients, info):
    topic, subscription = info
    source = Stream.from_gcp_pubsub(subscription, timeout=1)
    L = source.map(lambda x: int(x.data)).sink_to_list()
    source.start()

    pub, _ = clients
    for i in range(10):
        pub.publish(topic, str(i).encode())

    wait_for(lambda: list(range(10)) == L, 1)

    with pytest.raises(Failed):
        wait_for(lambda: len(L) > 10, 3)  # test ack

    source.stop()
Beispiel #26
0
def test_ack(redis: StrictRedis, data):
    stream, group, con = uuid(3)
    source = Stream.from_redis_consumer_group(stream, group, con, timeout=0.1)
    L = source.sink_to_list()

    for x in data:
        redis.xadd(stream, x)

    source.start()

    wait_for(lambda: len(L) == 3, 3, lambda: print(L))
    sleep(0.05)  # wait a bit for the last ack
    for _, messages in redis.xreadgroup(group, con, {stream: 0}):
        assert messages == []

    source.stop()
Beispiel #27
0
def redis(name="test-streamz-redis"):
    cleanup(name=name)
    run_cmd = shlex.split(f"docker run -d -p 6379:6379 --name {name} redis")
    subprocess.check_call(run_cmd)

    def predicate():
        cmd = shlex.split(f"docker logs {name}")
        logs = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        return b"Ready to accept connections" in logs

    wait_for(predicate, 10, period=0.1)
    try:
        with StrictRedis() as client:
            yield client
    finally:
        cleanup(name=name, fail=True)
Beispiel #28
0
def test_multiple(redis: StrictRedis, data):
    stream1, stream2 = uuid(2)
    source = Stream.from_redis_streams({stream1: 0, stream2: 0}, timeout=0.1)
    L1 = source.pluck(0).filter(lambda x: x == stream1).sink_to_list()
    L2 = source.pluck(0).filter(lambda x: x == stream2).sink_to_list()
    source.start()

    for x in data:
        redis.xadd(stream1, x)
        redis.xadd(stream2, x)

    wait_for(lambda: len(L1) == 3, 3)
    wait_for(lambda: len(L2) == 3, 3)

    assert L1 == [stream1] * 3
    assert L2 == [stream2] * 3
    source.stop()
Beispiel #29
0
def test_kafka_batch_checkpointing_sync_nodes():
    '''
    Streams 1 and 3 have different consumer groups, while Stream 2
    has the same group as 1. Hence, Stream 2 does not re-read the
    data that had been finished processing by Stream 1, i.e. it
    picks up from where Stream 1 had left off.
    '''
    j1 = random.randint(0, 10000)
    ARGS1 = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j1,
        'enable.auto.commit': False,
        'auto.offset.reset': 'earliest'
    }
    j2 = j1 + 1
    ARGS2 = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j2,
        'enable.auto.commit': False,
        'auto.offset.reset': 'earliest'
    }
    with kafka_service() as kafka:
        kafka, TOPIC = kafka
        for i in range(10):
            kafka.produce(TOPIC, b'value-%d' % i)
        kafka.flush()
        stream1 = Stream.from_kafka_batched(TOPIC, ARGS1)
        out1 = stream1.map(split).filter(
            lambda x: x[-1] % 2 == 1).sink_to_list()
        stream1.start()
        wait_for(lambda: any(out1) and out1[-1][-1] == 9, 10, period=0.2)
        stream1.upstream.stopped = True
        stream2 = Stream.from_kafka_batched(TOPIC, ARGS1)
        out2 = stream2.map(split).filter(
            lambda x: x[-1] % 2 == 1).sink_to_list()
        stream2.start()
        time.sleep(5)
        assert len(out2) == 0
        stream2.upstream.stopped = True
        stream3 = Stream.from_kafka_batched(TOPIC, ARGS2)
        out3 = stream3.map(split).filter(
            lambda x: x[-1] % 2 == 1).sink_to_list()
        stream3.start()
        wait_for(lambda: any(out3) and out3[-1][-1] == 9, 10, period=0.2)
        stream3.upstream.stopped = True
Beispiel #30
0
def test_kafka_batch():
    j = random.randint(0, 10000)
    ARGS = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'streamz-test%i' % j
    }
    with kafka_service() as kafka:
        stream = Stream.from_kafka_batched(TOPIC, ARGS)
        out = stream.sink_to_list()
        stream.start()
        for i in range(10):
            kafka.produce(TOPIC, b'value-%d' % i)
        kafka.flush()
        # out may still be empty or first item of out may be []
        wait_for(lambda: any(out) and out[-1][-1] == b'value-9',
                 10,
                 period=0.2)
        stream.stopped = True