예제 #1
0
def test_one_one():
    ts = time.time()

    # create an instance of the StreamBuffer class
    stream_buffer = StreamBuffer(instant_emit=True, delta_time=sys.maxsize, left="r", buffer_results=True,
                                 verbose=True)

    # create Queues to store the input streams
    events_r = list()
    events_s = list()

    # Fill the input_stream with randomized Records
    N = 100
    random.seed(0)
    event_order = ["r", "s"] * int(N / 2)
    start_time = 1600000000
    for i in range(len(event_order)):
        if event_order[i] == "r":
            events_r.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))
        elif event_order[i] == "s":
            events_s.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))

    ingestion_order = ["r", "s"] * int(N/2)            # works
    n_r = n_s = 0
    for i in range(N):
        # decide based on the ingestion order which stream record is forwarded
        # store as dict of KafkaRecords and a flag whether it was already joined as older sibling
        if ingestion_order[i] == "r":
            # receive the first record from the event stream
            stream_buffer.ingest_left(events_r[n_r])  # instant emit
            n_r += 1
        elif ingestion_order[i] == "s":
            # receive the first record from the event stream
            stream_buffer.ingest_right(events_s[n_s])
            n_s += 1

    # print("\nRecords in buffer r:")
    # for rec in stream_buffer.buffer_left:
    #     print(rec)
    # print("Records in buffer s:")
    # for rec in stream_buffer.buffer_right:
    #     print(rec)
    # print("Merged records in buffer t:")
    events_t = stream_buffer.fetch_results()
    # for rec in events_t:
    #     print(rec)

    print(f"Join time-series with |r| = {n_r}, |s| = {n_s}.")
    print(f"joined {len(events_t)} tuples in {time.time() - ts} s.")
    assert len(events_t) == 99
예제 #2
0
def test_randomized_many():
    # create an instance of the StreamBuffer class
    stream_buffer = StreamBuffer(instant_emit=True, delta_time=sys.maxsize, left="r", buffer_results=True,
                                 verbose=False)

    # Test Settings:
    # Create Queues to store the input streams
    events_r = list()
    events_s = list()

    # Fill the input_stream with randomized
    n_r = n_s = 10_000
    random.seed(0)
    start_time = 1600000000
    phenomenon_time = start_time
    for i in range(n_r):
        phenomenon_time += random.random()
        events_r.append(Record(timestamp=phenomenon_time, quantity="r", result=random.random()))
    phenomenon_time = start_time
    for i in range(n_s):
        phenomenon_time += random.random()
        events_s.append(Record(timestamp=phenomenon_time, quantity="s", result=random.random()))

    ingestion_order = ["r"] * n_r + ["s"] * n_s
    random.shuffle(ingestion_order)

    n_r = n_s = 0
    ts = time.time()
    for quantity in ingestion_order:
        # decide based on the ingestion order which stream record is forwarded
        # store as dict of KafkaRecords and a flag whether it was already joined as older sibling
        if quantity == "r":
            # receive the first record from the event stream
            stream_buffer.ingest_left(events_r[n_r])  # instant emit
            n_r += 1
        elif quantity == "s":
            # receive the first record from the event stream
            stream_buffer.ingest_right(events_s[n_s])
            n_s += 1

    events_t = stream_buffer.fetch_results()
    stop_time = time.time()

    print(f"Join time-series with |r| = {n_r}, |s| = {n_s}.")
    print(f"joined {len(events_t)} tuples in {time.time() - ts} s.")
    print(f"that are {int(len(events_t)/(time.time() - ts))} joins per second.")
    assert len(events_t) == 23041
    assert stop_time - ts < 2  # we got around 0.4 s
예제 #3
0
def test_delayed_many():
    imbalance = 100  # additional latency of stream s

    # create an instance of the StreamBuffer class
    stream_buffer = StreamBuffer(instant_emit=True, delta_time=sys.maxsize, left="r", buffer_results=True,
                                 verbose=False)

    # Test Settings:
    # Create Queues to store the input streams
    events_r = list()
    events_s = list()

    # Fill the input_stream with randomized
    N = 10_000
    random.seed(0)
    event_order = (["r"] * 5 + ["s"] * 5) * int(N/10)
    start_time = 1600000000

    for i in range(len(event_order)):
        if event_order[i] == "r":
            events_r.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))
        elif event_order[i] == "s":
            events_s.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))

    ingestion_order = ["r"] * imbalance + (["r"] * 5 + ["s"] * 5) * int(N/10)
    n_r = 0
    n_s = 0
    ts = time.time()
    while n_r < len(events_r) and n_s < len(events_s):
        # decide based on the ingestion order which stream record is forwarded
        # store as dict of KafkaRecords and a flag whether it was already joined as older sibling
        if ingestion_order[n_r+n_s] == "r":
            # receive the first record from the event stream
            stream_buffer.ingest_left(events_r[n_r])  # instant emit
            n_r += 1
        elif ingestion_order[n_r+n_s] == "s":
            # receive the first record from the event stream
            stream_buffer.ingest_right(events_s[n_s])
            n_s += 1

    events_t = stream_buffer.fetch_results()

    print(f"Join time-series with |r| = {n_r}, |s| = {n_s}.")
    print(f"joined {len(events_t)} tuples in {time.time() - ts} s.")
    print(f"that are {int(len(events_t)/(time.time() - ts))} joins per second.")
    assert len(events_t) == 13702
    assert time.time() - ts < 1  # we got around 0.2 s
예제 #4
0
def test_timeout_five_five():
    # create an instance of the StreamBuffer class
    stream_buffer = StreamBuffer(instant_emit=True, delta_time=3, left="r", buffer_results=True,
                                 verbose=True)

    # Test Settings:
    # Create Queues to store the input streams
    events_r = list()
    events_s = list()

    # Fill the input_stream with randomized
    N = 20
    random.seed(0)
    event_order = (["r"] * 5 + ["s"] * 5) * int(N / 10)
    start_time = 1600000000

    for i in range(len(event_order)):
        if event_order[i] == "r":
            events_r.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))
        elif event_order[i] == "s":
            events_s.append(Record(timestamp=i + start_time, quantity=event_order[i], result=random.random()))

    ingestion_order = (["r"] * 5 + ["s"] * 5) * N
    n_r = n_s = 0
    ts = time.time()
    for i in range(N):
        # decide based on the ingestion order which stream record is forwarded
        # store as dict of KafkaRecords and a flag whether it was already joined as older sibling
        if ingestion_order[i] == "r":
            # receive the first record from the event stream
            stream_buffer.ingest_left(events_r[n_r])  # instant emit
            n_r += 1
        elif ingestion_order[i] == "s":
            # receive the first record from the event stream
            stream_buffer.ingest_right(events_s[n_s])
            n_s += 1

    events_t = stream_buffer.fetch_results()

    print(f"Join time-series with |r| = {n_r}, |s| = {n_s}.")
    print(f"joined {len(events_t)} tuples in {time.time() - ts} s.")
    assert len(events_t) == 13
예제 #5
0
def test_unordered():
    # create an instance of the StreamBuffer class
    stream_buffer = StreamBuffer(instant_emit=True, delta_time=sys.maxsize, left="r", buffer_results=True,
                                 verbose=True)

    # Fill the input_stream with randomized
    random.seed(0)
    start_time = 1600000000

    # Test Settings:
    # Create Queues to store the input records
    events_r = list()
    for i in range(10):
        events_r.append(Record(timestamp=i + start_time, quantity="r", result=random.random()))

    ts = time.time()
    # first ingest all Records into R, then all into s
    for event in events_r:
        stream_buffer.ingest_left(event)  # instant emit

    print("Ingest Records into s.")
    stream_buffer.ingest_right(Record(timestamp=start_time - 0.5, quantity="s", result=random.random()))
    stream_buffer.ingest_right(Record(timestamp=start_time + 0.5, quantity="s", result=random.random()))
    stream_buffer.ingest_right(Record(timestamp=start_time + 5.5, quantity="s", result=random.random()))
    stream_buffer.ingest_right(Record(timestamp=start_time + 9.5, quantity="s", result=random.random()))

    events_t = stream_buffer.fetch_results()

    print(f"Join time-series with |r| = {len(events_r)}, |s| = {4}.")
    print(f"joined {len(events_t)} tuples in {time.time() - ts} s.")
    if time.time() - ts > 1e-3:
        print(f"that are {int(len(events_t)/(time.time() - ts))} joins per second.")
    assert len(events_t) == 20
    d = {'r.quantity': 'r', 'r.phenomenonTime': 1600000006, 'r.result': 0.7837985890347726,
         's.quantity': 's', 's.phenomenonTime': 1600000005.5, 's.result': 0.28183784439970383}
    assert d in events_t
예제 #6
0
def join_fct(record_left, record_right):
    """
    Blueprint for the join function, takes two records and merges them using the defined routine.
    :param record_left: Record 
        Record that is joined as left join partner
    :param record_right: Record 
        Record that is joined as right join partner
    :return: Record
        the resulting record from the join of both partners
    """
    record = Record(quantity="t",
                    result=record_left.get_result() * record_right.get_result(),
                    timestamp=(record_left.get_time() + record_right.get_time()) / 2)
    # here, the resulting record can be produced to e.g. Apache Kafka or a pipeline
    return record
예제 #7
0
def test_commit_transaction(round_nr=1):
    print(f"\n################################ commit, transaction {round_nr} ######################################\n")

    # start transaction if it is the first round
    if round_nr == 1:
        # Initialize producer transaction.
        kafka_producer.init_transactions()
        # Start producer transaction for round 1 only
        kafka_producer.begin_transaction()

    # commit_fct is empty and join_fct is with transactions
    lsb = StreamBuffer(instant_emit=True, left="actSpeed_C11", right="vaTorque_C11",
                       buffer_results=True, delta_time=1,
                       verbose=VERBOSE, join_function=join_fct)

    start_time = stop_time = last_transaction_time = time.time()
    n_none_polls = 0
    started = False
    while True:
        # msg = kafka_consumer.poll(0.1)
        msgs = kafka_consumer.consume(num_messages=MAX_BATCH_SIZE, timeout=0.1)  # is faster, returns a list

        # if there is no msg within a second, continue
        if n_none_polls >= 30:  # time.time() - init_time > MAX_TIMEOUT:, it does need around 2 seconds
            print("  Break as there won't come any further messages.")
            break
        elif len(msgs) == 0:
            n_none_polls += 1
            continue
        else:
            # update to latest running-time
            stop_time = time.time()
            if not started:  # set starter flag if first message was consumed
                started = True
                print("Start the count clock")
                # update to latest not-started-time
                start_time = stop_time

        # iterate over each message that was consumed
        for msg in msgs:
            record_json = json.loads(msg.value().decode('utf-8'))
            if VERBOSE:
                if record_json.get("quantity").endswith("_C11"):
                    print(f"Received new record: {record_json}")

            # create a Record from the json
            record = Record(
                thing=record_json.get("thing"),
                quantity=record_json.get("quantity"),
                timestamp=record_json.get("phenomenonTime"),
                result=record_json.get("result"),
                topic=msg.topic(), partition=msg.partition(), offset=msg.offset())

            # ingest the record into the StreamBuffer instance, instant emit
            if msg.topic() == KAFKA_TOPIC_IN_0:  # "actSpeed_C11":
                lsb.ingest_left(record)  # with instant emit
            elif msg.topic() == KAFKA_TOPIC_IN_1:  # "vaTorque_C11":
                lsb.ingest_right(record)

        # commit the transaction every TRANSACTION_TIME
        if stop_time >= last_transaction_time + TRANSACTION_TIME:
            last_transaction_time = stop_time
            commit_transaction(stream_buffer=lsb, verbose=VERBOSE, commit_time=last_transaction_time)

        # break if there were MAX_JOIN_COUNT or more joins
        if MAX_JOIN_CNT is not None and lsb.get_join_counter() >= MAX_JOIN_CNT:
            print("Reached the maximal join count, graceful stopping.")
            break

        # sleep to allow other processes to run
        time.sleep(0)

    try:
        # commit processed message offsets to the transaction
        kafka_producer.send_offsets_to_transaction(
            kafka_consumer.position(kafka_consumer.assignment()),
            kafka_consumer.consumer_group_metadata())
        # commit transaction
        kafka_producer.commit_transaction()
    except confluent_kafka.KafkaException as e:
        if confluent_kafka.KafkaError.str(e.args[0]) == "Operation not valid in state Ready":
            print("_STATE exception, should occur here.")
        else:
            print("Couldn't commit transaction.")
            raise e

    events_out = lsb.fetch_results()
    print(f"\nLengths: |{RES_QUANTITY}| = {lsb.get_join_counter()}, "
          f"|{QUANTITIES[0]}| = {lsb.get_left_counter()}, |{QUANTITIES[1]}| = {lsb.get_right_counter()}.")
    if start_time != stop_time:
        print(f"Joined time-series {stop_time - start_time:.6f} s long, "
              f"that are {lsb.get_join_counter() / (stop_time - start_time):.2f} joins per second.")
    if round_nr == 1:
        print(f" first record: \t{events_out[0]}")
        print(f" last record:  \t{events_out[-1]}")
        assert len(events_out) == 1595
        # assert cnt_left == 2681  # this values can be different
        # assert cnt_right == 4705
        print(f"Result #0: {events_out[0]}")
        assert events_out[0].get_quantity() == "vaPower_C11"
        assert round(events_out[0].get_time() - 1554096460.415, 3) == 0
        assert round(events_out[0].get_result() - 86.71966370389097, 5) == 0
        assert round(events_out[-1].get_time() - 1554355545.929, 3) == 0
        assert round(events_out[-1].get_result() - 0.0, 5) == 0
    elif round_nr == 2:
        assert len(events_out) == 0
                continue
            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue
            try:
                record_json = json.loads(msg.value().decode('utf-8'))
                if VERBOSE:
                    print(f"Received new record: {record_json}")
                if st0 is None:
                    print("Start count clock")
                    st0 = time.time()

                # create a Record from the json
                record = Record(
                    thing=record_json.get("thing"),
                    quantity=record_json.get("quantity"),
                    timestamp=record_json.get("phenomenonTime"),
                    result=record_json.get("result"))

                # ingest the record into the StreamBuffer instance, instant emit
                if "Torque" in record_json.get("quantity"):
                    stream_buffer.ingest_r(record)  # instant emit
                    cnt_r += 1
                elif "Load" in record_json.get("quantity"):
                    stream_buffer.ingest_s(record)
                    cnt_s += 1
            except json.decoder.JSONDecodeError as e:
                print("skipping record as there is a json.decoder.JSONDecodeError.")
        pass
    except KeyboardInterrupt:
        kafka_consumer.close()