Beispiel #1
0
def test_count_small():
    lc = LinearCounter(100000)

    assert lc.count() == 0

    lc.add("test")
    assert lc.count() == 1

    lc.add("test")
    assert lc.count() == 1

    lc.add("test2")
    assert lc.count() == 2
Beispiel #2
0
def test_count():
    lc = LinearCounter(100000)

    assert lc.count() == 0

    lc.add("test")
    assert lc.count() == 1

    lc.add("test")
    assert lc.count() == 1

    lc.add("test2")
    assert lc.count() == 2

    del lc

    lc = LinearCounter(100000)

    for word in LOREM_TEXT["text"].split():
        lc.add(word)

    assert lc.count() == LOREM_TEXT["num_of_unique_words"]
Beispiel #3
0
def test_count():
    lc = LinearCounter(100000)

    errors = []

    cardinality = 0
    for i in range(100):
        cardinality += 1
        element = "element_{}".format(i)
        lc.add(element)

        error = abs(cardinality - lc.count()) / float(cardinality)
        errors.append(error)

    avg_error = sum(errors) / float(len(errors))

    assert avg_error >= 0
    assert avg_error <= 0.1
    previous_window = 0
    users_bitmap = LinearCounter(60000)

    for message in consumer:
        json_msg = message.value
        total_bytes = total_bytes + utf8len(json_msg)
        ts = json_msg['ts']
        uid = json_msg['uid']
        # convert ts(seconds) to minutes - using it as the 'minute window'
        current_window = int(ts / 60)

        if previous_window != current_window:
            # current minute window changed
            # print for the previous window the unique users count
            if previous_window > 0:
                print_minute_stats(previous_window, users_bitmap.count())

            previous_window = current_window
            users_bitmap = LinearCounter(60000)
        users_bitmap.add(uid)

        # stop parser after 1000000 mesages
        msg_consumed_count = msg_consumed_count + 1
        if msg_consumed_count + 1 > msg_consumed_max:
            break

    print_minute_stats(previous_window, users_bitmap.count())
    consumer_timing = time.time() - consumer_start
    consumer.close()
    print(
        'Consumer timing (1000000 messages): {0:.2f}'.format(consumer_timing))