Esempio n. 1
0
def test_init():
    lc = LinearCounter(8000)
    assert lc.sizeof() == 1000, "Unexpected size in bytes"

    with pytest.raises(ValueError) as excinfo:
        lc = LinearCounter(0)
    assert str(excinfo.value) == 'Counter length can\'t be 0 or negative'
Esempio n. 2
0
def test_count():
    lc = LinearCounter(100000)

    errors = []

    cardinality = 0
    for i in range(100):
        cardinality += 1
        element = "element_{}".format(i)
        lc.add(element)

        error = abs(cardinality - lc.count()) / float(cardinality)
        errors.append(error)

    avg_error = sum(errors) / float(len(errors))

    assert avg_error >= 0
    assert avg_error <= 0.1
Esempio n. 3
0
def test_len():
    lc = LinearCounter(8000)
    assert len(lc) == 8000

    lc = LinearCounter(8001)
    assert len(lc) == 8008
Esempio n. 4
0
def test_count():
    lc = LinearCounter(100000)

    assert lc.count() == 0

    lc.add("test")
    assert lc.count() == 1

    lc.add("test")
    assert lc.count() == 1

    lc.add("test2")
    assert lc.count() == 2

    del lc

    lc = LinearCounter(100000)

    for word in LOREM_TEXT["text"].split():
        lc.add(word)

    assert lc.count() == LOREM_TEXT["num_of_unique_words"]
Esempio n. 5
0
def test_add():
    lc = LinearCounter(8000)

    for word in ["test", 1, {"hello": "world"}]:
        lc.add(word)
Esempio n. 6
0
def test_repr():
    lc = LinearCounter(8000)

    assert repr(lc) == "<LinearCounter (length: 8000)>"
Esempio n. 7
0
def test_count_small():
    lc = LinearCounter(100000)

    assert lc.count() == 0

    lc.add("test")
    assert lc.count() == 1

    lc.add("test")
    assert lc.count() == 1

    lc.add("test2")
    assert lc.count() == 2

if __name__ == "__main__":
    consumer = KafkaConsumer(
        'sunday',
        bootstrap_servers=['localhost:9092'],
        auto_offset_reset='earliest',
        value_deserializer=lambda x: json.loads(x.decode('utf-8')))

    total_bytes = 0
    consumer_start = time.time()
    msg_consumed_max = 1000000
    msg_consumed_count = 0
    # one minute window
    previous_window = 0
    users_bitmap = LinearCounter(60000)

    for message in consumer:
        json_msg = message.value
        total_bytes = total_bytes + utf8len(json_msg)
        ts = json_msg['ts']
        uid = json_msg['uid']
        # convert ts(seconds) to minutes - using it as the 'minute window'
        current_window = int(ts / 60)

        if previous_window != current_window:
            # current minute window changed
            # print for the previous window the unique users count
            if previous_window > 0:
                print_minute_stats(previous_window, users_bitmap.count())