def test_random_sketch(k, amount_keys, replays):
    tests = []
    results = defaultdict(int)

    cms = CountMinSketch(10 ** -7, 0.01, k)
    for i in range(replays):
        for i in range(amount_keys):
            key = 'random-key-%s' % i
            n = randint(10, 1000000)
            tests.append((i, n))
            results[key] += n
            cms.update(key, n)

    exp_results = dict([(v, kk) for (kk, v) in results.iteritems()])
    exp = heapq.nlargest(k, exp_results.items())
    ranking = cms.get_ranking()
    try:
        assert len(exp) == len(ranking)
        for i, (amnt, key) in enumerate(exp):
            (a_amnt, keys) = ranking[i]
            assert key in keys
            assert amnt == a_amnt
    except Exception:
        from datetime import datetime
        import pickle
        import traceback

        traceback.print_exc()
        print('k=%s, amount_keys=%s, replays=%s' % (k, amount_keys,
                                                    replays))
        print("exp", exp)
        print("ranking", ranking)
        test = {'k': k, 'amount_keys': amount_keys, 'replays': replays,
                'tests': tests, 'exp': exp, 'ranking': ranking}
        fn = 'failed-tests/failed-test-%s.pickle' % datetime.now()
        with open(fn, 'wb') as f:
            pickle.dump(test, f)
        raise
Esempio n. 2
0
 def __init__(self, delta, epsilon, k, redis_host='localhost',
              redis_port=6379, redis_prefix='countminsketch'):
     RedisHashing.__init__(self, redis_host, redis_port, redis_prefix)
     CountMinSketch.__init__(self, delta, epsilon, k)
def test_simple_count_min_sketch():
    c = CountMinSketch(10 ** -7, 0.01, 50)

    c.update('www.google.de', 10)
    assert c.get('www.google.de') == 10

    exp = {0: (10, ['www.google.de'])}
    assert c.get_ranking() == exp

    c.update('www.bing.com', 12)
    assert c.get('www.bing.com') == 12

    exp = {0: (12, ['www.bing.com']),
           1: (10, ['www.google.de'])}
    assert c.get_ranking() == exp

    c.update('www.yahoo.com', 28)
    assert c.get('www.yahoo.com') == 28

    exp = {0: (28, ['www.yahoo.com']),
           1: (12, ['www.bing.com']),
           2: (10, ['www.google.de'])}
    assert c.get_ranking() == exp

    c.update('www.google.de', 2)
    exp = {0: (28, ['www.yahoo.com']),
           1: (12, ['www.bing.com', 'www.google.de'])}
    assert c.get_ranking() == exp

    c.update('www.google.de', 2)
    exp = {0: (28, ['www.yahoo.com']),
           2: (12, ['www.bing.com']),
           1: (14, ['www.google.de'])}
    assert c.get_ranking() == exp