def _test_hs_agg(octopus, agg): values = [random.randint(-100, 100) for n in range(1000)] h = random.choice(values) + random.randint(-10, 10) octopus.create_stream('stream0', x='int') cq = 'SELECT %s(%d) WITHIN GROUP (ORDER BY x::integer) FROM stream0' % (agg, h) octopus.create_cv('test_%s' % agg, cq) octopus.insert('stream0', ('x',), [(v,) for v in values]) result = octopus.execute('SELECT %s FROM test_%s' % (agg, agg)).first() rank, peers = _rank(h, values) dense_rank, _ = _dense_rank(h, values) return rank, dense_rank, peers, len(values), result[agg]
def test_join_with_aggs(octopus, clean_db): """ Verify that joins involving aggregates referencing columns from multiple tables work """ num_cols = 4 join_cols = [1] q = """ SELECT sum(s.col0::integer) AS s0, sum(a0.col0::integer) AS s1, sum(a1.col0::integer) AS s2 FROM a1 JOIN a0 ON a1.col1 = a0.col1 JOIN stream0 s ON s.col1::integer = a0.col1 """ a0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) a1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('a0', **a0_cols) octopus.create_table('a1', **a1_cols) a0 = _generate_rows(num_cols, 64) a1 = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 'a0', a0, 0.1) _insert(octopus, 'a1', a1, 0.1) octopus.create_stream('stream0', **a0_cols) octopus.create_cv('test_agg_join', q) _insert(octopus, 'stream0', s) expected = _join(a1, _join(a0, s, join_cols), join_cols) result = octopus.execute('SELECT * FROM test_agg_join').first() # sum of col0 from stream s0_expected = sum([r[num_cols * 2] for r in expected]) # sum of col0 from a0 s1_expected = sum([r[num_cols * 1] for r in expected]) # sum of col0 from a1 s2_expected = sum([r[num_cols * 0] for r in expected]) assert s0_expected == result['s0'] assert s1_expected == result['s1'] assert s2_expected == result['s2']
def test_percentile_cont_agg(octopus, clean_db): range_top = 100000 q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0] batches = [] min_seen = range_top max_seen = 0 for _ in xrange(10): b = [(random.randint(0, range_top), ) for _ in xrange(5000)] min_seen = min(min_seen, min(b)[0]) max_seen = max(max_seen, max(b)[0]) batches.append(b) octopus.create_stream('test_stream', x='int') query = '''SELECT percentile_cont(ARRAY[%s]) WITHIN GROUP (ORDER BY x::integer) FROM %s ''' % (', '.join(map(lambda f: str(f), q)), '%s') octopus.create_cv('test_cq_percentile_cont', query % 'test_stream') octopus.create_table('test_percentile_cont', x='integer') for b in batches: octopus.insert('test_stream', ('x', ), b) octopus.insert('test_percentile_cont', ('x', ), b) actual = octopus.execute(query % 'test_percentile_cont') result = octopus.execute('SELECT * FROM test_cq_percentile_cont') actual = actual.first()['percentile_cont'] result = result.first()['percentile_cont'] assert len(actual) == len(result) assert result == sorted(result) diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))] # 0th and 100th percentile should be accurate. assert result[0] == min_seen assert result[-1] == max_seen # 1st and 99th percentile should be within 0.1%. assert diff[1] <= 0.001 * range_top assert diff[-2] <= 0.001 * range_top # All percentiles should be within 0.5%. assert all(x <= 0.005 * range_top for x in diff)
def test_concurrent_crash(octopus, clean_db): """ Test simple worker and combiner crashes. """ octopus.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' octopus.create_cv('test_concurrent_crash', q) batch_size = 25000 desc = [0, 0, False] vals = [(1, )] * batch_size def insert(): while True: octopus.insert('stream0', ['x'], vals) desc[1] += batch_size if desc[2]: break def kill(): for _ in xrange(30): r = random.random() if r > 0.85: desc[0] += kill_combiner() if r < 0.15: desc[0] += kill_worker() time.sleep(0.1) desc[2] = True threads = [threading.Thread(target=insert), threading.Thread(target=kill)] map(lambda t: t.start(), threads) map(lambda t: t.join(), threads) num_killed = desc[0] num_inserted = desc[1] result = octopus.execute('SELECT count FROM test_concurrent_crash').first() assert num_killed > 0 assert result['count'] <= num_inserted assert result['count'] >= num_inserted - (num_killed * batch_size) # To ensure that all remaining events in ZMQ queues have been consumed time.sleep(2)
def test_cksketch_frequency(octopus, clean_db): octopus.create_stream('test_cmsketch_stream', k='int', x='int') q = """ SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_cmsketch_frequency', q) rows = [(n, None) for n in range(100)] octopus.insert('test_cmsketch_stream', desc, rows) result = list(octopus.execute( 'SELECT cmsketch_frequency(c, null) AS x FROM test_cmsketch_frequency ORDER BY k').fetchall()) assert len(result) == 100 for row in result: assert row[0] == 0
def test_static_streams(octopus, clean_db): """ Verify that static stream definitions are dumped and restored """ octopus.create_stream('static', x='int', y='float8') _dump(octopus, 'test_static.sql') octopus.drop_stream('static') _restore(octopus, 'test_static.sql') # Force the requirement of a static stream octopus.create_cv('static_cv', 'SELECT x, y FROM static') octopus.insert('static', ('x', 'y'), [(0, 1)]) result = octopus.execute('SELECT x, y FROM static_cv').first() assert result['x'] == 0 assert result['y'] == 1
def test_simple_insert(octopus, clean_db): """ Verify that we can insert some rows and count some stuff """ octopus.create_stream('stream0', key='int') octopus.create_cv( 'cv', 'SELECT key::integer, COUNT(*) FROM stream0 GROUP BY key') rows = [(n % 10, ) for n in range(1000)] octopus.insert('stream0', ('key', ), rows) result = list(octopus.execute('SELECT * FROM cv ORDER BY key')) assert len(result) == 10 for i, row in enumerate(result): assert row['key'] == i assert row['count'] == 100
def test_regression(octopus, clean_db): path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv')) _generate_csv( path, [['2015-06-01 00:00:00', 'De', 'Adam_Babareka', '1', '37433']], desc=('day', 'project', 'title', 'count', 'size')) octopus.create_stream('copy_regression_stream', count='int', day='timestamp', project='text', title='text', size='int') octopus.create_cv('test_copy_regression', 'SELECT sum(count) FROM copy_regression_stream') octopus.execute( "COPY copy_regression_stream (day, project, title, count, size) FROM '%s' CSV HEADER" % path)
def test_concurrent_copy(octopus, clean_db): octopus.create_stream('stream0', x='int') octopus.create_cv('concurrent_copy0', 'SELECT x::int, count(*) FROM stream0 GROUP BY x') octopus.create_cv('concurrent_copy1', 'SELECT count(*) FROM stream0') tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.copy') query = 'SELECT generate_series(1, 2000) AS x' octopus.execute("COPY (%s) TO '%s'" % (query, tmp_file)) num_threads = 4 stop = False inserted = [0] * num_threads def insert(i): conn = psycopg2.connect( 'dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) cur = conn.cursor() while not stop: cur.execute("COPY stream0 (x) FROM '%s'" % tmp_file) conn.commit() inserted[i] += 2000 conn.close() threads = [ threading.Thread(target=insert, args=(i, )) for i in range(num_threads) ] map(lambda t: t.start(), threads) time.sleep(60) stop = True map(lambda t: t.join(), threads) time.sleep(5) total = (octopus.execute( 'SELECT sum(count) FROM concurrent_copy0').first()['sum']) assert total == sum(inserted) total = ( octopus.execute('SELECT count FROM concurrent_copy1').first()['count']) assert total == sum(inserted)
def test_stream_buffer(octopus, clean_db): """ Run CQs which take different times to process an event in the stream buffer and each stream is read by a disjoint set of CQs. """ octopus.create_stream('stream1', x='int', string='text') octopus.create_stream('stream2', x='int', string='text') octopus.create_cv('test_sbuf_1', 'SELECT x::int FROM stream1') octopus.create_cv('test_sbuf_2', 'SELECT x::int, pg_sleep(0.002) FROM stream1') octopus.create_cv('test_sbuf_3', 'SELECT x::int FROM stream2') octopus.create_cv('test_sbuf_4', 'SELECT x::int, pg_sleep(0.002) FROM stream2') num_per_batch = 5000 num_batches = 6 # We're sending this 2k string with each event just to consume # more space in the stream buffer. That way we get to test the # wrapping of the buffer as well. values = [(i, 'a' * 2048) for i in xrange(num_per_batch)] def insert(stream): for _ in xrange(num_batches - 1): octopus.insert(stream, ('x', 'string'), values) threads = [ threading.Thread(target=insert, args=('stream1', )), threading.Thread(target=insert, args=('stream2', )) ] map(lambda t: t.start(), threads) map(lambda t: t.join(), threads) octopus.insert('stream1', ('x', 'string'), values) octopus.insert('stream2', ('x', 'string'), values) q = 'SELECT COUNT(*) FROM test_sbuf_%d' r1 = octopus.execute(q % 1).first()[0] r2 = octopus.execute(q % 2).first()[0] r3 = octopus.execute(q % 3).first()[0] r4 = octopus.execute(q % 4).first()[0] assert r1 == r2 == r3 == r4 == num_batches * num_per_batch
def test_transforms(octopus, clean_db): """ Verify that continuous transforms work properly on output streams """ octopus.create_stream('stream0', x='int') octopus.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x', sw='5 seconds') # Write a row to a stream each time a row goes out of window q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL' octopus.create_stream('oow_stream', x='integer') octopus.create_ct('ct', q, "octopus_stream_insert('oow_stream')") octopus.create_cv('ct_recv', 'SELECT x FROM oow_stream') octopus.insert('stream0', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(7) rows = list(octopus.execute('SELECT * FROM ct_recv')) assert len(rows) == 100
def test_schema_only(octopus, clean_db): """ Verify that it is possible to only dump continuous view schemas and not data """ octopus.create_stream('stream0', x='int') octopus.create_cv('test_schema', 'SELECT COUNT(*) FROM stream0') octopus.insert('stream0', ('x',), [(x,) for x in range(10)]) result = octopus.execute('SELECT count FROM test_schema').first() assert result['count'] == 10 _dump(octopus, 'test_schema.sql', schema_only=True) octopus.drop_all() _restore(octopus, 'test_schema.sql') # No data loaded result = list(octopus.execute('SELECT count FROM test_schema')) assert not result
def test_distinct(octopus, clean_db): """ Verify that streaming SELECT DISTINCT ON (...) works """ octopus.create_stream('stream0', x='int', y='int', z='int') octopus.create_table('table0', x='int', y='int', z='int') q = 'SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM stream0' octopus.create_cv('test_distinct', q) uniques = defaultdict(set) values = [] for _ in xrange(2000): x, y, z = random.randint(0, 20), random.randint(0, 20), random.randint( 0, 20) values.append((x, y, z)) uniques[(x, y - z)].add(y) octopus.insert('stream0', ['x', 'y', 'z'], values) octopus.insert('table0', ['x', 'y', 'z'], values) q = """ SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM table0 """ expected = list(octopus.execute(q)) expected = len(expected) assert expected < 2000 result = octopus.execute('SELECT COUNT(*) FROM test_distinct').first() assert expected == result['count'] # Check if the first row was selected for uniques result = octopus.execute('SELECT * FROM test_distinct') reverse_uniques = defaultdict(set) for (x, _), ys in uniques.iteritems(): for y in ys: reverse_uniques[y].add(x) for row in result: assert row['x'] in reverse_uniques[row['y']]
def test_deadlock_regress(octopus, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems octopus.execute("COPY (%s) TO '%s'" % (query, tmp_file)) octopus.create_stream('s1', n='int') octopus.create_stream('s2', n='int') octopus.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "octopus_stream_insert('s2')") octopus.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['receive', 'commit']: octopus.stop() octopus.run({ 'continuous_query_num_workers': nworkers, 'stream_insert_level': 'sync_%s' % sync }) octopus.execute("SELECT truncate_continuous_view('cv')") octopus.execute('COMMIT') if copy: octopus.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: octopus.execute('INSERT INTO s1 (n) %s' % query) count = dict(octopus.execute('SELECT count FROM cv').first() or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'receive' time.sleep(1) count = dict(octopus.execute('SELECT count FROM cv').first() or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) octopus.stop() octopus.run()
def test_hll_count_distinct(octopus, clean_db): """ Verify that streaming COUNT(DISTINCT) works """ octopus.create_stream('stream0', x='int') q = 'SELECT COUNT(DISTINCT x::integer) FROM stream0' octopus.create_cv('test_count_distinct', q) desc = ('x', ) values = [(random.randint(1, 1024), ) for n in range(1000)] octopus.insert('stream0', desc, values) expected = len(set(values)) result = octopus.execute('SELECT count FROM test_count_distinct').first() # Error rate should be well below %2 delta = abs(expected - result['count']) assert delta / float(expected) <= 0.02
def test_object_aggs(octopus, clean_db): """ Verify that combines work properly on object aggs """ q = """ SELECT x::integer %% 10 AS k, json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \')FROM stream0 GROUP BY k; """ desc = ('x', 'y', 's') octopus.create_stream('stream0', x='int', y='float8', s='text') octopus.create_cv('test_object_aggs', q) octopus.create_table('test_object_aggs_t', x='integer', y='float8', s='text') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random(), str(n) * random.randint(1, 8)) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_object_aggs_t', desc, rows) tq = """ SELECT json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \') FROM test_object_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(json_agg), combine(json_object_agg), combine(string_agg) FROM test_object_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert sorted(tr[0]) == sorted(cr[0]) assert sorted(tr[1]) == sorted(cr[1]) assert sorted(tr[2]) == sorted(cr[2])
def test_concurrent_inserts(octopus, clean_db): octopus.create_stream('stream0', x='int') octopus.create_cv('concurrent_inserts0', 'SELECT x::int, count(*) FROM stream0 GROUP BY x') octopus.create_cv('concurrent_inserts1', 'SELECT count(*) FROM stream0') num_threads = 4 stop = False inserted = [0] * num_threads def insert(i): conn = psycopg2.connect( 'dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) cur = conn.cursor() while not stop: cur.execute('INSERT INTO stream0 (x) ' 'SELECT x % 100 FROM generate_series(1, 2000) AS x') conn.commit() inserted[i] += 2000 conn.close() threads = [ threading.Thread(target=insert, args=(i, )) for i in range(num_threads) ] map(lambda t: t.start(), threads) time.sleep(60) stop = True map(lambda t: t.join(), threads) time.sleep(5) total = (octopus.execute( 'SELECT sum(count) FROM concurrent_inserts0').first()['sum']) assert total == sum(inserted) total = (octopus.execute('SELECT count FROM concurrent_inserts1').first() ['count']) assert total == sum(inserted)
def test_multiple(octopus, clean_db): """ Verify that multiple continuous views work together properly """ octopus.create_stream('stream0', n='numeric', s='text', unused='int') octopus.create_cv('cv0', 'SELECT n::numeric FROM stream0 WHERE n > 10.00001') octopus.create_cv('cv1', 'SELECT s::text FROM stream0 WHERE s LIKE \'%%this%%\'') rows = [(float(n + 10), 'this', 100) for n in range(1000)] for n in range(10): rows.append((float(n), 'not a match', -n)) octopus.insert('stream0', ('n', 's', 'unused'), rows) result = list(octopus.execute('SELECT * FROM cv0')) assert len(result) == 999 result = list(octopus.execute('SELECT * FROM cv1')) assert len(result) == 1000
def test_dump(octopus, clean_db): """ Verify that we can dump and restore CVs using INSERT statements """ octopus.create_stream('stream0', x='int') q = """ SELECT x::integer %% 100 AS g, avg(x) + 1 AS avg, count(*), count(distinct x) AS distincts FROM stream0 GROUP BY g """ octopus.create_cv('test_dump', q) rows = [(x,) for x in range(1000)] octopus.insert('stream0', ('x',), rows) def _verify(): result = octopus.execute('SELECT count(*) FROM test_dump').first() assert result['count'] == 100 result = octopus.execute('SELECT sum(avg) FROM test_dump').first() assert result['sum'] == 50050 result = octopus.execute('SELECT sum(distincts) FROM test_dump').first() assert result['sum'] == 1000 _verify() _dump(octopus, 'test_dump.sql') octopus.drop_all() _restore(octopus, 'test_dump.sql') _verify() # Now verify that we can successfully add more data to the restored CV rows = [(x,) for x in range(2000)] octopus.insert('stream0', ('x',), rows) result = octopus.execute('SELECT sum(count) FROM test_dump').first() assert result['sum'] == 3000 result = octopus.execute('SELECT sum(distincts) FROM test_dump').first() assert result['sum'] == 2000
def test_hypothetical_set_aggs(octopus, clean_db): """ Verify that combines work properly on HS aggs """ q = """ SELECT x::integer %% 10 AS k, rank(256) WITHIN GROUP (ORDER BY x), dense_rank(256) WITHIN GROUP (ORDER BY x) FROM stream0 GROUP BY k """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_hs_aggs', q) octopus.create_table('test_hs_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_hs_aggs_t', desc, rows) # Note that the CQ will use the HLL variant of dense_rank, # so use hll_dense_rank on the table too tq = """ SELECT rank(256) WITHIN GROUP (ORDER BY x), hll_dense_rank(256) WITHIN GROUP (ORDER BY x) FROM test_hs_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(rank), combine(dense_rank) FROM test_hs_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert tr == cr
def test_create_drop_continuous_view(octopus, clean_db): """ Basic sanity check """ octopus.create_stream('stream0', id='int') octopus.create_cv('cv0', 'SELECT id::integer FROM stream0') octopus.create_cv('cv1', 'SELECT id::integer FROM stream0') octopus.create_cv('cv2', 'SELECT id::integer FROM stream0') result = octopus.execute('SELECT * FROM octopus_views()') names = [r['name'] for r in result] assert sorted(names) == ['cv0', 'cv1', 'cv2'] octopus.drop_cv('cv0') octopus.drop_cv('cv1') octopus.drop_cv('cv2') result = octopus.execute('SELECT * FROM octopus_views()') names = [r['name'] for r in result] assert len(names) == 0
def test_combine(octopus, clean_db): """ Verify that partial tuples are combined with on-disk tuples """ octopus.create_stream('stream0', key='text', unused='int') octopus.create_cv('combine', 'SELECT key::text, COUNT(*) FROM stream0 GROUP BY key') rows = [] for n in range(100): for m in range(100): key = '%d%d' % (n % 10, m) rows.append((key, 0)) octopus.insert('stream0', ('key', 'unused'), rows) total = 0 result = octopus.execute('SELECT * FROM combine') for row in result: total += row['count'] assert total == 10000
def test_colums_subset(octopus, clean_db): """ Verify that copying data from a file into a stream works when the file's input columns are a subset of the stream0's columns """ octopus.create_stream('stream0', x='int', y='float8', z='numeric', m='int') q = 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric), max(m::integer) FROM stream0' octopus.create_cv('test_copy_subset', q) octopus.create_table('test_copy_subset_t', x='integer', y='float8', z='numeric') path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv')) rows = [] for n in range(10000): row = random.randint(1, 1024), random.randint(1, 1024), random.random() rows.append(row) _generate_csv(path, rows, desc=('x', 'y', 'z')) octopus.execute( 'COPY test_copy_subset_t (x, y, z) FROM \'%s\' HEADER CSV' % path) octopus.execute('COPY stream0 (x, y, z) FROM \'%s\' HEADER CSV' % path) expected = octopus.execute( 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric) FROM test_copy_subset_t' ).first() result = list(octopus.execute('SELECT s0, s1, avg FROM test_copy_subset')) assert len(result) == 1 result = result[0] assert result[0] == expected[0] assert result[1] == expected[1] assert result[2] == expected[2]
def test_concurrent_sw_ticking(octopus, clean_db): """ Verify that several concurrent sliding-window queries each having different windows tick correctly at different intervals. """ octopus.create_stream('stream0', x='int') output_names = [] for n in range(10): name = 'sw%d' % n octopus.create_cv(name, 'SELECT x::integer, count(*) FROM stream0 GROUP BY x', sw='%d seconds' % (n + 10)) output_name = name + '_output' q = """ SELECT arrival_timestamp, CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel """ % name octopus.create_cv(output_name, q) output_names.append(output_name) names = [r[0] for r in octopus.execute('SELECT name FROM octopus_views() ORDER BY name DESC')] assert len(names) == 2 * 10 octopus.insert('stream0', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(25) for name in output_names: rows = list(octopus.execute('SELECT COUNT(DISTINCT x) FROM %s' % name)) assert rows[0][0] == 100 for x in range(100): # In window assert octopus.execute('SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d' % (name, x)) # Out of window assert octopus.execute('SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d' % (name, x)) # Drop these in reverse dependency order to prevent deadlocks for name in names: octopus.drop_cv(name)
def test_user_low_and_high_card(octopus, clean_db): """ Verify that Bloom filters's with low and high cardinalities are correcly unioned """ octopus.create_stream('test_bloom_stream', x='int', k='int') q = """ SELECT k::integer, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_bloom_agg', q) # Low cardinalities rows = [] for n in range(1000): rows.append((0, random.choice((-1, -2)))) rows.append((1, random.choice((-3, -4)))) # High cardinalities for n in range(10000): rows.append((2, n)) rows.append((3, n)) octopus.insert('test_bloom_stream', desc, rows) result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) ' 'FROM test_bloom_agg WHERE k in (0, 1)').first() assert result[0] == 4 result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) ' 'FROM test_bloom_agg WHERE k in (2, 3)').first() assert result[0] == 8879 result = octopus.execute('SELECT bloom_cardinality(combine(bloom_agg)) ' 'FROM test_bloom_agg').first() assert result[0] == 8881
def test_windowed_combine(octopus, clean_db): """ Verify that windowed queries with combines work """ q = """ SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_windowed_combine', q) octopus.create_table('test_windowed_combine_t', x='integer', y='integer') rows = [] for n in range(10000): row = (n, n) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_windowed_combine_t', desc, rows) table = """ SELECT first_value(x) OVER w, avg(y) OVER w FROM test_windowed_combine_t WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) ORDER BY first_value """ expected = list(octopus.execute(table)) combine = """ SELECT first_value(x) OVER w, avg(avg) OVER w FROM test_windowed_combine WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) ORDER BY first_value """ actual = list(octopus.execute(combine)) for e, a in zip(expected, actual): assert e == a
def test_prepared_inserts(octopus, clean_db): """ Verify that we can PREPARE and EXECUTE an INSERT statement """ octopus.create_stream('stream0', x='int', y='float8') conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) db = conn.cursor() db.execute('CREATE CONTINUOUS VIEW test_prepared0 AS SELECT x::integer, COUNT(*), sum(y::integer) FROM stream0 GROUP BY x') db.execute('CREATE CONTINUOUS VIEW test_prepared1 AS SELECT x::integer, COUNT(*), sum(y::float8) FROM stream0 GROUP BY x') conn.commit() db.execute('PREPARE ins AS INSERT INTO stream0 (x, y) VALUES ($1, $2)') for n in range(10000): row = (n % 100, random.random()) db.execute('EXECUTE ins (%s, %s)' % row) time.sleep(0.1) conn.commit() result = list(octopus.execute('SELECT * FROM test_prepared0 ORDER BY x')) assert len(result) == 100 for n in range(100): assert result[n]['count'] == 100 result = list(octopus.execute('SELECT * FROM test_prepared1 ORDER BY x')) assert len(result) == 100 for n in range(100): assert result[n]['count'] == 100 conn.close()
def test_stats_aggs(octopus, clean_db): """ Verify that combines work on stats aggs """ q = """ SELECT x::integer %% 10 AS k, regr_sxx(x, y::float8), stddev(x) FROM stream0 GROUP BY k; """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_stats_aggs', q) octopus.create_table('test_stats_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_stats_aggs_t', desc, rows) tq = """ SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert abs(tr[0] - cr[0]) < 0.00001 assert abs(tr[1] - cr[1]) < 0.00001
def test_hll_distinct(octopus, clean_db): """ Verify that combines work on HLL COUNT DISTINCT queries """ q = """ SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream0 GROUP BY k """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_hll_distinct', q) octopus.create_table('test_hll_distinct_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_hll_distinct_t', desc, rows) # Note that the CQ will use the HLL variant of COUNT DISTINCT, # so use hll_count_distinct on the table too tq = """ SELECT hll_count_distinct(x) FROM test_hll_distinct_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(count) FROM test_hll_distinct """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert tr == cr
def test_null_groups(octopus, clean_db): """ Verify that null group columns are considered equal """ octopus.create_stream('s', x='int', y='int', z='int') q = """ SELECT x::integer, y::integer, z::integer, COUNT(*) FROM s GROUP BY x, y, z; """ desc = ('x', 'y', 'z') octopus.create_cv('test_null_groups', q) octopus.create_table('test_null_groups_t', x='integer', y='integer', z='integer') rows = [] for n in range(10000): vals = list(random.randint(0, 10) for n in range(3)) vals = map(lambda n: random.random() > 0.1 and n or None, vals) rows.append(tuple(vals)) octopus.insert('s', desc, rows) octopus.insert('test_null_groups_t', desc, rows) table_q = """ SELECT x, y, z, COUNT(*) FROM test_null_groups_t GROUP BY x, y, z ORDER BY x, y, z; """ expected = list(octopus.execute(table_q)) result = list( octopus.execute( 'SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z')) for r, e in zip(result, expected): assert r == e