def test_bloom_intersection(octopus, clean_db): """ Verify that bloom_intersection works """ octopus.create_stream('test_bloom_stream', x='int', k='int') q = """ SELECT k::int, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_bloom_intersection', q) rows = [] for i in range(10000): rows.append((0, 2 * i)) rows.append((1, i)) octopus.insert('test_bloom_stream', desc, rows) cvq = """ SELECT bloom_cardinality(bloom_intersection_agg(bloom_agg)) FROM test_bloom_intersection """ result = list(octopus.execute(cvq)) assert len(result) == 1 result = result[0] assert result[0] == 5530
def test_combine_in_view(octopus, clean_db): """ Verify that combines in views on top of continuous views work """ q = """ SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_combine_view', q) octopus.execute( 'CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view') rows = [] for n in range(10000): rows.append((random.randint(1, 256), random.randint(1, 1024))) octopus.insert('stream0', desc, rows) view = list(octopus.execute('SELECT * FROM v')) assert len(view) == 1 expected = sum(r[1] for r in rows) / float(len(rows)) assert abs(float(view[0][0]) - expected) < 0.00001 octopus.execute('DROP VIEW v')
def test_bloom_agg_hashing(octopus, clean_db): """ Verify that bloom_agg correctly hashes different input types """ octopus.create_stream('test_bloom_stream', x='int', y='text', z='float8') q = """ SELECT bloom_agg(x::integer) AS i, bloom_agg(y::text) AS t, bloom_agg(z::float8) AS f FROM test_bloom_stream """ desc = ('x', 'y', 'z') octopus.create_cv('test_bloom_hashing', q) rows = [] for n in range(10000): rows.append((n, '%d' % n, float(n))) rows.append((n, '%05d' % n, float(n))) octopus.insert('test_bloom_stream', desc, rows) cvq = """ SELECT bloom_cardinality(i), bloom_cardinality(t), bloom_cardinality(f) FROM test_bloom_hashing """ result = list(octopus.execute(cvq)) assert len(result) == 1 result = result[0] assert result[0] == 8879 assert result[1] == 15614 assert result[2] == 8855
def test_join_multiple_tables(octopus, clean_db): """ Verify that stream-table joins involving multiple tables work """ num_cols = 8 join_cols = [0] t0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) t1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('t0', **t0_cols) octopus.create_table('t1', **t1_cols) octopus.create_stream('stream0', **t0_cols) q = """ SELECT s.col0::integer FROM t0 JOIN t1 ON t0.col0 = t1.col0 JOIN stream0 s ON t1.col0 = s.col0 """ t0 = _generate_rows(num_cols, 64) t1 = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 't1', t1, 0.1) _insert(octopus, 't0', t0, 0.1) octopus.create_cv('test_join_multi', q) _insert(octopus, 'stream0', s) expected = _join(t0, _join(s, t1, join_cols), join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_join_multi').first() assert result['count'] == len(expected)
def test_indexed(octopus, clean_db): """ Verify that stream-table joins involving indexed tables work """ octopus.create_stream('stream0', x='int', y='int') q = """ SELECT stream0.x::integer, count(*) FROM stream0 JOIN test_indexed_t t ON stream0.x = t.x GROUP BY stream0.x """ octopus.create_table('test_indexed_t', x='integer', y='integer') octopus.execute('CREATE INDEX idx ON test_indexed_t(x)') t = _generate_rows(2, 1000) s = _generate_rows(2, 1000) octopus.insert('test_indexed_t', ('x', 'y'), t) time.sleep(0.1) octopus.create_cv('test_indexed', q) octopus.insert('stream0', ('x', 'y'), s) expected = _join(s, t, [0]) result = octopus.execute('SELECT sum(count) FROM test_indexed').first() assert result['sum'] == len(expected)
def test_join_across_batches(octopus, clean_db): """ Verify that stream-table joins are properly built when they span across multiple input batches """ num_cols = 4 join_cols = [0] t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('batch', **t_cols) octopus.create_stream('stream0', **t_cols) q = """ SELECT s.col0::integer FROM batch JOIN stream0 s ON batch.col0 = s.col0 """ t = _generate_rows(num_cols, 64) _insert(octopus, 'batch', t, 0.1) s = _generate_rows(num_cols, 64) octopus.create_cv('test_batched_join', q) _insert(octopus, 'stream0', s) expected = _join(t, s, join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_batched_join').first() assert result['count'] == len(expected)
def test_incremental_join(octopus, clean_db): """ Verify that join results increase appropriately as we incrementally add stream events to the input """ num_cols = 4 join_cols = [0, 1] t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('inc', **t_cols) octopus.create_stream('stream0', **t_cols) q = """ SELECT s.col0::integer FROM inc JOIN stream0 s ON inc.col0 = s.col0 AND inc.col1 = s.col1::integer """ t = _generate_rows(num_cols, 64) _insert(octopus, 'inc', t, 0.1) octopus.create_cv('test_join', q) s = [] for n in range(2): row = _generate_row(num_cols) _insert(octopus, 'stream0', [row]) s.append(row) expected = _join(t, s, join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_join').first() assert result['count'] == len(expected)
def test_user_low_and_high_card(octopus, clean_db): """ Verify that HLL's with low and high cardinalities are correcly combined """ octopus.create_stream('test_hll_stream', x='int', k='integer') q = """ SELECT k::integer, hll_agg(x::integer) FROM test_hll_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_hll_agg', q) # Low cardinalities rows = [] for n in range(1000): rows.append((0, random.choice((-1, -2)))) rows.append((1, random.choice((-3, -4)))) # High cardinalities for n in range(10000): rows.append((2, n)) rows.append((3, n)) octopus.insert('test_hll_stream', desc, rows) result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) ' 'FROM test_hll_agg WHERE k in (0, 1)').first() assert result[0] == 4 result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) ' 'FROM test_hll_agg WHERE k in (2, 3)').first() assert result[0] == 9976 result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) ' 'FROM test_hll_agg').first() assert result[0] == 9983
def test_cmsketch_agg(octopus, clean_db): """ Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile """ octopus.create_stream('test_cmsketch_stream', k='int', x='int') q = """ SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_cmsketch_agg', q) rows = [] for n in range(1000): rows.append((0, n % 20)) rows.append((1, n % 50)) octopus.insert('test_cmsketch_stream', desc, rows) result = list(octopus.execute( 'SELECT cmsketch_frequency(c, 10) AS x, cmsketch_frequency(c, 40) AS y, ' 'cmsketch_frequency(c, 60) FROM test_cmsketch_agg ORDER BY k').fetchall()) assert len(result) == 2 assert tuple(result[0]) == (50, 0, 0) assert tuple(result[1]) == (20, 20, 0) result = list(octopus.execute( 'SELECT cmsketch_frequency(combine(c), 10) AS x, ' 'cmsketch_frequency(combine(c), 40) AS y, cmsketch_frequency(combine(c), 60) ' 'FROM test_cmsketch_agg').fetchall()) assert len(result) == 1 assert tuple(result[0]) == (70, 20, 0)
def test_single_continuous_view(octopus, clean_db): """ Verify that specific continuous views can be dropped and restored """ octopus.create_stream('stream0', x='int') octopus.create_cv('test_single0', 'SELECT COUNT(*) FROM stream0') octopus.create_cv('test_single1', 'SELECT COUNT(*) FROM stream0') octopus.insert('stream0', ('x',), [(x,) for x in range(10)]) result = octopus.execute('SELECT count FROM test_single0').first() assert result['count'] == 10 result = octopus.execute('SELECT count FROM test_single1').first() assert result['count'] == 10 _dump(octopus, 'test_single.sql', tables=['test_single0', 'stream0', 'test_single0_mrel']) octopus.drop_all() _restore(octopus, 'test_single.sql') result = octopus.execute('SELECT count FROM test_single0').first() assert result['count'] == 10 # We didn't dump this one result = list(octopus.execute('SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\'')) assert not result
def test_join_with_where(octopus, clean_db): """ Verify that stream-table joins using a WHERE clause work properly """ num_cols = 4 q = """ SELECT s.col0::integer FROM stream0 s, wt WHERE s.col0 = 1 AND wt.col0 = 1 """ wt_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('wt', **wt_cols) octopus.create_table('wt_s', **wt_cols) wt = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 'wt', wt, 0.1) _insert(octopus, 'wt_s', s, 0.1) octopus.create_stream('stream0', **wt_cols) octopus.create_cv('test_join_where', q) _insert(octopus, 'stream0', s) expected = octopus.execute('SELECT COUNT(*) FROM wt_s s, wt WHERE s.col0 = 1 AND wt.col0 = 1').first() result = octopus.execute('SELECT COUNT(*) FROM test_join_where').first() assert result['count'] == expected['count']
def test_prepared_extended(octopus, clean_db): """ Verify that we can write to streams using the extended protocol. This test shells out to a binary because psycopg2 doesn't use the extended protocol. """ octopus.create_stream('extended_stream', x='int', y='int', z='int') q = """ SELECT COUNT(x::integer) AS x, COUNT(y::integer) AS y, COUNT(z::integer) AS z FROM extended_stream """ octopus.create_cv('test_prepared_extended', q) # This will insert 1000 via a paramaterized insert, and 1000 via unparamaterized insert cmd = ['./extended', 'octopus', str(octopus.port), 'extended_stream', '1000'] stdout, stderr = subprocess.Popen(cmd).communicate() assert stdout is None assert stderr is None rows = list(octopus.execute('SELECT x, y, z FROM test_prepared_extended')) assert len(rows) == 1 result = rows[0] assert result['x'] == 2000 assert result['y'] == 2000 assert result['z'] == 2000
def test_filter_clause(octopus, clean_db): """ Verify that FILTER clauses work on aggregates and sliding window aggregates """ octopus.create_stream('test_filter_stream', x='int') q = """ SELECT SUM(x::int) FILTER (WHERE mod(x, 2) = 0) AS sum2, SUM(x::int) FILTER (WHERE mod(x, 3) = 0) AS sum3 FROM test_filter_stream """ sw = """ WHERE arrival_timestamp > clock_timestamp() - interval '30 second' """ octopus.create_cv('test_filter', q) octopus.create_cv('test_filter_sw', '%s %s' % (q, sw)) desc = ('x', ) rows = [] for n in range(1000): rows.append((n, )) octopus.insert('test_filter_stream', desc, rows) sum2 = sum(filter(lambda x: x % 2 == 0, map(lambda x: x[0], rows))) sum3 = sum(filter(lambda x: x % 3 == 0, map(lambda x: x[0], rows))) result1 = octopus.execute('SELECT * FROM test_filter').first() result2 = octopus.execute('SELECT * FROM test_filter_sw').first() assert result1['sum2'] == result2['sum2'] == sum2 assert result1['sum3'] == result2['sum3'] == sum3
def test_fss_agg(octopus, clean_db): octopus.create_stream('test_fss_stream', x='int', k='text') q = """ SELECT k::text, fss_agg(x::int, 5) FROM test_fss_stream GROUP BY k """ desc = ('k', 'x') octopus.create_cv('test_fss_agg', q) items = range(14) random.shuffle(items) a_items = items b_items = list(reversed(items)) values = map(lambda i: ('a', i), get_geometric_dist(a_items)) values.extend(map(lambda i: ('b', i), get_geometric_dist(b_items))) random.shuffle(values) octopus.insert('test_fss_stream', desc, values) result = list( octopus.execute( 'SELECT k, fss_topk_values(fss_agg) FROM test_fss_agg ORDER BY k')) topk = map(int, result[0][1].rstrip('}').lstrip('{').split(',')) assert sorted(topk) == sorted(a_items[-5:]) topk = map(int, result[1][1].rstrip('}').lstrip('{').split(',')) assert sorted(topk) == sorted(b_items[-5:])
def test_restart_recovery(octopus, clean_db): octopus.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' octopus.create_cv('test_restart_recovery', q) octopus.insert('stream0', ['x'], [(1, ), (1, )]) result = octopus.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 # Need to sleep here, otherwise on restart the materialization table is # empty. Not sure why. time.sleep(0.1) # Restart. octopus.stop() octopus.run() result = octopus.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 octopus.insert('stream0', ['x'], [(1, ), (1, )]) result = octopus.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 4
def test_simple_crash(octopus, clean_db): """ Test simple worker and combiner crashes. """ octopus.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' octopus.create_cv('test_simple_crash', q) octopus.insert('stream0', ['x'], [(1, ), (1, )]) result = octopus.execute('SELECT * FROM test_simple_crash').first() assert result['count'] == 2 # This batch can potentially get lost. octopus.insert('stream0', ['x'], [(1, ), (1, )]) assert kill_worker() octopus.insert('stream0', ['x'], [(1, ), (1, )]) result = octopus.execute('SELECT * FROM test_simple_crash').first() assert result['count'] in [4, 6] # This batch can potentially get lost. octopus.insert('stream0', ['x'], [(1, ), (1, )]) assert kill_combiner() octopus.insert('stream0', ['x'], [(1, ), (1, )]) result = octopus.execute('SELECT * FROM test_simple_crash').first() assert result['count'] in [6, 8, 10] # To ensure that all remaining events in ZMQ queues have been consumed time.sleep(2)
def test_bloom_contains(octopus, clean_db): """ Verify that bloom_contains works """ octopus.create_stream('test_bloom_stream', x='int') q = """ SELECT bloom_agg(x::integer) FROM test_bloom_stream """ desc = ('x') octopus.create_cv('test_bloom_contains', q) rows = [] for i in range(10000): rows.append((2 * i, )) octopus.insert('test_bloom_stream', desc, rows) cvq = """ SELECT bloom_contains(bloom_agg, 0), bloom_contains(bloom_agg, 5000), bloom_contains(bloom_agg, 1), bloom_contains(bloom_agg, 5001) FROM test_bloom_contains """ result = list(octopus.execute(cvq)) assert len(result) == 1 result = result[0] assert result[0] == True assert result[1] == True assert result[2] == False assert result[3] == False
def assert_result_changes(func, args): """ Verifies that the result of the given function changes with time """ octopus.create_stream('stream0', x='int', y='text', z='int') name = 'assert_%s_decreases' % func octopus.create_cv( name, "SELECT %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args)) rows = [(n, str(n), n + 1) for n in range(1000)] octopus.insert('stream0', ('x', 'y', 'z'), rows) current = 1 results = [] while current: row = octopus.execute('SELECT * FROM %s' % name).first() current = row[func] if current is None: break results.append(current) # Verify that we actually read something assert results octopus.drop_cv(name)
def test_concurrent_vacuum_full(octopus, clean_db): octopus.create_stream('test_vacuum_stream', x='int') octopus.create_cv( 'test_vacuum_full', 'SELECT x::int, COUNT(*) FROM test_vacuum_stream GROUP BY x') stop = False def insert(): while not stop: values = [(random.randint(0, 1000000), ) for _ in xrange(1000)] octopus.insert('test_vacuum_stream', ('x', ), values) time.sleep(0.01) threads = [threading.Thread(target=insert) for _ in range(4)] map(lambda t: t.start(), threads) # Insert data for a little bit so we have enough work to do while # vacuuming. time.sleep(20) conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute('VACUUM FULL test_vacuum_full') conn.close() # Now kill the insert threads. stop = True map(lambda t: t.join(), threads)
def test_simple_aggs(octopus, clean_db): """ Verify that combines work properly on simple aggs """ q = """ SELECT x::integer %% 10 AS k, avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k; """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_simple_aggs', q) octopus.create_table('test_simple_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_simple_aggs_t', desc, rows) table_result = list( octopus.execute( 'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t')) cv_result = list( octopus.execute( 'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs' )) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert abs(tr[0] - cr[0]) < 0.00001 assert abs(tr[1] - cr[1]) < 0.00001 assert abs(tr[2] - cr[2]) < 0.00001
def test_multiple_databases(octopus, clean_db): conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute('CREATE DATABASE tmp_octopus') cur.close() q = 'SELECT x::int FROM dbstream' octopus.create_stream('dbstream', x='int') octopus.create_cv('test_multiple_databases', q) # Insert data in first database. octopus.insert('dbstream', ['x'], map(lambda x: (x, ), range(0, 10, 2))) result = octopus.execute('SELECT * FROM test_multiple_databases') assert sorted(row['x'] for row in result) == range(0, 10, 2) # Create same CV in the other database, make sure its created and write different data to it. tmp_conn = psycopg2.connect( 'dbname=tmp_octopus user=%s host=localhost port=%s' % (getpass.getuser(), octopus.port)) cur = tmp_conn.cursor() cur.execute('CREATE STREAM dbstream (x int)') cur.execute('CREATE CONTINUOUS VIEW test_multiple_databases AS %s' % q) tmp_conn.commit() cur.execute('INSERT INTO dbstream (x) VALUES %s' % ', '.join(map(lambda x: '(%d)' % x, range(1, 11, 2)))) cur.execute('SELECT * FROM test_multiple_databases') tmp_conn.commit() assert sorted(row[0] for row in cur) == range(1, 11, 2) # Ensure that the data written to the other database isn't seen by the first database. result = octopus.execute('SELECT * FROM test_multiple_databases') assert sorted(row['x'] for row in result) == range(0, 10, 2) # Insert new data to both databases. octopus.insert('dbstream', ['x'], map(lambda x: (x, ), range(10, 20, 2))) cur.execute('INSERT INTO dbstream (x) VALUES %s' % ', '.join(map(lambda x: '(%d)' % x, range(11, 21, 2)))) # Ensure both databases still saw the data written out to them. result = octopus.execute('SELECT * FROM test_multiple_databases') assert sorted(row['x'] for row in result) == range(0, 20, 2) cur.execute('SELECT * FROM test_multiple_databases') tmp_conn.commit() assert sorted(row[0] for row in cur) == range(1, 21, 2) cur.close() tmp_conn.close() cur = conn.cursor() cur.execute('DROP DATABASE tmp_octopus') cur.close() conn.close()
def test_create_views(octopus, clean_db): cvs = [] octopus.create_stream('stream0', x='int') q = 'SELECT count(*) FROM stream0' for i in xrange(1, MAX_CQS): cvs.append('cv_%d' % i) octopus.create_cv(cvs[-1], q) try: octopus.create_cv('cv_fail', q) assert False except Exception, e: assert 'maximum number of continuous queries exceeded' in e.message
def test_uniqueness(octopus, clean_db): octopus.create_stream('stream0', x='int') octopus.create_cv('uniqueness', 'SELECT x::int, count(*) FROM stream0 GROUP BY x') for i in range(10): rows = [((10000 * i) + j, ) for j in xrange(10000)] octopus.insert('stream0', ('x', ), rows) count = octopus.execute('SELECT count(*) FROM uniqueness').first()['count'] distinct_count = octopus.execute( 'SELECT count(DISTINCT x) FROM uniqueness').first()['count'] assert count == distinct_count
def test_tdigest_agg(octopus, clean_db): """ Test tdigest_agg, tdigest_merge_agg, tdigest_cdf, tdigest_quantile """ q = """ SELECT k::integer, tdigest_agg(x::int) AS t FROM test_tdigest_stream GROUP BY k """ desc = ('k', 'x') octopus.create_stream('test_tdigest_stream', k='int', x='int') octopus.create_cv('test_tdigest_agg', q) rows = [] for _ in range(10): for n in range(1000): rows.append((0, n)) rows.append((1, n + 500)) octopus.insert('test_tdigest_stream', desc, rows) result = list( octopus.execute( 'SELECT tdigest_quantile(t, 0.1) FROM test_tdigest_agg ORDER BY k' ).fetchall()) assert len(result) == 2 assert abs(int(result[0]['tdigest_quantile']) - 99) <= 1 assert abs(int(result[1]['tdigest_quantile']) - 599) <= 1 result = list( octopus.execute( 'SELECT tdigest_quantile(combine(t), 0.1) FROM test_tdigest_agg'). fetchall()) assert len(result) == 1 assert abs(int(result[0]['tdigest_quantile']) - 200) <= 4 result = list( octopus.execute( 'SELECT tdigest_cdf(t, 600) FROM test_tdigest_agg ORDER BY k'). fetchall()) assert len(result) == 2 assert round(result[0]['tdigest_cdf'], 2) == 0.6 assert round(result[1]['tdigest_cdf'], 2) == 0.1 result = list( octopus.execute( 'SELECT tdigest_cdf(combine(t), 600) FROM test_tdigest_agg'). fetchall()) assert len(result) == 1 assert round(result[0]['tdigest_cdf'], 2) == 0.35
def test_multiple_insert(octopus, clean_db): octopus.create_stream('stream0', x='int') octopus.create_stream('stream1', x='int') octopus.create_stream('stream2', x='int') octopus.create_cv('cv0', 'SELECT count(*) FROM stream1') octopus.create_cv('cv1', 'SELECT count(*) FROM stream2') octopus.create_ct('ct1', 'SELECT x::int FROM stream0 WHERE mod(x, 2) = 0', "octopus_stream_insert('stream1', 'stream2')") octopus.insert('stream0', ('x',), [(n,) for n in range(1000)]) count = octopus.execute('SELECT count FROM cv0').first()['count'] assert count == 500 count = octopus.execute('SELECT count FROM cv1').first()['count'] assert count == 500
def _test_hs_agg(octopus, agg): values = [random.randint(-100, 100) for n in range(1000)] h = random.choice(values) + random.randint(-10, 10) octopus.create_stream('stream0', x='int') cq = 'SELECT %s(%d) WITHIN GROUP (ORDER BY x::integer) FROM stream0' % (agg, h) octopus.create_cv('test_%s' % agg, cq) octopus.insert('stream0', ('x',), [(v,) for v in values]) result = octopus.execute('SELECT %s FROM test_%s' % (agg, agg)).first() rank, peers = _rank(h, values) dense_rank, _ = _dense_rank(h, values) return rank, dense_rank, peers, len(values), result[agg]
def test_join_with_aggs(octopus, clean_db): """ Verify that joins involving aggregates referencing columns from multiple tables work """ num_cols = 4 join_cols = [1] q = """ SELECT sum(s.col0::integer) AS s0, sum(a0.col0::integer) AS s1, sum(a1.col0::integer) AS s2 FROM a1 JOIN a0 ON a1.col1 = a0.col1 JOIN stream0 s ON s.col1::integer = a0.col1 """ a0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) a1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('a0', **a0_cols) octopus.create_table('a1', **a1_cols) a0 = _generate_rows(num_cols, 64) a1 = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 'a0', a0, 0.1) _insert(octopus, 'a1', a1, 0.1) octopus.create_stream('stream0', **a0_cols) octopus.create_cv('test_agg_join', q) _insert(octopus, 'stream0', s) expected = _join(a1, _join(a0, s, join_cols), join_cols) result = octopus.execute('SELECT * FROM test_agg_join').first() # sum of col0 from stream s0_expected = sum([r[num_cols * 2] for r in expected]) # sum of col0 from a0 s1_expected = sum([r[num_cols * 1] for r in expected]) # sum of col0 from a1 s2_expected = sum([r[num_cols * 0] for r in expected]) assert s0_expected == result['s0'] assert s1_expected == result['s1'] assert s2_expected == result['s2']
def test_percentile_cont_agg(octopus, clean_db): range_top = 100000 q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0] batches = [] min_seen = range_top max_seen = 0 for _ in xrange(10): b = [(random.randint(0, range_top), ) for _ in xrange(5000)] min_seen = min(min_seen, min(b)[0]) max_seen = max(max_seen, max(b)[0]) batches.append(b) octopus.create_stream('test_stream', x='int') query = '''SELECT percentile_cont(ARRAY[%s]) WITHIN GROUP (ORDER BY x::integer) FROM %s ''' % (', '.join(map(lambda f: str(f), q)), '%s') octopus.create_cv('test_cq_percentile_cont', query % 'test_stream') octopus.create_table('test_percentile_cont', x='integer') for b in batches: octopus.insert('test_stream', ('x', ), b) octopus.insert('test_percentile_cont', ('x', ), b) actual = octopus.execute(query % 'test_percentile_cont') result = octopus.execute('SELECT * FROM test_cq_percentile_cont') actual = actual.first()['percentile_cont'] result = result.first()['percentile_cont'] assert len(actual) == len(result) assert result == sorted(result) diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))] # 0th and 100th percentile should be accurate. assert result[0] == min_seen assert result[-1] == max_seen # 1st and 99th percentile should be within 0.1%. assert diff[1] <= 0.001 * range_top assert diff[-2] <= 0.001 * range_top # All percentiles should be within 0.5%. assert all(x <= 0.005 * range_top for x in diff)
def test_concurrent_crash(octopus, clean_db): """ Test simple worker and combiner crashes. """ octopus.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' octopus.create_cv('test_concurrent_crash', q) batch_size = 25000 desc = [0, 0, False] vals = [(1, )] * batch_size def insert(): while True: octopus.insert('stream0', ['x'], vals) desc[1] += batch_size if desc[2]: break def kill(): for _ in xrange(30): r = random.random() if r > 0.85: desc[0] += kill_combiner() if r < 0.15: desc[0] += kill_worker() time.sleep(0.1) desc[2] = True threads = [threading.Thread(target=insert), threading.Thread(target=kill)] map(lambda t: t.start(), threads) map(lambda t: t.join(), threads) num_killed = desc[0] num_inserted = desc[1] result = octopus.execute('SELECT count FROM test_concurrent_crash').first() assert num_killed > 0 assert result['count'] <= num_inserted assert result['count'] >= num_inserted - (num_killed * batch_size) # To ensure that all remaining events in ZMQ queues have been consumed time.sleep(2)
def test_simple_insert(octopus, clean_db): """ Verify that we can insert some rows and count some stuff """ octopus.create_stream('stream0', key='int') octopus.create_cv( 'cv', 'SELECT key::integer, COUNT(*) FROM stream0 GROUP BY key') rows = [(n % 10, ) for n in range(1000)] octopus.insert('stream0', ('key', ), rows) result = list(octopus.execute('SELECT * FROM cv ORDER BY key')) assert len(result) == 10 for i, row in enumerate(result): assert row['key'] == i assert row['count'] == 100