def test_indexed(octopus, clean_db): """ Verify that stream-table joins involving indexed tables work """ octopus.create_stream('stream0', x='int', y='int') q = """ SELECT stream0.x::integer, count(*) FROM stream0 JOIN test_indexed_t t ON stream0.x = t.x GROUP BY stream0.x """ octopus.create_table('test_indexed_t', x='integer', y='integer') octopus.execute('CREATE INDEX idx ON test_indexed_t(x)') t = _generate_rows(2, 1000) s = _generate_rows(2, 1000) octopus.insert('test_indexed_t', ('x', 'y'), t) time.sleep(0.1) octopus.create_cv('test_indexed', q) octopus.insert('stream0', ('x', 'y'), s) expected = _join(s, t, [0]) result = octopus.execute('SELECT sum(count) FROM test_indexed').first() assert result['sum'] == len(expected)
def test_incremental_join(octopus, clean_db): """ Verify that join results increase appropriately as we incrementally add stream events to the input """ num_cols = 4 join_cols = [0, 1] t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('inc', **t_cols) octopus.create_stream('stream0', **t_cols) q = """ SELECT s.col0::integer FROM inc JOIN stream0 s ON inc.col0 = s.col0 AND inc.col1 = s.col1::integer """ t = _generate_rows(num_cols, 64) _insert(octopus, 'inc', t, 0.1) octopus.create_cv('test_join', q) s = [] for n in range(2): row = _generate_row(num_cols) _insert(octopus, 'stream0', [row]) s.append(row) expected = _join(t, s, join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_join').first() assert result['count'] == len(expected)
def test_join_with_where(octopus, clean_db): """ Verify that stream-table joins using a WHERE clause work properly """ num_cols = 4 q = """ SELECT s.col0::integer FROM stream0 s, wt WHERE s.col0 = 1 AND wt.col0 = 1 """ wt_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('wt', **wt_cols) octopus.create_table('wt_s', **wt_cols) wt = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 'wt', wt, 0.1) _insert(octopus, 'wt_s', s, 0.1) octopus.create_stream('stream0', **wt_cols) octopus.create_cv('test_join_where', q) _insert(octopus, 'stream0', s) expected = octopus.execute('SELECT COUNT(*) FROM wt_s s, wt WHERE s.col0 = 1 AND wt.col0 = 1').first() result = octopus.execute('SELECT COUNT(*) FROM test_join_where').first() assert result['count'] == expected['count']
def test_simple_aggs(octopus, clean_db): """ Verify that combines work properly on simple aggs """ q = """ SELECT x::integer %% 10 AS k, avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k; """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_simple_aggs', q) octopus.create_table('test_simple_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_simple_aggs_t', desc, rows) table_result = list( octopus.execute( 'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t')) cv_result = list( octopus.execute( 'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs' )) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert abs(tr[0] - cr[0]) < 0.00001 assert abs(tr[1] - cr[1]) < 0.00001 assert abs(tr[2] - cr[2]) < 0.00001
def test_join_multiple_tables(octopus, clean_db): """ Verify that stream-table joins involving multiple tables work """ num_cols = 8 join_cols = [0] t0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) t1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('t0', **t0_cols) octopus.create_table('t1', **t1_cols) octopus.create_stream('stream0', **t0_cols) q = """ SELECT s.col0::integer FROM t0 JOIN t1 ON t0.col0 = t1.col0 JOIN stream0 s ON t1.col0 = s.col0 """ t0 = _generate_rows(num_cols, 64) t1 = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 't1', t1, 0.1) _insert(octopus, 't0', t0, 0.1) octopus.create_cv('test_join_multi', q) _insert(octopus, 'stream0', s) expected = _join(t0, _join(s, t1, join_cols), join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_join_multi').first() assert result['count'] == len(expected)
def test_join_across_batches(octopus, clean_db): """ Verify that stream-table joins are properly built when they span across multiple input batches """ num_cols = 4 join_cols = [0] t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('batch', **t_cols) octopus.create_stream('stream0', **t_cols) q = """ SELECT s.col0::integer FROM batch JOIN stream0 s ON batch.col0 = s.col0 """ t = _generate_rows(num_cols, 64) _insert(octopus, 'batch', t, 0.1) s = _generate_rows(num_cols, 64) octopus.create_cv('test_batched_join', q) _insert(octopus, 'stream0', s) expected = _join(t, s, join_cols) result = octopus.execute('SELECT COUNT(*) FROM test_batched_join').first() assert result['count'] == len(expected)
def test_join_ordering(octopus, clean_db): """ Verify that the correct plan is generated regardless of the ordering of streams and tables. """ num_cols = 8 join_cols = [0] ordering0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) ordering1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('ordering0', **ordering0_cols) octopus.create_table('ordering1', **ordering1_cols) ordering0 = _generate_rows(num_cols, 64) ordering1 = _generate_rows(num_cols, 64) _insert(octopus, 'ordering0', ordering0, 0.1) _insert(octopus, 'ordering1', ordering1, 0.1) octopus.create_stream('stream0', **ordering0_cols) # stream, table, table q0 = """ SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM stream0 s JOIN ordering0 ON s.col0 = ordering0.col0 JOIN ordering1 ON ordering0.col0 = ordering1.col0 """ octopus.create_cv('test_ordering0', q0) # table, stream, table q1 = """ SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM ordering0 JOIN stream0 s ON s.col0 = ordering0.col0 JOIN ordering1 ON ordering0.col0 = ordering1.col0 """ octopus.create_cv('test_ordering1', q1) # table, table, stream q2 = """ SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM ordering0 JOIN ordering1 ON ordering0.col0 = ordering1.col0 JOIN stream0 s ON s.col0 = ordering0.col0 """ octopus.create_cv('test_ordering2', q2) s = _generate_rows(num_cols, 64) _insert(octopus, 'stream0', s) expected = _join(ordering0, _join(ordering1, s, join_cols), join_cols) result0 = octopus.execute('SELECT COUNT(*) FROM test_ordering0').first() result1 = octopus.execute('SELECT COUNT(*) FROM test_ordering1').first() result2 = octopus.execute('SELECT COUNT(*) FROM test_ordering2').first() assert result0['count'] == len(expected) assert result1['count'] == len(expected) assert result2['count'] == len(expected)
def test_hll_type(octopus, clean_db): octopus.create_table('test_hll_type', x='int', y='hll') octopus.execute('INSERT INTO test_hll_type (x, y) VALUES ' '(1, hll_empty()), (2, hll_empty())') for i in xrange(1000): octopus.execute('UPDATE test_hll_type SET y = hll_add(y, %d / x)' % i) result = list( octopus.execute('SELECT hll_cardinality(y) ' 'FROM test_hll_type ORDER BY x')) assert result[0][0] == 995 assert result[1][0] == 497
def test_cmsketch_type(octopus, clean_db): octopus.create_table('test_cmsketch_type', x='int', y='cmsketch') octopus.execute('INSERT INTO test_cmsketch_type (x, y) VALUES ' '(1, cmsketch_empty()), (2, cmsketch_empty())') for i in xrange(1000): octopus.execute('UPDATE test_cmsketch_type ' 'SET y = cmsketch_add(y, {} %% x)'.format(i)) result = list(octopus.execute('SELECT cmsketch_frequency(y, 0), ' 'cmsketch_frequency(y, 1) ' 'FROM test_cmsketch_type ORDER BY x')) assert result[0] == (1000, 0) assert result[1] == (500, 500)
def test_tdigest_type(octopus, clean_db): octopus.create_table('test_tdigest_type', x='int', y='tdigest') octopus.execute('INSERT INTO test_tdigest_type (x, y) VALUES ' '(1, tdigest_empty()), (2, tdigest_empty())') for i in xrange(1000): octopus.execute('UPDATE test_tdigest_type ' 'SET y = tdigest_add(y, {} %% (x * 500))'.format(i)) result = list( octopus.execute('SELECT tdigest_cdf(y, 400), ' 'tdigest_quantile(y, 0.9)' 'FROM test_tdigest_type ORDER BY x')) assert map(lambda x: round(x, 1), result[0]) == [0.8, 449.5] assert map(lambda x: round(x, 1), result[1]) == [0.4, 899.5]
def test_join_with_aggs(octopus, clean_db): """ Verify that joins involving aggregates referencing columns from multiple tables work """ num_cols = 4 join_cols = [1] q = """ SELECT sum(s.col0::integer) AS s0, sum(a0.col0::integer) AS s1, sum(a1.col0::integer) AS s2 FROM a1 JOIN a0 ON a1.col1 = a0.col1 JOIN stream0 s ON s.col1::integer = a0.col1 """ a0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) a1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)]) octopus.create_table('a0', **a0_cols) octopus.create_table('a1', **a1_cols) a0 = _generate_rows(num_cols, 64) a1 = _generate_rows(num_cols, 64) s = _generate_rows(num_cols, 64) _insert(octopus, 'a0', a0, 0.1) _insert(octopus, 'a1', a1, 0.1) octopus.create_stream('stream0', **a0_cols) octopus.create_cv('test_agg_join', q) _insert(octopus, 'stream0', s) expected = _join(a1, _join(a0, s, join_cols), join_cols) result = octopus.execute('SELECT * FROM test_agg_join').first() # sum of col0 from stream s0_expected = sum([r[num_cols * 2] for r in expected]) # sum of col0 from a0 s1_expected = sum([r[num_cols * 1] for r in expected]) # sum of col0 from a1 s2_expected = sum([r[num_cols * 0] for r in expected]) assert s0_expected == result['s0'] assert s1_expected == result['s1'] assert s2_expected == result['s2']
def test_percentile_cont_agg(octopus, clean_db): range_top = 100000 q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0] batches = [] min_seen = range_top max_seen = 0 for _ in xrange(10): b = [(random.randint(0, range_top), ) for _ in xrange(5000)] min_seen = min(min_seen, min(b)[0]) max_seen = max(max_seen, max(b)[0]) batches.append(b) octopus.create_stream('test_stream', x='int') query = '''SELECT percentile_cont(ARRAY[%s]) WITHIN GROUP (ORDER BY x::integer) FROM %s ''' % (', '.join(map(lambda f: str(f), q)), '%s') octopus.create_cv('test_cq_percentile_cont', query % 'test_stream') octopus.create_table('test_percentile_cont', x='integer') for b in batches: octopus.insert('test_stream', ('x', ), b) octopus.insert('test_percentile_cont', ('x', ), b) actual = octopus.execute(query % 'test_percentile_cont') result = octopus.execute('SELECT * FROM test_cq_percentile_cont') actual = actual.first()['percentile_cont'] result = result.first()['percentile_cont'] assert len(actual) == len(result) assert result == sorted(result) diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))] # 0th and 100th percentile should be accurate. assert result[0] == min_seen assert result[-1] == max_seen # 1st and 99th percentile should be within 0.1%. assert diff[1] <= 0.001 * range_top assert diff[-2] <= 0.001 * range_top # All percentiles should be within 0.5%. assert all(x <= 0.005 * range_top for x in diff)
def test_cont_transforms(octopus, clean_db): octopus.execute('CREATE STREAM cv_stream (x int, y text)') octopus.execute('CREATE STREAM ct_stream (x int, y text)') octopus.create_cv('test_cv', 'SELECT count(*) FROM cv_stream') octopus.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0', "octopus_stream_insert('cv_stream', 'cv_stream')") octopus.create_table('test_t', x='int', y='text') octopus.execute(''' CREATE OR REPLACE FUNCTION test_tg() RETURNS trigger AS $$ BEGIN INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y); RETURN NEW; END; $$ LANGUAGE plpgsql; ''') octopus.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream', 'test_tg()') octopus.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) _dump(octopus, 'test_cont_transform.sql') octopus.drop_all() octopus.drop_table('test_t') octopus.execute('DROP FUNCTION test_tg()') _restore(octopus, 'test_cont_transform.sql') octopus.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) assert octopus.execute('SELECT count FROM test_cv').first()['count'] == 4 ntups = 0 for row in octopus.execute('SELECT x, count(*) FROM test_t GROUP BY x'): assert row['count'] == 2 assert row['x'] in (1, 2) ntups += 1 assert ntups == 2
def test_distinct(octopus, clean_db): """ Verify that streaming SELECT DISTINCT ON (...) works """ octopus.create_stream('stream0', x='int', y='int', z='int') octopus.create_table('table0', x='int', y='int', z='int') q = 'SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM stream0' octopus.create_cv('test_distinct', q) uniques = defaultdict(set) values = [] for _ in xrange(2000): x, y, z = random.randint(0, 20), random.randint(0, 20), random.randint( 0, 20) values.append((x, y, z)) uniques[(x, y - z)].add(y) octopus.insert('stream0', ['x', 'y', 'z'], values) octopus.insert('table0', ['x', 'y', 'z'], values) q = """ SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM table0 """ expected = list(octopus.execute(q)) expected = len(expected) assert expected < 2000 result = octopus.execute('SELECT COUNT(*) FROM test_distinct').first() assert expected == result['count'] # Check if the first row was selected for uniques result = octopus.execute('SELECT * FROM test_distinct') reverse_uniques = defaultdict(set) for (x, _), ys in uniques.iteritems(): for y in ys: reverse_uniques[y].add(x) for row in result: assert row['x'] in reverse_uniques[row['y']]
def test_object_aggs(octopus, clean_db): """ Verify that combines work properly on object aggs """ q = """ SELECT x::integer %% 10 AS k, json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \')FROM stream0 GROUP BY k; """ desc = ('x', 'y', 's') octopus.create_stream('stream0', x='int', y='float8', s='text') octopus.create_cv('test_object_aggs', q) octopus.create_table('test_object_aggs_t', x='integer', y='float8', s='text') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random(), str(n) * random.randint(1, 8)) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_object_aggs_t', desc, rows) tq = """ SELECT json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \') FROM test_object_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(json_agg), combine(json_object_agg), combine(string_agg) FROM test_object_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert sorted(tr[0]) == sorted(cr[0]) assert sorted(tr[1]) == sorted(cr[1]) assert sorted(tr[2]) == sorted(cr[2])
def test_hypothetical_set_aggs(octopus, clean_db): """ Verify that combines work properly on HS aggs """ q = """ SELECT x::integer %% 10 AS k, rank(256) WITHIN GROUP (ORDER BY x), dense_rank(256) WITHIN GROUP (ORDER BY x) FROM stream0 GROUP BY k """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_hs_aggs', q) octopus.create_table('test_hs_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_hs_aggs_t', desc, rows) # Note that the CQ will use the HLL variant of dense_rank, # so use hll_dense_rank on the table too tq = """ SELECT rank(256) WITHIN GROUP (ORDER BY x), hll_dense_rank(256) WITHIN GROUP (ORDER BY x) FROM test_hs_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(rank), combine(dense_rank) FROM test_hs_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert tr == cr
def test_colums_subset(octopus, clean_db): """ Verify that copying data from a file into a stream works when the file's input columns are a subset of the stream0's columns """ octopus.create_stream('stream0', x='int', y='float8', z='numeric', m='int') q = 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric), max(m::integer) FROM stream0' octopus.create_cv('test_copy_subset', q) octopus.create_table('test_copy_subset_t', x='integer', y='float8', z='numeric') path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv')) rows = [] for n in range(10000): row = random.randint(1, 1024), random.randint(1, 1024), random.random() rows.append(row) _generate_csv(path, rows, desc=('x', 'y', 'z')) octopus.execute( 'COPY test_copy_subset_t (x, y, z) FROM \'%s\' HEADER CSV' % path) octopus.execute('COPY stream0 (x, y, z) FROM \'%s\' HEADER CSV' % path) expected = octopus.execute( 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric) FROM test_copy_subset_t' ).first() result = list(octopus.execute('SELECT s0, s1, avg FROM test_copy_subset')) assert len(result) == 1 result = result[0] assert result[0] == expected[0] assert result[1] == expected[1] assert result[2] == expected[2]
def test_windowed_combine(octopus, clean_db): """ Verify that windowed queries with combines work """ q = """ SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_windowed_combine', q) octopus.create_table('test_windowed_combine_t', x='integer', y='integer') rows = [] for n in range(10000): row = (n, n) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_windowed_combine_t', desc, rows) table = """ SELECT first_value(x) OVER w, avg(y) OVER w FROM test_windowed_combine_t WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) ORDER BY first_value """ expected = list(octopus.execute(table)) combine = """ SELECT first_value(x) OVER w, avg(avg) OVER w FROM test_windowed_combine WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) ORDER BY first_value """ actual = list(octopus.execute(combine)) for e, a in zip(expected, actual): assert e == a
def test_null_groups(octopus, clean_db): """ Verify that null group columns are considered equal """ octopus.create_stream('s', x='int', y='int', z='int') q = """ SELECT x::integer, y::integer, z::integer, COUNT(*) FROM s GROUP BY x, y, z; """ desc = ('x', 'y', 'z') octopus.create_cv('test_null_groups', q) octopus.create_table('test_null_groups_t', x='integer', y='integer', z='integer') rows = [] for n in range(10000): vals = list(random.randint(0, 10) for n in range(3)) vals = map(lambda n: random.random() > 0.1 and n or None, vals) rows.append(tuple(vals)) octopus.insert('s', desc, rows) octopus.insert('test_null_groups_t', desc, rows) table_q = """ SELECT x, y, z, COUNT(*) FROM test_null_groups_t GROUP BY x, y, z ORDER BY x, y, z; """ expected = list(octopus.execute(table_q)) result = list( octopus.execute( 'SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z')) for r, e in zip(result, expected): assert r == e
def test_stats_aggs(octopus, clean_db): """ Verify that combines work on stats aggs """ q = """ SELECT x::integer %% 10 AS k, regr_sxx(x, y::float8), stddev(x) FROM stream0 GROUP BY k; """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_stats_aggs', q) octopus.create_table('test_stats_aggs_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_stats_aggs_t', desc, rows) tq = """ SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert abs(tr[0] - cr[0]) < 0.00001 assert abs(tr[1] - cr[1]) < 0.00001
def test_hll_distinct(octopus, clean_db): """ Verify that combines work on HLL COUNT DISTINCT queries """ q = """ SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream0 GROUP BY k """ desc = ('x', 'y') octopus.create_stream('stream0', x='int', y='float8') octopus.create_cv('test_hll_distinct', q) octopus.create_table('test_hll_distinct_t', x='integer', y='float8') rows = [] for n in range(10000): row = (random.randint(0, 1000), random.random()) rows.append(row) octopus.insert('stream0', desc, rows) octopus.insert('test_hll_distinct_t', desc, rows) # Note that the CQ will use the HLL variant of COUNT DISTINCT, # so use hll_count_distinct on the table too tq = """ SELECT hll_count_distinct(x) FROM test_hll_distinct_t """ table_result = list(octopus.execute(tq)) cq = """ SELECT combine(count) FROM test_hll_distinct """ cv_result = list(octopus.execute(cq)) assert len(table_result) == len(cv_result) for tr, cr in zip(table_result, cv_result): assert tr == cr
def test_binary_upgrade(octopus, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n octopus.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] octopus.insert(name, ('x', 'y', 'z'), rows) if n >= 8: octopus.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n octopus.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n octopus.create_stream('stream_%d' % n, x='int', y='text', z='text') octopus.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] octopus.insert('stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 16: octopus.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Now create some in another namespace octopus.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n octopus.create_stream('namespace.stream_%d' % n, x='int', y='text', z='text') octopus.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] octopus.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: octopus.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ octopus.execute(create_fn) octopus.create_stream('stream0', z='text') # Create some transforms for n in range(8): name = 'ct_%d' % n octopus.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn()') time.sleep(10) old_bin_dir = new_bin_dir = octopus.bin_dir old_data_dir = octopus.data_dir new_data_dir = os.path.abspath('test_binary_upgrade_data_dir') octopus.stop() p = subprocess.Popen( [os.path.join(octopus.bin_dir, 'octopus-init'), '-D', new_data_dir]) stdout, stderr = p.communicate() result = subprocess.check_call([ os.path.join(octopus.bin_dir, 'octopus-upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir ]) assert result == 0 # The cleanup path expects this to be running, but we're done with it octopus.run() # octopus-upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = Octopus(data_dir=new_data_dir) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): x, y, z = row assert x == i assert y == name assert z == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN octopus_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 upgraded.stop() shutil.rmtree(new_data_dir)