def test_multiple_insert(pipeline, clean_db): pipeline.create_cv('cv0', 'SELECT count(*) FROM stream1') pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2') pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream1', 'stream2')") pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)]) count = pipeline.execute('SELECT count FROM cv0').first()['count'] assert count == 500 count = pipeline.execute('SELECT count FROM cv1').first()['count'] assert count == 500
def test_nested_transforms(pipeline, clean_db): pipeline.create_cv('cv0', 'SELECT count(*) FROM stream4') pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2') pipeline.create_ct('ct0', 'SELECT x::int FROM stream2 WHERE mod(x, 4) = 0', "pipeline_stream_insert('stream4')") pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream2')") pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)]) time.sleep(1) count = pipeline.execute('SELECT count FROM cv0').first()['count'] assert count == 250 count = pipeline.execute('SELECT count FROM cv1').first()['count'] assert count == 500
def test_cont_transforms(pipeline, clean_db): pipeline.execute('CREATE FOREIGN TABLE cv_stream (x int, y text) SERVER pipelinedb') pipeline.execute('CREATE FOREIGN TABLE ct_stream (x int, y text) SERVER pipelinedb') pipeline.create_cv('test_cv', 'SELECT count(*) FROM cv_stream') pipeline.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0', "pipelinedb.insert_into_stream('cv_stream', 'cv_stream')") pipeline.create_table('test_t', x='int', y='text') pipeline.execute(''' CREATE OR REPLACE FUNCTION test_tg() RETURNS trigger AS $$ BEGIN INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y); RETURN NEW; END; $$ LANGUAGE plpgsql; ''') pipeline.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream', 'test_tg') pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) assert pipeline.execute('SELECT count FROM test_cv')[0]['count'] == 2 _dump(pipeline, 'test_cont_transform.sql') pipeline.drop_all() pipeline.drop_table('test_t') pipeline.execute('DROP FUNCTION test_tg()') _restore(pipeline, 'test_cont_transform.sql') pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) assert pipeline.execute('SELECT count FROM test_cv')[0]['count'] == 4 ntups = 0 for row in pipeline.execute('SELECT x, count(*) FROM test_t GROUP BY x'): assert row['count'] == 2 assert row['x'] in (1, 2) ntups += 1 assert ntups == 2
def test_nested_transforms(pipeline, clean_db): pipeline.create_stream('stream0', x='int') pipeline.create_stream('stream2', x='int') pipeline.create_stream('stream4', x='int') pipeline.create_cv('cv0', 'SELECT count(*) FROM stream4') pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2') pipeline.create_ct('ct0', 'SELECT x::int FROM stream2 WHERE mod(x, 4) = 0', "pipelinedb.insert_into_stream('stream4')") pipeline.create_ct('ct1', 'SELECT x::int FROM stream0 WHERE mod(x, 2) = 0', "pipelinedb.insert_into_stream('stream2')") pipeline.insert('stream0', ('x',), [(n,) for n in range(1000)]) count = pipeline.execute('SELECT count FROM cv0')[0]['count'] assert count == 250 count = pipeline.execute('SELECT count FROM cv1')[0]['count'] assert count == 500
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipeline_stream_insert('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['off', 'on']: pipeline.stop() pipeline.run({ 'continuous_query_num_workers': nworkers, 'synchronous_stream_insert': sync }) pipeline.execute('TRUNCATE CONTINUOUS VIEW cv') pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict( pipeline.execute('SELECT count FROM cv').first() or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'off' time.sleep(1) count = dict( pipeline.execute('SELECT count FROM cv').first() or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_transforms(pipeline, clean_db): """ Verify that continuous transforms work properly on output streams """ pipeline.create_stream('stream0', x='int') pipeline.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x', sw='5 seconds') # Write a row to a stream each time a row goes out of window q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL' pipeline.create_stream('oow_stream', x='integer') pipeline.create_ct('ct', q, "pipeline_stream_insert('oow_stream')") pipeline.create_cv('ct_recv', 'SELECT x FROM oow_stream') pipeline.insert('stream0', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(7) rows = list(pipeline.execute('SELECT * FROM ct_recv')) assert len(rows) == 100
def test_cont_transforms(pipeline, clean_db): pipeline.execute('CREATE STREAM cv_stream (x int, y text)') pipeline.execute('CREATE STREAM ct_stream (x int, y text)') pipeline.create_cv('test_cv', 'SELECT count(*) FROM cv_stream') pipeline.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('cv_stream', 'cv_stream')") pipeline.create_table('test_t', x='int', y='text') pipeline.execute(''' CREATE OR REPLACE FUNCTION test_tg() RETURNS trigger AS $$ BEGIN INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y); RETURN NEW; END; $$ LANGUAGE plpgsql; ''') pipeline.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream', 'test_tg()') pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) _dump(pipeline, 'test_cont_transform.sql') pipeline.drop_all() pipeline.drop_table('test_t') pipeline.execute('DROP FUNCTION test_tg()') _restore(pipeline, 'test_cont_transform.sql') pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')]) time.sleep(1) assert pipeline.execute('SELECT count FROM test_cv').first()['count'] == 4 ntups = 0 for row in pipeline.execute('SELECT x, count(*) FROM test_t GROUP BY x'): assert row['count'] == 2 assert row['x'] in (1, 2) ntups += 1 assert ntups == 2
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipeline_stream_insert('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['off', 'on']: pipeline.stop() pipeline.run({ 'continuous_query_num_workers': nworkers, 'synchronous_stream_insert': sync }) pipeline.execute('TRUNCATE CONTINUOUS VIEW cv') pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict(pipeline.execute('SELECT count FROM cv').first() or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'off' time.sleep(1) count = dict(pipeline.execute('SELECT count FROM cv').first() or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipelinedb.insert_into_stream('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['receive', 'commit']: pipeline.stop() pipeline.run({ 'pipelinedb.num_workers': nworkers, 'pipelinedb.stream_insert_level': 'sync_%s' % sync }) pipeline.execute("SELECT pipelinedb.truncate_continuous_view('cv')") pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict(pipeline.execute('SELECT count FROM cv')[0] or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'receive' time.sleep(1) count = dict(pipeline.execute('SELECT count FROM cv')[0] or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_transforms(pipeline, clean_db): """ Verify that continuous transforms work properly on output streams """ pipeline.create_stream('stream0', x='int') pipeline.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x', sw='5 seconds') # Write a row to a stream each time a row goes out of window q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL' pipeline.create_stream('oow_stream', x='integer') pipeline.create_ct('ct', q, "pipeline_stream_insert('oow_stream')") pipeline.create_cv('ct_recv', 'SELECT x FROM oow_stream') pipeline.insert('stream0', ('x', ), [(x % 100, ) for x in range(10000)]) time.sleep(7) rows = list(pipeline.execute('SELECT * FROM ct_recv')) assert len(rows) == 100
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_stream('namespace.stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) pipeline.create_stream('stream0', z='text') # Create some transforms for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn()') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir = os.path.abspath('test_binary_upgrade_data_dir') pipeline.stop() p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir]) stdout, stderr = p.communicate() result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pipeline-upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir ]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pipeline-upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): x, y, z = row assert x == i assert y == name assert z == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipeline_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 upgraded.stop() shutil.rmtree(new_data_dir)
def test_renamed_objects(pipeline, clean_db): """ Verify that we can dump and restore renamed CQs and streams """ pipeline.create_stream('s', x='int') q = """ SELECT x, count(*) FROM s GROUP BY x; """ pipeline.create_cv('cv_0', q) q = """ SELECT (new).x, combine((delta).count) AS count FROM output_of('cv_0') GROUP BY x """ pipeline.create_cv('combine_cv_0', q) q = """ SELECT (new).count + 41 AS v FROM output_of('combine_cv_0') """ pipeline.create_ct('transform_combine_cv_0', q) q = """ SELECT max(v), count(*) FROM output_of('transform_combine_cv_0') """ pipeline.create_cv('max_transform_combine_cv_0', q) rows = [(x,) for x in range(1000)] pipeline.insert('s', ('x',), rows) result = pipeline.execute('SELECT combine(count) FROM cv_0')[0] assert result['combine'] == 1000 pipeline.execute('ALTER VIEW cv_0 RENAME TO cv_0_renamed') pipeline.execute('ALTER VIEW combine_cv_0 RENAME TO combine_cv_0_renamed') pipeline.execute('ALTER VIEW transform_combine_cv_0 RENAME TO transform_combine_cv_0_renamed') pipeline.execute('ALTER VIEW max_transform_combine_cv_0 RENAME TO max_transform_combine_cv_0_renamed') pipeline.execute('ALTER FOREIGN TABLE s RENAME TO s_renamed') result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0] assert result['combine'] == 1000 result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0] assert result['combine'] == 1000 result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0] assert result['max'] == 42 assert result['count'] == 1000 _dump(pipeline, 'test_renamed_cqs.sql') pipeline.execute('DROP VIEW combine_cv_0_renamed CASCADE') pipeline.drop_all() _restore(pipeline, 'test_renamed_cqs.sql') result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0] assert result['combine'] == 1000 result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0] assert result['combine'] == 1000 result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0] assert result['max'] == 42 assert result['count'] == 1000 # Now write some more rows to verify everything updates properly rows = [(x,) for x in range(1000)] pipeline.insert('s_renamed', ('x',), rows) result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0] assert result['combine'] == 2000 result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0] assert result['combine'] == 2000 result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0] assert result['max'] == 43 assert result['count'] == 2000 pipeline.execute('DROP VIEW combine_cv_0_renamed CASCADE')
def test_chained_cqs(pipeline, clean_db): """ Verify that multiple CQs chained together are properly dumped/restored """ pipeline.create_stream('s', x='int') q = """ SELECT x, count(*) FROM s GROUP BY x """ pipeline.create_cv('cv0', q) q = """ SELECT (new).x % 2 AS m FROM output_of('cv0') """ pipeline.create_stream('ct_s', m='int') pipeline.create_ct('ct0', q, "pipelinedb.insert_into_stream('ct_s')") q = """ SELECT m, count(*) FROM ct_s GROUP BY m """ pipeline.create_cv('cv1', q) q = """ SELECT combine((delta).count) AS count FROM output_of('cv1') """ pipeline.create_cv('cv2', q) q = """ SELECT combine((delta).count) FROM output_of('cv2') """ pipeline.create_cv('cv3', q) pipeline.insert('s', ('x',), [(x,) for x in range(1000)]) time.sleep(1) row = pipeline.execute('SELECT combine(count) FROM cv0')[0] assert row['combine'] == 1000 row = pipeline.execute('SELECT combine(count) FROM cv1')[0] assert row['combine'] == 1000 row = pipeline.execute('SELECT count FROM cv2')[0] assert row['count'] == 1000 row = pipeline.execute('SELECT combine FROM cv3')[0] assert row['combine'] == 1000 _dump(pipeline, 'test_chained_cqs.sql') pipeline.execute('DROP FOREIGN TABLE s CASCADE') pipeline.execute('DROP FOREIGN TABLE ct_s CASCADE') _restore(pipeline, 'test_chained_cqs.sql') pipeline.insert('s', ('x',), [(x,) for x in range(1000)]) time.sleep(1) row = pipeline.execute('SELECT combine(count) FROM cv0')[0] assert row['combine'] == 2000 row = pipeline.execute('SELECT combine(count) FROM cv1')[0] assert row['combine'] == 2000 row = pipeline.execute('SELECT count FROM cv2')[0] assert row['count'] == 2000 row = pipeline.execute('SELECT combine FROM cv3')[0] assert row['combine'] == 2000 pipeline.execute('DROP FOREIGN TABLE s CASCADE') pipeline.execute('DROP FOREIGN TABLE ct_s CASCADE')
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) # Create some transforms for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream', 'tg_fn()') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir = os.path.abspath('test_binary_upgrade_data_dir') pipeline.stop() p = subprocess.Popen([ os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir]) stdout, stderr = p.communicate() result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pipeline-upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pipeline-upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): x, y, z = row assert x == i assert y == name assert z == name # Streams for n in range(8): name = 's_%d' % n rows = list(upgraded.execute("SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipeline_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 upgraded.stop() shutil.rmtree(new_data_dir)
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ if pipeline.version_num == 110000: pytest.skip('skipping until PG11 supports dump/restore WITH OIDS') # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Create some STJs for n in range(8): pipeline.create_cv( 'stj_%d' % n, 'SELECT t.x, count(*) FROM stream_%d s JOIN t_%d t ON s.x = t.x GROUP BY t.x' % (n, n)) # Create some SW CVs for n in range(8): pipeline.create_cv('sw_%d' % n, 'SELECT count(*) FROM stream_%d' % n, sw='%d days' % (n + 1), step_factor=n + 1) # Create some CVs/CTs/streams that we'll rename for n in range(4): pipeline.create_stream('to_rename_s_%d' % n, x='int') pipeline.create_cv( 'to_rename_cv_%d' % n, 'SELECT x, count(*) FROM to_rename_s_%d GROUP BY x' % n) pipeline.create_ct('to_rename_ct_%d' % n, 'SELECT x FROM to_rename_s_%d' % n) pipeline.create_cv( 'to_rename_ct_reader_%d' % n, "SELECT count(*) FROM output_of('to_rename_ct_%d')" % n) rows = [(x, ) for x in range(1000)] pipeline.insert('to_rename_s_%d' % n, ('x', ), rows) # Now rename them for n in range(4): pipeline.execute( 'ALTER FOREIGN TABLE to_rename_s_%d RENAME TO renamed_s_%d' % (n, n)) pipeline.execute('ALTER VIEW to_rename_cv_%d RENAME TO renamed_cv_%d' % (n, n)) pipeline.execute('ALTER VIEW to_rename_ct_%d RENAME TO renamed_ct_%d' % (n, n)) pipeline.execute( 'ALTER VIEW to_rename_ct_reader_%d RENAME TO renamed_ct_reader_%d' % (n, n)) # And write some data using the new stream names rows = [(x, ) for x in range(1000)] pipeline.insert('renamed_s_%d' % n, ('x', ), rows) # Create a CV chain that combines output streams q = """ SELECT (new).z, combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('cv_0') GROUP BY (new).z """ pipeline.create_cv('combine_cv_0', q) q = """ SELECT combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('combine_cv_0') """ pipeline.create_cv('combine_cv_1', q) for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) # Create a CV with a TTL to verify TTL info is restored properly pipeline.create_cv( 'ttlcv', 'SELECT second(arrival_timestamp), count(*) FROM stream_0 GROUP BY second', ttl='1 hour', ttl_column='second') # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_stream('namespace.stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) pipeline.create_stream('stream0', z='text') # Create some transforms with trigger functions for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn') # Create some transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir0 = os.path.abspath('test_binary_upgrade_data_dir0') if os.path.exists(new_data_dir0): shutil.rmtree(new_data_dir0) pipeline.stop() p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir0]) stdout, stderr = p.communicate() with open(os.path.join(new_data_dir0, 'postgresql.conf'), 'a') as f: f.write('shared_preload_libraries=pipelinedb\n') f.write('max_worker_processes=128\n') f.write('pipelinedb.stream_insert_level=sync_commit\n') result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir0 ]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pg_upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir0) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): assert row['x'] == i assert row['y'] == name assert row['z'] == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CV with TTL row = list( upgraded.execute( "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'" ))[0] assert row[0] == 3600 assert row[1] == 1 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms with trigger functions for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Verify SW CVs for n in range(8): name = 'sw_%d' % n row = upgraded.execute( "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'" % name)[0] assert row['ttl'] == (n + 1) * 3600 * 24 assert row['step_factor'] == n + 1 row = upgraded.execute('SELECT count FROM %s' % name)[0] assert row['count'] == 1000 # Verify renamed CVs/CTs/streams for n in range(4): row = upgraded.execute('SELECT combine(count) FROM renamed_cv_%d' % n)[0] assert row['combine'] == 2000 row = upgraded.execute( 'SELECT combine(count) FROM renamed_ct_reader_%d' % n)[0] assert row['combine'] == 2000 # Verify chained CVs row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 1000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 1000 assert row['distinct_count'] == 1 # Now insert some new data and verify CVs are still updating properly for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows) for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 2000 row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 2000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 2000 assert row['distinct_count'] == 1 # Verify STJs for n in range(8): cv = 'stj_%d' % n row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0] assert row['sum'] == 2000 # Rename objects again before the second upgrade for n in range(4): upgraded.execute( 'ALTER FOREIGN TABLE renamed_s_%d RENAME TO renamed_again_s_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_cv_%d RENAME TO renamed_again_cv_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_ct_%d RENAME TO renamed_again_ct_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_ct_reader_%d RENAME TO renamed_again_ct_reader_%d' % (n, n)) # And write some data using the new stream names rows = [(x, ) for x in range(1000)] upgraded.insert('renamed_again_s_%d' % n, ('x', ), rows) upgraded.stop() new_data_dir1 = os.path.abspath('test_binary_upgrade_data_dir1') if os.path.exists(new_data_dir1): shutil.rmtree(new_data_dir1) p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir1]) stdout, stderr = p.communicate() with open(os.path.join(new_data_dir1, 'postgresql.conf'), 'a') as f: f.write('shared_preload_libraries=pipelinedb\n') f.write('max_worker_processes=128\n') f.write('pipelinedb.stream_insert_level=sync_commit\n') # Now upgrade the upgraded DB to verify that restored DBs can be updated properly result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', new_data_dir0, '-D', new_data_dir1 ]) assert result == 0 # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir1) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): assert row['x'] == i assert row['y'] == name assert row['z'] == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 2000 # CV with TTL row = list( upgraded.execute( "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'" ))[0] assert row[0] == 3600 assert row[1] == 1 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms with trigger functions for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Verify SW Cvs for n in range(8): name = 'sw_%d' % n step_factor = n + 1 row = upgraded.execute( "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'" % name)[0] assert row['ttl'] == (n + 1) * 3600 * 24 assert row['step_factor'] == n + 1 row = upgraded.execute('SELECT count FROM %s' % name)[0] assert row['count'] == 2000 # Verify renamed CVs/CTs/streams for n in range(4): row = upgraded.execute( 'SELECT combine(count) FROM renamed_again_cv_%d' % n)[0] assert row['combine'] == 3000 row = upgraded.execute( 'SELECT combine(count) FROM renamed_again_ct_reader_%d' % n)[0] assert row['combine'] == 3000 # Verify chained CV row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 2000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 2000 assert row['distinct_count'] == 1 # Now insert some new data and verify CVs are still updating properly for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows) for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 3000 row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 3000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 3000 assert row['distinct_count'] == 1 # Verify STJs for n in range(8): cv = 'stj_%d' % n row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0] assert row['sum'] == 3000 upgraded.stop() pipeline.execute('DROP VIEW combine_cv_0 CASCADE') shutil.rmtree(new_data_dir0) shutil.rmtree(new_data_dir1)