def test_continuous_queries_enabled(pipeline, clean_db): # Check that continuous query processes are running try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split('\n') except: procs = [] procs = filter(lambda s: s, procs) assert len(procs) > 1 # Restart with continuous queries disabled. pipeline.stop() pipeline.run(params={'continuous_queries_enabled': 'off'}) try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split('\n') except: procs = [] procs = filter(lambda s: s, procs) assert len(procs) == 1 # grep cmd pipeline.stop() pipeline.run()
def test_restart_recovery(pipeline, clean_db): pipeline.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' pipeline.create_cv('test_restart_recovery', q) pipeline.insert('stream0', ['x'], [(1, ), (1, )]) result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 # Need to sleep here, otherwise on restart the materialization table is # empty. Not sure why. time.sleep(0.1) # Restart. pipeline.stop() pipeline.run() result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 pipeline.insert('stream0', ['x'], [(1, ), (1, )]) result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 4
def test_continuous_queries_enabled(pipeline, clean_db): # Check that continuous query processes are running try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split('\n') except: procs = [] procs = filter(lambda s: s and 'grep' not in s, procs) assert len(procs) > 0 # Restart with continuous queries disabled. pipeline.stop() pipeline.run(params={'continuous_queries_enabled': 'off'}) try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split('\n') except: procs = [] procs = filter(lambda s: s and 'grep' not in s, procs) assert len(procs) == 0 pipeline.stop() pipeline.run()
def test_restart_recovery(pipeline, clean_db): pipeline.create_stream('stream0', x='int') q = 'SELECT COUNT(*) FROM stream0' pipeline.create_cv('test_restart_recovery', q) pipeline.insert('stream0', ['x'], [(1,), (1,)]) result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 # Need to sleep here, otherwise on restart the materialization table is # empty. Not sure why. time.sleep(0.1) # Restart. pipeline.stop() pipeline.run() result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 2 pipeline.insert('stream0', ['x'], [(1,), (1,)]) result = pipeline.execute('SELECT * FROM test_restart_recovery').first() assert result['count'] == 4
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipeline_stream_insert('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['off', 'on']: pipeline.stop() pipeline.run({ 'continuous_query_num_workers': nworkers, 'synchronous_stream_insert': sync }) pipeline.execute('TRUNCATE CONTINUOUS VIEW cv') pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict( pipeline.execute('SELECT count FROM cv').first() or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'off' time.sleep(1) count = dict( pipeline.execute('SELECT count FROM cv').first() or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipeline_stream_insert('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['off', 'on']: pipeline.stop() pipeline.run({ 'continuous_query_num_workers': nworkers, 'synchronous_stream_insert': sync }) pipeline.execute('TRUNCATE CONTINUOUS VIEW cv') pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict(pipeline.execute('SELECT count FROM cv').first() or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'off' time.sleep(1) count = dict(pipeline.execute('SELECT count FROM cv').first() or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_deadlock_regress(pipeline, clean_db): nitems = 2000000 tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json') query = 'SELECT generate_series(1, %d) AS n' % nitems pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file)) pipeline.create_stream('s1', n='int') pipeline.create_stream('s2', n='int') pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL', "pipelinedb.insert_into_stream('s2')") pipeline.create_cv('cv', 'SELECT count(*) FROM s2') for copy in [True, False]: for nworkers in [1, 4]: for sync in ['receive', 'commit']: pipeline.stop() pipeline.run({ 'pipelinedb.num_workers': nworkers, 'pipelinedb.stream_insert_level': 'sync_%s' % sync }) pipeline.execute("SELECT pipelinedb.truncate_continuous_view('cv')") pipeline.execute('COMMIT') if copy: pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file) else: pipeline.execute('INSERT INTO s1 (n) %s' % query) count = dict(pipeline.execute('SELECT count FROM cv')[0] or {}) ntries = 5 while count.get('count') != nitems and ntries > 0: assert sync == 'receive' time.sleep(1) count = dict(pipeline.execute('SELECT count FROM cv')[0] or {}) ntries -= 1 assert count and count['count'] == nitems os.remove(tmp_file) pipeline.stop() pipeline.run()
def test_continuous_queries_enabled(pipeline, clean_db): # Check that continuous query processes are running try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split("\n") except: procs = [] procs = filter(lambda s: s and "grep" not in s, procs) assert len(procs) > 0 # Restart with continuous queries disabled. pipeline.stop() pipeline.run(params={"continuous_queries_enabled": "off"}) try: procs = check_output('ps aux | grep "pipeline" | grep "combiner0"', shell=True).split("\n") except: procs = [] procs = filter(lambda s: s and "grep" not in s, procs) assert len(procs) == 0 pipeline.stop() pipeline.run()
def test_restart_recovery(pipeline, clean_db): q = "SELECT COUNT(*) FROM stream" pipeline.create_cv("test_restart_recovery", q) pipeline.insert("stream", ["x"], [(1,), (1,)]) result = pipeline.execute("SELECT * FROM test_restart_recovery").first() assert result["count"] == 2 # Need to sleep here, otherwise on restart the materialization table is # empty. Not sure why. time.sleep(0.1) # Restart. pipeline.stop() pipeline.run() result = pipeline.execute("SELECT * FROM test_restart_recovery").first() assert result["count"] == 2 pipeline.insert("stream", ["x"], [(1,), (1,)]) result = pipeline.execute("SELECT * FROM test_restart_recovery").first() assert result["count"] == 4
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_stream('namespace.stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) pipeline.create_stream('stream0', z='text') # Create some transforms for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn()') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir = os.path.abspath('test_binary_upgrade_data_dir') pipeline.stop() p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir]) stdout, stderr = p.communicate() result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pipeline-upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir ]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pipeline-upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): x, y, z = row assert x == i assert y == name assert z == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipeline_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 upgraded.stop() shutil.rmtree(new_data_dir)
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) # Create some transforms for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream', 'tg_fn()') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir = os.path.abspath('test_binary_upgrade_data_dir') pipeline.stop() p = subprocess.Popen([ os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir]) stdout, stderr = p.communicate() result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pipeline-upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pipeline-upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): x, y, z = row assert x == i assert y == name assert z == name # Streams for n in range(8): name = 's_%d' % n rows = list(upgraded.execute("SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipeline_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 upgraded.stop() shutil.rmtree(new_data_dir)
def test_binary_upgrade(pipeline, clean_db): """ Verify that binary upgrades properly transfer all objects and data into the new installation """ if pipeline.version_num == 110000: pytest.skip('skipping until PG11 supports dump/restore WITH OIDS') # Create some regular tables with data, and create an index on half of them for n in range(16): name = 't_%d' % n pipeline.create_table(name, x='integer', y='text', z='text') rows = [(x, name, name) for x in range(1000)] pipeline.insert(name, ('x', 'y', 'z'), rows) if n >= 8: pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name)) # Create some streams for n in range(8): name = 's_%d' % n pipeline.create_stream(name, x='integer', y='text') # Now create some CVs with data, some with indices for n in range(32): name = 'cv_%d' % n pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n) if n >= 16: pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name)) # Create some STJs for n in range(8): pipeline.create_cv( 'stj_%d' % n, 'SELECT t.x, count(*) FROM stream_%d s JOIN t_%d t ON s.x = t.x GROUP BY t.x' % (n, n)) # Create some SW CVs for n in range(8): pipeline.create_cv('sw_%d' % n, 'SELECT count(*) FROM stream_%d' % n, sw='%d days' % (n + 1), step_factor=n + 1) # Create some CVs/CTs/streams that we'll rename for n in range(4): pipeline.create_stream('to_rename_s_%d' % n, x='int') pipeline.create_cv( 'to_rename_cv_%d' % n, 'SELECT x, count(*) FROM to_rename_s_%d GROUP BY x' % n) pipeline.create_ct('to_rename_ct_%d' % n, 'SELECT x FROM to_rename_s_%d' % n) pipeline.create_cv( 'to_rename_ct_reader_%d' % n, "SELECT count(*) FROM output_of('to_rename_ct_%d')" % n) rows = [(x, ) for x in range(1000)] pipeline.insert('to_rename_s_%d' % n, ('x', ), rows) # Now rename them for n in range(4): pipeline.execute( 'ALTER FOREIGN TABLE to_rename_s_%d RENAME TO renamed_s_%d' % (n, n)) pipeline.execute('ALTER VIEW to_rename_cv_%d RENAME TO renamed_cv_%d' % (n, n)) pipeline.execute('ALTER VIEW to_rename_ct_%d RENAME TO renamed_ct_%d' % (n, n)) pipeline.execute( 'ALTER VIEW to_rename_ct_reader_%d RENAME TO renamed_ct_reader_%d' % (n, n)) # And write some data using the new stream names rows = [(x, ) for x in range(1000)] pipeline.insert('renamed_s_%d' % n, ('x', ), rows) # Create a CV chain that combines output streams q = """ SELECT (new).z, combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('cv_0') GROUP BY (new).z """ pipeline.create_cv('combine_cv_0', q) q = """ SELECT combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('combine_cv_0') """ pipeline.create_cv('combine_cv_1', q) for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows) # Create a CV with a TTL to verify TTL info is restored properly pipeline.create_cv( 'ttlcv', 'SELECT second(arrival_timestamp), count(*) FROM stream_0 GROUP BY second', ttl='1 hour', ttl_column='second') # Now create some in another namespace pipeline.execute('CREATE SCHEMA namespace') for n in range(8): name = 'namespace.cv_%d' % n pipeline.create_stream('namespace.stream_%d' % n, x='int', y='text', z='text') pipeline.create_cv( name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n) rows = [(x, name, name) for x in range(1000)] pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows) if n >= 4: pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name)) create_fn = """ CREATE OR REPLACE FUNCTION tg_fn() RETURNS trigger AS $$ BEGIN RETURN NEW; END; $$ LANGUAGE plpgsql; """ pipeline.execute(create_fn) pipeline.create_stream('stream0', z='text') # Create some transforms with trigger functions for n in range(8): name = 'ct_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn') # Create some transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n pipeline.create_ct(name, 'SELECT z::text FROM stream0') time.sleep(10) old_bin_dir = new_bin_dir = pipeline.bin_dir old_data_dir = pipeline.data_dir new_data_dir0 = os.path.abspath('test_binary_upgrade_data_dir0') if os.path.exists(new_data_dir0): shutil.rmtree(new_data_dir0) pipeline.stop() p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir0]) stdout, stderr = p.communicate() with open(os.path.join(new_data_dir0, 'postgresql.conf'), 'a') as f: f.write('shared_preload_libraries=pipelinedb\n') f.write('max_worker_processes=128\n') f.write('pipelinedb.stream_insert_level=sync_commit\n') result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir0 ]) assert result == 0 # The cleanup path expects this to be running, but we're done with it pipeline.run() # pg_upgrade returned successfully and has already done sanity checks # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir0) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): assert row['x'] == i assert row['y'] == name assert row['z'] == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # CV with TTL row = list( upgraded.execute( "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'" ))[0] assert row[0] == 3600 assert row[1] == 1 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms with trigger functions for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Verify SW CVs for n in range(8): name = 'sw_%d' % n row = upgraded.execute( "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'" % name)[0] assert row['ttl'] == (n + 1) * 3600 * 24 assert row['step_factor'] == n + 1 row = upgraded.execute('SELECT count FROM %s' % name)[0] assert row['count'] == 1000 # Verify renamed CVs/CTs/streams for n in range(4): row = upgraded.execute('SELECT combine(count) FROM renamed_cv_%d' % n)[0] assert row['combine'] == 2000 row = upgraded.execute( 'SELECT combine(count) FROM renamed_ct_reader_%d' % n)[0] assert row['combine'] == 2000 # Verify chained CVs row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 1000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 1000 assert row['distinct_count'] == 1 # Now insert some new data and verify CVs are still updating properly for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows) for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 2000 row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 2000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 2000 assert row['distinct_count'] == 1 # Verify STJs for n in range(8): cv = 'stj_%d' % n row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0] assert row['sum'] == 2000 # Rename objects again before the second upgrade for n in range(4): upgraded.execute( 'ALTER FOREIGN TABLE renamed_s_%d RENAME TO renamed_again_s_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_cv_%d RENAME TO renamed_again_cv_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_ct_%d RENAME TO renamed_again_ct_%d' % (n, n)) upgraded.execute( 'ALTER VIEW renamed_ct_reader_%d RENAME TO renamed_again_ct_reader_%d' % (n, n)) # And write some data using the new stream names rows = [(x, ) for x in range(1000)] upgraded.insert('renamed_again_s_%d' % n, ('x', ), rows) upgraded.stop() new_data_dir1 = os.path.abspath('test_binary_upgrade_data_dir1') if os.path.exists(new_data_dir1): shutil.rmtree(new_data_dir1) p = subprocess.Popen( [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir1]) stdout, stderr = p.communicate() with open(os.path.join(new_data_dir1, 'postgresql.conf'), 'a') as f: f.write('shared_preload_libraries=pipelinedb\n') f.write('max_worker_processes=128\n') f.write('pipelinedb.stream_insert_level=sync_commit\n') # Now upgrade the upgraded DB to verify that restored DBs can be updated properly result = subprocess.check_call([ os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B', new_bin_dir, '-d', new_data_dir0, '-D', new_data_dir1 ]) assert result == 0 # but let's manually verify that all objects were migrated to the new data directory upgraded = PipelineDB(data_dir=new_data_dir1) upgraded.run() # Tables for n in range(16): name = 't_%d' % n q = 'SELECT x, y, z FROM %s ORDER BY x' % name rows = upgraded.execute(q) for i, row in enumerate(rows): assert row['x'] == i assert row['y'] == name assert row['z'] == name # Streams for n in range(8): name = 's_%d' % n rows = list( upgraded.execute( "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'" % name)) assert len(rows) == 1 # CVs for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 2000 # CV with TTL row = list( upgraded.execute( "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'" ))[0] assert row[0] == 3600 assert row[1] == 1 # CVs in separate schema for n in range(8): name = 'namespace.cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 1000 # Transforms with trigger functions for n in range(8): name = 'ct_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Transforms without trigger functions for n in range(8): name = 'ct_no_trig_%d' % n q = """ SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s' """ % name rows = list(upgraded.execute(q)) assert len(rows) == 1 # Verify SW Cvs for n in range(8): name = 'sw_%d' % n step_factor = n + 1 row = upgraded.execute( "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'" % name)[0] assert row['ttl'] == (n + 1) * 3600 * 24 assert row['step_factor'] == n + 1 row = upgraded.execute('SELECT count FROM %s' % name)[0] assert row['count'] == 2000 # Verify renamed CVs/CTs/streams for n in range(4): row = upgraded.execute( 'SELECT combine(count) FROM renamed_again_cv_%d' % n)[0] assert row['combine'] == 3000 row = upgraded.execute( 'SELECT combine(count) FROM renamed_again_ct_reader_%d' % n)[0] assert row['combine'] == 3000 # Verify chained CV row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 2000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 2000 assert row['distinct_count'] == 1 # Now insert some new data and verify CVs are still updating properly for n in range(32): name = 'cv_%d' % n rows = [(x, name, name) for x in range(1000)] upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows) for n in range(32): name = 'cv_%d' % n rows = list( upgraded.execute('SELECT z, distinct_count, count FROM %s' % name)) assert len(rows) == 1 assert rows[0][0] == name assert rows[0][1] == 1 assert rows[0][2] == 3000 row = upgraded.execute( 'SELECT z, count, distinct_count FROM combine_cv_0')[0] assert row['z'] == 'cv_0' assert row['count'] == 3000 assert row['distinct_count'] == 1 row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0] assert row['count'] == 3000 assert row['distinct_count'] == 1 # Verify STJs for n in range(8): cv = 'stj_%d' % n row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0] assert row['sum'] == 3000 upgraded.stop() pipeline.execute('DROP VIEW combine_cv_0 CASCADE') shutil.rmtree(new_data_dir0) shutil.rmtree(new_data_dir1)