def assert_result_changes(func, args): """ Verifies that the result of the given function changes with time """ name = 'assert_%s_decreases' % func pipeline.create_cv( name, "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args)) rows = [(n, str(n), n + 1) for n in range(1000)] pipeline.insert('stream', ('x', 'y', 'z'), rows) current = 1 results = [] while current: row = pipeline.execute('SELECT * FROM %s' % name).first() current = row[func] if current is None: break results.append(current) # Verify that we actually read something assert results pipeline.drop_cv(name)
def _test_agg(pipeline, agg, check_fn=None): name = agg[:agg.find('(')] q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s' cv_name = 'test_%s' % name table_name = 'test_%s_t' % name desc = ('ts', 'g', 'x', 'y', 'z') pipeline.create_cv(cv_name, q % (agg, 'stream')) pipeline.create_table(table_name, ts='timestamp', x='integer', y='integer', z='integer', g='integer') rows = [] for i, n in enumerate(range(1000)): ts = str(datetime.utcnow() + timedelta(seconds=i)) row = ts, n % 10, random.randint(1, 256), random.randint(1, 256), random.randint(1, 256) rows.append(row) pipeline.insert('stream', desc, rows) pipeline.insert(table_name, desc, rows) if check_fn: return check_fn(pipeline) expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g')) result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name)) assert len(expected) == len(result) for e, r in zip(expected, result): assert e == r pipeline.drop_cv(cv_name) pipeline.drop_table(table_name)
def assert_result_changes(func, args): """ Verifies that the result of the given function changes with time """ name = "assert_%s_decreases" % func pipeline.create_cv( name, "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args), ) rows = [(n, str(n), n + 1) for n in range(1000)] pipeline.insert("stream", ("x", "y", "z"), rows) current = 1 results = [] while current: row = pipeline.execute("SELECT * FROM %s" % name).first() current = row[func] if current is None: break results.append(current) # Verify that we actually read something assert results pipeline.drop_cv(name)
def assert_result_changes(func, args): """ Verifies that the result of the given function changes with time """ pipeline.create_stream('stream', x='int', y='text', z='int') name = 'assert_%s_decreases' % func pipeline.create_cv(name, "SELECT %s(%s) FROM stream WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args)) rows = [(n, str(n), n + 1) for n in range(1000)] pipeline.insert('stream', ('x', 'y', 'z'), rows) current = 1 results = [] while current: row = pipeline.execute('SELECT * FROM %s' % name).first() current = row[func] if current is None: break results.append(current) # Verify that we actually read something assert results pipeline.drop_cv(name)
def test_output_tree(pipeline, clean_db): """ Create a relatively complex tree of continuous views and transforms chained together by their output streams, and verify that all output correctly propagates to the leaves. """ pipeline.create_cv('level0_0', 'SELECT x::integer, count(*) FROM root GROUP BY x') pipeline.create_cv('level1_0', 'SELECT (new).x, (new).count FROM level0_0_osrel') pipeline.create_cv('level1_1', 'SELECT (new).x, (new).count FROM level0_0_osrel') pipeline.create_cv('level2_0', 'SELECT (new).x, (new).count FROM level1_0_osrel') pipeline.create_cv('level2_1', 'SELECT (new).x, (new).count FROM level1_0_osrel') pipeline.create_cv('level2_2', 'SELECT (new).x, (new).count FROM level1_1_osrel') pipeline.create_cv('level2_3', 'SELECT (new).x, (new).count FROM level1_1_osrel') pipeline.create_cv('level3_0', 'SELECT (new).x, (new).count FROM level2_0_osrel') pipeline.create_cv('level3_1', 'SELECT (new).x, (new).count FROM level2_0_osrel') pipeline.create_cv('level3_2', 'SELECT (new).x, (new).count FROM level2_1_osrel') pipeline.create_cv('level3_3', 'SELECT (new).x, (new).count FROM level2_1_osrel') pipeline.create_cv('level3_4', 'SELECT (new).x, (new).count FROM level2_2_osrel') pipeline.create_cv('level3_5', 'SELECT (new).x, (new).count FROM level2_2_osrel') pipeline.create_cv('level3_6', 'SELECT (new).x, (new).count FROM level2_3_osrel') pipeline.create_cv('level3_7', 'SELECT (new).x, (new).count FROM level2_3_osrel') pipeline.insert('root', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(5) names = [r[0] for r in pipeline.execute('SELECT name FROM pipeline_views() ORDER BY name DESC')] assert len(names) == 15 # Verify all values propagated to each node in the tree for name in names: rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name) for row in rows: x, count = row assert count == 100 pipeline.insert('root', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(5) # Verify all values propagated to each node in the tree again for name in names: rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name) for row in rows: x, count = row assert count == 200 # Drop these in reverse dependency order to prevent deadlocks for name in names: pipeline.drop_cv(name)
def assert_result_changes(func, args): """ Verifies that the result of the given function changes with time """ pipeline.create_stream('stream0', x='int', y='text', z='int') name = 'assert_%s_decreases' % func pipeline.create_cv( name, "SELECT %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'" % (func, args)) # We also create a wide sliding window just to verify that user combines work on SW CVs and have the same output # as if they were being run on a non-SW CV sw_name = name + '_sw_agg' pipeline.create_cv( sw_name, "SELECT x %% 10 AS g, %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 days' GROUP BY g" % (func, args)) verify_name = name + '_sw_agg_verify' pipeline.create_cv( verify_name, "SELECT x %% 10 AS g, %s(%s) FROM stream0 GROUP BY g" % (func, args)) rows = [(n, str(n), n + 1) for n in range(1000)] pipeline.insert('stream0', ('x', 'y', 'z'), rows) current = 1 results = [] while current: row = pipeline.execute('SELECT * FROM %s' % name)[0] current = row[func] if current is None: break results.append(current) # Verify that we actually read something assert results # Verify user combines on SW CVs work and produce the expected output sw_row = pipeline.execute('SELECT combine(%s) FROM %s' % (func, sw_name))[0] expected_row = pipeline.execute('SELECT combine(%s) FROM %s' % (func, verify_name))[0] if isinstance(sw_row['combine'], list): sw_row['combine'] = sorted(sw_row['combine']) expected_row['combine'] = sorted(expected_row['combine']) assert sw_row['combine'] == expected_row['combine'] pipeline.drop_cv(name)
def test_concurrent_sw_ticking(pipeline, clean_db): """ Verify that several concurrent sliding-window queries each having different windows tick correctly at different intervals. """ pipeline.create_stream('stream0', x='int') output_names = [] for n in range(10): name = 'sw%d' % n pipeline.create_cv( name, 'SELECT x::integer, count(*) FROM stream0 GROUP BY x', sw='%d seconds' % (n + 10)) output_name = name + '_output' q = """ SELECT arrival_timestamp, CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel """ % name pipeline.create_cv(output_name, q) output_names.append(output_name) names = [ r[0] for r in pipeline.execute( 'SELECT name FROM pipeline_views() ORDER BY name DESC') ] assert len(names) == 2 * 10 pipeline.insert('stream0', ('x', ), [(x % 100, ) for x in range(10000)]) time.sleep(25) for name in output_names: rows = list(pipeline.execute('SELECT COUNT(DISTINCT x) FROM %s' % name)) assert rows[0][0] == 100 for x in range(100): # In window assert pipeline.execute( 'SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d' % (name, x)) # Out of window assert pipeline.execute( 'SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d' % (name, x)) # Drop these in reverse dependency order to prevent deadlocks for name in names: pipeline.drop_cv(name)
def test_concurrent_sw_ticking(pipeline, clean_db): """ Verify that several concurrent sliding-window queries each having different windows tick correctly at different intervals. """ pipeline.create_stream('stream0', x='int') output_names = [] for n in range(10): name = 'sw%d' % n pipeline.create_cv(name, 'SELECT x::integer, count(*) FROM stream0 GROUP BY x', sw='%d seconds' % (n + 10)) output_name = name + '_output' q = """ SELECT arrival_timestamp, CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel """ % name pipeline.create_cv(output_name, q) output_names.append(output_name) names = [r[0] for r in pipeline.execute('SELECT name FROM pipeline_views() ORDER BY name DESC')] assert len(names) == 2 * 10 pipeline.insert('stream0', ('x',), [(x % 100,) for x in range(10000)]) time.sleep(25) for name in output_names: rows = list(pipeline.execute('SELECT COUNT(DISTINCT x) FROM %s' % name)) assert rows[0][0] == 100 for x in range(100): # In window assert pipeline.execute('SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d' % (name, x)) # Out of window assert pipeline.execute('SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d' % (name, x)) # Drop these in reverse dependency order to prevent deadlocks for name in names: pipeline.drop_cv(name)
def _test_agg(pipeline, agg, check_fn=None): name = agg[:agg.find('(')] q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s' cv_name = 'test_%s' % name table_name = 'test_%s_t' % name desc = ('ts', 'g', 'x', 'y', 'z') pipeline.create_cv(cv_name, q % (agg, 'stream')) pipeline.create_table(table_name, ts='timestamp', x='integer', y='integer', z='integer', g='integer') rows = [] for i, n in enumerate(range(1000)): ts = str(datetime.utcnow() + timedelta(seconds=i)) row = ts, n % 10, random.randint(1, 256), random.randint( 1, 256), random.randint(1, 256) rows.append(row) pipeline.insert('stream', desc, rows) pipeline.insert(table_name, desc, rows) if check_fn: return check_fn(pipeline) expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g')) result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name)) assert len(expected) == len(result) for e, r in zip(expected, result): assert e == r pipeline.drop_cv(cv_name) pipeline.drop_table(table_name)
def test_create_drop_continuous_view(pipeline, clean_db): """ Basic sanity check """ pipeline.create_cv('cv0', 'SELECT id::integer FROM stream') pipeline.create_cv('cv1', 'SELECT id::integer FROM stream') pipeline.create_cv('cv2', 'SELECT id::integer FROM stream') result = pipeline.execute('SELECT * FROM pipeline_query') names = [r['name'] for r in result] assert sorted(names) == ['cv0', 'cv1', 'cv2'] pipeline.drop_cv('cv0') pipeline.drop_cv('cv1') pipeline.drop_cv('cv2') result = pipeline.execute('SELECT * FROM pipeline_query') names = [r['name'] for r in result] assert len(names) == 0
def test_create_views(pipeline, clean_db): cvs = [] pipeline.create_stream('stream0', x='int') q = 'SELECT count(*) FROM stream0' for i in xrange(1, MAX_CQS): cvs.append('cv_%d' % i) pipeline.create_cv(cvs[-1], q) try: pipeline.create_cv('cv_fail', q) assert False except Exception, e: assert 'maximum number of continuous queries exceeded' in e.message ids = [ r['id'] for r in pipeline.execute('SELECT id FROM pipeline_views()') ] assert len(set(ids)) == len(ids) assert set(ids) == set(xrange(1, MAX_CQS)) num_remove = random.randint(128, 512) for _ in xrange(num_remove): pipeline.drop_cv(cvs.pop()) for _ in xrange(num_remove): cvs.append('cv_%d' % (len(cvs) + 1)) pipeline.create_cv(cvs[-1], q)
def test_output_tree(pipeline, clean_db): """ Create a relatively complex tree of continuous views and transforms chained together by their output streams, and verify that all output correctly propagates to the leaves. """ pipeline.create_stream('root', x='int') pipeline.create_cv('level0_0', 'SELECT x::integer, count(*) FROM root GROUP BY x') pipeline.create_cv('level1_0', 'SELECT (new).x, (new).count FROM level0_0_osrel') pipeline.create_cv('level1_1', 'SELECT (new).x, (new).count FROM level0_0_osrel') pipeline.create_cv('level2_0', 'SELECT (new).x, (new).count FROM level1_0_osrel') pipeline.create_cv('level2_1', 'SELECT (new).x, (new).count FROM level1_0_osrel') pipeline.create_cv('level2_2', 'SELECT (new).x, (new).count FROM level1_1_osrel') pipeline.create_cv('level2_3', 'SELECT (new).x, (new).count FROM level1_1_osrel') pipeline.create_cv('level3_0', 'SELECT (new).x, (new).count FROM level2_0_osrel') pipeline.create_cv('level3_1', 'SELECT (new).x, (new).count FROM level2_0_osrel') pipeline.create_cv('level3_2', 'SELECT (new).x, (new).count FROM level2_1_osrel') pipeline.create_cv('level3_3', 'SELECT (new).x, (new).count FROM level2_1_osrel') pipeline.create_cv('level3_4', 'SELECT (new).x, (new).count FROM level2_2_osrel') pipeline.create_cv('level3_5', 'SELECT (new).x, (new).count FROM level2_2_osrel') pipeline.create_cv('level3_6', 'SELECT (new).x, (new).count FROM level2_3_osrel') pipeline.create_cv('level3_7', 'SELECT (new).x, (new).count FROM level2_3_osrel') pipeline.insert('root', ('x', ), [(x % 100, ) for x in range(10000)]) names = [ r[0] for r in pipeline.execute( 'SELECT name FROM pipeline_views() ORDER BY name DESC') ] assert len(names) == 15 # Verify all values propagated to each node in the tree for name in names: rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name) for row in rows: x, count = row assert count == 100 pipeline.insert('root', ('x', ), [(x % 100, ) for x in range(10000)]) # Verify all values propagated to each node in the tree again for name in names: rows = pipeline.execute('SELECT x, max(count) FROM %s GROUP BY x' % name) for row in rows: x, count = row assert count == 200 # Drop these in reverse dependency order to prevent deadlocks for name in names: pipeline.drop_cv(name)
def test_create_views(pipeline, clean_db): cvs = [] pipeline.create_stream("stream0", x="int") q = "SELECT count(*) FROM stream0" for i in xrange(1, MAX_CQS): cvs.append("cv_%d" % i) pipeline.create_cv(cvs[-1], q) try: pipeline.create_cv("cv_fail", q) assert False except Exception, e: assert "maximum number of continuous queries exceeded" in e.message ids = [r["id"] for r in pipeline.execute("SELECT id FROM pipeline_views()")] assert len(set(ids)) == len(ids) assert set(ids) == set(xrange(1, MAX_CQS)) num_remove = random.randint(128, 512) for _ in xrange(num_remove): pipeline.drop_cv(cvs.pop()) for _ in xrange(num_remove): cvs.append("cv_%d" % (len(cvs) + 1)) pipeline.create_cv(cvs[-1], q)