Exemple #1
0
def test_filter_clause(pipeline, clean_db):
    """
    Verify that FILTER clauses work on aggregates and sliding window aggregates
    """
    pipeline.create_stream("test_filter_stream", x="int")
    q = """
    SELECT SUM(x::int) FILTER (WHERE mod(x, 2) = 0) AS sum2, SUM(x::int) FILTER (WHERE mod(x, 3) = 0) AS sum3 FROM test_filter_stream
    """
    sw = """
    WHERE arrival_timestamp > clock_timestamp() - interval '30 second'
    """
    pipeline.create_cv("test_filter", q)
    pipeline.create_cv("test_filter_sw", "%s %s" % (q, sw))

    desc = ("x",)
    rows = []
    for n in range(1000):
        rows.append((n,))

    pipeline.insert("test_filter_stream", desc, rows)

    sum2 = sum(filter(lambda x: x % 2 == 0, map(lambda x: x[0], rows)))
    sum3 = sum(filter(lambda x: x % 3 == 0, map(lambda x: x[0], rows)))

    result1 = pipeline.execute("SELECT * FROM test_filter").first()
    result2 = pipeline.execute("SELECT * FROM test_filter_sw").first()

    assert result1["sum2"] == result2["sum2"] == sum2
    assert result1["sum3"] == result2["sum3"] == sum3
Exemple #2
0
def test_single_continuous_view(pipeline, clean_db):
    """
  Verify that specific continuous views can be dropped and restored
  """
    pipeline.create_cv("test_single0", "SELECT COUNT(*) FROM stream")
    pipeline.create_cv("test_single1", "SELECT COUNT(*) FROM stream")
    pipeline.insert("stream", ("x",), [(x,) for x in range(10)])

    result = pipeline.execute("SELECT count FROM test_single0").first()
    assert result["count"] == 10

    result = pipeline.execute("SELECT count FROM test_single1").first()
    assert result["count"] == 10

    _dump(pipeline, "test_single.sql", cv_name="test_single0")

    pipeline.drop_all_views()
    _restore(pipeline, "test_single.sql")

    result = pipeline.execute("SELECT count FROM test_single0").first()
    assert result["count"] == 10

    # We didn't dump this one
    result = list(pipeline.execute("SELECT * FROM pg_class WHERE relname LIKE '%%test_single1%%'"))
    assert not result
def test_cmsketch_agg(pipeline, clean_db):
    """
    Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile
    """
    pipeline.create_stream('test_cmsketch_stream', k='int', x='int')

    q = """
    SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_cmsketch_agg', q)

    rows = []
    for n in range(1000):
        rows.append((0, n % 20))
        rows.append((1, n % 50))

    pipeline.insert('test_cmsketch_stream', desc, rows)

    result = list(pipeline.execute(
      'SELECT cmsketch_frequency(c, 10) AS x, cmsketch_frequency(c, 40) AS y, '
      'cmsketch_frequency(c, 60) FROM test_cmsketch_agg ORDER BY k').fetchall())
    assert len(result) == 2
    assert tuple(result[0]) == (50, 0, 0)
    assert tuple(result[1]) == (20, 20, 0)

    result = list(pipeline.execute(
      'SELECT cmsketch_frequency(combine(c), 10) AS x, '
      'cmsketch_frequency(combine(c), 40) AS y, cmsketch_frequency(combine(c), 60) '
      'FROM test_cmsketch_agg').fetchall())
    assert len(result) == 1
    assert tuple(result[0]) == (70, 20, 0)
def test_single_continuous_view(pipeline, clean_db):
  """
  Verify that specific continuous views can be dropped and restored
  """
  pipeline.create_stream('stream0', x='int')
  pipeline.create_cv('test_single0', 'SELECT COUNT(*) FROM stream0')
  pipeline.create_cv('test_single1', 'SELECT COUNT(*) FROM stream0')
  pipeline.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = pipeline.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  result = pipeline.execute('SELECT count FROM test_single1').first()
  assert result['count'] == 10

  _dump(pipeline, 'test_single.sql', tables=['test_single0', 'stream0', 'test_single0_mrel'])

  pipeline.drop_all()
  _restore(pipeline, 'test_single.sql')

  result = pipeline.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  # We didn't dump this one
  result = list(pipeline.execute('SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\''))
  assert not result
Exemple #5
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that HLL's with low and high cardinalities are correcly combined
    """
    pipeline.create_stream('test_hll_stream', x='int', k='integer')
    q = """
    SELECT k::integer, hll_agg(x::integer) FROM test_hll_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_hll_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_hll_stream', desc, rows)

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg WHERE k in (2, 3)').first()
    assert result[0] == 9976

    result = pipeline.execute('SELECT hll_cardinality(combine(hll_agg)) '
                              'FROM test_hll_agg').first()
    assert result[0] == 9983
def test_stats_aggs(pipeline, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_stats_aggs', q)
    pipeline.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
def test_simple_crash(pipeline, clean_db):
    """
  Test simple worker and combiner crashes.
  """
    q = 'SELECT COUNT(*) FROM stream'
    pipeline.create_cv('test_simple_crash', q)

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 2

    # We can potentially lose one batch for a worker or combiner crash.
    # In our case each batch adds a count 2 and since we're adding 3 batches
    # we should either see an increment from the previous count of 4 or 6.
    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    assert kill_worker()

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 6

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    assert kill_combiner()

    pipeline.insert('stream', ['x'], [(1, ), (1, )])

    result = pipeline.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 10
def test_null_groups(pipeline, clean_db):
    """
    Verify that null group columns are considered equal
    """
    pipeline.create_stream('stream', x='int', y='int', z='int')
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM stream
    GROUP BY x, y, z;
    """
    desc = ('x', 'y', 'z')
    pipeline.create_cv('test_null_groups', q)
    pipeline.create_table('test_null_groups_t', x='integer', y='integer', z='integer')

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() > 0.1 and n or None, vals)
        rows.append(tuple(vals))

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_null_groups_t', desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(pipeline.execute(table_q))
    result = list(pipeline.execute('SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z'))

    for r, e in zip(result, expected):
        assert r == e
def test_simple_aggs(pipeline, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_stream('stream0', x='int', y='float8')
    pipeline.create_cv('test_simple_aggs', q)
    pipeline.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream0', desc, rows)
    pipeline.insert('test_simple_aggs_t', desc, rows)

    table_result = list(pipeline.execute('SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(pipeline.execute('SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
def test_single_continuous_view(pipeline, clean_db):
    """
  Verify that specific continuous views can be dropped and restored
  """
    pipeline.create_cv('test_single0', 'SELECT COUNT(*) FROM stream')
    pipeline.create_cv('test_single1', 'SELECT COUNT(*) FROM stream')
    pipeline.insert('stream', ('x', ), [(x, ) for x in range(10)])

    result = pipeline.execute('SELECT count FROM test_single0').first()
    assert result['count'] == 10

    result = pipeline.execute('SELECT count FROM test_single1').first()
    assert result['count'] == 10

    _dump(pipeline, 'test_single.sql', cv_name='test_single0')

    pipeline.drop_all_queries()
    _restore(pipeline, 'test_single.sql')

    result = pipeline.execute('SELECT count FROM test_single0').first()
    assert result['count'] == 10

    # We didn't dump this one
    result = list(
        pipeline.execute(
            'SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\''))
    assert not result
  def messages_partitioned():
    for n in range(4):
      rows = pipeline.execute('SELECT sum(count) FROM cv%d' % n)
      assert rows and rows[0][0] == 100

      rows = pipeline.execute('SELECT count(*) FROM cv%d' % n)
      assert rows and rows[0][0] == 100
def test_simple_crash(pipeline, clean_db):
  """
  Test simple worker and combiner crashes.
  """
  pipeline.create_stream('stream0', x='int')
  q = 'SELECT COUNT(*) FROM stream0'
  pipeline.create_cv('test_simple_crash', q)

  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 2

  # This batch can potentially get lost.
  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  assert kill_worker()

  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] in [4, 6]

  # This batch can potentially get lost.
  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  assert kill_combiner()

  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] in [6, 8, 10]

  # To ensure that all remaining events in ZMQ queues have been consumed
  time.sleep(2)
def test_restart_recovery(pipeline, clean_db):
  pipeline.create_stream('stream0', x='int')
  q = 'SELECT COUNT(*) FROM stream0'
  pipeline.create_cv('test_restart_recovery', q)

  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
  assert result['count'] == 2

  # Need to sleep here, otherwise on restart the materialization table is
  # empty. Not sure why.
  time.sleep(0.1)

  # Restart.
  pipeline.stop()
  pipeline.run()

  result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
  assert result['count'] == 2

  pipeline.insert('stream0', ['x'], [(1,), (1,)])

  result = pipeline.execute('SELECT * FROM test_restart_recovery').first()
  assert result['count'] == 4
def test_postmaster_worker_recovery(pipeline, clean_db):
  """
  Verify that the Postmaster only restarts crashed worker processes, and does not
  attempt to start them when the continuous query scheduler should.
  """
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  expected_workers = result['count']

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  expected_combiners = result['count']

  q = 'SELECT COUNT(*) FROM stream'
  pipeline.create_cv('test_pm_recovery', q)
  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  def backend():
    try:
      # Just keep a long-running backend connection open
      client = pipeline.engine.connect()
      client.execute('SELECT pg_sleep(10000)')
    except:
      pass

  t = threading.Thread(target=backend)
  t.start()

  attempts = 0
  result = None
  backend_pid = 0

  while not result and attempts < 10:
    result = pipeline.execute("""SELECT pid, query FROM pg_stat_activity WHERE lower(query) LIKE '%%pg_sleep%%'""").first()
    time.sleep(1)
    attempts += 1

  assert result

  backend_pid = result['pid']
  os.kill(backend_pid, signal.SIGKILL)

  attempts = 0
  pipeline.conn = None

  while attempts < 15:
    try:
      pipeline.conn = pipeline.engine.connect()
      break
    except:
      time.sleep(1)
      pass
    attempts += 1

  assert pipeline.conn

  # Now verify that we have the correct number of CQ worker procs
  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'worker\'').first()
  assert result['count'] == expected_workers

  result = pipeline.execute('SELECT COUNT(*) FROM pipeline_proc_stats WHERE type = \'combiner\'').first()
  assert result['count'] == expected_combiners
Exemple #15
0
def test_hll_distinct(pipeline, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream GROUP BY k
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_hll_distinct', q)
    pipeline.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
def test_schema_inference(pipeline, clean_db):
    """
  Verify that types are properly inferred
  """
    pipeline.create_cv("test_infer0", "SELECT x::int8, y::bigint, COUNT(*) FROM infer_stream GROUP BY x, y")
    pipeline.create_cv("test_infer1", "SELECT x::int4, y::real, COUNT(*) FROM infer_stream GROUP BY x, y")
    pipeline.create_cv("test_infer2", "SELECT x::int2, y::integer, COUNT(*) FROM infer_stream GROUP BY x, y")
    pipeline.create_cv("test_infer3", "SELECT x::numeric, y::float8, COUNT(*) FROM infer_stream GROUP BY x, y")
    pipeline.create_cv("test_infer4", "SELECT x::int8, y::bigint, COUNT(*) FROM infer_stream GROUP BY x, y")
    desc = ("x", "y")
    rows = []

    for n in range(10000):
        rows.append((random.random() + 1, random.random() * random.randint(0, 128)))

    pipeline.insert("infer_stream", desc, rows)

    result = pipeline.execute("SELECT * FROM test_infer0 ORDER BY x")
    for row in result:
        assert row["count"]

    result = pipeline.execute("SELECT * FROM test_infer1 ORDER BY x")
    for row in result:
        assert row["count"]

    result = pipeline.execute("SELECT * FROM test_infer2 ORDER BY x")
    for row in result:
        assert row["count"]

    result = pipeline.execute("SELECT * FROM test_infer3 ORDER BY x")
    for row in result:
        assert row["count"]
Exemple #17
0
def test_combine_in_view(pipeline, clean_db):
    """
    Verify that combines in views on top of continuous views work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream GROUP BY x
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_combine_view', q)
    pipeline.execute(
        'CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view')

    rows = []
    for n in range(10000):
        rows.append((random.randint(1, 256), random.randint(1, 1024)))

    pipeline.insert('stream', desc, rows)

    view = list(pipeline.execute('SELECT * FROM v'))

    assert len(view) == 1

    expected = sum(r[1] for r in rows) / float(len(rows))

    assert abs(float(view[0][0]) - expected) < 0.00001

    pipeline.execute('DROP VIEW v')
def test_null_groups(pipeline, clean_db):
    """
    Verify that null group columns are considered equal
    """
    pipeline.create_stream("s", x="int", y="int", z="int")
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM s
    GROUP BY x, y, z;
    """
    desc = ("x", "y", "z")
    pipeline.create_cv("test_null_groups", q)
    pipeline.create_table("test_null_groups_t", x="integer", y="integer", z="integer")

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() > 0.1 and n or None, vals)
        rows.append(tuple(vals))

    pipeline.insert("s", desc, rows)
    pipeline.insert("test_null_groups_t", desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(pipeline.execute(table_q))
    result = list(pipeline.execute("SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z"))

    for r, e in zip(result, expected):
        assert r == e
Exemple #19
0
def test_simple_aggs(pipeline, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_simple_aggs', q)
    pipeline.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_simple_aggs_t', desc, rows)

    table_result = list(
        pipeline.execute(
            'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(
        pipeline.execute(
            'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'
        ))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
def test_hll_distinct(pipeline, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream GROUP BY k
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_hll_distinct', q)
    pipeline.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
Exemple #21
0
def test_stats_aggs(pipeline, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream GROUP BY k;
    """
    desc = ('x', 'y')
    pipeline.create_cv('test_stats_aggs', q)
    pipeline.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(pipeline.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(pipeline.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
def test_join_with_where(pipeline, clean_db):
    """
    Verify that stream-table joins using a WHERE clause work properly
    """
    num_cols = 4
    q = """
    SELECT s.col0::integer FROM stream s, wt WHERE s.col0 = 1 AND wt.col0 = 1
    """
    wt_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    pipeline.create_table('wt', **wt_cols)
    pipeline.create_table('wt_s', **wt_cols)

    wt = _generate_rows(num_cols, 64)
    s = _generate_rows(num_cols, 64)

    _insert(pipeline, 'wt', wt, 0.1)
    _insert(pipeline, 'wt_s', s, 0.1)

    pipeline.create_stream('stream', **wt_cols)
    pipeline.create_cv('test_join_where', q)
    _insert(pipeline, 'stream', s)

    expected = pipeline.execute('SELECT COUNT(*) FROM wt_s s, wt WHERE s.col0 = 1 AND wt.col0 = 1').first()
    result = pipeline.execute('SELECT COUNT(*) FROM test_join_where').first()

    assert result['count'] == expected['count']
Exemple #23
0
def test_cmsketch_agg(pipeline, clean_db):
    """
    Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile
    """
    q = """
    SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_cmsketch_agg', q)

    rows = []
    for n in range(1000):
        rows.append((0, n % 20))
        rows.append((1, n % 50))

    pipeline.insert('test_cmsketch_stream', desc, rows)

    result = list(pipeline.execute(
      'SELECT cmsketch_count(c, 10) AS x, cmsketch_count(c, 40) AS y, '
      'cmsketch_count(c, 60) FROM test_cmsketch_agg ORDER BY k').fetchall())
    assert len(result) == 2
    assert tuple(result[0]) == (50, 0, 0)
    assert tuple(result[1]) == (20, 20, 0)

    result = list(pipeline.execute(
      'SELECT cmsketch_count(combine(c), 10) AS x, '
      'cmsketch_count(combine(c), 40) AS y, cmsketch_count(combine(c), 60) '
      'FROM test_cmsketch_agg').fetchall())
    assert len(result) == 1
    assert tuple(result[0]) == (70, 20, 0)
def test_indexed(pipeline, clean_db):
    """
    Verify that stream-table joins involving indexed tables work
    """
    pipeline.create_stream('stream', x='int', y='int')
    q = """
    SELECT stream.x::integer, count(*) FROM stream
    JOIN test_indexed_t t ON stream.x = t.x GROUP BY stream.x
    """
    pipeline.create_table('test_indexed_t', x='integer', y='integer')
    pipeline.execute('CREATE INDEX idx ON test_indexed_t(x)')

    t = _generate_rows(2, 1000)
    s = _generate_rows(2, 1000)

    pipeline.insert('test_indexed_t', ('x', 'y'), t)
    time.sleep(0.1)

    pipeline.create_cv('test_indexed', q)
    pipeline.insert('stream', ('x', 'y'), s)

    expected = _join(s, t, [0])
    result = pipeline.execute('SELECT sum(count) FROM test_indexed').first()

    assert result['sum'] == len(expected)
def test_combine_in_view(pipeline, clean_db):
    """
    Verify that combines in views on top of continuous views work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x
    """
    desc = ('x', 'y')
    pipeline.create_stream('stream0', x='int', y='float8')
    pipeline.create_cv('test_combine_view', q)
    pipeline.execute('CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view')

    rows = []
    for n in range(10000):
        rows.append((random.randint(1, 256), random.randint(1, 1024)))

    pipeline.insert('stream0', desc, rows)

    view = list(pipeline.execute('SELECT * FROM v'))

    assert len(view) == 1

    expected = sum(r[1] for r in rows) / float(len(rows))

    assert abs(float(view[0][0]) - expected) < 0.00001

    pipeline.execute('DROP VIEW v')
def test_prepared_inserts(pipeline, clean_db):
  """
  Verify that we can PREPARE and EXECUTE an INSERT statement
  """
  conn = psycopg2.connect('dbname=pipeline user=%s host=localhost port=%s' % (getpass.getuser(), pipeline.port))
  db = conn.cursor()
  db.execute('CREATE CONTINUOUS VIEW test_prepared0 AS SELECT x::integer, COUNT(*), sum(y::integer) FROM stream GROUP BY x')
  db.execute('CREATE CONTINUOUS VIEW test_prepared1 AS SELECT x::integer, COUNT(*), sum(y::float8) FROM stream GROUP BY x')
  conn.commit()

  db.execute('PREPARE ins AS INSERT INTO stream (x, y) VALUES ($1, $2)')

  for n in range(10000):
    row = (n % 100, random.random())
    db.execute('EXECUTE ins (%s, %s)' % row)

  time.sleep(0.1)

  conn.commit()

  result = list(pipeline.execute('SELECT * FROM test_prepared0 ORDER BY x'))

  assert len(result) == 100

  for n in range(100):
    assert result[n]['count'] == 100

  result = list(pipeline.execute('SELECT * FROM test_prepared1 ORDER BY x'))

  assert len(result) == 100

  for n in range(100):
    assert result[n]['count'] == 100

  conn.close()
Exemple #27
0
  def get_stat():
    rows = pipeline.execute(
      'SELECT COUNT(*) FROM test_vacuum').first()['count']
    matrel_rows = pipeline.execute(
      'SELECT COUNT(*) FROM test_vacuum_mrel').first()['count']
    disk_pages = pipeline.execute("""
    SELECT pg_relation_filepath(oid), relpages
    FROM pg_class WHERE relname = 'test_vacuum_mrel';
    """).first()['relpages']
    for r in pipeline.execute("""
    SELECT relname, relpages
    FROM pg_class,
         (SELECT reltoastrelid
          FROM pg_class
          WHERE relname = 'test_vacuum_mrel') AS ss
    WHERE oid = ss.reltoastrelid OR
          oid = (SELECT indexrelid
                 FROM pg_index
                 WHERE indrelid = ss.reltoastrelid)
    ORDER BY relname;
    """):
      disk_pages += r['relpages']
    for r in pipeline.execute("""
    SELECT c2.relname, c2.relpages
    FROM pg_class c, pg_class c2, pg_index i
    WHERE c.relname = 'test_vacuum_mrel' AND
          c.oid = i.indrelid AND
          c2.oid = i.indexrelid
    ORDER BY c2.relname;
    """):
      disk_pages += r['relpages']

    return Stat(rows, matrel_rows, disk_pages)
Exemple #28
0
def _test_agg(pipeline, agg, check_fn=None):
    name = agg[:agg.find('(')]
    q = 'SELECT g::integer, %s OVER (PARTITION BY g ORDER BY ts::timestamp) FROM %s'
    cv_name = 'test_%s' % name
    table_name = 'test_%s_t' % name
    desc = ('ts', 'g', 'x', 'y', 'z')

    pipeline.create_cv(cv_name, q % (agg, 'stream'))
    pipeline.create_table(table_name, ts='timestamp', x='integer', y='integer', z='integer', g='integer')

    rows = []
    for i, n in enumerate(range(1000)):
        ts = str(datetime.utcnow() + timedelta(seconds=i))
        row = ts, n % 10, random.randint(1, 256), random.randint(1, 256), random.randint(1, 256)
        rows.append(row)

    pipeline.insert('stream', desc, rows)
    pipeline.insert(table_name, desc, rows)

    if check_fn:
        return check_fn(pipeline)

    expected = list(pipeline.execute(q % (agg, table_name) + ' ORDER BY g'))
    result = list(pipeline.execute('SELECT * FROM %s ORDER BY g' % cv_name))

    assert len(expected) == len(result)

    for e, r in zip(expected, result):
        assert e == r

    pipeline.drop_cv(cv_name)
    pipeline.drop_table(table_name)
  def messages_partitioned():
    for n in range(4):
      rows = pipeline.execute('SELECT sum(count) FROM cv%d' % n)
      assert rows and rows[0][0] == 100

      rows = pipeline.execute('SELECT count(*) FROM cv%d' % n)
      assert rows and rows[0][0] == 100
def test_simple_crash(pipeline, clean_db):
  """
  Test simple worker and combiner crashes.
  """
  q = 'SELECT COUNT(*) FROM stream'
  pipeline.create_cv('test_simple_crash', q)

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 2

  # We can potentially lose one batch for a worker or combiner crash.
  # In our case each batch adds a count 2 and since we're adding 3 batches
  # we should either see an increment from the previous count of 4 or 6.
  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  assert kill_worker()

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 6

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  assert kill_combiner()

  pipeline.insert('stream', ['x'], [(1, ), (1, )])

  result = pipeline.execute('SELECT * FROM test_simple_crash').first()
  assert result['count'] == 10
Exemple #31
0
def test_user_low_and_high_card(pipeline, clean_db):
    """
    Verify that Bloom filters's with low and high cardinalities are correcly
    unioned
    """
    q = """
    SELECT k::integer, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_bloom_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    pipeline.insert('test_bloom_stream', desc, rows)

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg WHERE k in (2, 3)').first()
    assert result[0] == 8879

    result = pipeline.execute('SELECT bloom_cardinality(combine(bloom_agg)) '
                              'FROM test_bloom_agg').first()
    assert result[0] == 8881
def test_regression(pipeline, clean_db):
  path = os.path.abspath(os.path.join(pipeline.data_dir, 'test_copy.csv'))
  _generate_csv(path, [['2015-06-01 00:00:00', 'De', 'Adam_Babareka', '1', '37433']], desc=('day', 'project', 'title', 'count', 'size'))

  pipeline.create_stream('copy_regression_stream', count='int', day='timestamp', project='text', title='text', size='int')
  pipeline.create_cv('test_copy_regression', 'SELECT sum(count) FROM copy_regression_stream')

  pipeline.execute("COPY copy_regression_stream (day, project, title, count, size) FROM '%s' CSV HEADER" % path)
def test_online_drop_column(pipeline, clean_db):
    pipeline.create_stream("stream1", c0="integer")

    try:
        pipeline.execute("ALTER STREAM stream1 DROP c0")
        assert False
    except:
        pass
Exemple #34
0
def test_online_drop_column(pipeline, clean_db):
  pipeline.create_stream('stream1', c0='integer')

  try:
    pipeline.execute('ALTER STREAM stream1 DROP c0')
    assert False
  except:
    pass
def test_regression(pipeline, clean_db):
  path = os.path.abspath(os.path.join(pipeline.tmp_dir, 'test_copy.csv'))
  _generate_csv(path, [['2015-06-01 00:00:00','De','Adam_Babareka','1','37433']], desc=('day', 'project', 'title', 'count', 'size'))

  pipeline.create_stream('copy_regression_stream', count='int', day='timestamp', project='text', title='text', size='int')
  pipeline.create_cv('test_copy_regression', 'SELECT sum(count) FROM copy_regression_stream')

  pipeline.execute("COPY copy_regression_stream (day, project, title, count, size) FROM '%s' CSV HEADER" % path)
Exemple #36
0
    def _verify():
        result = pipeline.execute("SELECT count(*) FROM test_dump").first()
        assert result["count"] == 100

        result = pipeline.execute("SELECT sum(avg) FROM test_dump").first()
        assert result["sum"] == 50050

        result = pipeline.execute("SELECT sum(distincts) FROM test_dump").first()
        assert result["sum"] == 1000
  def _verify():
    result = pipeline.execute('SELECT count(*) FROM test_dump').first()
    assert result['count'] == 100

    result = pipeline.execute('SELECT sum(avg) FROM test_dump').first()
    assert result['sum'] == 50050

    result = pipeline.execute('SELECT sum(distincts) FROM test_dump').first()
    assert result['sum'] == 1000
  def _verify():
    result = pipeline.execute('SELECT count(*) FROM test_dump').first()
    assert result['count'] == 100

    result = pipeline.execute('SELECT sum(avg) FROM test_dump').first()
    assert result['sum'] == 50050

    result = pipeline.execute('SELECT sum(distincts) FROM test_dump').first()
    assert result['sum'] == 1000
def test_join_ordering(pipeline, clean_db):
  """
  Verify that the correct plan is generated regardless of the ordering of
  streams and tables.
  """
  num_cols = 8
  join_cols = [0]
  ordering0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
  ordering1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

  pipeline.create_table('ordering0', **ordering0_cols)
  pipeline.create_table('ordering1', **ordering1_cols)

  ordering0 = _generate_rows(num_cols, 64)
  ordering1 = _generate_rows(num_cols, 64)
  _insert(pipeline, 'ordering0', ordering0, 0.1)
  _insert(pipeline, 'ordering1', ordering1, 0.1)

  pipeline.create_stream('stream0', **ordering0_cols)

  # stream, table, table
  q0 = """
  SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
  stream0 s JOIN ordering0 ON s.col0 = ordering0.col0
  JOIN ordering1 ON ordering0.col0 = ordering1.col0
  """
  pipeline.create_cv('test_ordering0', q0)

  # table, stream, table
  q1 = """
  SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
  ordering0 JOIN stream0 s ON s.col0 = ordering0.col0
  JOIN ordering1 ON ordering0.col0 = ordering1.col0
  """
  pipeline.create_cv('test_ordering1', q1)

  # table, table, stream
  q2 = """
  SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
  ordering0 JOIN ordering1 ON ordering0.col0 = ordering1.col0
  JOIN stream0 s ON s.col0 = ordering0.col0
  """
  pipeline.create_cv('test_ordering2', q2)

  s = _generate_rows(num_cols, 64)
  _insert(pipeline, 'stream0', s)

  expected = _join(ordering0, _join(ordering1, s, join_cols), join_cols)

  result0 = pipeline.execute('SELECT COUNT(*) FROM test_ordering0')[0]
  result1 = pipeline.execute('SELECT COUNT(*) FROM test_ordering1')[0]
  result2 = pipeline.execute('SELECT COUNT(*) FROM test_ordering2')[0]

  assert result0['count'] == len(expected)
  assert result1['count'] == len(expected)
  assert result2['count'] == len(expected)
def test_join_ordering(pipeline, clean_db):
    """
    Verify that the correct plan is generated regardless of the ordering of
    streams and tables.
    """
    num_cols = 8
    join_cols = [0]
    ordering0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    ordering1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    pipeline.create_table('ordering0', **ordering0_cols)
    pipeline.create_table('ordering1', **ordering1_cols)

    ordering0 = _generate_rows(num_cols, 64)
    ordering1 = _generate_rows(num_cols, 64)
    _insert(pipeline, 'ordering0', ordering0, 0.1)
    _insert(pipeline, 'ordering1', ordering1, 0.1)

    pipeline.create_stream('stream', **ordering0_cols)

    # stream, table, table
    q0 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    stream s JOIN ordering0 ON s.col0 = ordering0.col0
    JOIN ordering1 ON ordering0.col0 = ordering1.col0
    """
    pipeline.create_cv('test_ordering0', q0)

    # table, stream, table
    q1 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    ordering0 JOIN stream s ON s.col0 = ordering0.col0
    JOIN ordering1 ON ordering0.col0 = ordering1.col0
    """
    pipeline.create_cv('test_ordering1', q1)

    # table, table, stream
    q2 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    ordering0 JOIN ordering1 ON ordering0.col0 = ordering1.col0
    JOIN stream s ON s.col0 = ordering0.col0
    """
    pipeline.create_cv('test_ordering2', q2)

    s = _generate_rows(num_cols, 64)
    _insert(pipeline, 'stream', s)

    expected = _join(ordering0, _join(ordering1, s, join_cols), join_cols)

    result0 = pipeline.execute('SELECT COUNT(*) FROM test_ordering0').first()
    result1 = pipeline.execute('SELECT COUNT(*) FROM test_ordering1').first()
    result2 = pipeline.execute('SELECT COUNT(*) FROM test_ordering2').first()

    assert result0['count'] == len(expected)
    assert result1['count'] == len(expected)
    assert result2['count'] == len(expected)
    def delimited_messages():
        rows = pipeline.execute('SELECT * from comma_cv ORDER BY x')
        assert len(rows) == 100
        for i, row in enumerate(rows):
            assert tuple([row[0], row[1], row[2]]) == (i, i, i)

        rows = pipeline.execute('SELECT * from tab_cv ORDER BY x')
        assert len(rows) == 100
        for i, row in enumerate(rows):
            assert tuple([row[0], row[1], row[2]]) == (i, i, i)
def test_regression(pipeline, clean_db):
  path = os.path.abspath(os.path.join(pipeline.tmp_dir, 'test_copy.csv'))
  _generate_csv(path, [['2015-06-01 00:00:00','De','Adam_Babareka','1','37433']], desc=('day', 'project', 'title', 'count', 'size'))

  pipeline.create_cv('test_copy_regression', 'SELECT sum(count::integer) FROM copy_regression_stream')

  try:
    pipeline.execute("COPY copy_regression_stream (day, project, title, count, size) FROM '%s'" % path)
  except Exception, e:
    assert '(DataError) missing data for column "project"' in e.message
Exemple #43
0
def test_bloom_type(pipeline, clean_db):
    pipeline.create_table("test_bloom_type", x="int", y="bloom")
    pipeline.execute("INSERT INTO test_bloom_type (x, y) VALUES " "(1, bloom_empty()), (2, bloom_empty())")

    for i in xrange(1000):
        pipeline.execute("UPDATE test_bloom_type SET y = bloom_add(y, %d / x)" % i)

    result = list(pipeline.execute("SELECT bloom_cardinality(y) " "FROM test_bloom_type ORDER BY x"))
    assert result[0][0] == 986
    assert result[1][0] == 495
Exemple #44
0
def test_multiple_databases(pipeline, clean_db):
    conn = psycopg2.connect('dbname=postgres user=%s host=localhost port=%s' %
                            (getpass.getuser(), pipeline.port))
    conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

    cur = conn.cursor()
    cur.execute('CREATE DATABASE tmp_pipeline')
    cur.close()

    q = 'SELECT x::int FROM dbstream'
    pipeline.create_stream('dbstream', x='int')
    pipeline.create_cv('test_multiple_databases', q)

    # Insert data in first database.
    pipeline.insert('dbstream', ['x'], map(lambda x: (x, ), range(0, 10, 2)))
    result = pipeline.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 10, 2)

    # Create same CV in the other database, make sure its created and write different data to it.
    tmp_conn = psycopg2.connect(
        'dbname=tmp_pipeline user=%s host=localhost port=%s' %
        (getpass.getuser(), pipeline.port))
    cur = tmp_conn.cursor()
    cur.execute('CREATE EXTENSION pipelinedb')
    cur.execute('CREATE FOREIGN TABLE dbstream (x int) SERVER pipelinedb')
    cur.execute('CREATE VIEW test_multiple_databases AS %s' % q)
    tmp_conn.commit()
    cur.execute('INSERT INTO dbstream (x) VALUES %s' %
                ', '.join(map(lambda x: '(%d)' % x, range(1, 11, 2))))
    cur.execute('SELECT * FROM test_multiple_databases')
    tmp_conn.commit()
    assert sorted(row[0] for row in cur) == range(1, 11, 2)

    # Ensure that the data written to the other database isn't seen by the first database.
    result = pipeline.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 10, 2)

    # Insert new data to both databases.
    pipeline.insert('dbstream', ['x'], map(lambda x: (x, ), range(10, 20, 2)))
    cur.execute('INSERT INTO dbstream (x) VALUES %s' %
                ', '.join(map(lambda x: '(%d)' % x, range(11, 21, 2))))

    # Ensure both databases still saw the data written out to them.
    result = pipeline.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 20, 2)
    cur.execute('SELECT * FROM test_multiple_databases')
    tmp_conn.commit()
    assert sorted(row[0] for row in cur) == range(1, 21, 2)

    cur.close()
    tmp_conn.close()
    cur = conn.cursor()
    cur.execute('DROP DATABASE tmp_pipeline')
    cur.close()
    conn.close()
Exemple #45
0
def test_multiple_insert(pipeline, clean_db):
  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream1')
  pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2')
  pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream1', 'stream2')")

  pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)])

  count = pipeline.execute('SELECT count FROM cv0').first()['count']
  assert count == 500
  count = pipeline.execute('SELECT count FROM cv1').first()['count']
  assert count == 500
Exemple #46
0
def test_postmaster_worker_recovery(pipeline, clean_db):
  """
  Verify that the postmaster only restarts crashed worker processes, and does not
  attempt to start them when the continuous query scheduler should.
  """
  expected_workers = len(get_worker_pids())
  assert expected_workers > 0

  expected_combiners = len(get_combiner_pids())
  assert expected_combiners > 0

  def backend():
    try:
      # Just keep a long-running backend connection open
      client = pipeline.engine.connect()
      client.execute('SELECT pg_sleep(10000)')
    except:
      pass

  t = threading.Thread(target=backend)
  t.start()

  attempts = 0
  result = None
  backend_pid = 0

  while not result and attempts < 10:
    result = pipeline.execute("""SELECT pid, query FROM pg_stat_activity WHERE lower(query) LIKE '%%pg_sleep%%'""")[0]
    time.sleep(1)
    attempts += 1

  assert result

  backend_pid = result['pid']
  os.kill(backend_pid, signal.SIGKILL)

  attempts = 0
  pipeline.conn = None

  while attempts < 20:
    try:
      pipeline.execute('SELECT 1')
      break
    except:
      time.sleep(1)
      pass
    attempts += 1

  assert pipeline.conn

  # Now verify that we have the correct number of CQ worker procs
  assert expected_workers == len(get_worker_pids())
  assert expected_combiners == len(get_combiner_pids())
Exemple #47
0
def test_hll_type(pipeline, clean_db):
    pipeline.create_table('test_hll_type', x='int', y='hyperloglog')
    pipeline.execute('INSERT INTO test_hll_type (x, y) VALUES '
                     '(1, hll_empty()), (2, hll_empty())')

    for i in xrange(1000):
        pipeline.execute('UPDATE test_hll_type SET y = hll_add(y, %d / x)' % i)

    result = pipeline.execute(
        'SELECT hll_cardinality(y) FROM test_hll_type ORDER BY x')
    assert result[0][0] == 995
    assert result[1][0] == 497
    def unpack_cv():
        # The JSON array gets unpacked for this CV so there are 3 rows for every message
        rows = pipeline.execute('SELECT count(*) FROM unpack_cv')
        assert rows[0][0] == 600

        rows = pipeline.execute('SELECT count(distinct k) FROM unpack_cv')
        assert rows[0][0] == 100

        rows = pipeline.execute('SELECT * FROM unpack_cv ORDER BY k')
        for row in rows:
            k, arr_el = row[0], row[1]
            assert k.replace('v', '') == arr_el
    def counts():
        rows = pipeline.execute('SELECT sum(count) FROM group0')
        assert rows[0][0] == 100

        rows = pipeline.execute('SELECT sum(count) FROM group1')
        assert rows[0][0] == 100

        rows = pipeline.execute('SELECT count(*) FROM group0')
        assert rows[0][0] == 100

        rows = pipeline.execute('SELECT count(*) FROM group1')
        assert rows[0][0] == 100
Exemple #50
0
def test_bloom_type(pipeline, clean_db):
    pipeline.create_table('test_bloom_type', x='int', y='bloom')
    pipeline.execute('INSERT INTO test_bloom_type (x, y) VALUES '
                     '(1, bloom_empty()), (2, bloom_empty())')

    for i in xrange(1000):
        pipeline.execute(
            'UPDATE test_bloom_type SET y = bloom_add(y, %d / x)' % i)

    result = pipeline.execute(
        'SELECT bloom_cardinality(y) FROM test_bloom_type ORDER BY x')
    assert result[0][0] == 986
    assert result[1][0] == 495
Exemple #51
0
def test_combine_table_no_groups(pipeline, clean_db):
  pipeline.create_stream('s', x='int')
  pipeline.create_cv('no_groups', 'SELECT COUNT(*) FROM s')
  values = [(i,) for i in xrange(1000)]
  pipeline.insert('s', ('x',), values)

  pipeline.execute('SELECT * INTO tmprel FROM no_groups_mrel')
  pipeline.execute("SELECT pipeline_combine_table('no_groups', 'tmprel')")

  rows = list(pipeline.execute('SELECT count FROM no_groups'))
  assert len(rows) == 1
  assert len(rows[0]) == 1
  assert rows[0][0] == 2000
Exemple #52
0
def test_uniqueness(pipeline, clean_db):
  pipeline.create_cv('uniqueness',
                     'SELECT x::int, count(*) FROM stream GROUP BY x')

  for i in range(10):
    rows = [((10000 * i) + j, ) for j in xrange(10000)]
    pipeline.insert('stream', ('x', ), rows)

  count = pipeline.execute('SELECT count(*) FROM uniqueness').first()['count']
  distinct_count = pipeline.execute(
    'SELECT count(DISTINCT x) FROM uniqueness').first()['count']

  assert count == distinct_count
Exemple #53
0
def test_tdigest_type(pipeline, clean_db):
  pipeline.create_table('test_tdigest_type', x='int', y='tdigest')
  pipeline.execute('INSERT INTO test_tdigest_type (x, y) VALUES '
                   '(1, tdigest_empty()), (2, tdigest_empty())')

  for i in xrange(1000):
    pipeline.execute('UPDATE test_tdigest_type '
                     'SET y = tdigest_add(y, {} %% (x * 500))'.format(i))

  result = list(pipeline.execute('SELECT tdigest_cdf(y, 400), '
                                 'tdigest_quantile(y, 0.9)'
                                 'FROM test_tdigest_type ORDER BY x'))
  assert map(lambda x: round(x, 1), result[0]) == [0.8, 449.5]
  assert map(lambda x: round(x, 1), result[1]) == [0.4, 899.5]
Exemple #54
0
def test_cmsketch_type(pipeline, clean_db):
  pipeline.create_table('test_cmsketch_type', x='int', y='cmsketch')
  pipeline.execute('INSERT INTO test_cmsketch_type (x, y) VALUES '
                   '(1, cmsketch_empty()), (2, cmsketch_empty())')

  for i in xrange(1000):
    pipeline.execute('UPDATE test_cmsketch_type '
                     'SET y = cmsketch_add(y, {} %% x)'.format(i))

  result = list(pipeline.execute('SELECT cmsketch_count(y, 0), '
                                 'cmsketch_count(y, 1) '
                                 'FROM test_cmsketch_type ORDER BY x'))
  assert result[0] == (1000, 0)
  assert result[1] == (500, 500)
Exemple #55
0
def test_drop_mrel_column(pipeline, clean_db):
    """
  Verify that we can't drop matrel columns
  """
    pipeline.create_stream('mrel_drop_s', x='integer')
    q = """
  SELECT x, sum(x), avg(x), count(*) FROM mrel_drop_s GROUP BY x
  """
    pipeline.create_cv('mrel_drop_cv', q)

    for col in ('x', 'sum', 'avg', 'count'):
        with pytest.raises(psycopg2.InternalError):
            pipeline.execute('ALTER TABLE mrel_drop_cv_mrel DROP COLUMN %s' %
                             col)
Exemple #56
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    pipeline.create_stream('stream0', x='int', y='text', z='int')
    name = 'assert_%s_decreases' % func
    pipeline.create_cv(
        name,
        "SELECT %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'"
        % (func, args))

    # We also create a wide sliding window just to verify that user combines work on SW CVs and have the same output
    # as if they were being run on a non-SW CV
    sw_name = name + '_sw_agg'
    pipeline.create_cv(
        sw_name,
        "SELECT x %% 10 AS g, %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 days' GROUP BY g"
        % (func, args))
    verify_name = name + '_sw_agg_verify'
    pipeline.create_cv(
        verify_name,
        "SELECT x %% 10 AS g, %s(%s) FROM stream0 GROUP BY g" % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    pipeline.insert('stream0', ('x', 'y', 'z'), rows)
    current = 1

    results = []
    while current:
        row = pipeline.execute('SELECT * FROM %s' % name)[0]
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    # Verify user combines on SW CVs work and produce the expected output
    sw_row = pipeline.execute('SELECT combine(%s) FROM %s' %
                              (func, sw_name))[0]
    expected_row = pipeline.execute('SELECT combine(%s) FROM %s' %
                                    (func, verify_name))[0]
    if isinstance(sw_row['combine'], list):
        sw_row['combine'] = sorted(sw_row['combine'])
        expected_row['combine'] = sorted(expected_row['combine'])
    assert sw_row['combine'] == expected_row['combine']

    pipeline.drop_cv(name)
Exemple #57
0
def test_concurrent_sw_ticking(pipeline, clean_db):
    """
  Verify that several concurrent sliding-window queries each
  having different windows tick correctly at different intervals.
  """
    pipeline.create_stream('stream0', x='int')
    output_names = []
    for n in range(10):
        name = 'sw%d' % n
        pipeline.create_cv(
            name,
            'SELECT x::integer, count(*) FROM stream0 GROUP BY x',
            sw='%d seconds' % (n + 10))
        output_name = name + '_output'

        q = """
    SELECT arrival_timestamp,
    CASE WHEN (old).x IS NULL THEN (new).x ELSE (old).x END AS x, old, new FROM %s_osrel
    """ % name
        pipeline.create_cv(output_name, q)
        output_names.append(output_name)

    names = [
        r[0] for r in pipeline.execute(
            'SELECT name FROM pipeline_views() ORDER BY name DESC')
    ]
    assert len(names) == 2 * 10

    pipeline.insert('stream0', ('x', ), [(x % 100, ) for x in range(10000)])
    time.sleep(25)

    for name in output_names:
        rows = list(pipeline.execute('SELECT COUNT(DISTINCT x) FROM %s' %
                                     name))
        assert rows[0][0] == 100

        for x in range(100):
            # In window
            assert pipeline.execute(
                'SELECT * FROM %s WHERE old IS NULL AND new IS NOT NULL AND x = %d'
                % (name, x))
            # Out of window
            assert pipeline.execute(
                'SELECT * FROM %s WHERE old IS NOT NULL AND new IS NULL AND x = %d'
                % (name, x))

    # Drop these in reverse dependency order to prevent deadlocks
    for name in names:
        pipeline.drop_cv(name)
Exemple #58
0
def test_cmsketch_type(pipeline, clean_db):
    pipeline.create_table('test_cmsketch_type', x='int', y='cmsketch')
    pipeline.execute('INSERT INTO test_cmsketch_type (x, y) VALUES '
                     '(1, cmsketch_empty()), (2, cmsketch_empty())')

    for i in xrange(1000):
        pipeline.execute(
            'UPDATE test_cmsketch_type SET y = freq_add(y, %d %% x)' % i)

    result = list(
        pipeline.execute('SELECT freq(y, 0), '
                         'freq(y, 1) '
                         'FROM test_cmsketch_type ORDER BY x'))
    assert (result[0][0], result[0][1]) == (1000, 0)
    assert (result[1][0], result[1][1]) == (500, 500)
Exemple #59
0
def test_tdigest_agg(pipeline, clean_db):
    """
    Test tdigest_agg, tdigest_merge_agg, tdigest_cdf, tdigest_quantile
    """
    q = """
    SELECT k::integer, tdigest_agg(x::int) AS t FROM test_tdigest_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    pipeline.create_cv('test_tdigest_agg', q)

    rows = []
    for _ in range(10):
        for n in range(1000):
            rows.append((0, n))
            rows.append((1, n + 500))

    pipeline.insert('test_tdigest_stream', desc, rows)

    result = list(
        pipeline.execute(
            'SELECT tdigest_quantile(t, 0.1) FROM test_tdigest_agg ORDER BY k'
        ).fetchall())
    assert len(result) == 2
    assert abs(int(result[0]['tdigest_quantile']) - 99) <= 1
    assert abs(int(result[1]['tdigest_quantile']) - 599) <= 1

    result = list(
        pipeline.execute(
            'SELECT tdigest_quantile(combine(t), 0.1) FROM test_tdigest_agg').
        fetchall())
    assert len(result) == 1
    assert abs(int(result[0]['tdigest_quantile']) - 200) <= 4

    result = list(
        pipeline.execute(
            'SELECT tdigest_cdf(t, 600) FROM test_tdigest_agg ORDER BY k').
        fetchall())
    assert len(result) == 2
    assert round(result[0]['tdigest_cdf'], 2) == 0.6
    assert round(result[1]['tdigest_cdf'], 2) == 0.1

    result = list(
        pipeline.execute(
            'SELECT tdigest_cdf(combine(t), 600) FROM test_tdigest_agg').
        fetchall())
    assert len(result) == 1
    assert round(result[0]['tdigest_cdf'], 2) == 0.35
def test_drop_columns(pipeline, clean_db):
  """
  Verify that columns on the table-side of a stream-table join can't be dropped
  """
  pipeline.create_table('drop_t', x='integer', y='integer')
  pipeline.create_stream('drop_s', x='integer')
  q = """
  SELECT s.x, count(*) FROM drop_s s JOIN drop_t t ON s.x = t.x GROUP BY s.x
  """
  pipeline.create_cv('stj_drop_cv', q)

  with pytest.raises(psycopg2.InternalError):
    pipeline.execute('ALTER TABLE drop_t DROP COLUMN x')

  # Columns not being joined on can be dropped though
  pipeline.execute('ALTER TABLE drop_t DROP COLUMN y')