Esempio n. 1
0
def test_bloom_intersection(octopus, clean_db):
    """
  Verify that bloom_intersection works
  """
    octopus.create_stream('test_bloom_stream', x='int', k='int')

    q = """
  SELECT k::int, bloom_agg(x::integer) FROM test_bloom_stream GROUP BY k
  """

    desc = ('k', 'x')
    octopus.create_cv('test_bloom_intersection', q)

    rows = []
    for i in range(10000):
        rows.append((0, 2 * i))
        rows.append((1, i))

    octopus.insert('test_bloom_stream', desc, rows)

    cvq = """
  SELECT bloom_cardinality(bloom_intersection_agg(bloom_agg))
  FROM test_bloom_intersection
  """

    result = list(octopus.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 5530
Esempio n. 2
0
def test_combine_in_view(octopus, clean_db):
    """
    Verify that combines in views on top of continuous views work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_combine_view', q)
    octopus.execute(
        'CREATE VIEW v AS SELECT combine(avg) FROM test_combine_view')

    rows = []
    for n in range(10000):
        rows.append((random.randint(1, 256), random.randint(1, 1024)))

    octopus.insert('stream0', desc, rows)

    view = list(octopus.execute('SELECT * FROM v'))

    assert len(view) == 1

    expected = sum(r[1] for r in rows) / float(len(rows))

    assert abs(float(view[0][0]) - expected) < 0.00001

    octopus.execute('DROP VIEW v')
Esempio n. 3
0
def test_fss_agg(octopus, clean_db):
    octopus.create_stream('test_fss_stream', x='int', k='text')
    q = """
  SELECT k::text, fss_agg(x::int, 5) FROM test_fss_stream
  GROUP BY k
  """
    desc = ('k', 'x')
    octopus.create_cv('test_fss_agg', q)

    items = range(14)
    random.shuffle(items)
    a_items = items
    b_items = list(reversed(items))

    values = map(lambda i: ('a', i), get_geometric_dist(a_items))
    values.extend(map(lambda i: ('b', i), get_geometric_dist(b_items)))
    random.shuffle(values)

    octopus.insert('test_fss_stream', desc, values)
    result = list(
        octopus.execute(
            'SELECT k, fss_topk_values(fss_agg) FROM test_fss_agg ORDER BY k'))
    topk = map(int, result[0][1].rstrip('}').lstrip('{').split(','))
    assert sorted(topk) == sorted(a_items[-5:])
    topk = map(int, result[1][1].rstrip('}').lstrip('{').split(','))
    assert sorted(topk) == sorted(b_items[-5:])
Esempio n. 4
0
def test_restart_recovery(octopus, clean_db):
    octopus.create_stream('stream0', x='int')
    q = 'SELECT COUNT(*) FROM stream0'
    octopus.create_cv('test_restart_recovery', q)

    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    result = octopus.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 2

    # Need to sleep here, otherwise on restart the materialization table is
    # empty. Not sure why.
    time.sleep(0.1)

    # Restart.
    octopus.stop()
    octopus.run()

    result = octopus.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 2

    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    result = octopus.execute('SELECT * FROM test_restart_recovery').first()
    assert result['count'] == 4
Esempio n. 5
0
def test_simple_crash(octopus, clean_db):
    """
  Test simple worker and combiner crashes.
  """
    octopus.create_stream('stream0', x='int')
    q = 'SELECT COUNT(*) FROM stream0'
    octopus.create_cv('test_simple_crash', q)

    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    result = octopus.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] == 2

    # This batch can potentially get lost.
    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    assert kill_worker()

    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    result = octopus.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] in [4, 6]

    # This batch can potentially get lost.
    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    assert kill_combiner()

    octopus.insert('stream0', ['x'], [(1, ), (1, )])

    result = octopus.execute('SELECT * FROM test_simple_crash').first()
    assert result['count'] in [6, 8, 10]

    # To ensure that all remaining events in ZMQ queues have been consumed
    time.sleep(2)
Esempio n. 6
0
def test_simple_aggs(octopus, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_simple_aggs', q)
    octopus.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_simple_aggs_t', desc, rows)

    table_result = list(
        octopus.execute(
            'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(
        octopus.execute(
            'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'
        ))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
Esempio n. 7
0
def test_filter_clause(octopus, clean_db):
    """
    Verify that FILTER clauses work on aggregates and sliding window aggregates
    """
    octopus.create_stream('test_filter_stream', x='int')
    q = """
    SELECT SUM(x::int) FILTER (WHERE mod(x, 2) = 0) AS sum2, SUM(x::int) FILTER (WHERE mod(x, 3) = 0) AS sum3 FROM test_filter_stream
    """
    sw = """
    WHERE arrival_timestamp > clock_timestamp() - interval '30 second'
    """
    octopus.create_cv('test_filter', q)
    octopus.create_cv('test_filter_sw', '%s %s' % (q, sw))

    desc = ('x', )
    rows = []
    for n in range(1000):
        rows.append((n, ))

    octopus.insert('test_filter_stream', desc, rows)

    sum2 = sum(filter(lambda x: x % 2 == 0, map(lambda x: x[0], rows)))
    sum3 = sum(filter(lambda x: x % 3 == 0, map(lambda x: x[0], rows)))

    result1 = octopus.execute('SELECT * FROM test_filter').first()
    result2 = octopus.execute('SELECT * FROM test_filter_sw').first()

    assert result1['sum2'] == result2['sum2'] == sum2
    assert result1['sum3'] == result2['sum3'] == sum3
Esempio n. 8
0
def test_user_low_and_high_card(octopus, clean_db):
    """
    Verify that HLL's with low and high cardinalities are correcly combined
    """
    octopus.create_stream('test_hll_stream', x='int', k='integer')
    q = """
    SELECT k::integer, hll_agg(x::integer) FROM test_hll_stream GROUP BY k
    """
    desc = ('k', 'x')
    octopus.create_cv('test_hll_agg', q)

    # Low cardinalities
    rows = []
    for n in range(1000):
        rows.append((0, random.choice((-1, -2))))
        rows.append((1, random.choice((-3, -4))))

    # High cardinalities
    for n in range(10000):
        rows.append((2, n))
        rows.append((3, n))

    octopus.insert('test_hll_stream', desc, rows)

    result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) '
                             'FROM test_hll_agg WHERE k in (0, 1)').first()
    assert result[0] == 4

    result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) '
                             'FROM test_hll_agg WHERE k in (2, 3)').first()
    assert result[0] == 9976

    result = octopus.execute('SELECT hll_cardinality(combine(hll_agg)) '
                             'FROM test_hll_agg').first()
    assert result[0] == 9983
def test_indexed(octopus, clean_db):
    """
    Verify that stream-table joins involving indexed tables work
    """
    octopus.create_stream('stream0', x='int', y='int')
    q = """
    SELECT stream0.x::integer, count(*) FROM stream0
    JOIN test_indexed_t t ON stream0.x = t.x GROUP BY stream0.x
    """
    octopus.create_table('test_indexed_t', x='integer', y='integer')
    octopus.execute('CREATE INDEX idx ON test_indexed_t(x)')

    t = _generate_rows(2, 1000)
    s = _generate_rows(2, 1000)

    octopus.insert('test_indexed_t', ('x', 'y'), t)
    time.sleep(0.1)

    octopus.create_cv('test_indexed', q)
    octopus.insert('stream0', ('x', 'y'), s)

    expected = _join(s, t, [0])
    result = octopus.execute('SELECT sum(count) FROM test_indexed').first()

    assert result['sum'] == len(expected)
Esempio n. 10
0
def test_cmsketch_agg(octopus, clean_db):
    """
    Test cmsketch_agg, cmsketch_merge_agg, cmsketch_cdf, cmsketch_quantile
    """
    octopus.create_stream('test_cmsketch_stream', k='int', x='int')

    q = """
    SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    octopus.create_cv('test_cmsketch_agg', q)

    rows = []
    for n in range(1000):
        rows.append((0, n % 20))
        rows.append((1, n % 50))

    octopus.insert('test_cmsketch_stream', desc, rows)

    result = list(octopus.execute(
      'SELECT cmsketch_frequency(c, 10) AS x, cmsketch_frequency(c, 40) AS y, '
      'cmsketch_frequency(c, 60) FROM test_cmsketch_agg ORDER BY k').fetchall())
    assert len(result) == 2
    assert tuple(result[0]) == (50, 0, 0)
    assert tuple(result[1]) == (20, 20, 0)

    result = list(octopus.execute(
      'SELECT cmsketch_frequency(combine(c), 10) AS x, '
      'cmsketch_frequency(combine(c), 40) AS y, cmsketch_frequency(combine(c), 60) '
      'FROM test_cmsketch_agg').fetchall())
    assert len(result) == 1
    assert tuple(result[0]) == (70, 20, 0)
Esempio n. 11
0
def assert_result_changes(func, args):
    """
    Verifies that the result of the given function changes with time
    """
    octopus.create_stream('stream0', x='int', y='text', z='int')
    name = 'assert_%s_decreases' % func
    octopus.create_cv(
        name,
        "SELECT %s(%s) FROM stream0 WHERE arrival_timestamp > clock_timestamp() - interval '2 seconds'"
        % (func, args))

    rows = [(n, str(n), n + 1) for n in range(1000)]
    octopus.insert('stream0', ('x', 'y', 'z'), rows)

    current = 1

    results = []
    while current:
        row = octopus.execute('SELECT * FROM %s' % name).first()
        current = row[func]
        if current is None:
            break
        results.append(current)

    # Verify that we actually read something
    assert results

    octopus.drop_cv(name)
Esempio n. 12
0
def test_single_continuous_view(octopus, clean_db):
  """
  Verify that specific continuous views can be dropped and restored
  """
  octopus.create_stream('stream0', x='int')
  octopus.create_cv('test_single0', 'SELECT COUNT(*) FROM stream0')
  octopus.create_cv('test_single1', 'SELECT COUNT(*) FROM stream0')
  octopus.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = octopus.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  result = octopus.execute('SELECT count FROM test_single1').first()
  assert result['count'] == 10

  _dump(octopus, 'test_single.sql', tables=['test_single0', 'stream0', 'test_single0_mrel'])

  octopus.drop_all()
  _restore(octopus, 'test_single.sql')

  result = octopus.execute('SELECT count FROM test_single0').first()
  assert result['count'] == 10

  # We didn't dump this one
  result = list(octopus.execute('SELECT * FROM pg_class WHERE relname LIKE \'%%test_single1%%\''))
  assert not result
Esempio n. 13
0
def test_sliding_windows(octopus, clean_db):
  """
  Verify that sliding window queries are properly dumped and restored
  """
  octopus.create_stream('stream0', x='int')
  octopus.execute('CREATE CONTINUOUS VIEW sw_v WITH (sw = \'20 seconds\') AS SELECT count(*) FROM stream0')
  octopus.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = octopus.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 10

  _dump(octopus, 'test_sw.sql')

  octopus.drop_all()
  _restore(octopus, 'test_sw.sql')

  result = octopus.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 10

  # We should still drop back to 0 within 20 seconds
  result = octopus.execute('SELECT count FROM sw_v').first()
  while result['count'] > 0:
    time.sleep(1)
    result = octopus.execute('SELECT count FROM sw_v').first()

  result = octopus.execute('SELECT count FROM sw_v').first()
  assert result['count'] == 0
Esempio n. 14
0
def test_bloom_contains(octopus, clean_db):
    """
  Verify that bloom_contains works
  """
    octopus.create_stream('test_bloom_stream', x='int')

    q = """
  SELECT bloom_agg(x::integer) FROM test_bloom_stream
  """

    desc = ('x')
    octopus.create_cv('test_bloom_contains', q)

    rows = []
    for i in range(10000):
        rows.append((2 * i, ))

    octopus.insert('test_bloom_stream', desc, rows)

    cvq = """
  SELECT bloom_contains(bloom_agg, 0), bloom_contains(bloom_agg, 5000),
  bloom_contains(bloom_agg, 1), bloom_contains(bloom_agg, 5001)
  FROM test_bloom_contains
  """

    result = list(octopus.execute(cvq))

    assert len(result) == 1
    result = result[0]
    assert result[0] == True
    assert result[1] == True
    assert result[2] == False
    assert result[3] == False
Esempio n. 15
0
def test_bloom_agg_hashing(octopus, clean_db):
    """
  Verify that bloom_agg correctly hashes different input types
  """
    octopus.create_stream('test_bloom_stream', x='int', y='text', z='float8')

    q = """
  SELECT bloom_agg(x::integer) AS i,
  bloom_agg(y::text) AS t,
  bloom_agg(z::float8) AS f FROM test_bloom_stream
  """
    desc = ('x', 'y', 'z')
    octopus.create_cv('test_bloom_hashing', q)

    rows = []
    for n in range(10000):
        rows.append((n, '%d' % n, float(n)))
        rows.append((n, '%05d' % n, float(n)))

    octopus.insert('test_bloom_stream', desc, rows)

    cvq = """
  SELECT bloom_cardinality(i),
  bloom_cardinality(t), bloom_cardinality(f) FROM test_bloom_hashing
  """
    result = list(octopus.execute(cvq))

    assert len(result) == 1

    result = result[0]

    assert result[0] == 8879
    assert result[1] == 15614
    assert result[2] == 8855
Esempio n. 16
0
def test_multiple_databases(octopus, clean_db):
    conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' %
                            (getpass.getuser(), octopus.port))
    conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

    cur = conn.cursor()
    cur.execute('CREATE DATABASE tmp_octopus')
    cur.close()

    q = 'SELECT x::int FROM dbstream'
    octopus.create_stream('dbstream', x='int')
    octopus.create_cv('test_multiple_databases', q)

    # Insert data in first database.
    octopus.insert('dbstream', ['x'], map(lambda x: (x, ), range(0, 10, 2)))
    result = octopus.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 10, 2)

    # Create same CV in the other database, make sure its created and write different data to it.
    tmp_conn = psycopg2.connect(
        'dbname=tmp_octopus user=%s host=localhost port=%s' %
        (getpass.getuser(), octopus.port))
    cur = tmp_conn.cursor()
    cur.execute('CREATE STREAM dbstream (x int)')
    cur.execute('CREATE CONTINUOUS VIEW test_multiple_databases AS %s' % q)
    tmp_conn.commit()
    cur.execute('INSERT INTO dbstream (x) VALUES %s' %
                ', '.join(map(lambda x: '(%d)' % x, range(1, 11, 2))))
    cur.execute('SELECT * FROM test_multiple_databases')
    tmp_conn.commit()
    assert sorted(row[0] for row in cur) == range(1, 11, 2)

    # Ensure that the data written to the other database isn't seen by the first database.
    result = octopus.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 10, 2)

    # Insert new data to both databases.
    octopus.insert('dbstream', ['x'], map(lambda x: (x, ), range(10, 20, 2)))
    cur.execute('INSERT INTO dbstream (x) VALUES %s' %
                ', '.join(map(lambda x: '(%d)' % x, range(11, 21, 2))))

    # Ensure both databases still saw the data written out to them.
    result = octopus.execute('SELECT * FROM test_multiple_databases')
    assert sorted(row['x'] for row in result) == range(0, 20, 2)
    cur.execute('SELECT * FROM test_multiple_databases')
    tmp_conn.commit()
    assert sorted(row[0] for row in cur) == range(1, 21, 2)

    cur.close()
    tmp_conn.close()
    cur = conn.cursor()
    cur.execute('DROP DATABASE tmp_octopus')
    cur.close()
    conn.close()
Esempio n. 17
0
def test_uniqueness(octopus, clean_db):
    octopus.create_stream('stream0', x='int')
    octopus.create_cv('uniqueness',
                      'SELECT x::int, count(*) FROM stream0 GROUP BY x')

    for i in range(10):
        rows = [((10000 * i) + j, ) for j in xrange(10000)]
        octopus.insert('stream0', ('x', ), rows)

    count = octopus.execute('SELECT count(*) FROM uniqueness').first()['count']
    distinct_count = octopus.execute(
        'SELECT count(DISTINCT x) FROM uniqueness').first()['count']

    assert count == distinct_count
Esempio n. 18
0
def test_output_tree(octopus, clean_db):
  """
  Create a relatively complex tree of continuous views
  and transforms chained together by their output streams,
  and verify that all output correctly propagates to the leaves.
  """
  octopus.create_stream('root', x='int')
  octopus.create_cv('level0_0', 'SELECT x::integer, count(*) FROM root GROUP BY x')

  octopus.create_cv('level1_0', 'SELECT (new).x, (new).count FROM level0_0_osrel')
  octopus.create_cv('level1_1', 'SELECT (new).x, (new).count FROM level0_0_osrel')

  octopus.create_cv('level2_0', 'SELECT (new).x, (new).count FROM level1_0_osrel')
  octopus.create_cv('level2_1', 'SELECT (new).x, (new).count FROM level1_0_osrel')
  octopus.create_cv('level2_2', 'SELECT (new).x, (new).count FROM level1_1_osrel')
  octopus.create_cv('level2_3', 'SELECT (new).x, (new).count FROM level1_1_osrel')

  octopus.create_cv('level3_0', 'SELECT (new).x, (new).count FROM level2_0_osrel')
  octopus.create_cv('level3_1', 'SELECT (new).x, (new).count FROM level2_0_osrel')
  octopus.create_cv('level3_2', 'SELECT (new).x, (new).count FROM level2_1_osrel')
  octopus.create_cv('level3_3', 'SELECT (new).x, (new).count FROM level2_1_osrel')
  octopus.create_cv('level3_4', 'SELECT (new).x, (new).count FROM level2_2_osrel')
  octopus.create_cv('level3_5', 'SELECT (new).x, (new).count FROM level2_2_osrel')
  octopus.create_cv('level3_6', 'SELECT (new).x, (new).count FROM level2_3_osrel')
  octopus.create_cv('level3_7', 'SELECT (new).x, (new).count FROM level2_3_osrel')

  octopus.insert('root', ('x',), [(x % 100,) for x in range(10000)])

  names = [r[0] for r in octopus.execute('SELECT name FROM octopus_views() ORDER BY name DESC')]
  assert len(names) == 15

  # Verify all values propagated to each node in the tree
  for name in names:
    rows = octopus.execute('SELECT x, max(count) FROM %s GROUP BY x' % name)
    for row in rows:
      x, count = row
      assert count == 100

  octopus.insert('root', ('x',), [(x % 100,) for x in range(10000)])

  # Verify all values propagated to each node in the tree again
  for name in names:
    rows = octopus.execute('SELECT x, max(count) FROM %s GROUP BY x' % name)
    for row in rows:
      x, count = row
      assert count == 200

  # Drop these in reverse dependency order to prevent deadlocks
  for name in names:
    octopus.drop_cv(name)
Esempio n. 19
0
def test_tdigest_agg(octopus, clean_db):
    """
    Test tdigest_agg, tdigest_merge_agg, tdigest_cdf, tdigest_quantile
    """
    q = """
    SELECT k::integer, tdigest_agg(x::int) AS t FROM test_tdigest_stream
    GROUP BY k
    """
    desc = ('k', 'x')
    octopus.create_stream('test_tdigest_stream', k='int', x='int')
    octopus.create_cv('test_tdigest_agg', q)

    rows = []
    for _ in range(10):
        for n in range(1000):
            rows.append((0, n))
            rows.append((1, n + 500))

    octopus.insert('test_tdigest_stream', desc, rows)

    result = list(
        octopus.execute(
            'SELECT tdigest_quantile(t, 0.1) FROM test_tdigest_agg ORDER BY k'
        ).fetchall())
    assert len(result) == 2
    assert abs(int(result[0]['tdigest_quantile']) - 99) <= 1
    assert abs(int(result[1]['tdigest_quantile']) - 599) <= 1

    result = list(
        octopus.execute(
            'SELECT tdigest_quantile(combine(t), 0.1) FROM test_tdigest_agg').
        fetchall())
    assert len(result) == 1
    assert abs(int(result[0]['tdigest_quantile']) - 200) <= 4

    result = list(
        octopus.execute(
            'SELECT tdigest_cdf(t, 600) FROM test_tdigest_agg ORDER BY k').
        fetchall())
    assert len(result) == 2
    assert round(result[0]['tdigest_cdf'], 2) == 0.6
    assert round(result[1]['tdigest_cdf'], 2) == 0.1

    result = list(
        octopus.execute(
            'SELECT tdigest_cdf(combine(t), 600) FROM test_tdigest_agg').
        fetchall())
    assert len(result) == 1
    assert round(result[0]['tdigest_cdf'], 2) == 0.35
Esempio n. 20
0
def test_multiple_insert(octopus, clean_db):
  octopus.create_stream('stream0', x='int')
  octopus.create_stream('stream1', x='int')
  octopus.create_stream('stream2', x='int')

  octopus.create_cv('cv0', 'SELECT count(*) FROM stream1')
  octopus.create_cv('cv1', 'SELECT count(*) FROM stream2')
  octopus.create_ct('ct1', 'SELECT x::int FROM stream0 WHERE mod(x, 2) = 0', "octopus_stream_insert('stream1', 'stream2')")

  octopus.insert('stream0', ('x',), [(n,) for n in range(1000)])

  count = octopus.execute('SELECT count FROM cv0').first()['count']
  assert count == 500
  count = octopus.execute('SELECT count FROM cv1').first()['count']
  assert count == 500
Esempio n. 21
0
def test_multiple_stmts(octopus, clean_db):
    octopus.create_stream('stream0', unused='int')
    conn = psycopg2.connect('dbname=octopus user=%s host=localhost port=%s' %
                            (getpass.getuser(), octopus.port))
    db = conn.cursor()
    db.execute('CREATE CONTINUOUS VIEW test_multiple AS '
               'SELECT COUNT(*) FROM stream0; SELECT 1;')
    conn.commit()
    conn.close()

    octopus.insert('stream0', ('unused', ), [(1, )] * 100)

    result = list(octopus.execute('SELECT * FROM test_multiple'))
    assert len(result) == 1
    assert result[0]['count'] == 100
Esempio n. 22
0
def _test_hs_agg(octopus, agg):
    values = [random.randint(-100, 100) for n in range(1000)]
    h = random.choice(values) + random.randint(-10, 10)

    octopus.create_stream('stream0', x='int')
    cq = 'SELECT %s(%d) WITHIN GROUP (ORDER BY x::integer) FROM stream0' % (agg, h)
    octopus.create_cv('test_%s' % agg, cq)

    octopus.insert('stream0', ('x',), [(v,) for v in values])

    result = octopus.execute('SELECT %s FROM test_%s' % (agg, agg)).first()

    rank, peers = _rank(h, values)
    dense_rank, _ = _dense_rank(h, values)

    return rank, dense_rank, peers, len(values), result[agg]
Esempio n. 23
0
def test_percentile_cont_agg(octopus, clean_db):
    range_top = 100000
    q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0]

    batches = []
    min_seen = range_top
    max_seen = 0
    for _ in xrange(10):
        b = [(random.randint(0, range_top), ) for _ in xrange(5000)]
        min_seen = min(min_seen, min(b)[0])
        max_seen = max(max_seen, max(b)[0])
        batches.append(b)

    octopus.create_stream('test_stream', x='int')
    query = '''SELECT
  percentile_cont(ARRAY[%s])
  WITHIN GROUP (ORDER BY x::integer) FROM %s
  ''' % (', '.join(map(lambda f: str(f), q)), '%s')

    octopus.create_cv('test_cq_percentile_cont', query % 'test_stream')
    octopus.create_table('test_percentile_cont', x='integer')

    for b in batches:
        octopus.insert('test_stream', ('x', ), b)
        octopus.insert('test_percentile_cont', ('x', ), b)

    actual = octopus.execute(query % 'test_percentile_cont')
    result = octopus.execute('SELECT * FROM test_cq_percentile_cont')

    actual = actual.first()['percentile_cont']
    result = result.first()['percentile_cont']

    assert len(actual) == len(result)
    assert result == sorted(result)
    diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))]

    # 0th and 100th percentile should be accurate.
    assert result[0] == min_seen
    assert result[-1] == max_seen

    # 1st and 99th percentile should be within 0.1%.
    assert diff[1] <= 0.001 * range_top
    assert diff[-2] <= 0.001 * range_top

    # All percentiles should be within 0.5%.
    assert all(x <= 0.005 * range_top for x in diff)
Esempio n. 24
0
def test_static_streams(octopus, clean_db):
  """
  Verify that static stream definitions are dumped and restored
  """
  octopus.create_stream('static', x='int', y='float8')

  _dump(octopus, 'test_static.sql')

  octopus.drop_stream('static')
  _restore(octopus, 'test_static.sql')

  # Force the requirement of a static stream
  octopus.create_cv('static_cv', 'SELECT x, y FROM static')
  octopus.insert('static', ('x', 'y'), [(0, 1)])

  result = octopus.execute('SELECT x, y FROM static_cv').first()
  assert result['x'] == 0
  assert result['y'] == 1
Esempio n. 25
0
def test_cksketch_frequency(octopus, clean_db):
  octopus.create_stream('test_cmsketch_stream', k='int', x='int')

  q = """
  SELECT k::integer, cmsketch_agg(x::int) AS c FROM test_cmsketch_stream
  GROUP BY k
  """
  desc = ('k', 'x')
  octopus.create_cv('test_cmsketch_frequency', q)

  rows = [(n, None) for n in range(100)]
  octopus.insert('test_cmsketch_stream', desc, rows)

  result = list(octopus.execute(
    'SELECT cmsketch_frequency(c, null) AS x FROM test_cmsketch_frequency ORDER BY k').fetchall())
  assert len(result) == 100
  for row in result:
    assert row[0] == 0
Esempio n. 26
0
def test_simple_insert(octopus, clean_db):
    """
  Verify that we can insert some rows and count some stuff
  """
    octopus.create_stream('stream0', key='int')
    octopus.create_cv(
        'cv', 'SELECT key::integer, COUNT(*) FROM stream0 GROUP BY key')

    rows = [(n % 10, ) for n in range(1000)]

    octopus.insert('stream0', ('key', ), rows)

    result = list(octopus.execute('SELECT * FROM cv ORDER BY key'))

    assert len(result) == 10
    for i, row in enumerate(result):
        assert row['key'] == i
        assert row['count'] == 100
Esempio n. 27
0
def test_stream_buffer(octopus, clean_db):
    """
  Run CQs which take different times to process an event in the stream buffer
  and each stream is read by a disjoint set of CQs.
  """
    octopus.create_stream('stream1', x='int', string='text')
    octopus.create_stream('stream2', x='int', string='text')

    octopus.create_cv('test_sbuf_1', 'SELECT x::int FROM stream1')
    octopus.create_cv('test_sbuf_2',
                      'SELECT x::int, pg_sleep(0.002) FROM stream1')
    octopus.create_cv('test_sbuf_3', 'SELECT x::int FROM stream2')
    octopus.create_cv('test_sbuf_4',
                      'SELECT x::int, pg_sleep(0.002) FROM stream2')

    num_per_batch = 5000
    num_batches = 6
    # We're sending this 2k string with each event just to consume
    # more space in the stream buffer. That way we get to test the
    # wrapping of the buffer as well.
    values = [(i, 'a' * 2048) for i in xrange(num_per_batch)]

    def insert(stream):
        for _ in xrange(num_batches - 1):
            octopus.insert(stream, ('x', 'string'), values)

    threads = [
        threading.Thread(target=insert, args=('stream1', )),
        threading.Thread(target=insert, args=('stream2', ))
    ]

    map(lambda t: t.start(), threads)
    map(lambda t: t.join(), threads)

    octopus.insert('stream1', ('x', 'string'), values)
    octopus.insert('stream2', ('x', 'string'), values)

    q = 'SELECT COUNT(*) FROM test_sbuf_%d'
    r1 = octopus.execute(q % 1).first()[0]
    r2 = octopus.execute(q % 2).first()[0]
    r3 = octopus.execute(q % 3).first()[0]
    r4 = octopus.execute(q % 4).first()[0]

    assert r1 == r2 == r3 == r4 == num_batches * num_per_batch
Esempio n. 28
0
def test_schema_only(octopus, clean_db):
  """
  Verify that it is possible to only dump continuous view schemas and not data
  """
  octopus.create_stream('stream0', x='int')
  octopus.create_cv('test_schema', 'SELECT COUNT(*) FROM stream0')
  octopus.insert('stream0', ('x',), [(x,) for x in range(10)])

  result = octopus.execute('SELECT count FROM test_schema').first()
  assert result['count'] == 10

  _dump(octopus, 'test_schema.sql', schema_only=True)

  octopus.drop_all()
  _restore(octopus, 'test_schema.sql')

  # No data loaded
  result = list(octopus.execute('SELECT count FROM test_schema'))
  assert not result
Esempio n. 29
0
def test_transforms(octopus, clean_db):
  """
  Verify that continuous transforms work properly on output streams
  """
  octopus.create_stream('stream0', x='int')
  octopus.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x',
                     sw='5 seconds')

  # Write a row to a stream each time a row goes out of window
  q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL'
  octopus.create_stream('oow_stream', x='integer')
  octopus.create_ct('ct', q, "octopus_stream_insert('oow_stream')")
  octopus.create_cv('ct_recv', 'SELECT x FROM oow_stream')

  octopus.insert('stream0', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(7)

  rows = list(octopus.execute('SELECT * FROM ct_recv'))
  assert len(rows) == 100
Esempio n. 30
0
def test_distinct(octopus, clean_db):
    """
  Verify that streaming SELECT DISTINCT ON (...) works
  """
    octopus.create_stream('stream0', x='int', y='int', z='int')
    octopus.create_table('table0', x='int', y='int', z='int')
    q = 'SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM stream0'
    octopus.create_cv('test_distinct', q)

    uniques = defaultdict(set)
    values = []
    for _ in xrange(2000):
        x, y, z = random.randint(0, 20), random.randint(0, 20), random.randint(
            0, 20)
        values.append((x, y, z))
        uniques[(x, y - z)].add(y)

    octopus.insert('stream0', ['x', 'y', 'z'], values)
    octopus.insert('table0', ['x', 'y', 'z'], values)

    q = """
  SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM table0
  """
    expected = list(octopus.execute(q))
    expected = len(expected)

    assert expected < 2000

    result = octopus.execute('SELECT COUNT(*) FROM test_distinct').first()

    assert expected == result['count']

    # Check if the first row was selected for uniques
    result = octopus.execute('SELECT * FROM test_distinct')
    reverse_uniques = defaultdict(set)

    for (x, _), ys in uniques.iteritems():
        for y in ys:
            reverse_uniques[y].add(x)

    for row in result:
        assert row['x'] in reverse_uniques[row['y']]