def test_indexed(octopus, clean_db):
    """
    Verify that stream-table joins involving indexed tables work
    """
    octopus.create_stream('stream0', x='int', y='int')
    q = """
    SELECT stream0.x::integer, count(*) FROM stream0
    JOIN test_indexed_t t ON stream0.x = t.x GROUP BY stream0.x
    """
    octopus.create_table('test_indexed_t', x='integer', y='integer')
    octopus.execute('CREATE INDEX idx ON test_indexed_t(x)')

    t = _generate_rows(2, 1000)
    s = _generate_rows(2, 1000)

    octopus.insert('test_indexed_t', ('x', 'y'), t)
    time.sleep(0.1)

    octopus.create_cv('test_indexed', q)
    octopus.insert('stream0', ('x', 'y'), s)

    expected = _join(s, t, [0])
    result = octopus.execute('SELECT sum(count) FROM test_indexed').first()

    assert result['sum'] == len(expected)
def test_incremental_join(octopus, clean_db):
    """
    Verify that join results increase appropriately as we incrementally
    add stream events to the input
    """
    num_cols = 4
    join_cols = [0, 1]
    t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    octopus.create_table('inc', **t_cols)
    octopus.create_stream('stream0', **t_cols)

    q = """
    SELECT s.col0::integer FROM inc JOIN stream0 s ON inc.col0 = s.col0
    AND inc.col1 = s.col1::integer
    """
    t = _generate_rows(num_cols, 64)
    _insert(octopus, 'inc', t, 0.1)

    octopus.create_cv('test_join', q)
    s = []
    for n in range(2):
        row = _generate_row(num_cols)
        _insert(octopus, 'stream0', [row])
        s.append(row)

    expected = _join(t, s, join_cols)
    result = octopus.execute('SELECT COUNT(*) FROM test_join').first()

    assert result['count'] == len(expected)
def test_join_with_where(octopus, clean_db):
    """
    Verify that stream-table joins using a WHERE clause work properly
    """
    num_cols = 4
    q = """
    SELECT s.col0::integer FROM stream0 s, wt WHERE s.col0 = 1 AND wt.col0 = 1
    """
    wt_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    octopus.create_table('wt', **wt_cols)
    octopus.create_table('wt_s', **wt_cols)

    wt = _generate_rows(num_cols, 64)
    s = _generate_rows(num_cols, 64)

    _insert(octopus, 'wt', wt, 0.1)
    _insert(octopus, 'wt_s', s, 0.1)

    octopus.create_stream('stream0', **wt_cols)
    octopus.create_cv('test_join_where', q)
    _insert(octopus, 'stream0', s)

    expected = octopus.execute('SELECT COUNT(*) FROM wt_s s, wt WHERE s.col0 = 1 AND wt.col0 = 1').first()
    result = octopus.execute('SELECT COUNT(*) FROM test_join_where').first()

    assert result['count'] == expected['count']
def test_simple_aggs(octopus, clean_db):
    """
    Verify that combines work properly on simple aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    avg(x), sum(y::float8), count(*) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_simple_aggs', q)
    octopus.create_table('test_simple_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_simple_aggs_t', desc, rows)

    table_result = list(
        octopus.execute(
            'SELECT avg(x), sum(y::float8), count(*) FROM test_simple_aggs_t'))
    cv_result = list(
        octopus.execute(
            'SELECT combine(avg), combine(sum), combine(count) FROM test_simple_aggs'
        ))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
        assert abs(tr[2] - cr[2]) < 0.00001
def test_join_multiple_tables(octopus, clean_db):
    """
    Verify that stream-table joins involving multiple tables work
    """
    num_cols = 8
    join_cols = [0]
    t0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    t1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    octopus.create_table('t0', **t0_cols)
    octopus.create_table('t1', **t1_cols)
    octopus.create_stream('stream0', **t0_cols)
    q = """
    SELECT s.col0::integer FROM t0 JOIN t1 ON t0.col0 = t1.col0
    JOIN stream0 s ON t1.col0 = s.col0
    """

    t0 = _generate_rows(num_cols, 64)
    t1 = _generate_rows(num_cols, 64)
    s = _generate_rows(num_cols, 64)

    _insert(octopus, 't1', t1, 0.1)
    _insert(octopus, 't0', t0, 0.1)

    octopus.create_cv('test_join_multi', q)
    _insert(octopus, 'stream0', s)

    expected = _join(t0, _join(s, t1, join_cols), join_cols)
    result = octopus.execute('SELECT COUNT(*) FROM test_join_multi').first()

    assert result['count'] == len(expected)
def test_join_across_batches(octopus, clean_db):
    """
    Verify that stream-table joins are properly built when they
    span across multiple input batches
    """
    num_cols = 4
    join_cols = [0]
    t_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    octopus.create_table('batch', **t_cols)
    octopus.create_stream('stream0', **t_cols)

    q = """
    SELECT s.col0::integer FROM batch JOIN stream0 s ON batch.col0 = s.col0
    """

    t = _generate_rows(num_cols, 64)
    _insert(octopus, 'batch', t, 0.1)

    s = _generate_rows(num_cols, 64)
    octopus.create_cv('test_batched_join', q)
    _insert(octopus, 'stream0', s)

    expected = _join(t, s, join_cols)
    result = octopus.execute('SELECT COUNT(*) FROM test_batched_join').first()

    assert result['count'] == len(expected)
def test_join_ordering(octopus, clean_db):
    """
    Verify that the correct plan is generated regardless of the ordering of
    streams and tables.
    """
    num_cols = 8
    join_cols = [0]
    ordering0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    ordering1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    octopus.create_table('ordering0', **ordering0_cols)
    octopus.create_table('ordering1', **ordering1_cols)

    ordering0 = _generate_rows(num_cols, 64)
    ordering1 = _generate_rows(num_cols, 64)
    _insert(octopus, 'ordering0', ordering0, 0.1)
    _insert(octopus, 'ordering1', ordering1, 0.1)

    octopus.create_stream('stream0', **ordering0_cols)

    # stream, table, table
    q0 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    stream0 s JOIN ordering0 ON s.col0 = ordering0.col0
    JOIN ordering1 ON ordering0.col0 = ordering1.col0
    """
    octopus.create_cv('test_ordering0', q0)

    # table, stream, table
    q1 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    ordering0 JOIN stream0 s ON s.col0 = ordering0.col0
    JOIN ordering1 ON ordering0.col0 = ordering1.col0
    """
    octopus.create_cv('test_ordering1', q1)

    # table, table, stream
    q2 = """
    SELECT s.col0::integer, ordering0.col3, ordering1.col4 FROM
    ordering0 JOIN ordering1 ON ordering0.col0 = ordering1.col0
    JOIN stream0 s ON s.col0 = ordering0.col0
    """
    octopus.create_cv('test_ordering2', q2)

    s = _generate_rows(num_cols, 64)
    _insert(octopus, 'stream0', s)

    expected = _join(ordering0, _join(ordering1, s, join_cols), join_cols)

    result0 = octopus.execute('SELECT COUNT(*) FROM test_ordering0').first()
    result1 = octopus.execute('SELECT COUNT(*) FROM test_ordering1').first()
    result2 = octopus.execute('SELECT COUNT(*) FROM test_ordering2').first()

    assert result0['count'] == len(expected)
    assert result1['count'] == len(expected)
    assert result2['count'] == len(expected)
Exemple #8
0
def test_hll_type(octopus, clean_db):
    octopus.create_table('test_hll_type', x='int', y='hll')
    octopus.execute('INSERT INTO test_hll_type (x, y) VALUES '
                    '(1, hll_empty()), (2, hll_empty())')

    for i in xrange(1000):
        octopus.execute('UPDATE test_hll_type SET y = hll_add(y, %d / x)' % i)

    result = list(
        octopus.execute('SELECT hll_cardinality(y) '
                        'FROM test_hll_type ORDER BY x'))
    assert result[0][0] == 995
    assert result[1][0] == 497
Exemple #9
0
def test_cmsketch_type(octopus, clean_db):
  octopus.create_table('test_cmsketch_type', x='int', y='cmsketch')
  octopus.execute('INSERT INTO test_cmsketch_type (x, y) VALUES '
                   '(1, cmsketch_empty()), (2, cmsketch_empty())')

  for i in xrange(1000):
    octopus.execute('UPDATE test_cmsketch_type '
                     'SET y = cmsketch_add(y, {} %% x)'.format(i))

  result = list(octopus.execute('SELECT cmsketch_frequency(y, 0), '
                                 'cmsketch_frequency(y, 1) '
                                 'FROM test_cmsketch_type ORDER BY x'))
  assert result[0] == (1000, 0)
  assert result[1] == (500, 500)
Exemple #10
0
def test_tdigest_type(octopus, clean_db):
    octopus.create_table('test_tdigest_type', x='int', y='tdigest')
    octopus.execute('INSERT INTO test_tdigest_type (x, y) VALUES '
                    '(1, tdigest_empty()), (2, tdigest_empty())')

    for i in xrange(1000):
        octopus.execute('UPDATE test_tdigest_type '
                        'SET y = tdigest_add(y, {} %% (x * 500))'.format(i))

    result = list(
        octopus.execute('SELECT tdigest_cdf(y, 400), '
                        'tdigest_quantile(y, 0.9)'
                        'FROM test_tdigest_type ORDER BY x'))
    assert map(lambda x: round(x, 1), result[0]) == [0.8, 449.5]
    assert map(lambda x: round(x, 1), result[1]) == [0.4, 899.5]
def test_join_with_aggs(octopus, clean_db):
    """
    Verify that joins involving aggregates referencing columns from
    multiple tables work
    """
    num_cols = 4
    join_cols = [1]
    q = """
    SELECT
    sum(s.col0::integer) AS s0,
    sum(a0.col0::integer) AS s1,
    sum(a1.col0::integer) AS s2
    FROM a1 JOIN a0 ON a1.col1 = a0.col1
    JOIN stream0 s ON s.col1::integer = a0.col1
    """
    a0_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])
    a1_cols = dict([('col%d' % n, 'integer') for n in range(num_cols)])

    octopus.create_table('a0', **a0_cols)
    octopus.create_table('a1', **a1_cols)

    a0 = _generate_rows(num_cols, 64)
    a1 = _generate_rows(num_cols, 64)
    s = _generate_rows(num_cols, 64)

    _insert(octopus, 'a0', a0, 0.1)
    _insert(octopus, 'a1', a1, 0.1)

    octopus.create_stream('stream0', **a0_cols)
    octopus.create_cv('test_agg_join', q)
    _insert(octopus, 'stream0', s)

    expected = _join(a1, _join(a0, s, join_cols), join_cols)
    result = octopus.execute('SELECT * FROM test_agg_join').first()

    # sum of col0 from stream
    s0_expected = sum([r[num_cols * 2] for r in expected])

    # sum of col0 from a0
    s1_expected = sum([r[num_cols * 1] for r in expected])

    # sum of col0 from a1
    s2_expected = sum([r[num_cols * 0] for r in expected])

    assert s0_expected == result['s0']
    assert s1_expected == result['s1']
    assert s2_expected == result['s2']
Exemple #12
0
def test_percentile_cont_agg(octopus, clean_db):
    range_top = 100000
    q = [0.0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1.0]

    batches = []
    min_seen = range_top
    max_seen = 0
    for _ in xrange(10):
        b = [(random.randint(0, range_top), ) for _ in xrange(5000)]
        min_seen = min(min_seen, min(b)[0])
        max_seen = max(max_seen, max(b)[0])
        batches.append(b)

    octopus.create_stream('test_stream', x='int')
    query = '''SELECT
  percentile_cont(ARRAY[%s])
  WITHIN GROUP (ORDER BY x::integer) FROM %s
  ''' % (', '.join(map(lambda f: str(f), q)), '%s')

    octopus.create_cv('test_cq_percentile_cont', query % 'test_stream')
    octopus.create_table('test_percentile_cont', x='integer')

    for b in batches:
        octopus.insert('test_stream', ('x', ), b)
        octopus.insert('test_percentile_cont', ('x', ), b)

    actual = octopus.execute(query % 'test_percentile_cont')
    result = octopus.execute('SELECT * FROM test_cq_percentile_cont')

    actual = actual.first()['percentile_cont']
    result = result.first()['percentile_cont']

    assert len(actual) == len(result)
    assert result == sorted(result)
    diff = [abs(actual[i] - result[i]) for i in xrange(len(actual))]

    # 0th and 100th percentile should be accurate.
    assert result[0] == min_seen
    assert result[-1] == max_seen

    # 1st and 99th percentile should be within 0.1%.
    assert diff[1] <= 0.001 * range_top
    assert diff[-2] <= 0.001 * range_top

    # All percentiles should be within 0.5%.
    assert all(x <= 0.005 * range_top for x in diff)
def test_cont_transforms(octopus, clean_db):
  octopus.execute('CREATE STREAM cv_stream (x int, y text)')
  octopus.execute('CREATE STREAM ct_stream (x int, y text)')
  octopus.create_cv('test_cv', 'SELECT count(*) FROM cv_stream')
  octopus.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0',
                     "octopus_stream_insert('cv_stream', 'cv_stream')")
  octopus.create_table('test_t', x='int', y='text')
  octopus.execute('''
  CREATE OR REPLACE FUNCTION test_tg()
  RETURNS trigger AS
  $$
  BEGIN
   INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y);
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  ''')
  octopus.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream',
                     'test_tg()')

  octopus.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  _dump(octopus, 'test_cont_transform.sql')

  octopus.drop_all()
  octopus.drop_table('test_t')
  octopus.execute('DROP FUNCTION test_tg()')

  _restore(octopus, 'test_cont_transform.sql')

  octopus.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  assert octopus.execute('SELECT count FROM test_cv').first()['count'] == 4
  ntups = 0
  for row in octopus.execute('SELECT x, count(*) FROM test_t GROUP BY x'):
    assert row['count'] == 2
    assert row['x'] in (1, 2)
    ntups += 1
  assert ntups == 2
Exemple #14
0
def test_distinct(octopus, clean_db):
    """
  Verify that streaming SELECT DISTINCT ON (...) works
  """
    octopus.create_stream('stream0', x='int', y='int', z='int')
    octopus.create_table('table0', x='int', y='int', z='int')
    q = 'SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM stream0'
    octopus.create_cv('test_distinct', q)

    uniques = defaultdict(set)
    values = []
    for _ in xrange(2000):
        x, y, z = random.randint(0, 20), random.randint(0, 20), random.randint(
            0, 20)
        values.append((x, y, z))
        uniques[(x, y - z)].add(y)

    octopus.insert('stream0', ['x', 'y', 'z'], values)
    octopus.insert('table0', ['x', 'y', 'z'], values)

    q = """
  SELECT DISTINCT ON (x::int, y::int - z::int) x::int, y::int FROM table0
  """
    expected = list(octopus.execute(q))
    expected = len(expected)

    assert expected < 2000

    result = octopus.execute('SELECT COUNT(*) FROM test_distinct').first()

    assert expected == result['count']

    # Check if the first row was selected for uniques
    result = octopus.execute('SELECT * FROM test_distinct')
    reverse_uniques = defaultdict(set)

    for (x, _), ys in uniques.iteritems():
        for y in ys:
            reverse_uniques[y].add(x)

    for row in result:
        assert row['x'] in reverse_uniques[row['y']]
def test_object_aggs(octopus, clean_db):
    """
    Verify that combines work properly on object aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \')FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y', 's')
    octopus.create_stream('stream0', x='int', y='float8', s='text')
    octopus.create_cv('test_object_aggs', q)
    octopus.create_table('test_object_aggs_t',
                         x='integer',
                         y='float8',
                         s='text')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random(),
               str(n) * random.randint(1, 8))
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_object_aggs_t', desc, rows)

    tq = """
    SELECT json_agg(x), json_object_agg(x, y::float8), string_agg(s::text, \' :: \') FROM test_object_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(json_agg), combine(json_object_agg), combine(string_agg) FROM test_object_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert sorted(tr[0]) == sorted(cr[0])
        assert sorted(tr[1]) == sorted(cr[1])
        assert sorted(tr[2]) == sorted(cr[2])
def test_hypothetical_set_aggs(octopus, clean_db):
    """
    Verify that combines work properly on HS aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    rank(256) WITHIN GROUP (ORDER BY x),
    dense_rank(256) WITHIN GROUP (ORDER BY x)
    FROM stream0 GROUP BY k
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_hs_aggs', q)
    octopus.create_table('test_hs_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_hs_aggs_t', desc, rows)

    # Note that the CQ will use the HLL variant of dense_rank,
    # so use hll_dense_rank on the table too
    tq = """
    SELECT rank(256) WITHIN GROUP (ORDER BY x), hll_dense_rank(256) WITHIN GROUP (ORDER BY x)
    FROM test_hs_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(rank), combine(dense_rank) FROM test_hs_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
def test_colums_subset(octopus, clean_db):
    """
    Verify that copying data from a file into a stream works when the file's input
    columns are a subset of the stream0's columns
    """
    octopus.create_stream('stream0', x='int', y='float8', z='numeric', m='int')
    q = 'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric), max(m::integer) FROM stream0'
    octopus.create_cv('test_copy_subset', q)
    octopus.create_table('test_copy_subset_t',
                         x='integer',
                         y='float8',
                         z='numeric')

    path = os.path.abspath(os.path.join(octopus.tmp_dir, 'test_copy.csv'))

    rows = []
    for n in range(10000):
        row = random.randint(1, 1024), random.randint(1, 1024), random.random()
        rows.append(row)

    _generate_csv(path, rows, desc=('x', 'y', 'z'))

    octopus.execute(
        'COPY test_copy_subset_t (x, y, z) FROM \'%s\' HEADER CSV' % path)

    octopus.execute('COPY stream0 (x, y, z) FROM \'%s\' HEADER CSV' % path)

    expected = octopus.execute(
        'SELECT sum(x::integer) AS s0, sum(y::float8) AS s1, avg(z::numeric) FROM test_copy_subset_t'
    ).first()
    result = list(octopus.execute('SELECT s0, s1, avg FROM test_copy_subset'))

    assert len(result) == 1

    result = result[0]

    assert result[0] == expected[0]
    assert result[1] == expected[1]
    assert result[2] == expected[2]
def test_windowed_combine(octopus, clean_db):
    """
    Verify that windowed queries with combines work
    """
    q = """
    SELECT x::integer, avg(y::integer) FROM stream0 GROUP BY x
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_windowed_combine', q)
    octopus.create_table('test_windowed_combine_t', x='integer', y='integer')

    rows = []
    for n in range(10000):
        row = (n, n)
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_windowed_combine_t', desc, rows)

    table = """
    SELECT first_value(x) OVER w, avg(y) OVER w
    FROM test_windowed_combine_t
    WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING)
    ORDER BY first_value
    """
    expected = list(octopus.execute(table))
    combine = """
    SELECT first_value(x) OVER w, avg(avg) OVER w
    FROM test_windowed_combine
    WINDOW w AS (ORDER BY x ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING)
    ORDER BY first_value
    """
    actual = list(octopus.execute(combine))

    for e, a in zip(expected, actual):
        assert e == a
Exemple #19
0
def test_null_groups(octopus, clean_db):
    """
    Verify that null group columns are considered equal
    """
    octopus.create_stream('s', x='int', y='int', z='int')
    q = """
    SELECT x::integer, y::integer, z::integer, COUNT(*) FROM s
    GROUP BY x, y, z;
    """
    desc = ('x', 'y', 'z')
    octopus.create_cv('test_null_groups', q)
    octopus.create_table('test_null_groups_t',
                         x='integer',
                         y='integer',
                         z='integer')

    rows = []
    for n in range(10000):
        vals = list(random.randint(0, 10) for n in range(3))
        vals = map(lambda n: random.random() > 0.1 and n or None, vals)
        rows.append(tuple(vals))

    octopus.insert('s', desc, rows)
    octopus.insert('test_null_groups_t', desc, rows)

    table_q = """
    SELECT x, y, z, COUNT(*) FROM test_null_groups_t
    GROUP BY x, y, z ORDER BY x, y, z;
    """
    expected = list(octopus.execute(table_q))
    result = list(
        octopus.execute(
            'SELECT x, y, z, count FROM test_null_groups ORDER BY x, y, z'))

    for r, e in zip(result, expected):
        assert r == e
def test_stats_aggs(octopus, clean_db):
    """
    Verify that combines work on stats aggs
    """
    q = """
    SELECT x::integer %% 10 AS k,
    regr_sxx(x, y::float8), stddev(x) FROM stream0 GROUP BY k;
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_stats_aggs', q)
    octopus.create_table('test_stats_aggs_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_stats_aggs_t', desc, rows)

    tq = """
    SELECT regr_sxx(x, y::float8), stddev(x) FROM test_stats_aggs_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(regr_sxx), combine(stddev) FROM test_stats_aggs
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert abs(tr[0] - cr[0]) < 0.00001
        assert abs(tr[1] - cr[1]) < 0.00001
def test_hll_distinct(octopus, clean_db):
    """
    Verify that combines work on HLL COUNT DISTINCT queries
    """
    q = """
    SELECT x::integer %% 10 AS k, COUNT(DISTINCT x) AS count FROM stream0 GROUP BY k
    """
    desc = ('x', 'y')
    octopus.create_stream('stream0', x='int', y='float8')
    octopus.create_cv('test_hll_distinct', q)
    octopus.create_table('test_hll_distinct_t', x='integer', y='float8')

    rows = []
    for n in range(10000):
        row = (random.randint(0, 1000), random.random())
        rows.append(row)

    octopus.insert('stream0', desc, rows)
    octopus.insert('test_hll_distinct_t', desc, rows)

    # Note that the CQ will use the HLL variant of COUNT DISTINCT,
    # so use hll_count_distinct on the table too
    tq = """
    SELECT hll_count_distinct(x) FROM test_hll_distinct_t
    """
    table_result = list(octopus.execute(tq))

    cq = """
    SELECT combine(count) FROM test_hll_distinct
    """
    cv_result = list(octopus.execute(cq))

    assert len(table_result) == len(cv_result)

    for tr, cr in zip(table_result, cv_result):
        assert tr == cr
Exemple #22
0
def test_binary_upgrade(octopus, clean_db):
    """
  Verify that binary upgrades properly transfer all objects and data
  into the new installation
  """
    # Create some regular tables with data, and create an index on half of them
    for n in range(16):
        name = 't_%d' % n
        octopus.create_table(name, x='integer', y='text', z='text')
        rows = [(x, name, name) for x in range(1000)]
        octopus.insert(name, ('x', 'y', 'z'), rows)
        if n >= 8:
            octopus.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name))

    # Create some streams
    for n in range(8):
        name = 's_%d' % n
        octopus.create_stream(name, x='integer', y='text')

    # Now create some CVs with data, some with indices
    for n in range(32):
        name = 'cv_%d' % n
        octopus.create_stream('stream_%d' % n, x='int', y='text', z='text')
        octopus.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z'
            % n)
        rows = [(x, name, name) for x in range(1000)]
        octopus.insert('stream_%d' % n, ('x', 'y', 'z'), rows)
        if n >= 16:
            octopus.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name))

    # Now create some in another namespace
    octopus.execute('CREATE SCHEMA namespace')
    for n in range(8):
        name = 'namespace.cv_%d' % n
        octopus.create_stream('namespace.stream_%d' % n,
                              x='int',
                              y='text',
                              z='text')
        octopus.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z'
            % n)
        rows = [(x, name, name) for x in range(1000)]
        octopus.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows)
        if n >= 4:
            octopus.execute('CREATE INDEX namespace_idx_%d ON %s(z)' %
                            (n, name))

    create_fn = """
  CREATE OR REPLACE FUNCTION tg_fn()
  RETURNS trigger AS
  $$
  BEGIN
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  """
    octopus.execute(create_fn)

    octopus.create_stream('stream0', z='text')

    # Create some transforms
    for n in range(8):
        name = 'ct_%d' % n
        octopus.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn()')

    time.sleep(10)

    old_bin_dir = new_bin_dir = octopus.bin_dir
    old_data_dir = octopus.data_dir
    new_data_dir = os.path.abspath('test_binary_upgrade_data_dir')

    octopus.stop()

    p = subprocess.Popen(
        [os.path.join(octopus.bin_dir, 'octopus-init'), '-D', new_data_dir])
    stdout, stderr = p.communicate()

    result = subprocess.check_call([
        os.path.join(octopus.bin_dir, 'octopus-upgrade'), '-b', old_bin_dir,
        '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir
    ])

    assert result == 0

    # The cleanup path expects this to be running, but we're done with it
    octopus.run()

    # octopus-upgrade returned successfully and has already done sanity checks
    # but let's manually verify that all objects were migrated to the new data directory
    upgraded = Octopus(data_dir=new_data_dir)
    upgraded.run()

    # Tables
    for n in range(16):
        name = 't_%d' % n
        q = 'SELECT x, y, z FROM %s ORDER BY x' % name
        rows = upgraded.execute(q)
        for i, row in enumerate(rows):
            x, y, z = row
            assert x == i
            assert y == name
            assert z == name

    # Streams
    for n in range(8):
        name = 's_%d' % n
        rows = list(
            upgraded.execute(
                "SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'"
                % name))
        assert len(rows) == 1

    # CVs
    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # CVs in separate schema
    for n in range(8):
        name = 'namespace.cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # Transforms
    for n in range(8):
        name = 'ct_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN octopus_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    upgraded.stop()
    shutil.rmtree(new_data_dir)