Beispiel #1
0
def test_multiple_insert(pipeline, clean_db):
  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream1')
  pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2')
  pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream1', 'stream2')")

  pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)])

  count = pipeline.execute('SELECT count FROM cv0').first()['count']
  assert count == 500
  count = pipeline.execute('SELECT count FROM cv1').first()['count']
  assert count == 500
def test_multiple_insert(pipeline, clean_db):
  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream1')
  pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2')
  pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream1', 'stream2')")

  pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)])

  count = pipeline.execute('SELECT count FROM cv0').first()['count']
  assert count == 500
  count = pipeline.execute('SELECT count FROM cv1').first()['count']
  assert count == 500
def test_nested_transforms(pipeline, clean_db):
  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream4')
  pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2')
  pipeline.create_ct('ct0', 'SELECT x::int FROM stream2 WHERE mod(x, 4) = 0', "pipeline_stream_insert('stream4')")
  pipeline.create_ct('ct1', 'SELECT x::int FROM stream WHERE mod(x, 2) = 0', "pipeline_stream_insert('stream2')")

  pipeline.insert('stream', ('x', ), [(n, ) for n in range(1000)])
  time.sleep(1)

  count = pipeline.execute('SELECT count FROM cv0').first()['count']
  assert count == 250
  count = pipeline.execute('SELECT count FROM cv1').first()['count']
  assert count == 500
def test_cont_transforms(pipeline, clean_db):
  pipeline.execute('CREATE FOREIGN TABLE cv_stream (x int, y text) SERVER pipelinedb')
  pipeline.execute('CREATE FOREIGN TABLE ct_stream (x int, y text) SERVER pipelinedb')
  pipeline.create_cv('test_cv', 'SELECT count(*) FROM cv_stream')
  pipeline.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0',
                     "pipelinedb.insert_into_stream('cv_stream', 'cv_stream')")
  pipeline.create_table('test_t', x='int', y='text')
  pipeline.execute('''
  CREATE OR REPLACE FUNCTION test_tg()
  RETURNS trigger AS
  $$
  BEGIN
   INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y);
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  ''')
  pipeline.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream',
                     'test_tg')

  pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  assert pipeline.execute('SELECT count FROM test_cv')[0]['count'] == 2

  _dump(pipeline, 'test_cont_transform.sql')

  pipeline.drop_all()
  pipeline.drop_table('test_t')
  pipeline.execute('DROP FUNCTION test_tg()')

  _restore(pipeline, 'test_cont_transform.sql')
  
  pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  assert pipeline.execute('SELECT count FROM test_cv')[0]['count'] == 4
  ntups = 0
  
  for row in pipeline.execute('SELECT x, count(*) FROM test_t GROUP BY x'):
    assert row['count'] == 2
    assert row['x'] in (1, 2)
    ntups += 1
  assert ntups == 2
def test_nested_transforms(pipeline, clean_db):
  pipeline.create_stream('stream0', x='int')
  pipeline.create_stream('stream2', x='int')
  pipeline.create_stream('stream4', x='int')

  pipeline.create_cv('cv0', 'SELECT count(*) FROM stream4')
  pipeline.create_cv('cv1', 'SELECT count(*) FROM stream2')
  pipeline.create_ct('ct0', 'SELECT x::int FROM stream2 WHERE mod(x, 4) = 0',
             "pipelinedb.insert_into_stream('stream4')")
  pipeline.create_ct('ct1', 'SELECT x::int FROM stream0 WHERE mod(x, 2) = 0',
             "pipelinedb.insert_into_stream('stream2')")

  pipeline.insert('stream0', ('x',), [(n,) for n in range(1000)])

  count = pipeline.execute('SELECT count FROM cv0')[0]['count']
  assert count == 250
  count = pipeline.execute('SELECT count FROM cv1')[0]['count']
  assert count == 500
def test_deadlock_regress(pipeline, clean_db):
    nitems = 2000000
    tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json')
    query = 'SELECT generate_series(1, %d) AS n' % nitems
    pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file))

    pipeline.create_stream('s1', n='int')
    pipeline.create_stream('s2', n='int')
    pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL',
                       "pipeline_stream_insert('s2')")
    pipeline.create_cv('cv', 'SELECT count(*) FROM s2')

    for copy in [True, False]:
        for nworkers in [1, 4]:
            for sync in ['off', 'on']:
                pipeline.stop()
                pipeline.run({
                    'continuous_query_num_workers': nworkers,
                    'synchronous_stream_insert': sync
                })

                pipeline.execute('TRUNCATE CONTINUOUS VIEW cv')
                pipeline.execute('COMMIT')

                if copy:
                    pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file)
                else:
                    pipeline.execute('INSERT INTO s1 (n) %s' % query)

                count = dict(
                    pipeline.execute('SELECT count FROM cv').first() or {})
                ntries = 5
                while count.get('count') != nitems and ntries > 0:
                    assert sync == 'off'
                    time.sleep(1)
                    count = dict(
                        pipeline.execute('SELECT count FROM cv').first() or {})
                    ntries -= 1
                assert count and count['count'] == nitems

    os.remove(tmp_file)

    pipeline.stop()
    pipeline.run()
def test_transforms(pipeline, clean_db):
  """
  Verify that continuous transforms work properly on output streams
  """
  pipeline.create_stream('stream0', x='int')
  pipeline.create_cv('sw', 'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x',
                     sw='5 seconds')

  # Write a row to a stream each time a row goes out of window
  q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL'
  pipeline.create_stream('oow_stream', x='integer')
  pipeline.create_ct('ct', q, "pipeline_stream_insert('oow_stream')")
  pipeline.create_cv('ct_recv', 'SELECT x FROM oow_stream')

  pipeline.insert('stream0', ('x',), [(x % 100,) for x in range(10000)])
  time.sleep(7)

  rows = list(pipeline.execute('SELECT * FROM ct_recv'))
  assert len(rows) == 100
def test_cont_transforms(pipeline, clean_db):
  pipeline.execute('CREATE STREAM cv_stream (x int, y text)')
  pipeline.execute('CREATE STREAM ct_stream (x int, y text)')
  pipeline.create_cv('test_cv', 'SELECT count(*) FROM cv_stream')
  pipeline.create_ct('test_ct1', 'SELECT x::int, y::text FROM ct_stream WHERE mod(x, 2) = 0',
                     "pipeline_stream_insert('cv_stream', 'cv_stream')")
  pipeline.create_table('test_t', x='int', y='text')
  pipeline.execute('''
  CREATE OR REPLACE FUNCTION test_tg()
  RETURNS trigger AS
  $$
  BEGIN
   INSERT INTO test_t (x, y) VALUES (NEW.x, NEW.y);
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  ''')
  pipeline.create_ct('test_ct2', 'SELECT x::int, y::text FROM ct_stream',
                     'test_tg()')

  pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  _dump(pipeline, 'test_cont_transform.sql')

  pipeline.drop_all()
  pipeline.drop_table('test_t')
  pipeline.execute('DROP FUNCTION test_tg()')

  _restore(pipeline, 'test_cont_transform.sql')

  pipeline.insert('ct_stream', ('x', 'y'), [(1, 'hello'), (2, 'world')])
  time.sleep(1)

  assert pipeline.execute('SELECT count FROM test_cv').first()['count'] == 4
  ntups = 0
  for row in pipeline.execute('SELECT x, count(*) FROM test_t GROUP BY x'):
    assert row['count'] == 2
    assert row['x'] in (1, 2)
    ntups += 1
  assert ntups == 2
def test_deadlock_regress(pipeline, clean_db):
  nitems = 2000000
  tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json')
  query = 'SELECT generate_series(1, %d) AS n' % nitems
  pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file))

  pipeline.create_stream('s1', n='int')
  pipeline.create_stream('s2', n='int')
  pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL',
                     "pipeline_stream_insert('s2')")
  pipeline.create_cv('cv', 'SELECT count(*) FROM s2')

  for copy in [True, False]:
    for nworkers in [1, 4]:
      for sync in ['off', 'on']:
        pipeline.stop()
        pipeline.run({
          'continuous_query_num_workers': nworkers,
          'synchronous_stream_insert': sync
          })

        pipeline.execute('TRUNCATE CONTINUOUS VIEW cv')
        pipeline.execute('COMMIT')

        if copy:
          pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file)
        else:
          pipeline.execute('INSERT INTO s1 (n) %s' % query)

        count = dict(pipeline.execute('SELECT count FROM cv').first() or {})
        ntries = 5
        while count.get('count') != nitems and ntries > 0:
          assert sync == 'off'
          time.sleep(1)
          count = dict(pipeline.execute('SELECT count FROM cv').first() or {})
          ntries -= 1
        assert count and count['count'] == nitems

  os.remove(tmp_file)

  pipeline.stop()
  pipeline.run()
def test_deadlock_regress(pipeline, clean_db):
  nitems = 2000000
  tmp_file = os.path.join(tempfile.gettempdir(), 'tmp.json')
  query = 'SELECT generate_series(1, %d) AS n' % nitems
  pipeline.execute("COPY (%s) TO '%s'" % (query, tmp_file))

  pipeline.create_stream('s1', n='int')
  pipeline.create_stream('s2', n='int')
  pipeline.create_ct('ct', 'SELECT n FROM s1 WHERE n IS NOT NULL',
             "pipelinedb.insert_into_stream('s2')")
  pipeline.create_cv('cv', 'SELECT count(*) FROM s2')

  for copy in [True, False]:
    for nworkers in [1, 4]:
      for sync in ['receive', 'commit']:
        pipeline.stop()
        pipeline.run({
          'pipelinedb.num_workers': nworkers,
          'pipelinedb.stream_insert_level': 'sync_%s' % sync
          })

        pipeline.execute("SELECT pipelinedb.truncate_continuous_view('cv')")
        pipeline.execute('COMMIT')

        if copy:
          pipeline.execute("COPY s1 (n) FROM '%s'" % tmp_file)
        else:
          pipeline.execute('INSERT INTO s1 (n) %s' % query)

        count = dict(pipeline.execute('SELECT count FROM cv')[0] or {})
        ntries = 5
        while count.get('count') != nitems and ntries > 0:
          assert sync == 'receive'
          time.sleep(1)
          count = dict(pipeline.execute('SELECT count FROM cv')[0] or {})
          ntries -= 1
        assert count and count['count'] == nitems

  os.remove(tmp_file)

  pipeline.stop()
  pipeline.run()
Beispiel #11
0
def test_transforms(pipeline, clean_db):
    """
  Verify that continuous transforms work properly on output streams
  """
    pipeline.create_stream('stream0', x='int')
    pipeline.create_cv('sw',
                       'SELECT x::integer, COUNT(*) FROM stream0 GROUP BY x',
                       sw='5 seconds')

    # Write a row to a stream each time a row goes out of window
    q = 'SELECT (old).x FROM sw_osrel WHERE old IS NOT NULL AND new IS NULL'
    pipeline.create_stream('oow_stream', x='integer')
    pipeline.create_ct('ct', q, "pipeline_stream_insert('oow_stream')")
    pipeline.create_cv('ct_recv', 'SELECT x FROM oow_stream')

    pipeline.insert('stream0', ('x', ), [(x % 100, ) for x in range(10000)])
    time.sleep(7)

    rows = list(pipeline.execute('SELECT * FROM ct_recv'))
    assert len(rows) == 100
Beispiel #12
0
def test_binary_upgrade(pipeline, clean_db):
    """
  Verify that binary upgrades properly transfer all objects and data
  into the new installation
  """
    # Create some regular tables with data, and create an index on half of them
    for n in range(16):
        name = 't_%d' % n
        pipeline.create_table(name, x='integer', y='text', z='text')
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert(name, ('x', 'y', 'z'), rows)
        if n >= 8:
            pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name))

    # Create some streams
    for n in range(8):
        name = 's_%d' % n
        pipeline.create_stream(name, x='integer', y='text')

    # Now create some CVs with data, some with indices
    for n in range(32):
        name = 'cv_%d' % n
        pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text')
        pipeline.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z'
            % n)
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows)
        if n >= 16:
            pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name))

    # Now create some in another namespace
    pipeline.execute('CREATE SCHEMA namespace')
    for n in range(8):
        name = 'namespace.cv_%d' % n
        pipeline.create_stream('namespace.stream_%d' % n,
                               x='int',
                               y='text',
                               z='text')
        pipeline.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z'
            % n)
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows)
        if n >= 4:
            pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' %
                             (n, name))

    create_fn = """
  CREATE OR REPLACE FUNCTION tg_fn()
  RETURNS trigger AS
  $$
  BEGIN
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  """
    pipeline.execute(create_fn)

    pipeline.create_stream('stream0', z='text')

    # Create some transforms
    for n in range(8):
        name = 'ct_%d' % n
        pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn()')

    time.sleep(10)

    old_bin_dir = new_bin_dir = pipeline.bin_dir
    old_data_dir = pipeline.data_dir
    new_data_dir = os.path.abspath('test_binary_upgrade_data_dir')

    pipeline.stop()

    p = subprocess.Popen(
        [os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir])
    stdout, stderr = p.communicate()

    result = subprocess.check_call([
        os.path.join(pipeline.bin_dir, 'pipeline-upgrade'), '-b', old_bin_dir,
        '-B', new_bin_dir, '-d', old_data_dir, '-D', new_data_dir
    ])

    assert result == 0

    # The cleanup path expects this to be running, but we're done with it
    pipeline.run()

    # pipeline-upgrade returned successfully and has already done sanity checks
    # but let's manually verify that all objects were migrated to the new data directory
    upgraded = PipelineDB(data_dir=new_data_dir)
    upgraded.run()

    # Tables
    for n in range(16):
        name = 't_%d' % n
        q = 'SELECT x, y, z FROM %s ORDER BY x' % name
        rows = upgraded.execute(q)
        for i, row in enumerate(rows):
            x, y, z = row
            assert x == i
            assert y == name
            assert z == name

    # Streams
    for n in range(8):
        name = 's_%d' % n
        rows = list(
            upgraded.execute(
                "SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'"
                % name))
        assert len(rows) == 1

    # CVs
    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # CVs in separate schema
    for n in range(8):
        name = 'namespace.cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # Transforms
    for n in range(8):
        name = 'ct_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN pipeline_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    upgraded.stop()
    shutil.rmtree(new_data_dir)
def test_renamed_objects(pipeline, clean_db):
  """
  Verify that we can dump and restore renamed CQs and streams
  """
  pipeline.create_stream('s', x='int')

  q = """
  SELECT x, count(*) FROM s GROUP BY x;
  """
  pipeline.create_cv('cv_0', q)

  q = """
  SELECT (new).x, combine((delta).count) AS count FROM output_of('cv_0') GROUP BY x
  """
  pipeline.create_cv('combine_cv_0', q)

  q = """
  SELECT (new).count + 41 AS v FROM output_of('combine_cv_0')
  """
  pipeline.create_ct('transform_combine_cv_0', q)

  q = """
  SELECT max(v), count(*) FROM output_of('transform_combine_cv_0')
  """
  pipeline.create_cv('max_transform_combine_cv_0', q)

  rows = [(x,) for x in range(1000)]
  pipeline.insert('s', ('x',), rows)

  result = pipeline.execute('SELECT combine(count) FROM cv_0')[0]
  assert result['combine'] == 1000

  pipeline.execute('ALTER VIEW cv_0 RENAME TO cv_0_renamed')
  pipeline.execute('ALTER VIEW combine_cv_0 RENAME TO combine_cv_0_renamed')
  pipeline.execute('ALTER VIEW transform_combine_cv_0 RENAME TO transform_combine_cv_0_renamed')
  pipeline.execute('ALTER VIEW max_transform_combine_cv_0 RENAME TO max_transform_combine_cv_0_renamed')
  pipeline.execute('ALTER FOREIGN TABLE s RENAME TO s_renamed')

  result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0]
  assert result['combine'] == 1000

  result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0]
  assert result['combine'] == 1000

  result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0]
  assert result['max'] == 42
  assert result['count'] == 1000

  _dump(pipeline, 'test_renamed_cqs.sql')

  pipeline.execute('DROP VIEW combine_cv_0_renamed CASCADE')
  pipeline.drop_all()

  _restore(pipeline, 'test_renamed_cqs.sql')

  result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0]
  assert result['combine'] == 1000

  result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0]
  assert result['combine'] == 1000

  result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0]
  assert result['max'] == 42
  assert result['count'] == 1000

  # Now write some more rows to verify everything updates properly
  rows = [(x,) for x in range(1000)]
  pipeline.insert('s_renamed', ('x',), rows)

  result = pipeline.execute('SELECT combine(count) FROM cv_0_renamed')[0]
  assert result['combine'] == 2000

  result = pipeline.execute('SELECT combine(count) FROM combine_cv_0_renamed')[0]
  assert result['combine'] == 2000

  result = pipeline.execute('SELECT max, count FROM max_transform_combine_cv_0_renamed')[0]
  assert result['max'] == 43
  assert result['count'] == 2000

  pipeline.execute('DROP VIEW combine_cv_0_renamed CASCADE')
  
def test_chained_cqs(pipeline, clean_db):
  """
  Verify that multiple CQs chained together are properly dumped/restored
  """
  pipeline.create_stream('s', x='int')
  q = """
  SELECT x, count(*) FROM s GROUP BY x
  """
  pipeline.create_cv('cv0', q)
  q = """
  SELECT (new).x % 2 AS m FROM output_of('cv0')
  """
  pipeline.create_stream('ct_s', m='int')
  pipeline.create_ct('ct0', q, "pipelinedb.insert_into_stream('ct_s')")
  q = """
  SELECT m, count(*) FROM ct_s GROUP BY m
  """
  pipeline.create_cv('cv1', q)
  q = """
  SELECT combine((delta).count) AS count FROM output_of('cv1')
  """
  pipeline.create_cv('cv2', q)
  q = """
  SELECT combine((delta).count) FROM output_of('cv2')
  """
  pipeline.create_cv('cv3', q)
  pipeline.insert('s', ('x',), [(x,) for x in range(1000)])
  time.sleep(1)

  row = pipeline.execute('SELECT combine(count) FROM cv0')[0]
  assert row['combine'] == 1000

  row = pipeline.execute('SELECT combine(count) FROM cv1')[0]
  assert row['combine'] == 1000

  row = pipeline.execute('SELECT count FROM cv2')[0]
  assert row['count'] == 1000

  row = pipeline.execute('SELECT combine FROM cv3')[0]
  assert row['combine'] == 1000

  _dump(pipeline, 'test_chained_cqs.sql')

  pipeline.execute('DROP FOREIGN TABLE s CASCADE')
  pipeline.execute('DROP FOREIGN TABLE ct_s CASCADE')

  _restore(pipeline, 'test_chained_cqs.sql')

  pipeline.insert('s', ('x',), [(x,) for x in range(1000)])
  time.sleep(1)

  row = pipeline.execute('SELECT combine(count) FROM cv0')[0]
  assert row['combine'] == 2000

  row = pipeline.execute('SELECT combine(count) FROM cv1')[0]
  assert row['combine'] == 2000

  row = pipeline.execute('SELECT count FROM cv2')[0]
  assert row['count'] == 2000

  row = pipeline.execute('SELECT combine FROM cv3')[0]
  assert row['combine'] == 2000

  pipeline.execute('DROP FOREIGN TABLE s CASCADE')
  pipeline.execute('DROP FOREIGN TABLE ct_s CASCADE')
Beispiel #15
0
def test_binary_upgrade(pipeline, clean_db):
  """
  Verify that binary upgrades properly transfer all objects and data
  into the new installation
  """
  # Create some regular tables with data, and create an index on half of them
  for n in range(16):
    name = 't_%d' % n
    pipeline.create_table(name, x='integer', y='text', z='text')
    rows = [(x, name, name) for x in range(1000)]
    pipeline.insert(name, ('x', 'y', 'z'), rows)
    if n >= 8:
      pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name))

  # Create some streams
  for n in range(8):
    name = 's_%d' % n
    pipeline.create_stream(name, x='integer', y='text')

  # Now create some CVs with data, some with indices
  for n in range(32):
    name = 'cv_%d' % n
    pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z' % n)
    rows = [(x, name, name) for x in range(1000)]
    pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows)
    if n >= 16:
      pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name))

  # Now create some in another namespace
  pipeline.execute('CREATE SCHEMA namespace')
  for n in range(8):
    name = 'namespace.cv_%d' % n
    pipeline.create_cv(name, 'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z' % n)
    rows = [(x, name, name) for x in range(1000)]
    pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows)
    if n >= 4:
      pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' % (n, name))

  create_fn = """
  CREATE OR REPLACE FUNCTION tg_fn()
  RETURNS trigger AS
  $$
  BEGIN
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  """
  pipeline.execute(create_fn)

  # Create some transforms
  for n in range(8):
    name = 'ct_%d' % n
    pipeline.create_ct(name, 'SELECT z::text FROM stream', 'tg_fn()')

  time.sleep(10)

  old_bin_dir = new_bin_dir = pipeline.bin_dir
  old_data_dir = pipeline.data_dir
  new_data_dir = os.path.abspath('test_binary_upgrade_data_dir')

  pipeline.stop()

  p = subprocess.Popen([
    os.path.join(pipeline.bin_dir, 'pipeline-init'), '-D', new_data_dir])
  stdout, stderr = p.communicate()

  result = subprocess.check_call([
    os.path.join(pipeline.bin_dir, 'pipeline-upgrade'),
    '-b', old_bin_dir, '-B', new_bin_dir,
    '-d', old_data_dir, '-D', new_data_dir])

  assert result == 0

  # The cleanup path expects this to be running, but we're done with it
  pipeline.run()

  # pipeline-upgrade returned successfully and has already done sanity checks
  # but let's manually verify that all objects were migrated to the new data directory
  upgraded = PipelineDB(data_dir=new_data_dir)
  upgraded.run()

  # Tables
  for n in range(16):
    name = 't_%d' % n
    q = 'SELECT x, y, z FROM %s ORDER BY x' % name
    rows = upgraded.execute(q)
    for i, row in enumerate(rows):
      x, y, z = row
      assert x == i
      assert y == name
      assert z == name

  # Streams
  for n in range(8):
    name = 's_%d' % n
    rows = list(upgraded.execute("SELECT oid FROM pg_class WHERE relkind = '$' AND relname = '%s'" % name))
    assert len(rows) == 1

  # CVs
  for n in range(32):
    name = 'cv_%d' % n
    rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
    assert len(rows) == 1

    assert rows[0][0] == name
    assert rows[0][1] == 1
    assert rows[0][2] == 1000

  # CVs in separate schema
  for n in range(8):
    name = 'namespace.cv_%d' % n
    rows = list(upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
    assert len(rows) == 1

    assert rows[0][0] == name
    assert rows[0][1] == 1
    assert rows[0][2] == 1000

  # Transforms
  for n in range(8):
    name = 'ct_%d' % n
    q = """
    SELECT c.relname FROM pg_class c JOIN pipeline_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
    rows = list(upgraded.execute(q))
    assert len(rows) == 1

  upgraded.stop()
  shutil.rmtree(new_data_dir)
def test_binary_upgrade(pipeline, clean_db):
    """
  Verify that binary upgrades properly transfer all objects and data
  into the new installation
  """
    if pipeline.version_num == 110000:
        pytest.skip('skipping until PG11 supports dump/restore WITH OIDS')

    # Create some regular tables with data, and create an index on half of them
    for n in range(16):
        name = 't_%d' % n
        pipeline.create_table(name, x='integer', y='text', z='text')
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert(name, ('x', 'y', 'z'), rows)
        if n >= 8:
            pipeline.execute('CREATE INDEX idx_%s ON %s(y)' % (name, name))

    # Create some streams
    for n in range(8):
        name = 's_%d' % n
        pipeline.create_stream(name, x='integer', y='text')

    # Now create some CVs with data, some with indices
    for n in range(32):
        name = 'cv_%d' % n
        pipeline.create_stream('stream_%d' % n, x='int', y='text', z='text')
        pipeline.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM stream_%d GROUP BY z'
            % n)
        if n >= 16:
            pipeline.execute('CREATE INDEX idx_%s ON %s(z)' % (name, name))

    # Create some STJs
    for n in range(8):
        pipeline.create_cv(
            'stj_%d' % n,
            'SELECT t.x, count(*) FROM stream_%d s JOIN t_%d t ON s.x = t.x GROUP BY t.x'
            % (n, n))

    # Create some SW CVs
    for n in range(8):
        pipeline.create_cv('sw_%d' % n,
                           'SELECT count(*) FROM stream_%d' % n,
                           sw='%d days' % (n + 1),
                           step_factor=n + 1)

    # Create some CVs/CTs/streams that we'll rename
    for n in range(4):
        pipeline.create_stream('to_rename_s_%d' % n, x='int')
        pipeline.create_cv(
            'to_rename_cv_%d' % n,
            'SELECT x, count(*) FROM to_rename_s_%d GROUP BY x' % n)
        pipeline.create_ct('to_rename_ct_%d' % n,
                           'SELECT x FROM to_rename_s_%d' % n)
        pipeline.create_cv(
            'to_rename_ct_reader_%d' % n,
            "SELECT count(*) FROM output_of('to_rename_ct_%d')" % n)

        rows = [(x, ) for x in range(1000)]
        pipeline.insert('to_rename_s_%d' % n, ('x', ), rows)

    # Now rename them
    for n in range(4):
        pipeline.execute(
            'ALTER FOREIGN TABLE to_rename_s_%d RENAME TO renamed_s_%d' %
            (n, n))
        pipeline.execute('ALTER VIEW to_rename_cv_%d RENAME TO renamed_cv_%d' %
                         (n, n))
        pipeline.execute('ALTER VIEW to_rename_ct_%d RENAME TO renamed_ct_%d' %
                         (n, n))
        pipeline.execute(
            'ALTER VIEW to_rename_ct_reader_%d RENAME TO renamed_ct_reader_%d'
            % (n, n))

        # And write some data using the new stream names
        rows = [(x, ) for x in range(1000)]
        pipeline.insert('renamed_s_%d' % n, ('x', ), rows)

    # Create a CV chain that combines output streams
    q = """
  SELECT (new).z, combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('cv_0') GROUP BY (new).z
  """
    pipeline.create_cv('combine_cv_0', q)
    q = """
  SELECT combine((delta).count) AS count, combine((delta).distinct_count) AS distinct_count FROM output_of('combine_cv_0')
  """
    pipeline.create_cv('combine_cv_1', q)

    for n in range(32):
        name = 'cv_%d' % n
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert('stream_%d' % n, ('x', 'y', 'z'), rows)

    # Create a CV with a TTL to verify TTL info is restored properly
    pipeline.create_cv(
        'ttlcv',
        'SELECT second(arrival_timestamp), count(*) FROM stream_0 GROUP BY second',
        ttl='1 hour',
        ttl_column='second')

    # Now create some in another namespace
    pipeline.execute('CREATE SCHEMA namespace')
    for n in range(8):
        name = 'namespace.cv_%d' % n
        pipeline.create_stream('namespace.stream_%d' % n,
                               x='int',
                               y='text',
                               z='text')
        pipeline.create_cv(
            name,
            'SELECT z::text, COUNT(DISTINCT z) AS distinct_count, COUNT(*) FROM namespace.stream_%d GROUP BY z'
            % n)
        rows = [(x, name, name) for x in range(1000)]
        pipeline.insert('namespace.stream_%d' % n, ('x', 'y', 'z'), rows)
        if n >= 4:
            pipeline.execute('CREATE INDEX namespace_idx_%d ON %s(z)' %
                             (n, name))

    create_fn = """
  CREATE OR REPLACE FUNCTION tg_fn()
  RETURNS trigger AS
  $$
  BEGIN
   RETURN NEW;
  END;
  $$
  LANGUAGE plpgsql;
  """
    pipeline.execute(create_fn)

    pipeline.create_stream('stream0', z='text')

    # Create some transforms with trigger functions
    for n in range(8):
        name = 'ct_%d' % n
        pipeline.create_ct(name, 'SELECT z::text FROM stream0', 'tg_fn')

    # Create some transforms without trigger functions
    for n in range(8):
        name = 'ct_no_trig_%d' % n
        pipeline.create_ct(name, 'SELECT z::text FROM stream0')

    time.sleep(10)

    old_bin_dir = new_bin_dir = pipeline.bin_dir
    old_data_dir = pipeline.data_dir
    new_data_dir0 = os.path.abspath('test_binary_upgrade_data_dir0')

    if os.path.exists(new_data_dir0):
        shutil.rmtree(new_data_dir0)

    pipeline.stop()

    p = subprocess.Popen(
        [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir0])
    stdout, stderr = p.communicate()

    with open(os.path.join(new_data_dir0, 'postgresql.conf'), 'a') as f:
        f.write('shared_preload_libraries=pipelinedb\n')
        f.write('max_worker_processes=128\n')
        f.write('pipelinedb.stream_insert_level=sync_commit\n')

    result = subprocess.check_call([
        os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B',
        new_bin_dir, '-d', old_data_dir, '-D', new_data_dir0
    ])

    assert result == 0

    # The cleanup path expects this to be running, but we're done with it
    pipeline.run()

    # pg_upgrade returned successfully and has already done sanity checks
    # but let's manually verify that all objects were migrated to the new data directory
    upgraded = PipelineDB(data_dir=new_data_dir0)
    upgraded.run()

    # Tables
    for n in range(16):
        name = 't_%d' % n
        q = 'SELECT x, y, z FROM %s ORDER BY x' % name
        rows = upgraded.execute(q)
        for i, row in enumerate(rows):
            assert row['x'] == i
            assert row['y'] == name
            assert row['z'] == name

    # Streams
    for n in range(8):
        name = 's_%d' % n
        rows = list(
            upgraded.execute(
                "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'"
                % name))
        assert len(rows) == 1

    # CVs
    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # CV with TTL
    row = list(
        upgraded.execute(
            "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'"
        ))[0]
    assert row[0] == 3600
    assert row[1] == 1

    # CVs in separate schema
    for n in range(8):
        name = 'namespace.cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # Transforms with trigger functions
    for n in range(8):
        name = 'ct_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    # Transforms without trigger functions
    for n in range(8):
        name = 'ct_no_trig_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    # Verify SW CVs
    for n in range(8):
        name = 'sw_%d' % n
        row = upgraded.execute(
            "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'"
            % name)[0]
        assert row['ttl'] == (n + 1) * 3600 * 24
        assert row['step_factor'] == n + 1

        row = upgraded.execute('SELECT count FROM %s' % name)[0]
        assert row['count'] == 1000

    # Verify renamed CVs/CTs/streams
    for n in range(4):
        row = upgraded.execute('SELECT combine(count) FROM renamed_cv_%d' %
                               n)[0]
        assert row['combine'] == 2000
        row = upgraded.execute(
            'SELECT combine(count) FROM renamed_ct_reader_%d' % n)[0]
        assert row['combine'] == 2000

    # Verify chained CVs
    row = upgraded.execute(
        'SELECT z, count, distinct_count FROM combine_cv_0')[0]
    assert row['z'] == 'cv_0'
    assert row['count'] == 1000
    assert row['distinct_count'] == 1

    row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0]
    assert row['count'] == 1000
    assert row['distinct_count'] == 1

    # Now insert some new data and verify CVs are still updating properly
    for n in range(32):
        name = 'cv_%d' % n
        rows = [(x, name, name) for x in range(1000)]
        upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows)

    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 2000

    row = upgraded.execute(
        'SELECT z, count, distinct_count FROM combine_cv_0')[0]
    assert row['z'] == 'cv_0'
    assert row['count'] == 2000
    assert row['distinct_count'] == 1

    row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0]
    assert row['count'] == 2000
    assert row['distinct_count'] == 1

    # Verify STJs
    for n in range(8):
        cv = 'stj_%d' % n
        row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0]
        assert row['sum'] == 2000

    # Rename objects again before the second upgrade
    for n in range(4):
        upgraded.execute(
            'ALTER FOREIGN TABLE renamed_s_%d RENAME TO renamed_again_s_%d' %
            (n, n))
        upgraded.execute(
            'ALTER VIEW renamed_cv_%d RENAME TO renamed_again_cv_%d' % (n, n))
        upgraded.execute(
            'ALTER VIEW renamed_ct_%d RENAME TO renamed_again_ct_%d' % (n, n))
        upgraded.execute(
            'ALTER VIEW renamed_ct_reader_%d RENAME TO renamed_again_ct_reader_%d'
            % (n, n))

        # And write some data using the new stream names
        rows = [(x, ) for x in range(1000)]
        upgraded.insert('renamed_again_s_%d' % n, ('x', ), rows)

    upgraded.stop()

    new_data_dir1 = os.path.abspath('test_binary_upgrade_data_dir1')
    if os.path.exists(new_data_dir1):
        shutil.rmtree(new_data_dir1)

    p = subprocess.Popen(
        [os.path.join(pipeline.bin_dir, 'initdb'), '-D', new_data_dir1])
    stdout, stderr = p.communicate()

    with open(os.path.join(new_data_dir1, 'postgresql.conf'), 'a') as f:
        f.write('shared_preload_libraries=pipelinedb\n')
        f.write('max_worker_processes=128\n')
        f.write('pipelinedb.stream_insert_level=sync_commit\n')

    # Now upgrade the upgraded DB to verify that restored DBs can be updated properly
    result = subprocess.check_call([
        os.path.join(pipeline.bin_dir, 'pg_upgrade'), '-b', old_bin_dir, '-B',
        new_bin_dir, '-d', new_data_dir0, '-D', new_data_dir1
    ])

    assert result == 0

    # but let's manually verify that all objects were migrated to the new data directory
    upgraded = PipelineDB(data_dir=new_data_dir1)
    upgraded.run()

    # Tables
    for n in range(16):
        name = 't_%d' % n
        q = 'SELECT x, y, z FROM %s ORDER BY x' % name
        rows = upgraded.execute(q)
        for i, row in enumerate(rows):
            assert row['x'] == i
            assert row['y'] == name
            assert row['z'] == name

    # Streams
    for n in range(8):
        name = 's_%d' % n
        rows = list(
            upgraded.execute(
                "SELECT oid FROM pg_class WHERE relkind = 'f' AND relname = '%s'"
                % name))
        assert len(rows) == 1

    # CVs
    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 2000

    # CV with TTL
    row = list(
        upgraded.execute(
            "SELECT ttl, ttl_attno FROM pg_class c JOIN pipelinedb.cont_query pq on c.oid = pq.relid WHERE c.relname = 'ttlcv'"
        ))[0]
    assert row[0] == 3600
    assert row[1] == 1

    # CVs in separate schema
    for n in range(8):
        name = 'namespace.cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 1000

    # Transforms with trigger functions
    for n in range(8):
        name = 'ct_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    # Transforms without trigger functions
    for n in range(8):
        name = 'ct_no_trig_%d' % n
        q = """
    SELECT c.relname FROM pg_class c JOIN pipelinedb.cont_query pq
    ON c.oid = pq.relid WHERE pq.type = 't' AND c.relname = '%s'
    """ % name
        rows = list(upgraded.execute(q))
        assert len(rows) == 1

    # Verify SW Cvs
    for n in range(8):
        name = 'sw_%d' % n
        step_factor = n + 1
        row = upgraded.execute(
            "SELECT ttl, step_factor FROM pipelinedb.cont_query cq JOIN pg_class c ON cq.relid = c.oid WHERE relname = '%s'"
            % name)[0]
        assert row['ttl'] == (n + 1) * 3600 * 24
        assert row['step_factor'] == n + 1

        row = upgraded.execute('SELECT count FROM %s' % name)[0]
        assert row['count'] == 2000

    # Verify renamed CVs/CTs/streams
    for n in range(4):
        row = upgraded.execute(
            'SELECT combine(count) FROM renamed_again_cv_%d' % n)[0]
        assert row['combine'] == 3000
        row = upgraded.execute(
            'SELECT combine(count) FROM renamed_again_ct_reader_%d' % n)[0]
        assert row['combine'] == 3000

    # Verify chained CV
    row = upgraded.execute(
        'SELECT z, count, distinct_count FROM combine_cv_0')[0]
    assert row['z'] == 'cv_0'
    assert row['count'] == 2000
    assert row['distinct_count'] == 1

    row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0]
    assert row['count'] == 2000
    assert row['distinct_count'] == 1

    # Now insert some new data and verify CVs are still updating properly
    for n in range(32):
        name = 'cv_%d' % n
        rows = [(x, name, name) for x in range(1000)]
        upgraded.insert('stream_%d' % n, ('x', 'y', 'z'), rows)

    for n in range(32):
        name = 'cv_%d' % n
        rows = list(
            upgraded.execute('SELECT z, distinct_count, count FROM %s' % name))
        assert len(rows) == 1

        assert rows[0][0] == name
        assert rows[0][1] == 1
        assert rows[0][2] == 3000

    row = upgraded.execute(
        'SELECT z, count, distinct_count FROM combine_cv_0')[0]
    assert row['z'] == 'cv_0'
    assert row['count'] == 3000
    assert row['distinct_count'] == 1

    row = upgraded.execute('SELECT count, distinct_count FROM combine_cv_1')[0]
    assert row['count'] == 3000
    assert row['distinct_count'] == 1

    # Verify STJs
    for n in range(8):
        cv = 'stj_%d' % n
        row = upgraded.execute('SELECT sum(count) FROM %s' % cv)[0]
        assert row['sum'] == 3000

    upgraded.stop()

    pipeline.execute('DROP VIEW combine_cv_0 CASCADE')
    shutil.rmtree(new_data_dir0)
    shutil.rmtree(new_data_dir1)