Ejemplo n.º 1
0
def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):

    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_race_conditions')
    pg = env.postgres.create_start('test_safekeepers_race_conditions')

    # we rely upon autocommit after each statement
    # as waiting for acceptors happens there
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    cur.execute('CREATE TABLE t(key int primary key, value text)')

    proc = Process(target=xmas_garland, args=(env.safekeepers, stop_value))
    proc.start()

    for i in range(1000):
        cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, ))

    cur.execute('SELECT sum(key) FROM t')
    assert cur.fetchone() == (500500, )

    stop_value.value = 1
    proc.join()
Ejemplo n.º 2
0
def test_pageserver_catchup_while_compute_down(
        zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down')
    # Make shared_buffers large to ensure we won't query pageserver while it is down.
    pg = env.postgres.create_start(
        'test_pageserver_catchup_while_compute_down',
        config_lines=['shared_buffers=512MB'])

    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    # Create table, and insert some rows.
    cur.execute('CREATE TABLE foo (t text)')
    cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 10000) g
    ''')

    cur.execute("SELECT count(*) FROM foo")
    assert cur.fetchone() == (10000, )

    # Stop and restart pageserver. This is a more or less graceful shutdown, although
    # the page server doesn't currently have a shutdown routine so there's no difference
    # between stopping and crashing.
    env.pageserver.stop()

    # insert some more rows
    # since pageserver is shut down, these will be only on safekeepers
    cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 10000) g
    ''')

    # stop safekeepers gracefully
    env.safekeepers[0].stop()
    env.safekeepers[1].stop()
    env.safekeepers[2].stop()

    # start everything again
    # safekeepers must synchronize and pageserver must catch up
    env.pageserver.start()
    env.safekeepers[0].start()
    env.safekeepers[1].start()
    env.safekeepers[2].start()

    # restart compute node
    pg.stop_and_destroy().create_start(
        'test_pageserver_catchup_while_compute_down')

    # Ensure that basebackup went correct and pageserver returned all data
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    cur.execute("SELECT count(*) FROM foo")
    assert cur.fetchone() == (20000, )
Ejemplo n.º 3
0
def test_restarts(zenith_env_builder: ZenithEnvBuilder):
    fault_probability = 0.01
    n_inserts = 1000
    n_acceptors = 3

    zenith_env_builder.num_safekeepers = n_acceptors
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_restarts')
    pg = env.postgres.create_start('test_safekeepers_restarts')

    # we rely upon autocommit after each statement
    # as waiting for acceptors happens there
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    failed_node = None
    cur.execute('CREATE TABLE t(key int primary key, value text)')
    for i in range(n_inserts):
        cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, ))

        if random.random() <= fault_probability:
            if failed_node is None:
                failed_node = env.safekeepers[random.randrange(0, n_acceptors)]
                failed_node.stop()
            else:
                failed_node.start()
                failed_node = None
    cur.execute('SELECT sum(key) FROM t')
    assert cur.fetchone() == (500500, )
Ejemplo n.º 4
0
def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
    if with_safekeepers:
        zenith_env_builder.num_safekeepers = 3

    env = zenith_env_builder.init_start()
    """Tests tenants with and without wal acceptors"""
    tenant_1 = env.zenith_cli.create_tenant()
    tenant_2 = env.zenith_cli.create_tenant()

    env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
                                   tenant_id=tenant_1)
    env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
                                   tenant_id=tenant_2)

    pg_tenant1 = env.postgres.create_start(
        f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
        tenant_id=tenant_1,
    )
    pg_tenant2 = env.postgres.create_start(
        f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
        tenant_id=tenant_2,
    )

    for pg in [pg_tenant1, pg_tenant2]:
        with closing(pg.connect()) as conn:
            with conn.cursor() as cur:
                # we rely upon autocommit after each statement
                # as waiting for acceptors happens there
                cur.execute("CREATE TABLE t(key int primary key, value text)")
                cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
                cur.execute("SELECT sum(key) FROM t")
                assert cur.fetchone() == (5000050000, )
Ejemplo n.º 5
0
def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):

    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_timeline_status')
    pg = env.postgres.create_start('test_timeline_status')

    wa = env.safekeepers[0]
    wa_http_cli = wa.http_client()
    wa_http_cli.check_status()

    # learn zenith timeline from compute
    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    # fetch something sensible from status
    epoch = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch

    pg.safe_psql("create table t(i int)")

    # ensure epoch goes up after reboot
    pg.stop().start()
    pg.safe_psql("insert into t values(10)")

    epoch_after_reboot = wa_http_cli.timeline_status(
        tenant_id, timeline_id).acceptor_epoch
    assert epoch_after_reboot > epoch
Ejemplo n.º 6
0
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()
    # Create a branch for us
    env.zenith_cli.create_branch('test_backpressure')

    pg = env.postgres.create_start(
        'test_backpressure', config_lines=['max_replication_write_lag=30MB'])
    log.info("postgres is running on 'test_backpressure' branch")

    # setup check thread
    check_stop_event = threading.Event()
    check_thread = threading.Thread(target=check_backpressure,
                                    args=(pg, check_stop_event))
    check_thread.start()

    # Configure failpoint to slow down walreceiver ingest
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            pscur.execute("failpoints walreceiver-after-ingest=sleep(20)")

    # FIXME
    # Wait for the check thread to start
    #
    # Now if load starts too soon,
    # check thread cannot auth, because it is not able to connect to the database
    # because of the lag and waiting for lsn to replay to arrive.
    time.sleep(2)

    with pg_cur(pg) as cur:
        # Create and initialize test table
        cur.execute("CREATE TABLE foo(x bigint)")

        inserts_to_do = 2000000
        rows_inserted = 0

        while check_thread.is_alive() and rows_inserted < inserts_to_do:
            try:
                cur.execute(
                    "INSERT INTO foo select from generate_series(1, 100000)")
                rows_inserted += 100000
            except Exception as e:
                if check_thread.is_alive():
                    log.info('stopping check thread')
                    check_stop_event.set()
                    check_thread.join()
                    assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
                else:
                    assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."

        log.info(f"inserted {rows_inserted} rows")

    if check_thread.is_alive():
        log.info('stopping check thread')
        check_stop_event.set()
        check_thread.join()
        log.info('check thread stopped')
    else:
        assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
Ejemplo n.º 7
0
def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
    # Start with single sk
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    # Connect to sk port on v4 loopback
    res = requests.get(
        f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
    assert res.ok
Ejemplo n.º 8
0
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
    # used to calculate delta in collect_stats
    last_lsn = .0

    # returns LSN and pg_wal size, all in MB
    def collect_stats(pg: Postgres, cur, enable_logs=True):
        nonlocal last_lsn
        assert pg.pgdata_dir is not None

        log.info('executing INSERT to generate WAL')
        cur.execute("select pg_current_wal_lsn()")
        current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024
        pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir,
                                                'pg_wal')) / 1024 / 1024
        if enable_logs:
            log.info(
                f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB"
            )
        last_lsn = current_lsn
        return current_lsn, pg_wal_size

    # generates about ~20MB of WAL, to create at least one new segment
    def generate_wal(cur):
        cur.execute(
            "INSERT INTO t SELECT generate_series(1,300000), 'payload'")

    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_wal_deleted_after_broadcast')
    # Adjust checkpoint config to prevent keeping old WAL segments
    pg = env.postgres.create_start('test_wal_deleted_after_broadcast',
                                   config_lines=[
                                       'min_wal_size=32MB',
                                       'max_wal_size=32MB',
                                       'log_checkpoints=on'
                                   ])

    pg_conn = pg.connect()
    cur = pg_conn.cursor()
    cur.execute('CREATE TABLE t(key int, value text)')

    collect_stats(pg, cur)

    # generate WAL to simulate normal workload
    for i in range(5):
        generate_wal(cur)
        collect_stats(pg, cur)

    log.info('executing checkpoint')
    cur.execute('CHECKPOINT')
    wal_size_after_checkpoint = collect_stats(pg, cur)[1]

    # there shouldn't be more than 2 WAL segments (but dir may have archive_status files)
    assert wal_size_after_checkpoint < 16 * 2.5
Ejemplo n.º 9
0
def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_restarts_under_load')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start(
        'test_safekeepers_restarts_under_load',
        config_lines=['max_replication_write_lag=1MB'])

    asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))
Ejemplo n.º 10
0
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
    # One safekeeper is enough for this test.
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_pageserver_restart')
    pg = env.postgres.create_start('test_pageserver_restart')

    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    # Create table, and insert some rows. Make it big enough that it doesn't fit in
    # shared_buffers, otherwise the SELECT after restart will just return answer
    # from shared_buffers without hitting the page server, which defeats the point
    # of this test.
    cur.execute('CREATE TABLE foo (t text)')
    cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 100000) g
    ''')

    # Verify that the table is larger than shared_buffers
    cur.execute('''
        select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize
        from pg_settings where name = 'shared_buffers'
    ''')
    row = cur.fetchone()
    log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
    assert int(row[0]) < int(row[1])

    # Stop and restart pageserver. This is a more or less graceful shutdown, although
    # the page server doesn't currently have a shutdown routine so there's no difference
    # between stopping and crashing.
    env.pageserver.stop()
    env.pageserver.start()

    # Stopping the pageserver breaks the connection from the postgres backend to
    # the page server, and causes the next query on the connection to fail. Start a new
    # postgres connection too, to avoid that error. (Ideally, the compute node would
    # handle that and retry internally, without propagating the error to the user, but
    # currently it doesn't...)
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    cur.execute("SELECT count(*) FROM foo")
    assert cur.fetchone() == (100000, )

    # Stop the page server by force, and restart it
    env.pageserver.stop()
    env.pageserver.start()
Ejemplo n.º 11
0
def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 2
    zenith_env_builder.broker = True
    # to advance remote_consistent_llsn
    zenith_env_builder.enable_local_fs_remote_storage()
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_wal_removal')
    pg = env.postgres.create_start('test_safekeepers_wal_removal')

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # we rely upon autocommit after each statement
            # as waiting for acceptors happens there
            cur.execute('CREATE TABLE t(key int primary key, value text)')
            cur.execute(
                "INSERT INTO t SELECT generate_series(1,100000), 'payload'")

    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    # force checkpoint to advance remote_consistent_lsn
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor() as pscur:
            pscur.execute(f"checkpoint {tenant_id} {timeline_id}")

    # We will wait for first segment removal. Make sure they exist for starter.
    first_segments = [
        os.path.join(sk.data_dir(), tenant_id, timeline_id,
                     '000000010000000000000001') for sk in env.safekeepers
    ]
    assert all(os.path.exists(p) for p in first_segments)

    http_cli = env.safekeepers[0].http_client()
    # Pretend WAL is offloaded to s3.
    http_cli.record_safekeeper_info(tenant_id, timeline_id,
                                    {'s3_wal_lsn': 'FFFFFFFF/FEFFFFFF'})

    # wait till first segment is removed on all safekeepers
    started_at = time.time()
    while True:
        if all(not os.path.exists(p) for p in first_segments):
            break
        elapsed = time.time() - started_at
        if elapsed > 20:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for first segment get removed"
            )
        time.sleep(0.5)
Ejemplo n.º 12
0
def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
    # One safekeeper is enough for this test.
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    pg = env.postgres.create_start('main')

    conn = pg.connect()
    cur = conn.cursor()
    cur.execute('CREATE TABLE t(x integer)')

    iterations = 32
    for i in range(1, iterations + 1):
        print(f'iteration {i} / {iterations}')

        # Kill and restart the pageserver.
        pg.stop()
        env.pageserver.stop(immediate=True)
        env.pageserver.start()
        pg.start()

        retry_sleep = 0.5
        max_retries = 200
        retries = 0
        while True:
            try:
                conn = pg.connect()
                cur = conn.cursor()
                cur.execute(f"INSERT INTO t values({i})")
                conn.close()

            except Exception as error:
                # It's normal that it takes some time for the pageserver to
                # restart, and for the connection to fail until it does. It
                # should eventually recover, so retry until it succeeds.
                print(f'failed: {error}')
                if retries < max_retries:
                    retries += 1
                    print(f'retry {retries} / {max_retries}')
                    time.sleep(retry_sleep)
                    continue
                else:
                    raise
            break

    conn = pg.connect()
    cur = conn.cursor()
    cur.execute("SELECT count(*) FROM t")
    assert cur.fetchone() == (iterations, )
Ejemplo n.º 13
0
def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder):
    # Start with single sk
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    # Stop default ps/sk
    env.zenith_cli.pageserver_stop()
    env.zenith_cli.safekeeper_stop()

    # Default start
    res = env.zenith_cli.raw_cli(["start"])
    res.check_returncode()

    # Default stop
    res = env.zenith_cli.raw_cli(["stop"])
    res.check_returncode()
Ejemplo n.º 14
0
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin,
                          port_distributor: PortDistributor):

    # We don't really need the full environment for this test, just the
    # safekeepers would be enough.
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    timeline_id = uuid.uuid4()
    tenant_id = uuid.uuid4()

    # write config for proposer
    pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
    pg = ProposerPostgres(pgdata_dir, pg_bin, timeline_id, tenant_id,
                          '127.0.0.1', port_distributor.get_port())
    pg.create_dir_config(env.get_safekeeper_connstrs())

    # valid lsn, which is not in the segment start, nor in zero segment
    epoch_start_lsn = 0x16B9188  # 0/16B9188
    begin_lsn = epoch_start_lsn

    # append and commit WAL
    lsn_after_append = []
    for i in range(3):
        res = env.safekeepers[i].append_logical_message(
            tenant_id,
            timeline_id,
            {
                "lm_prefix": "prefix",
                "lm_message": "message",
                "set_commit_lsn": True,
                "send_proposer_elected": True,
                "term": 2,
                "begin_lsn": begin_lsn,
                "epoch_start_lsn": epoch_start_lsn,
                "truncate_lsn": epoch_start_lsn,
            },
        )
        lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"])
        lsn_after_append.append(lsn_hex)
        log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}")

    # run sync safekeepers
    lsn_after_sync = pg.sync_safekeepers()
    log.info(f"lsn after sync = {lsn_after_sync}")

    assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
Ejemplo n.º 15
0
def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 2
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_unavailability')
    pg = env.postgres.create_start('test_safekeepers_unavailability')

    # we rely upon autocommit after each statement
    # as waiting for acceptors happens there
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    # check basic work with table
    cur.execute('CREATE TABLE t(key int primary key, value text)')
    cur.execute("INSERT INTO t values (1, 'payload')")

    # shutdown one of two acceptors, that is, majority
    env.safekeepers[0].stop()

    proc = Process(target=delayed_safekeeper_start,
                   args=(env.safekeepers[0], ))
    proc.start()

    start = time.time()
    cur.execute("INSERT INTO t values (2, 'payload')")
    # ensure that the query above was hanging while acceptor was down
    assert (time.time() - start) >= start_delay_sec
    proc.join()

    # for the world's balance, do the same with second acceptor
    env.safekeepers[1].stop()

    proc = Process(target=delayed_safekeeper_start,
                   args=(env.safekeepers[1], ))
    proc.start()

    start = time.time()
    cur.execute("INSERT INTO t values (3, 'payload')")
    # ensure that the query above was hanging while acceptor was down
    assert (time.time() - start) >= start_delay_sec
    proc.join()

    cur.execute("INSERT INTO t values (4, 'payload')")

    cur.execute('SELECT sum(key) FROM t')
    assert cur.fetchone() == (10, )
Ejemplo n.º 16
0
def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    zenith_env_builder.broker = True
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_safekeepers_normal_work')
    pg = env.postgres.create_start('test_safekeepers_normal_work')

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # we rely upon autocommit after each statement
            # as waiting for acceptors happens there
            cur.execute('CREATE TABLE t(key int primary key, value text)')
            cur.execute(
                "INSERT INTO t SELECT generate_series(1,100000), 'payload'")
            cur.execute('SELECT sum(key) FROM t')
            assert cur.fetchone() == (5000050000, )
Ejemplo n.º 17
0
def test_startup(zenith_env_builder: ZenithEnvBuilder,
                 zenbenchmark: ZenithBenchmarker):
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    # Start
    env.zenith_cli.create_branch('test_startup')
    with zenbenchmark.record_duration("startup_time"):
        pg = env.postgres.create_start('test_startup')
        pg.safe_psql("select 1;")

    # Restart
    pg.stop_and_destroy()
    with zenbenchmark.record_duration("restart_time"):
        pg.create_start('test_startup')
        pg.safe_psql("select 1;")

    # Fill up
    num_rows = 1000000  # 30 MB
    num_tables = 100
    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            for i in range(num_tables):
                cur.execute(f'create table t_{i} (i integer);')
                cur.execute(
                    f'insert into t_{i} values (generate_series(1,{num_rows}));'
                )

    # Read
    with zenbenchmark.record_duration("read_time"):
        pg.safe_psql("select * from t_0;")

    # Read again
    with zenbenchmark.record_duration("second_read_time"):
        pg.safe_psql("select * from t_0;")

    # Restart
    pg.stop_and_destroy()
    with zenbenchmark.record_duration("restart_with_data"):
        pg.create_start('test_startup')
        pg.safe_psql("select 1;")

    # Read
    with zenbenchmark.record_duration("read_after_restart"):
        pg.safe_psql("select * from t_0;")
Ejemplo n.º 18
0
def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
    zenith_env_builder.pageserver_auth_enabled = True
    if with_safekeepers:
        zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    branch = f'test_compute_auth_to_pageserver{with_safekeepers}'
    env.zenith_cli.create_branch(branch)
    pg = env.postgres.create_start(branch)

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # we rely upon autocommit after each statement
            # as waiting for acceptors happens there
            cur.execute('CREATE TABLE t(key int primary key, value text)')
            cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
            cur.execute('SELECT sum(key) FROM t')
            assert cur.fetchone() == (5000050000, )
Ejemplo n.º 19
0
def test_broker(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    zenith_env_builder.broker = True
    zenith_env_builder.enable_local_fs_remote_storage()
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch("test_broker", "main")
    pg = env.postgres.create_start('test_broker')
    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")

    # learn zenith timeline from compute
    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    # wait until remote_consistent_lsn gets advanced on all safekeepers
    clients = [sk.http_client() for sk in env.safekeepers]
    stat_before = [
        cli.timeline_status(tenant_id, timeline_id) for cli in clients
    ]
    log.info(f"statuses is {stat_before}")

    pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
    # force checkpoint to advance remote_consistent_lsn
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor() as pscur:
            pscur.execute(f"checkpoint {tenant_id} {timeline_id}")
    # and wait till remote_consistent_lsn propagates to all safekeepers
    started_at = time.time()
    while True:
        stat_after = [
            cli.timeline_status(tenant_id, timeline_id) for cli in clients
        ]
        if all(
                lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex(
                    s_before.remote_consistent_lsn)
                for s_after, s_before in zip(stat_after, stat_before)):
            break
        elapsed = time.time() - started_at
        if elapsed > 20:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}"
            )
        time.sleep(0.5)
Ejemplo n.º 20
0
def test_restarts_frequent_checkpoints(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_restarts_frequent_checkpoints')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
                                   config_lines=[
                                       'max_replication_write_lag=1MB',
                                       'min_wal_size=32MB',
                                       'max_wal_size=32MB',
                                       'log_checkpoints=on'
                                   ])

    # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
    # are not removed before broadcasted to all safekeepers, with the help of replication slot
    asyncio.run(
        run_restarts_under_load(env,
                                pg,
                                env.safekeepers,
                                period_time=15,
                                iterations=5))
Ejemplo n.º 21
0
def test_wal_restore(zenith_env_builder: ZenithEnvBuilder,
                     pg_bin: PgBin,
                     test_output_dir,
                     port_distributor: PortDistributor):
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()
    env.zenith_cli.create_branch("test_wal_restore")
    pg = env.postgres.create_start('test_wal_restore')
    pg.safe_psql("create table t as select generate_series(1,300000)")
    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    env.zenith_cli.pageserver_stop()
    port = port_distributor.get_port()
    data_dir = os.path.join(test_output_dir, 'pgsql.restored')
    with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
        pg_bin.run_capture([
            os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'),
            os.path.join(pg_distrib_dir, 'bin'),
            os.path.join(test_output_dir, 'repo/safekeepers/sk1/{}/*'.format(tenant_id)),
            data_dir,
            str(port)
        ])
        restored.start()
        assert restored.safe_psql('select count(*) from t', user='******') == [(300000, )]
Ejemplo n.º 22
0
def test_bulk_tenant_create(
    zenith_env_builder: ZenithEnvBuilder,
    use_safekeepers: str,
    tenants_count: int,
    zenbenchmark,
):
    """Measure tenant creation time (with and without wal acceptors)"""
    if use_safekeepers == 'with_wa':
        zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    time_slices = []

    for i in range(tenants_count):
        start = timeit.default_timer()

        tenant = env.zenith_cli.create_tenant()
        env.zenith_cli.create_timeline(
            f'test_bulk_tenant_create_{tenants_count}_{i}_{use_safekeepers}', tenant_id=tenant)

        # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
        #if use_safekeepers == 'with_sa':
        #    wa_factory.start_n_new(3)

        pg_tenant = env.postgres.create_start(
            f'test_bulk_tenant_create_{tenants_count}_{i}_{use_safekeepers}', tenant_id=tenant)

        end = timeit.default_timer()
        time_slices.append(end - start)

        pg_tenant.stop()

    zenbenchmark.record('tenant_creation_time',
                        sum(time_slices) / len(time_slices),
                        's',
                        report=MetricReport.LOWER_IS_BETTER)
Ejemplo n.º 23
0
def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):

    # Use safekeeper in this test to avoid a subtle race condition.
    # Without safekeeper, walreceiver reconnection can stuck
    # because of IO deadlock.
    #
    # See https://github.com/zenithdb/zenith/issues/1068
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    # Override defaults, 1M gc_horizon and 4M checkpoint_distance.
    # Extend compaction_period and gc_period to disable background compaction and gc.
    tenant = env.zenith_cli.create_tenant(
        conf={
            'gc_period': '10 m',
            'gc_horizon': '1048576',
            'checkpoint_distance': '4194304',
            'compaction_period': '10 m',
            'compaction_threshold': '2',
            'compaction_target_size': '4194304',
        })

    env.zenith_cli.create_timeline(f'main', tenant_id=tenant)
    pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
    branch0_cur = pg_branch0.connect().cursor()
    branch0_cur.execute("SHOW zenith.zenith_timeline")
    branch0_timeline = branch0_cur.fetchone()[0]
    log.info(f"b0 timeline {branch0_timeline}")

    # Create table, and insert 100k rows.
    branch0_cur.execute('SELECT pg_current_wal_insert_lsn()')
    branch0_lsn = branch0_cur.fetchone()[0]
    log.info(f"b0 at lsn {branch0_lsn}")

    branch0_cur.execute(
        'CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)')
    branch0_cur.execute('''
        INSERT INTO foo
            SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g
            FROM generate_series(1, 100000) g
    ''')
    branch0_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_100 = branch0_cur.fetchone()[0]
    log.info(f'LSN after 100k rows: {lsn_100}')

    # Create branch1.
    env.zenith_cli.create_branch('branch1',
                                 'main',
                                 tenant_id=tenant,
                                 ancestor_start_lsn=lsn_100)
    pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant)
    log.info("postgres is running on 'branch1' branch")

    branch1_cur = pg_branch1.connect().cursor()
    branch1_cur.execute("SHOW zenith.zenith_timeline")
    branch1_timeline = branch1_cur.fetchone()[0]
    log.info(f"b1 timeline {branch1_timeline}")

    branch1_cur.execute('SELECT pg_current_wal_insert_lsn()')
    branch1_lsn = branch1_cur.fetchone()[0]
    log.info(f"b1 at lsn {branch1_lsn}")

    # Insert 100k rows.
    branch1_cur.execute('''
        INSERT INTO foo
            SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g
            FROM generate_series(1, 100000) g
    ''')
    branch1_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_200 = branch1_cur.fetchone()[0]
    log.info(f'LSN after 200k rows: {lsn_200}')

    # Create branch2.
    env.zenith_cli.create_branch('branch2',
                                 'branch1',
                                 tenant_id=tenant,
                                 ancestor_start_lsn=lsn_200)
    pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant)
    log.info("postgres is running on 'branch2' branch")
    branch2_cur = pg_branch2.connect().cursor()

    branch2_cur.execute("SHOW zenith.zenith_timeline")
    branch2_timeline = branch2_cur.fetchone()[0]
    log.info(f"b2 timeline {branch2_timeline}")

    branch2_cur.execute('SELECT pg_current_wal_insert_lsn()')
    branch2_lsn = branch2_cur.fetchone()[0]
    log.info(f"b2 at lsn {branch2_lsn}")

    # Insert 100k rows.
    branch2_cur.execute('''
        INSERT INTO foo
            SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g
            FROM generate_series(1, 100000) g
    ''')
    branch2_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_300 = branch2_cur.fetchone()[0]
    log.info(f'LSN after 300k rows: {lsn_300}')

    # Run compaction on branch1.
    psconn = env.pageserver.connect()
    log.info(f'compact {tenant.hex} {branch1_timeline} {lsn_200}')
    psconn.cursor().execute(
        f'''compact {tenant.hex} {branch1_timeline} {lsn_200}''')

    branch0_cur.execute('SELECT count(*) FROM foo')
    assert branch0_cur.fetchone() == (100000, )

    branch1_cur.execute('SELECT count(*) FROM foo')
    assert branch1_cur.fetchone() == (200000, )

    branch2_cur.execute('SELECT count(*) FROM foo')
    assert branch2_cur.fetchone() == (300000, )
Ejemplo n.º 24
0
def test_remote_storage_backup_and_restore(
        zenith_env_builder: ZenithEnvBuilder, storage_type: str):
    zenith_env_builder.rust_log_override = 'debug'
    zenith_env_builder.num_safekeepers = 1
    if storage_type == 'local_fs':
        zenith_env_builder.enable_local_fs_remote_storage()
    elif storage_type == 'mock_s3':
        zenith_env_builder.enable_s3_mock_remote_storage(
            'test_remote_storage_backup_and_restore')
    else:
        raise RuntimeError(f'Unknown storage type: {storage_type}')

    data_id = 1
    data_secret = 'very secret secret'

    ##### First start, insert secret data and upload it to the remote storage
    env = zenith_env_builder.init_start()
    pg = env.postgres.create_start('main')

    client = env.pageserver.http_client()

    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    checkpoint_numbers = range(1, 3)

    for checkpoint_number in checkpoint_numbers:
        with closing(pg.connect()) as conn:
            with conn.cursor() as cur:
                cur.execute(f'''
                    CREATE TABLE t{checkpoint_number}(id int primary key, secret text);
                    INSERT INTO t{checkpoint_number} VALUES ({data_id}, '{data_secret}|{checkpoint_number}');
                ''')
                cur.execute("SELECT pg_current_wal_flush_lsn()")
                current_lsn = lsn_from_hex(cur.fetchone()[0])

        # wait until pageserver receives that data
        wait_for_last_record_lsn(client, UUID(tenant_id), UUID(timeline_id),
                                 current_lsn)

        # run checkpoint manually to be sure that data landed in remote storage
        with closing(env.pageserver.connect()) as psconn:
            with psconn.cursor() as pscur:
                pscur.execute(f"checkpoint {tenant_id} {timeline_id}")

        log.info(f'waiting for checkpoint {checkpoint_number} upload')
        # wait until pageserver successfully uploaded a checkpoint to remote storage
        wait_for_upload(client, UUID(tenant_id), UUID(timeline_id),
                        current_lsn)
        log.info(f'upload of checkpoint {checkpoint_number} is done')

    ##### Stop the first pageserver instance, erase all its data
    env.postgres.stop_all()
    env.pageserver.stop()

    dir_to_clear = Path(env.repo_dir) / 'tenants'
    shutil.rmtree(dir_to_clear)
    os.mkdir(dir_to_clear)

    ##### Second start, restore the data and ensure it's the same
    env.pageserver.start()

    client.timeline_attach(UUID(tenant_id), UUID(timeline_id))

    log.info("waiting for timeline redownload")
    wait_for(
        number_of_iterations=10,
        interval=1,
        func=lambda: assert_local(client, UUID(tenant_id), UUID(timeline_id)))

    pg = env.postgres.create_start('main')
    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            for checkpoint_number in checkpoint_numbers:
                cur.execute(
                    f'SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};'
                )
                assert cur.fetchone() == (
                    f'{data_secret}|{checkpoint_number}', )
Ejemplo n.º 25
0
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    # Use safekeeper in this test to avoid a subtle race condition.
    # Without safekeeper, walreceiver reconnection can stuck
    # because of IO deadlock.
    #
    # See https://github.com/zenithdb/zenith/issues/1068
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    # Branch at the point where only 100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind')
    pgmain = env.postgres.create_start('test_branch_behind')
    log.info("postgres is running on 'test_branch_behind' branch")

    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

    main_cur.execute("SHOW zenith.zenith_timeline")
    timeline = main_cur.fetchone()[0]

    # Create table, and insert the first 100 rows
    main_cur.execute('CREATE TABLE foo (t text)')

    # keep some early lsn to test branch creation on out of date lsn
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    gced_lsn = main_cur.fetchone()[0]

    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 100) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_a = main_cur.fetchone()[0]
    log.info(f'LSN after 100 rows: {lsn_a}')

    # Insert some more rows. (This generates enough WAL to fill a few segments.)
    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 200000) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_b = main_cur.fetchone()[0]
    log.info(f'LSN after 200100 rows: {lsn_b}')

    # Branch at the point where only 100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind_hundred',
                                 'test_branch_behind',
                                 ancestor_start_lsn=lsn_a)

    # Insert many more rows. This generates enough WAL to fill a few segments.
    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 200000) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')

    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_c = main_cur.fetchone()[0]
    log.info(f'LSN after 400100 rows: {lsn_c}')

    # Branch at the point where only 200100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind_more',
                                 'test_branch_behind',
                                 ancestor_start_lsn=lsn_b)

    pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
    pg_more = env.postgres.create_start('test_branch_behind_more')

    # On the 'hundred' branch, we should see only 100 rows
    hundred_pg_conn = pg_hundred.connect()
    hundred_cur = hundred_pg_conn.cursor()
    hundred_cur.execute('SELECT count(*) FROM foo')
    assert hundred_cur.fetchone() == (100, )

    # On the 'more' branch, we should see 100200 rows
    more_pg_conn = pg_more.connect()
    more_cur = more_pg_conn.cursor()
    more_cur.execute('SELECT count(*) FROM foo')
    assert more_cur.fetchone() == (200100, )

    # All the rows are visible on the main branch
    main_cur.execute('SELECT count(*) FROM foo')
    assert main_cur.fetchone() == (400100, )

    # Check bad lsn's for branching

    # branch at segment boundary
    env.zenith_cli.create_branch('test_branch_segment_boundary',
                                 'test_branch_behind',
                                 ancestor_start_lsn="0/3000000")
    pg = env.postgres.create_start('test_branch_segment_boundary')
    cur = pg.connect().cursor()
    cur.execute('SELECT 1')
    assert cur.fetchone() == (1, )

    # branch at pre-initdb lsn
    with pytest.raises(Exception, match="invalid branch start lsn"):
        env.zenith_cli.create_branch('test_branch_preinitdb',
                                     ancestor_start_lsn="0/42")

    # branch at pre-ancestor lsn
    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
        env.zenith_cli.create_branch('test_branch_preinitdb',
                                     'test_branch_behind',
                                     ancestor_start_lsn="0/42")

    # check that we cannot create branch based on garbage collected data
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            # call gc to advace latest_gc_cutoff_lsn
            pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
            row = pscur.fetchone()
            print_gc_result(row)

    with pytest.raises(Exception, match="invalid branch start lsn"):
        # this gced_lsn is pretty random, so if gc is disabled this woudln't fail
        env.zenith_cli.create_branch('test_branch_create_fail',
                                     'test_branch_behind',
                                     ancestor_start_lsn=gced_lsn)

    # check that after gc everything is still there
    hundred_cur.execute('SELECT count(*) FROM foo')
    assert hundred_cur.fetchone() == (100, )

    more_cur.execute('SELECT count(*) FROM foo')
    assert more_cur.fetchone() == (200100, )

    main_cur.execute('SELECT count(*) FROM foo')
    assert main_cur.fetchone() == (400100, )
Ejemplo n.º 26
0
def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
    def safekeepers_guc(env: ZenithEnv, sk_names: List[int]) -> str:
        return ','.join([
            f'localhost:{sk.port.pg}' for sk in env.safekeepers
            if sk.id in sk_names
        ])

    def execute_payload(pg: Postgres):
        with closing(pg.connect()) as conn:
            with conn.cursor() as cur:
                # we rely upon autocommit after each statement
                # as waiting for acceptors happens there
                cur.execute(
                    'CREATE TABLE IF NOT EXISTS t(key int, value text)')
                cur.execute("INSERT INTO t VALUES (0, 'something')")
                cur.execute('SELECT SUM(key) FROM t')
                sum_before = cur.fetchone()[0]

                cur.execute(
                    "INSERT INTO t SELECT generate_series(1,100000), 'payload'"
                )
                cur.execute('SELECT SUM(key) FROM t')
                sum_after = cur.fetchone()[0]
                assert sum_after == sum_before + 5000050000

    def show_statuses(safekeepers: List[Safekeeper], tenant_id: str,
                      timeline_id: str):
        for sk in safekeepers:
            http_cli = sk.http_client()
            try:
                status = http_cli.timeline_status(tenant_id, timeline_id)
                log.info(f"Safekeeper {sk.id} status: {status}")
            except Exception as e:
                log.info(f"Safekeeper {sk.id} status error: {e}")

    zenith_env_builder.num_safekeepers = 4
    env = zenith_env_builder.init_start()
    env.zenith_cli.create_branch('test_replace_safekeeper')

    log.info("Use only first 3 safekeepers")
    env.safekeepers[3].stop()
    active_safekeepers = [1, 2, 3]
    pg = env.postgres.create('test_replace_safekeeper')
    pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
    pg.start()

    # learn zenith timeline from compute
    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    execute_payload(pg)
    show_statuses(env.safekeepers, tenant_id, timeline_id)

    log.info("Restart all safekeepers to flush everything")
    env.safekeepers[0].stop(immediate=True)
    execute_payload(pg)
    env.safekeepers[0].start()
    env.safekeepers[1].stop(immediate=True)
    execute_payload(pg)
    env.safekeepers[1].start()
    env.safekeepers[2].stop(immediate=True)
    execute_payload(pg)
    env.safekeepers[2].start()

    env.safekeepers[0].stop(immediate=True)
    env.safekeepers[1].stop(immediate=True)
    env.safekeepers[2].stop(immediate=True)
    env.safekeepers[0].start()
    env.safekeepers[1].start()
    env.safekeepers[2].start()

    execute_payload(pg)
    show_statuses(env.safekeepers, tenant_id, timeline_id)

    log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3")
    env.safekeepers[0].stop(immediate=True)
    execute_payload(pg)
    show_statuses(env.safekeepers, tenant_id, timeline_id)

    log.info("Recreate postgres to replace failed sk1 with new sk4")
    pg.stop_and_destroy().create('test_replace_safekeeper')
    active_safekeepers = [2, 3, 4]
    env.safekeepers[3].start()
    pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
    pg.start()

    execute_payload(pg)
    show_statuses(env.safekeepers, tenant_id, timeline_id)

    log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work")
    env.safekeepers[1].stop(immediate=True)
    execute_payload(pg)
    show_statuses(env.safekeepers, tenant_id, timeline_id)
Ejemplo n.º 27
0
def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    n_timelines = 3

    branch_names = [
        "test_safekeepers_many_timelines_{}".format(tlin)
        for tlin in range(n_timelines)
    ]
    # pageserver, safekeeper operate timelines via their ids (can be represented in hex as 'ad50847381e248feaac9876cc71ae418')
    # that's not really human readable, so the branch names are introduced in Zenith CLI.
    # Zenith CLI stores its branch <-> timeline mapping in its internals,
    # but we need this to collect metrics from other servers, related to the timeline.
    branch_names_to_timeline_ids = {}

    # start postgres on each timeline
    pgs = []
    for branch_name in branch_names:
        new_timeline_id = env.zenith_cli.create_branch(branch_name)
        pgs.append(env.postgres.create_start(branch_name))
        branch_names_to_timeline_ids[branch_name] = new_timeline_id

    tenant_id = env.initial_tenant

    def collect_metrics(message: str) -> List[TimelineMetrics]:
        with env.pageserver.http_client() as pageserver_http:
            timeline_details = [
                pageserver_http.timeline_detail(
                    tenant_id=tenant_id,
                    timeline_id=branch_names_to_timeline_ids[branch_name])
                for branch_name in branch_names
            ]
        # All changes visible to pageserver (last_record_lsn) should be
        # confirmed by safekeepers first. As we cannot atomically get
        # state of both pageserver and safekeepers, we should start with
        # pageserver. Looking at outdated data from pageserver is ok.
        # Asking safekeepers first is not ok because new commits may arrive
        # to both safekeepers and pageserver after we've already obtained
        # safekeepers' state, it will look contradictory.
        sk_metrics = [sk.http_client().get_metrics() for sk in env.safekeepers]

        timeline_metrics = []
        for timeline_detail in timeline_details:
            timeline_id: str = timeline_detail["timeline_id"]

            local_timeline_detail = timeline_detail.get('local')
            if local_timeline_detail is None:
                log.debug(
                    f"Timeline {timeline_id} is not present locally, skipping")
                continue

            m = TimelineMetrics(
                timeline_id=timeline_id,
                last_record_lsn=lsn_from_hex(
                    local_timeline_detail['last_record_lsn']),
            )
            for sk_m in sk_metrics:
                m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex,
                                                            timeline_id)])
                m.commit_lsns.append(sk_m.commit_lsn_inexact[(tenant_id.hex,
                                                              timeline_id)])

            for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns):
                # Invariant. May be < when transaction is in progress.
                assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            # We only call collect_metrics() after a transaction is confirmed by
            # the compute node, which only happens after a consensus of safekeepers
            # has confirmed the transaction. We assume majority consensus here.
            assert (
                2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns) >
                zenith_env_builder.num_safekeepers
            ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            assert (
                2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns) >
                zenith_env_builder.num_safekeepers
            ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            timeline_metrics.append(m)
        log.info(f"{message}: {timeline_metrics}")
        return timeline_metrics

    # TODO: https://github.com/zenithdb/zenith/issues/809
    # collect_metrics("before CREATE TABLE")

    # Do everything in different loops to have actions on different timelines
    # interleaved.
    # create schema
    for pg in pgs:
        pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
    init_m = collect_metrics("after CREATE TABLE")

    # Populate data for 2/3 timelines
    class MetricsChecker(threading.Thread):
        def __init__(self) -> None:
            super().__init__(daemon=True)
            self.should_stop = threading.Event()
            self.exception: Optional[BaseException] = None

        def run(self) -> None:
            try:
                while not self.should_stop.is_set():
                    collect_metrics("during INSERT INTO")
                    time.sleep(1)
            except:
                log.error(
                    "MetricsChecker's thread failed, the test will be failed on .stop() call",
                    exc_info=True)
                # We want to preserve traceback as well as the exception
                exc_type, exc_value, exc_tb = sys.exc_info()
                assert exc_type
                e = exc_type(exc_value)
                e.__traceback__ = exc_tb
                self.exception = e

        def stop(self) -> None:
            self.should_stop.set()
            self.join()
            if self.exception:
                raise self.exception

    metrics_checker = MetricsChecker()
    metrics_checker.start()

    for pg in pgs[:-1]:
        pg.safe_psql(
            "INSERT INTO t SELECT generate_series(1,100000), 'payload'")

    metrics_checker.stop()

    collect_metrics("after INSERT INTO")

    # Check data for 2/3 timelines
    for pg in pgs[:-1]:
        res = pg.safe_psql("SELECT sum(key) FROM t")
        assert res[0] == (5000050000, )

    final_m = collect_metrics("after SELECT")
    # Assume that LSNs (a) behave similarly in all timelines; and (b) INSERT INTO alters LSN significantly.
    # Also assume that safekeepers will not be significantly out of sync in this test.
    middle_lsn = (init_m[0].last_record_lsn + final_m[0].last_record_lsn) // 2
    assert max(init_m[0].flush_lsns) < middle_lsn < min(final_m[0].flush_lsns)
    assert max(init_m[0].commit_lsns) < middle_lsn < min(
        final_m[0].commit_lsns)
    assert max(init_m[1].flush_lsns) < middle_lsn < min(final_m[1].flush_lsns)
    assert max(init_m[1].commit_lsns) < middle_lsn < min(
        final_m[1].commit_lsns)
    assert max(init_m[2].flush_lsns) <= min(final_m[2].flush_lsns) < middle_lsn
    assert max(init_m[2].commit_lsns) <= min(
        final_m[2].commit_lsns) < middle_lsn
Ejemplo n.º 28
0
def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()
    new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota')

    client = env.pageserver.http_client()
    res = assert_local(client, env.initial_tenant, new_timeline_id)
    assert res['local']["current_logical_size"] == res['local'][
        "current_logical_size_non_incremental"]

    pgmain = env.postgres.create_start(
        "test_timeline_size_quota",
        # Set small limit for the test
        config_lines=['zenith.max_cluster_size=30MB'])
    log.info("postgres is running on 'test_timeline_size_quota' branch")

    with closing(pgmain.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute(
                "CREATE EXTENSION zenith")  # TODO move it to zenith_fixtures?

            cur.execute("CREATE TABLE foo (t text)")

            wait_for_pageserver_catchup(pgmain)

            # Insert many rows. This query must fail because of space limit
            try:
                cur.execute('''
                    INSERT INTO foo
                        SELECT 'long string to consume some space' || g
                        FROM generate_series(1, 100000) g
                ''')

                wait_for_pageserver_catchup(pgmain)

                cur.execute('''
                    INSERT INTO foo
                        SELECT 'long string to consume some space' || g
                        FROM generate_series(1, 500000) g
                ''')

                # If we get here, the timeline size limit failed
                log.error("Query unexpectedly succeeded")
                assert False

            except psycopg2.errors.DiskFull as err:
                log.info(f"Query expectedly failed with: {err}")

            # drop table to free space
            cur.execute('DROP TABLE foo')

            wait_for_pageserver_catchup(pgmain)

            # create it again and insert some rows. This query must succeed
            cur.execute("CREATE TABLE foo (t text)")
            cur.execute('''
                INSERT INTO foo
                    SELECT 'long string to consume some space' || g
                    FROM generate_series(1, 10000) g
            ''')

            wait_for_pageserver_catchup(pgmain)

            cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
            pg_cluster_size = cur.fetchone()
            log.info(f"pg_cluster_size = {pg_cluster_size}")
Ejemplo n.º 29
0
def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
                           port_distributor: PortDistributor, with_load: str):
    zenith_env_builder.num_safekeepers = 1
    zenith_env_builder.enable_local_fs_remote_storage()

    env = zenith_env_builder.init_start()

    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'

    tenant = env.zenith_cli.create_tenant(
        UUID("74ee8b079a0e437eb0afea7d26a07209"))
    log.info("tenant to relocate %s", tenant)
    env.zenith_cli.create_root_branch('main', tenant_id=tenant)
    env.zenith_cli.create_branch('test_tenant_relocation', tenant_id=tenant)

    tenant_pg = env.postgres.create_start(branch_name='main',
                                          node_name='test_tenant_relocation',
                                          tenant_id=tenant)

    # insert some data
    with closing(tenant_pg.connect()) as conn:
        with conn.cursor() as cur:
            # save timeline for later gc call
            cur.execute("SHOW zenith.zenith_timeline")
            timeline = UUID(cur.fetchone()[0])
            log.info("timeline to relocate %s", timeline.hex)

            # we rely upon autocommit after each statement
            # as waiting for acceptors happens there
            cur.execute("CREATE TABLE t(key int primary key, value text)")
            cur.execute(
                "INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
            cur.execute("SELECT sum(key) FROM t")
            assert cur.fetchone() == (500500, )
            cur.execute("SELECT pg_current_wal_flush_lsn()")

            current_lsn = lsn_from_hex(cur.fetchone()[0])

    pageserver_http = env.pageserver.http_client()

    # wait until pageserver receives that data
    wait_for_last_record_lsn(pageserver_http, tenant, timeline, current_lsn)
    timeline_detail = assert_local(pageserver_http, tenant, timeline)

    if with_load == 'with_load':
        # create load table
        with pg_cur(tenant_pg) as cur:
            cur.execute("CREATE TABLE load(value text)")

        load_stop_event = threading.Event()
        load_ok_event = threading.Event()
        load_thread = threading.Thread(target=load,
                                       args=(tenant_pg, load_stop_event,
                                             load_ok_event))
        load_thread.start()

    # run checkpoint manually to be sure that data landed in remote storage
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor() as pscur:
            pscur.execute(f"checkpoint {tenant.hex} {timeline.hex}")

    # wait until pageserver successfully uploaded a checkpoint to remote storage
    wait_for_upload(pageserver_http, tenant, timeline, current_lsn)

    log.info("inititalizing new pageserver")
    # bootstrap second pageserver
    new_pageserver_dir = env.repo_dir / 'new_pageserver'
    new_pageserver_dir.mkdir()

    new_pageserver_pg_port = port_distributor.get_port()
    new_pageserver_http_port = port_distributor.get_port()
    log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port,
             new_pageserver_http_port)
    pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver'

    new_pageserver_http = ZenithPageserverHttpClient(
        port=new_pageserver_http_port, auth_token=None)

    with new_pageserver_helper(new_pageserver_dir, pageserver_bin,
                               remote_storage_mock_path,
                               new_pageserver_pg_port,
                               new_pageserver_http_port):

        # call to attach timeline to new pageserver
        new_pageserver_http.timeline_attach(tenant, timeline)
        # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
        new_timeline_detail = wait_for(
            number_of_iterations=5,
            interval=1,
            func=lambda: assert_local(new_pageserver_http, tenant, timeline))

        # when load is active these checks can break because lsns are not static
        # so lets check with some margin
        assert_abs_margin_ratio(
            lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
            lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
            0.03)

        # callmemaybe to start replication from safekeeper to the new pageserver
        # when there is no load there is a clean checkpoint and no wal delta
        # needs to be streamed to the new pageserver
        # TODO (rodionov) use attach to start replication
        with pg_cur(PgProtocol(host='localhost',
                               port=new_pageserver_pg_port)) as cur:
            # "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={}'"
            safekeeper_connstring = f"host=localhost port={env.safekeepers[0].port.pg} options='-c ztimelineid={timeline} ztenantid={tenant} pageserver_connstr=postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
            cur.execute("callmemaybe {} {} {}".format(tenant.hex, timeline.hex,
                                                      safekeeper_connstring))

        tenant_pg.stop()

        # rewrite zenith cli config to use new pageserver for basebackup to start new compute
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
        cli_config_lines[
            -2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
        cli_config_lines[
            -1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
        (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))

        tenant_pg_config_file_path = pathlib.Path(tenant_pg.config_file_path())
        tenant_pg_config_file_path.open('a').write(
            f"\nzenith.page_server_connstring = 'postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
        )

        tenant_pg.start()

        # detach tenant from old pageserver before we check
        # that all the data is there to be sure that old pageserver
        # is no longer involved, and if it is, we will see the errors
        pageserver_http.timeline_detach(tenant, timeline)

        with pg_cur(tenant_pg) as cur:
            # check that data is still there
            cur.execute("SELECT sum(key) FROM t")
            assert cur.fetchone() == (500500, )
            # check that we can write new data
            cur.execute(
                "INSERT INTO t SELECT generate_series(1001,2000), 'some payload'"
            )
            cur.execute("SELECT sum(key) FROM t")
            assert cur.fetchone() == (2001000, )

        if with_load == 'with_load':
            assert load_ok_event.wait(3)
            log.info('stopping load thread')
            load_stop_event.set()
            load_thread.join(timeout=10)
            log.info('load thread stopped')

        # bring old pageserver back for clean shutdown via zenith cli
        # new pageserver will be shut down by the context manager
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
        cli_config_lines[
            -2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
        cli_config_lines[
            -1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'"
        (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
Ejemplo n.º 30
0
def test_restart_compute(zenith_env_builder: ZenithEnvBuilder,
                         with_safekeepers: bool):
    zenith_env_builder.pageserver_auth_enabled = True
    if with_safekeepers:
        zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_restart_compute')
    pg = env.postgres.create_start('test_restart_compute')
    log.info("postgres is running on 'test_restart_compute' branch")

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('CREATE TABLE t(key int primary key, value text)')
            cur.execute(
                "INSERT INTO t SELECT generate_series(1,100000), 'payload'")
            cur.execute('SELECT sum(key) FROM t')
            r = cur.fetchone()
            assert r == (5000050000, )
            log.info(f"res = {r}")

    # Remove data directory and restart
    pg.stop_and_destroy().create_start('test_restart_compute')

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # We can still see the row
            cur.execute('SELECT sum(key) FROM t')
            r = cur.fetchone()
            assert r == (5000050000, )
            log.info(f"res = {r}")

            # Insert another row
            cur.execute("INSERT INTO t VALUES (100001, 'payload2')")
            cur.execute('SELECT count(*) FROM t')

            r = cur.fetchone()
            assert r == (100001, )
            log.info(f"res = {r}")

    # Again remove data directory and restart
    pg.stop_and_destroy().create_start('test_restart_compute')

    # That select causes lots of FPI's and increases probability of wakeepers
    # lagging behind after query completion
    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # We can still see the rows
            cur.execute('SELECT count(*) FROM t')

            r = cur.fetchone()
            assert r == (100001, )
            log.info(f"res = {r}")

    # And again remove data directory and restart
    pg.stop_and_destroy().create_start('test_restart_compute')

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # We can still see the rows
            cur.execute('SELECT count(*) FROM t')

            r = cur.fetchone()
            assert r == (100001, )
            log.info(f"res = {r}")