def test_pageserver_catchup_while_compute_down( zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down') # Make shared_buffers large to ensure we won't query pageserver while it is down. pg = env.postgres.create_start( 'test_pageserver_catchup_while_compute_down', config_lines=['shared_buffers=512MB']) pg_conn = pg.connect() cur = pg_conn.cursor() # Create table, and insert some rows. cur.execute('CREATE TABLE foo (t text)') cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g ''') cur.execute("SELECT count(*) FROM foo") assert cur.fetchone() == (10000, ) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference # between stopping and crashing. env.pageserver.stop() # insert some more rows # since pageserver is shut down, these will be only on safekeepers cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g ''') # stop safekeepers gracefully env.safekeepers[0].stop() env.safekeepers[1].stop() env.safekeepers[2].stop() # start everything again # safekeepers must synchronize and pageserver must catch up env.pageserver.start() env.safekeepers[0].start() env.safekeepers[1].start() env.safekeepers[2].start() # restart compute node pg.stop_and_destroy().create_start( 'test_pageserver_catchup_while_compute_down') # Ensure that basebackup went correct and pageserver returned all data pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") assert cur.fetchone() == (20000, )
def test_timeline_status(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_timeline_status') pg = env.postgres.create_start('test_timeline_status') wa = env.safekeepers[0] wa_http_cli = wa.http_client() wa_http_cli.check_status() # learn zenith timeline from compute tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0] timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0] # fetch something sensible from status epoch = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch pg.safe_psql("create table t(i int)") # ensure epoch goes up after reboot pg.stop().start() pg.safe_psql("insert into t values(10)") epoch_after_reboot = wa_http_cli.timeline_status( tenant_id, timeline_id).acceptor_epoch assert epoch_after_reboot > epoch
def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value): zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_race_conditions') pg = env.postgres.create_start('test_safekeepers_race_conditions') # we rely upon autocommit after each statement # as waiting for acceptors happens there pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute('CREATE TABLE t(key int primary key, value text)') proc = Process(target=xmas_garland, args=(env.safekeepers, stop_value)) proc.start() for i in range(1000): cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, )) cur.execute('SELECT sum(key) FROM t') assert cur.fetchone() == (500500, ) stop_value.value = 1 proc.join()
def test_restarts(zenith_env_builder: ZenithEnvBuilder): fault_probability = 0.01 n_inserts = 1000 n_acceptors = 3 zenith_env_builder.num_safekeepers = n_acceptors env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_restarts') pg = env.postgres.create_start('test_safekeepers_restarts') # we rely upon autocommit after each statement # as waiting for acceptors happens there pg_conn = pg.connect() cur = pg_conn.cursor() failed_node = None cur.execute('CREATE TABLE t(key int primary key, value text)') for i in range(n_inserts): cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, )) if random.random() <= fault_probability: if failed_node is None: failed_node = env.safekeepers[random.randrange(0, n_acceptors)] failed_node.stop() else: failed_node.start() failed_node = None cur.execute('SELECT sum(key) FROM t') assert cur.fetchone() == (500500, )
def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool): if with_safekeepers: zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() """Tests tenants with and without wal acceptors""" tenant_1 = env.zenith_cli.create_tenant() tenant_2 = env.zenith_cli.create_tenant() env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', tenant_id=tenant_1) env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', tenant_id=tenant_2) pg_tenant1 = env.postgres.create_start( f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', tenant_id=tenant_1, ) pg_tenant2 = env.postgres.create_start( f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', tenant_id=tenant_2, ) for pg in [pg_tenant1, pg_tenant2]: with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (5000050000, )
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Create a branch for us env.zenith_cli.create_branch('test_backpressure') pg = env.postgres.create_start( 'test_backpressure', config_lines=['max_replication_write_lag=30MB']) log.info("postgres is running on 'test_backpressure' branch") # setup check thread check_stop_event = threading.Event() check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event)) check_thread.start() # Configure failpoint to slow down walreceiver ingest with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: pscur.execute("failpoints walreceiver-after-ingest=sleep(20)") # FIXME # Wait for the check thread to start # # Now if load starts too soon, # check thread cannot auth, because it is not able to connect to the database # because of the lag and waiting for lsn to replay to arrive. time.sleep(2) with pg_cur(pg) as cur: # Create and initialize test table cur.execute("CREATE TABLE foo(x bigint)") inserts_to_do = 2000000 rows_inserted = 0 while check_thread.is_alive() and rows_inserted < inserts_to_do: try: cur.execute( "INSERT INTO foo select from generate_series(1, 100000)") rows_inserted += 100000 except Exception as e: if check_thread.is_alive(): log.info('stopping check thread') check_stop_event.set() check_thread.join() assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" else: assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." log.info(f"inserted {rows_inserted} rows") if check_thread.is_alive(): log.info('stopping check thread') check_stop_event.set() check_thread.join() log.info('check thread stopped') else: assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.pageserver_auth_enabled = True env = zenith_env_builder.init_start() management_token = env.auth_keys.generate_management_token() client = env.pageserver.http_client(auth_token=management_token) check_client(client, env.initial_tenant)
def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder): # Start with single sk zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Connect to sk port on v4 loopback res = requests.get( f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status') assert res.ok
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder): # used to calculate delta in collect_stats last_lsn = .0 # returns LSN and pg_wal size, all in MB def collect_stats(pg: Postgres, cur, enable_logs=True): nonlocal last_lsn assert pg.pgdata_dir is not None log.info('executing INSERT to generate WAL') cur.execute("select pg_current_wal_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024 pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024 if enable_logs: log.info( f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB" ) last_lsn = current_lsn return current_lsn, pg_wal_size # generates about ~20MB of WAL, to create at least one new segment def generate_wal(cur): cur.execute( "INSERT INTO t SELECT generate_series(1,300000), 'payload'") zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_wal_deleted_after_broadcast') # Adjust checkpoint config to prevent keeping old WAL segments pg = env.postgres.create_start('test_wal_deleted_after_broadcast', config_lines=[ 'min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on' ]) pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute('CREATE TABLE t(key int, value text)') collect_stats(pg, cur) # generate WAL to simulate normal workload for i in range(5): generate_wal(cur) collect_stats(pg, cur) log.info('executing checkpoint') cur.execute('CHECKPOINT') wal_size_after_checkpoint = collect_stats(pg, cur)[1] # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5
def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_restarts_under_load') # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long pg = env.postgres.create_start( 'test_safekeepers_restarts_under_load', config_lines=['max_replication_write_lag=1MB']) asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder): # One safekeeper is enough for this test. zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_pageserver_restart') pg = env.postgres.create_start('test_pageserver_restart') pg_conn = pg.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. cur.execute('CREATE TABLE foo (t text)') cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g ''') # Verify that the table is larger than shared_buffers cur.execute(''' select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' ''') row = cur.fetchone() log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference # between stopping and crashing. env.pageserver.stop() env.pageserver.start() # Stopping the pageserver breaks the connection from the postgres backend to # the page server, and causes the next query on the connection to fail. Start a new # postgres connection too, to avoid that error. (Ideally, the compute node would # handle that and retry internally, without propagating the error to the user, but # currently it doesn't...) pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") assert cur.fetchone() == (100000, ) # Stop the page server by force, and restart it env.pageserver.stop() env.pageserver.start()
def test_wal_removal(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 2 zenith_env_builder.broker = True # to advance remote_consistent_llsn zenith_env_builder.enable_local_fs_remote_storage() env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_wal_removal') pg = env.postgres.create_start('test_safekeepers_wal_removal') with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there cur.execute('CREATE TABLE t(key int primary key, value text)') cur.execute( "INSERT INTO t SELECT generate_series(1,100000), 'payload'") tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0] timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0] # force checkpoint to advance remote_consistent_lsn with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"checkpoint {tenant_id} {timeline_id}") # We will wait for first segment removal. Make sure they exist for starter. first_segments = [ os.path.join(sk.data_dir(), tenant_id, timeline_id, '000000010000000000000001') for sk in env.safekeepers ] assert all(os.path.exists(p) for p in first_segments) http_cli = env.safekeepers[0].http_client() # Pretend WAL is offloaded to s3. http_cli.record_safekeeper_info(tenant_id, timeline_id, {'s3_wal_lsn': 'FFFFFFFF/FEFFFFFF'}) # wait till first segment is removed on all safekeepers started_at = time.time() while True: if all(not os.path.exists(p) for p in first_segments): break elapsed = time.time() - started_at if elapsed > 20: raise RuntimeError( f"timed out waiting {elapsed:.0f}s for first segment get removed" ) time.sleep(0.5)
def test_next_xid(zenith_env_builder: ZenithEnvBuilder): # One safekeeper is enough for this test. zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() pg = env.postgres.create_start('main') conn = pg.connect() cur = conn.cursor() cur.execute('CREATE TABLE t(x integer)') iterations = 32 for i in range(1, iterations + 1): print(f'iteration {i} / {iterations}') # Kill and restart the pageserver. pg.stop() env.pageserver.stop(immediate=True) env.pageserver.start() pg.start() retry_sleep = 0.5 max_retries = 200 retries = 0 while True: try: conn = pg.connect() cur = conn.cursor() cur.execute(f"INSERT INTO t values({i})") conn.close() except Exception as error: # It's normal that it takes some time for the pageserver to # restart, and for the connection to fail until it does. It # should eventually recover, so retry until it succeeds. print(f'failed: {error}') if retries < max_retries: retries += 1 print(f'retry {retries} / {max_retries}') time.sleep(retry_sleep) continue else: raise break conn = pg.connect() cur = conn.cursor() cur.execute("SELECT count(*) FROM t") assert cur.fetchone() == (iterations, )
def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder): # Start with single sk zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Stop default ps/sk env.zenith_cli.pageserver_stop() env.zenith_cli.safekeeper_stop() # Default start res = env.zenith_cli.raw_cli(["start"]) res.check_returncode() # Default stop res = env.zenith_cli.raw_cli(["stop"]) res.check_returncode()
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor): # We don't really need the full environment for this test, just the # safekeepers would be enough. zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() timeline_id = uuid.uuid4() tenant_id = uuid.uuid4() # write config for proposer pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata") pg = ProposerPostgres(pgdata_dir, pg_bin, timeline_id, tenant_id, '127.0.0.1', port_distributor.get_port()) pg.create_dir_config(env.get_safekeeper_connstrs()) # valid lsn, which is not in the segment start, nor in zero segment epoch_start_lsn = 0x16B9188 # 0/16B9188 begin_lsn = epoch_start_lsn # append and commit WAL lsn_after_append = [] for i in range(3): res = env.safekeepers[i].append_logical_message( tenant_id, timeline_id, { "lm_prefix": "prefix", "lm_message": "message", "set_commit_lsn": True, "send_proposer_elected": True, "term": 2, "begin_lsn": begin_lsn, "epoch_start_lsn": epoch_start_lsn, "truncate_lsn": epoch_start_lsn, }, ) lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"]) lsn_after_append.append(lsn_hex) log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}") # run sync safekeepers lsn_after_sync = pg.sync_safekeepers() log.info(f"lsn after sync = {lsn_after_sync}") assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
def test_normal_work(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 zenith_env_builder.broker = True env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_normal_work') pg = env.postgres.create_start('test_safekeepers_normal_work') with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there cur.execute('CREATE TABLE t(key int primary key, value text)') cur.execute( "INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute('SELECT sum(key) FROM t') assert cur.fetchone() == (5000050000, )
def test_unavailability(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 2 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_safekeepers_unavailability') pg = env.postgres.create_start('test_safekeepers_unavailability') # we rely upon autocommit after each statement # as waiting for acceptors happens there pg_conn = pg.connect() cur = pg_conn.cursor() # check basic work with table cur.execute('CREATE TABLE t(key int primary key, value text)') cur.execute("INSERT INTO t values (1, 'payload')") # shutdown one of two acceptors, that is, majority env.safekeepers[0].stop() proc = Process(target=delayed_safekeeper_start, args=(env.safekeepers[0], )) proc.start() start = time.time() cur.execute("INSERT INTO t values (2, 'payload')") # ensure that the query above was hanging while acceptor was down assert (time.time() - start) >= start_delay_sec proc.join() # for the world's balance, do the same with second acceptor env.safekeepers[1].stop() proc = Process(target=delayed_safekeeper_start, args=(env.safekeepers[1], )) proc.start() start = time.time() cur.execute("INSERT INTO t values (3, 'payload')") # ensure that the query above was hanging while acceptor was down assert (time.time() - start) >= start_delay_sec proc.join() cur.execute("INSERT INTO t values (4, 'payload')") cur.execute('SELECT sum(key) FROM t') assert cur.fetchone() == (10, )
def test_startup(zenith_env_builder: ZenithEnvBuilder, zenbenchmark: ZenithBenchmarker): zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() # Start env.zenith_cli.create_branch('test_startup') with zenbenchmark.record_duration("startup_time"): pg = env.postgres.create_start('test_startup') pg.safe_psql("select 1;") # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_time"): pg.create_start('test_startup') pg.safe_psql("select 1;") # Fill up num_rows = 1000000 # 30 MB num_tables = 100 with closing(pg.connect()) as conn: with conn.cursor() as cur: for i in range(num_tables): cur.execute(f'create table t_{i} (i integer);') cur.execute( f'insert into t_{i} values (generate_series(1,{num_rows}));' ) # Read with zenbenchmark.record_duration("read_time"): pg.safe_psql("select * from t_0;") # Read again with zenbenchmark.record_duration("second_read_time"): pg.safe_psql("select * from t_0;") # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_with_data"): pg.create_start('test_startup') pg.safe_psql("select 1;") # Read with zenbenchmark.record_duration("read_after_restart"): pg.safe_psql("select * from t_0;")
def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool): zenith_env_builder.pageserver_auth_enabled = True if with_safekeepers: zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() branch = f'test_compute_auth_to_pageserver{with_safekeepers}' env.zenith_cli.create_branch(branch) pg = env.postgres.create_start(branch) with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there cur.execute('CREATE TABLE t(key int primary key, value text)') cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute('SELECT sum(key) FROM t') assert cur.fetchone() == (5000050000, )
def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.pageserver_auth_enabled = True env = zenith_env_builder.init_start() ps = env.pageserver tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant.hex) tenant_http_client = env.pageserver.http_client(tenant_token) invalid_tenant_token = env.auth_keys.generate_tenant_token(uuid4().hex) invalid_tenant_http_client = env.pageserver.http_client(invalid_tenant_token) management_token = env.auth_keys.generate_management_token() management_http_client = env.pageserver.http_client(management_token) # this does not invoke auth check and only decodes jwt and checks it for validity # check both tokens ps.safe_psql("set FOO", password=tenant_token) ps.safe_psql("set FOO", password=management_token) new_timeline_id = env.zenith_cli.create_branch('test_pageserver_auth', tenant_id=env.initial_tenant) # tenant can create branches tenant_http_client.timeline_create(tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id) # console can create branches for tenant management_http_client.timeline_create(tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id) # fail to create branch using token with different tenant_id with pytest.raises(ZenithPageserverApiException, match='Forbidden: Tenant id mismatch. Permission denied'): invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id) # create tenant using management token management_http_client.tenant_create() # fail to create tenant using tenant token with pytest.raises( ZenithPageserverApiException, match='Forbidden: Attempt to access management api with tenant scope. Permission denied' ): tenant_http_client.tenant_create()
def test_broker(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 zenith_env_builder.broker = True zenith_env_builder.enable_local_fs_remote_storage() env = zenith_env_builder.init_start() env.zenith_cli.create_branch("test_broker", "main") pg = env.postgres.create_start('test_broker') pg.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn zenith timeline from compute tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0] timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0] # wait until remote_consistent_lsn gets advanced on all safekeepers clients = [sk.http_client() for sk in env.safekeepers] stat_before = [ cli.timeline_status(tenant_id, timeline_id) for cli in clients ] log.info(f"statuses is {stat_before}") pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") # force checkpoint to advance remote_consistent_lsn with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"checkpoint {tenant_id} {timeline_id}") # and wait till remote_consistent_lsn propagates to all safekeepers started_at = time.time() while True: stat_after = [ cli.timeline_status(tenant_id, timeline_id) for cli in clients ] if all( lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex( s_before.remote_consistent_lsn) for s_after, s_before in zip(stat_after, stat_before)): break elapsed = time.time() - started_at if elapsed > 20: raise RuntimeError( f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}" ) time.sleep(0.5)
def test_restarts_frequent_checkpoints(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_restarts_frequent_checkpoints') # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long pg = env.postgres.create_start('test_restarts_frequent_checkpoints', config_lines=[ 'max_replication_write_lag=1MB', 'min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on' ]) # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments # are not removed before broadcasted to all safekeepers, with the help of replication slot asyncio.run( run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))
def test_wal_restore(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin, test_output_dir, port_distributor: PortDistributor): zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() env.zenith_cli.create_branch("test_wal_restore") pg = env.postgres.create_start('test_wal_restore') pg.safe_psql("create table t as select generate_series(1,300000)") tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0] env.zenith_cli.pageserver_stop() port = port_distributor.get_port() data_dir = os.path.join(test_output_dir, 'pgsql.restored') with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored: pg_bin.run_capture([ os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'), os.path.join(pg_distrib_dir, 'bin'), os.path.join(test_output_dir, 'repo/safekeepers/sk1/{}/*'.format(tenant_id)), data_dir, str(port) ]) restored.start() assert restored.safe_psql('select count(*) from t', user='******') == [(300000, )]
def test_bulk_tenant_create( zenith_env_builder: ZenithEnvBuilder, use_safekeepers: str, tenants_count: int, zenbenchmark, ): """Measure tenant creation time (with and without wal acceptors)""" if use_safekeepers == 'with_wa': zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() time_slices = [] for i in range(tenants_count): start = timeit.default_timer() tenant = env.zenith_cli.create_tenant() env.zenith_cli.create_timeline( f'test_bulk_tenant_create_{tenants_count}_{i}_{use_safekeepers}', tenant_id=tenant) # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now? #if use_safekeepers == 'with_sa': # wa_factory.start_n_new(3) pg_tenant = env.postgres.create_start( f'test_bulk_tenant_create_{tenants_count}_{i}_{use_safekeepers}', tenant_id=tenant) end = timeit.default_timer() time_slices.append(end - start) pg_tenant.stop() zenbenchmark.record('tenant_creation_time', sum(time_slices) / len(time_slices), 's', report=MetricReport.LOWER_IS_BETTER)
def test_tenant_config(zenith_env_builder: ZenithEnvBuilder): env = zenith_env_builder.init_start() """Test per tenant configuration""" tenant = env.zenith_cli.create_tenant( conf={ 'checkpoint_distance': '10000', 'compaction_target_size': '1048576', 'compaction_period': '60sec', 'compaction_threshold': '20', 'gc_horizon': '1024', 'gc_period': '100sec', 'pitr_interval': '3600sec', }) env.zenith_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant) pg = env.postgres.create_start( "test_tenant_conf", "main", tenant, ) with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"show {tenant.hex}") assert pscur.fetchone() == (10000, 1048576, 60, 20, 1024, 100, 3600) # update the config and ensure that it has changed env.zenith_cli.config_tenant(tenant_id=tenant, conf={ 'checkpoint_distance': '100000', 'compaction_target_size': '1048576', 'compaction_period': '30sec', 'compaction_threshold': '15', 'gc_horizon': '256', 'gc_period': '10sec', 'pitr_interval': '360sec', }) with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"show {tenant.hex}") assert pscur.fetchone() == (100000, 1048576, 30, 15, 256, 10, 360)
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder): # Use safekeeper in this test to avoid a subtle race condition. # Without safekeeper, walreceiver reconnection can stuck # because of IO deadlock. # # See https://github.com/zenithdb/zenith/issues/1068 zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Branch at the point where only 100 rows were inserted env.zenith_cli.create_branch('test_branch_behind') pgmain = env.postgres.create_start('test_branch_behind') log.info("postgres is running on 'test_branch_behind' branch") main_pg_conn = pgmain.connect() main_cur = main_pg_conn.cursor() main_cur.execute("SHOW zenith.zenith_timeline") timeline = main_cur.fetchone()[0] # Create table, and insert the first 100 rows main_cur.execute('CREATE TABLE foo (t text)') # keep some early lsn to test branch creation on out of date lsn main_cur.execute('SELECT pg_current_wal_insert_lsn()') gced_lsn = main_cur.fetchone()[0] main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_a = main_cur.fetchone()[0] log.info(f'LSN after 100 rows: {lsn_a}') # Insert some more rows. (This generates enough WAL to fill a few segments.) main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_b = main_cur.fetchone()[0] log.info(f'LSN after 200100 rows: {lsn_b}') # Branch at the point where only 100 rows were inserted env.zenith_cli.create_branch('test_branch_behind_hundred', 'test_branch_behind', ancestor_start_lsn=lsn_a) # Insert many more rows. This generates enough WAL to fill a few segments. main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_c = main_cur.fetchone()[0] log.info(f'LSN after 400100 rows: {lsn_c}') # Branch at the point where only 200100 rows were inserted env.zenith_cli.create_branch('test_branch_behind_more', 'test_branch_behind', ancestor_start_lsn=lsn_b) pg_hundred = env.postgres.create_start('test_branch_behind_hundred') pg_more = env.postgres.create_start('test_branch_behind_more') # On the 'hundred' branch, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute('SELECT count(*) FROM foo') assert hundred_cur.fetchone() == (100, ) # On the 'more' branch, we should see 100200 rows more_pg_conn = pg_more.connect() more_cur = more_pg_conn.cursor() more_cur.execute('SELECT count(*) FROM foo') assert more_cur.fetchone() == (200100, ) # All the rows are visible on the main branch main_cur.execute('SELECT count(*) FROM foo') assert main_cur.fetchone() == (400100, ) # Check bad lsn's for branching # branch at segment boundary env.zenith_cli.create_branch('test_branch_segment_boundary', 'test_branch_behind', ancestor_start_lsn="0/3000000") pg = env.postgres.create_start('test_branch_segment_boundary') cur = pg.connect().cursor() cur.execute('SELECT 1') assert cur.fetchone() == (1, ) # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn"): env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42") # branch at pre-ancestor lsn with pytest.raises(Exception, match="less than timeline ancestor lsn"): env.zenith_cli.create_branch('test_branch_preinitdb', 'test_branch_behind', ancestor_start_lsn="0/42") # check that we cannot create branch based on garbage collected data with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: # call gc to advace latest_gc_cutoff_lsn pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0") row = pscur.fetchone() print_gc_result(row) with pytest.raises(Exception, match="invalid branch start lsn"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail env.zenith_cli.create_branch('test_branch_create_fail', 'test_branch_behind', ancestor_start_lsn=gced_lsn) # check that after gc everything is still there hundred_cur.execute('SELECT count(*) FROM foo') assert hundred_cur.fetchone() == (100, ) more_cur.execute('SELECT count(*) FROM foo') assert more_cur.fetchone() == (200100, ) main_cur.execute('SELECT count(*) FROM foo') assert main_cur.fetchone() == (400100, )
def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool): zenith_env_builder.pageserver_auth_enabled = True if with_safekeepers: zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_restart_compute') pg = env.postgres.create_start('test_restart_compute') log.info("postgres is running on 'test_restart_compute' branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('CREATE TABLE t(key int primary key, value text)') cur.execute( "INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute('SELECT sum(key) FROM t') r = cur.fetchone() assert r == (5000050000, ) log.info(f"res = {r}") # Remove data directory and restart pg.stop_and_destroy().create_start('test_restart_compute') with closing(pg.connect()) as conn: with conn.cursor() as cur: # We can still see the row cur.execute('SELECT sum(key) FROM t') r = cur.fetchone() assert r == (5000050000, ) log.info(f"res = {r}") # Insert another row cur.execute("INSERT INTO t VALUES (100001, 'payload2')") cur.execute('SELECT count(*) FROM t') r = cur.fetchone() assert r == (100001, ) log.info(f"res = {r}") # Again remove data directory and restart pg.stop_and_destroy().create_start('test_restart_compute') # That select causes lots of FPI's and increases probability of wakeepers # lagging behind after query completion with closing(pg.connect()) as conn: with conn.cursor() as cur: # We can still see the rows cur.execute('SELECT count(*) FROM t') r = cur.fetchone() assert r == (100001, ) log.info(f"res = {r}") # And again remove data directory and restart pg.stop_and_destroy().create_start('test_restart_compute') with closing(pg.connect()) as conn: with conn.cursor() as cur: # We can still see the rows cur.execute('SELECT count(*) FROM t') r = cur.fetchone() assert r == (100001, ) log.info(f"res = {r}")
def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota') client = env.pageserver.http_client() res = assert_local(client, env.initial_tenant, new_timeline_id) assert res['local']["current_logical_size"] == res['local'][ "current_logical_size_non_incremental"] pgmain = env.postgres.create_start( "test_timeline_size_quota", # Set small limit for the test config_lines=['zenith.max_cluster_size=30MB']) log.info("postgres is running on 'test_timeline_size_quota' branch") with closing(pgmain.connect()) as conn: with conn.cursor() as cur: cur.execute( "CREATE EXTENSION zenith") # TODO move it to zenith_fixtures? cur.execute("CREATE TABLE foo (t text)") wait_for_pageserver_catchup(pgmain) # Insert many rows. This query must fail because of space limit try: cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g ''') wait_for_pageserver_catchup(pgmain) cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 500000) g ''') # If we get here, the timeline size limit failed log.error("Query unexpectedly succeeded") assert False except psycopg2.errors.DiskFull as err: log.info(f"Query expectedly failed with: {err}") # drop table to free space cur.execute('DROP TABLE foo') wait_for_pageserver_catchup(pgmain) # create it again and insert some rows. This query must succeed cur.execute("CREATE TABLE foo (t text)") cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g ''') wait_for_pageserver_catchup(pgmain) cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())") pg_cluster_size = cur.fetchone() log.info(f"pg_cluster_size = {pg_cluster_size}")
def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder, port_distributor: PortDistributor, with_load: str): zenith_env_builder.num_safekeepers = 1 zenith_env_builder.enable_local_fs_remote_storage() env = zenith_env_builder.init_start() # create folder for remote storage mock remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage' tenant = env.zenith_cli.create_tenant( UUID("74ee8b079a0e437eb0afea7d26a07209")) log.info("tenant to relocate %s", tenant) env.zenith_cli.create_root_branch('main', tenant_id=tenant) env.zenith_cli.create_branch('test_tenant_relocation', tenant_id=tenant) tenant_pg = env.postgres.create_start(branch_name='main', node_name='test_tenant_relocation', tenant_id=tenant) # insert some data with closing(tenant_pg.connect()) as conn: with conn.cursor() as cur: # save timeline for later gc call cur.execute("SHOW zenith.zenith_timeline") timeline = UUID(cur.fetchone()[0]) log.info("timeline to relocate %s", timeline.hex) # we rely upon autocommit after each statement # as waiting for acceptors happens there cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute( "INSERT INTO t SELECT generate_series(1,1000), 'some payload'") cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (500500, ) cur.execute("SELECT pg_current_wal_flush_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) pageserver_http = env.pageserver.http_client() # wait until pageserver receives that data wait_for_last_record_lsn(pageserver_http, tenant, timeline, current_lsn) timeline_detail = assert_local(pageserver_http, tenant, timeline) if with_load == 'with_load': # create load table with pg_cur(tenant_pg) as cur: cur.execute("CREATE TABLE load(value text)") load_stop_event = threading.Event() load_ok_event = threading.Event() load_thread = threading.Thread(target=load, args=(tenant_pg, load_stop_event, load_ok_event)) load_thread.start() # run checkpoint manually to be sure that data landed in remote storage with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"checkpoint {tenant.hex} {timeline.hex}") # wait until pageserver successfully uploaded a checkpoint to remote storage wait_for_upload(pageserver_http, tenant, timeline, current_lsn) log.info("inititalizing new pageserver") # bootstrap second pageserver new_pageserver_dir = env.repo_dir / 'new_pageserver' new_pageserver_dir.mkdir() new_pageserver_pg_port = port_distributor.get_port() new_pageserver_http_port = port_distributor.get_port() log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port) pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver' new_pageserver_http = ZenithPageserverHttpClient( port=new_pageserver_http_port, auth_token=None) with new_pageserver_helper(new_pageserver_dir, pageserver_bin, remote_storage_mock_path, new_pageserver_pg_port, new_pageserver_http_port): # call to attach timeline to new pageserver new_pageserver_http.timeline_attach(tenant, timeline) # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint new_timeline_detail = wait_for( number_of_iterations=5, interval=1, func=lambda: assert_local(new_pageserver_http, tenant, timeline)) # when load is active these checks can break because lsns are not static # so lets check with some margin assert_abs_margin_ratio( lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']), 0.03) # callmemaybe to start replication from safekeeper to the new pageserver # when there is no load there is a clean checkpoint and no wal delta # needs to be streamed to the new pageserver # TODO (rodionov) use attach to start replication with pg_cur(PgProtocol(host='localhost', port=new_pageserver_pg_port)) as cur: # "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={}'" safekeeper_connstring = f"host=localhost port={env.safekeepers[0].port.pg} options='-c ztimelineid={timeline} ztenantid={tenant} pageserver_connstr=postgresql://no_user:@localhost:{new_pageserver_pg_port}'" cur.execute("callmemaybe {} {} {}".format(tenant.hex, timeline.hex, safekeeper_connstring)) tenant_pg.stop() # rewrite zenith cli config to use new pageserver for basebackup to start new compute cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() cli_config_lines[ -2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'" cli_config_lines[ -1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'" (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines)) tenant_pg_config_file_path = pathlib.Path(tenant_pg.config_file_path()) tenant_pg_config_file_path.open('a').write( f"\nzenith.page_server_connstring = 'postgresql://no_user:@localhost:{new_pageserver_pg_port}'" ) tenant_pg.start() # detach tenant from old pageserver before we check # that all the data is there to be sure that old pageserver # is no longer involved, and if it is, we will see the errors pageserver_http.timeline_detach(tenant, timeline) with pg_cur(tenant_pg) as cur: # check that data is still there cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (500500, ) # check that we can write new data cur.execute( "INSERT INTO t SELECT generate_series(1001,2000), 'some payload'" ) cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (2001000, ) if with_load == 'with_load': assert load_ok_event.wait(3) log.info('stopping load thread') load_stop_event.set() load_thread.join(timeout=10) log.info('load thread stopped') # bring old pageserver back for clean shutdown via zenith cli # new pageserver will be shut down by the context manager cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() cli_config_lines[ -2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'" cli_config_lines[ -1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'" (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder): # Use safekeeper in this test to avoid a subtle race condition. # Without safekeeper, walreceiver reconnection can stuck # because of IO deadlock. # # See https://github.com/zenithdb/zenith/issues/1068 zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Override defaults, 1M gc_horizon and 4M checkpoint_distance. # Extend compaction_period and gc_period to disable background compaction and gc. tenant = env.zenith_cli.create_tenant( conf={ 'gc_period': '10 m', 'gc_horizon': '1048576', 'checkpoint_distance': '4194304', 'compaction_period': '10 m', 'compaction_threshold': '2', 'compaction_target_size': '4194304', }) env.zenith_cli.create_timeline(f'main', tenant_id=tenant) pg_branch0 = env.postgres.create_start('main', tenant_id=tenant) branch0_cur = pg_branch0.connect().cursor() branch0_cur.execute("SHOW zenith.zenith_timeline") branch0_timeline = branch0_cur.fetchone()[0] log.info(f"b0 timeline {branch0_timeline}") # Create table, and insert 100k rows. branch0_cur.execute('SELECT pg_current_wal_insert_lsn()') branch0_lsn = branch0_cur.fetchone()[0] log.info(f"b0 at lsn {branch0_lsn}") branch0_cur.execute( 'CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)') branch0_cur.execute(''' INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g FROM generate_series(1, 100000) g ''') branch0_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_100 = branch0_cur.fetchone()[0] log.info(f'LSN after 100k rows: {lsn_100}') # Create branch1. env.zenith_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100) pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant) log.info("postgres is running on 'branch1' branch") branch1_cur = pg_branch1.connect().cursor() branch1_cur.execute("SHOW zenith.zenith_timeline") branch1_timeline = branch1_cur.fetchone()[0] log.info(f"b1 timeline {branch1_timeline}") branch1_cur.execute('SELECT pg_current_wal_insert_lsn()') branch1_lsn = branch1_cur.fetchone()[0] log.info(f"b1 at lsn {branch1_lsn}") # Insert 100k rows. branch1_cur.execute(''' INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g FROM generate_series(1, 100000) g ''') branch1_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_200 = branch1_cur.fetchone()[0] log.info(f'LSN after 200k rows: {lsn_200}') # Create branch2. env.zenith_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200) pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant) log.info("postgres is running on 'branch2' branch") branch2_cur = pg_branch2.connect().cursor() branch2_cur.execute("SHOW zenith.zenith_timeline") branch2_timeline = branch2_cur.fetchone()[0] log.info(f"b2 timeline {branch2_timeline}") branch2_cur.execute('SELECT pg_current_wal_insert_lsn()') branch2_lsn = branch2_cur.fetchone()[0] log.info(f"b2 at lsn {branch2_lsn}") # Insert 100k rows. branch2_cur.execute(''' INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g FROM generate_series(1, 100000) g ''') branch2_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_300 = branch2_cur.fetchone()[0] log.info(f'LSN after 300k rows: {lsn_300}') # Run compaction on branch1. psconn = env.pageserver.connect() log.info(f'compact {tenant.hex} {branch1_timeline} {lsn_200}') psconn.cursor().execute( f'''compact {tenant.hex} {branch1_timeline} {lsn_200}''') branch0_cur.execute('SELECT count(*) FROM foo') assert branch0_cur.fetchone() == (100000, ) branch1_cur.execute('SELECT count(*) FROM foo') assert branch1_cur.fetchone() == (200000, ) branch2_cur.execute('SELECT count(*) FROM foo') assert branch2_cur.fetchone() == (300000, )