def test_archive_sync(self, db, pghoard): log = logging.getLogger("test_archive_sync") store = pghoard.transfer_agents[0].get_object_storage(pghoard.test_site) def list_archive(folder): if folder == "timeline": matcher = wal.TIMELINE_RE.match else: matcher = wal.XLOG_RE.match path_to_list = "{}/{}".format(pghoard.test_site, folder) files_found, files_total = 0, 0 for obj in store.list_path(path_to_list): fname = os.path.basename(obj["name"]) files_total += 1 if matcher(fname): files_found += 1 yield fname log.info("Listed %r, %r out of %r matched %r pattern", path_to_list, files_found, files_total, folder) # create a basebackup to start with self._run_and_wait_basebackup(pghoard, db, "pipe") # force a couple of wal segment switches start_xlog, _ = self._switch_xlog(db, 4) # we should have at least 4 xlog files now (there may be more in # case other tests created them -- we share a single postresql # cluster between all tests) pg_xlog_dir = pghoard.config["backup_sites"][pghoard.test_site]["pg_xlog_directory"] pg_xlogs = {f for f in os.listdir(pg_xlog_dir) if wal.XLOG_RE.match(f) and f > start_xlog} assert len(pg_xlogs) >= 4 # create a couple of "recycled" xlog files that we must ignore last_xlog = sorted(pg_xlogs)[-1] dummy_data = b"x" * (16 * 2 ** 20) def write_dummy_xlog(inc): filename = "{:024X}".format((int(last_xlog, 16) + inc)) print("writing dummy xlog file", filename) open(os.path.join(pg_xlog_dir, filename), "wb").write(dummy_data) return filename recycled1 = write_dummy_xlog(1) recycled2 = write_dummy_xlog(2) # check what we have archived, there should be at least the three # above xlogs that are NOT there at the moment archived_xlogs = set(list_archive("xlog")) assert len(pg_xlogs - archived_xlogs) >= 4 # now perform an archive sync arsy = ArchiveSync() arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) # and now archive should include all our xlogs archived_xlogs = set(list_archive("xlog")) # the recycled files must not appear in archived files assert recycled1 not in archived_xlogs assert recycled2 not in archived_xlogs # the regular wals must be archived assert archived_xlogs.issuperset(pg_xlogs) # if we delete a wal file that's not the latest archival it should # get synced to the archive as we don't have a basebackup newer than # it current_wal = arsy.get_current_wal_file() old_xlogs = sorted(wal for wal in pg_xlogs if wal < current_wal) store.delete_key(os.path.join(pghoard.test_site, "xlog", old_xlogs[-2])) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_xlogs = set(list_archive("xlog")) assert archived_xlogs.issuperset(pg_xlogs) # delete the topmost wal file, this should cause resync too store.delete_key(os.path.join(pghoard.test_site, "xlog", old_xlogs[-1])) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_xlogs = set(list_archive("xlog")) assert archived_xlogs.issuperset(pg_xlogs) # let's do a little dance to turn our DB into a standby and then # promote it, forcing a timeline switch db.kill(force=False) with open(os.path.join(db.pgdata, "recovery.conf"), "w") as fp: fp.write( "standby_mode = 'on'\n" "recovery_target_timeline = 'latest'\n" "restore_command = 'false'\n" ) # start PG and promote it db.run_pg() db.run_cmd("pg_ctl", "-D", db.pgdata, "promote") time.sleep(5) # TODO: instead of sleeping, poll the db until ready # we should have a single timeline file in pg_xlog now pg_xlog_timelines = {f for f in os.listdir(pg_xlog_dir) if wal.TIMELINE_RE.match(f)} assert len(pg_xlog_timelines) > 0 # but there should be nothing archived as archive_command wasn't setup archived_timelines = set(list_archive("timeline")) assert len(archived_timelines) == 0 # let's hit archive sync arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) # now we should have an archived timeline archived_timelines = set(list_archive("timeline")) assert archived_timelines.issuperset(pg_xlog_timelines) assert "00000002.history" in archived_timelines # let's take a new basebackup self._run_and_wait_basebackup(pghoard, db, "basic") # nuke archives and resync them for name in list_archive(folder="timeline"): store.delete_key(os.path.join(pghoard.test_site, "timeline", name)) for name in list_archive(folder="xlog"): store.delete_key(os.path.join(pghoard.test_site, "xlog", name)) self._switch_xlog(db, 1) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_xlogs = set(list_archive("xlog")) # assume the same timeline file as before and one to three wal files assert len(archived_xlogs) >= 1 assert len(archived_xlogs) <= 3 archived_timelines = set(list_archive("timeline")) assert list(archived_timelines) == ["00000002.history"]
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(tmpdir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6") config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": pg_data_directory } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult( 201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock( return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock( return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1
def test_check_and_upload_missing_local_files(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync data_dir = str(tmpdir) wal_dir = os.path.join(data_dir, "pg_xlog") os.makedirs(wal_dir) open(os.path.join(data_dir, "PG_VERSION"), "w").write("9.6") # Write a bunch of local files file_hashes = {} for index in range(32): fn = "{:024X}".format(index + 1) data = os.urandom(32) sha1_hasher = hashlib.sha1() sha1_hasher.update(data) file_hashes[index + 1] = sha1_hasher.hexdigest() with open(os.path.join(wal_dir, fn), "wb") as f: f.write(data) head_call_indexes = [] put_call_indexes = [] missing_hash_indexes = {0xf, 0x10} def requests_head(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) head_call_indexes.append(wal_index) if wal_index > 0x14: return HTTPResult(404) sha1 = file_hashes[wal_index] # For some files return invalid hash if wal_index in {0x1, 0xb, 0xd, 0xf, 0x11, 0x13}: sha1 += "invalid" # For some files don't return sha1 header to test the code copes with missing header correctly if wal_index in missing_hash_indexes: headers = {} else: headers = { "metadata-hash": sha1, "metadata-hash-algorithm": "sha1" } return HTTPResult(200, headers=headers) def requests_put(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) put_call_indexes.append(wal_index) return HTTPResult(201) config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": data_dir } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.side_effect = requests_put requests_head_mock.side_effect = requests_head arsy.get_current_wal_file = Mock(return_value="00000000000000000000001A") arsy.get_first_required_wal_segment = Mock( return_value=("000000000000000000000001", 90300)) arsy.check_and_upload_missing_local_files(15) assert head_call_indexes == list( reversed([index + 1 for index in range(0x19)])) # Files above 0x1a in future, 0x1a is current. 0x14 and under are already uploaded but 0x13, 0x11, 0xf, # 0xd, 0xb and 0x1 have invalid hash. Of those 0x1 doesn't get re-uploaded because we set max hashes to # check to a value that is exceeded before reaching that and 0xf doesn't get reuploaded because remote # hash for that isn't available so hash cannot be validated but 0x10 does get reuploaded because it is # the first file missing a hash. assert put_call_indexes == [ 0xb, 0xd, 0x10, 0x11, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19 ] missing_hash_indexes.update(set(range(0x20))) head_call_indexes.clear() put_call_indexes.clear() arsy.check_and_upload_missing_local_files(15) # The first file that already existed (0x14) should've been re-uploaded due to missing sha1 assert put_call_indexes == [0x14, 0x15, 0x16, 0x17, 0x18, 0x19]
def test_archive_sync(self, db, pghoard): log = logging.getLogger("test_archive_sync") store = pghoard.transfer_agents[0].get_object_storage(pghoard.test_site) def list_archive(folder): if folder == "timeline": matcher = wal.TIMELINE_RE.match else: matcher = wal.WAL_RE.match path_to_list = "{}/{}".format(pghoard.test_site, folder) files_found, files_total = 0, 0 for obj in store.list_path(path_to_list): fname = os.path.basename(obj["name"]) files_total += 1 if matcher(fname): files_found += 1 yield fname log.info("Listed %r, %r out of %r matched %r pattern", path_to_list, files_found, files_total, folder) # create a basebackup to start with self._run_and_wait_basebackup(pghoard, db, "pipe") # force a couple of wal segment switches start_wal, _ = self._switch_wal(db, 4) # we should have at least 4 WAL files now (there may be more in # case other tests created them -- we share a single postresql # cluster between all tests) pg_wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site]) pg_wals = {f for f in os.listdir(pg_wal_dir) if wal.WAL_RE.match(f) and f > start_wal} assert len(pg_wals) >= 4 # create a couple of "recycled" xlog files that we must ignore last_wal = sorted(pg_wals)[-1] dummy_data = b"x" * (16 * 2 ** 20) def write_dummy_wal(inc): filename = "{:024X}".format((int(last_wal, 16) + inc)) print("Writing dummy WAL file", filename) open(os.path.join(pg_wal_dir, filename), "wb").write(dummy_data) return filename recycled1 = write_dummy_wal(1) recycled2 = write_dummy_wal(2) # check what we have archived, there should be at least the three # above WALs that are NOT there at the moment archived_wals = set(list_archive("xlog")) assert len(pg_wals - archived_wals) >= 4 # now perform an archive sync arsy = ArchiveSync() arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) # and now archive should include all our WALs archived_wals = set(list_archive("xlog")) # the recycled files must not appear in archived files assert recycled1 not in archived_wals assert recycled2 not in archived_wals # the regular wals must be archived assert archived_wals.issuperset(pg_wals) # if we delete a wal file that's not the latest archival it should # get synced to the archive as we don't have a basebackup newer than # it current_wal = arsy.get_current_wal_file() old_wals = sorted(wal for wal in pg_wals if wal < current_wal) store.delete_key(os.path.join(pghoard.test_site, "xlog", old_wals[-2])) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_wals = set(list_archive("xlog")) assert archived_wals.issuperset(pg_wals) # delete the topmost wal file, this should cause resync too store.delete_key(os.path.join(pghoard.test_site, "xlog", old_wals[-1])) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_wals = set(list_archive("xlog")) assert archived_wals.issuperset(pg_wals) # let's do a little dance to turn our DB into a standby and then # promote it, forcing a timeline switch db.kill(force=False) with open(os.path.join(db.pgdata, "recovery.conf"), "w") as fp: fp.write( "standby_mode = 'on'\n" "recovery_target_timeline = 'latest'\n" "restore_command = 'false'\n" ) # start PG and promote it db.run_pg() db.run_cmd("pg_ctl", "-D", db.pgdata, "promote") time.sleep(5) # TODO: instead of sleeping, poll the db until ready # we should have a single timeline file in pg_xlog/pg_wal now pg_wal_timelines = {f for f in os.listdir(pg_wal_dir) if wal.TIMELINE_RE.match(f)} assert len(pg_wal_timelines) > 0 # but there should be nothing archived as archive_command wasn't setup archived_timelines = set(list_archive("timeline")) assert len(archived_timelines) == 0 # let's hit archive sync arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) # now we should have an archived timeline archived_timelines = set(list_archive("timeline")) assert archived_timelines.issuperset(pg_wal_timelines) assert "00000002.history" in archived_timelines # let's take a new basebackup self._run_and_wait_basebackup(pghoard, db, "basic") # nuke archives and resync them for name in list_archive(folder="timeline"): store.delete_key(os.path.join(pghoard.test_site, "timeline", name)) for name in list_archive(folder="xlog"): store.delete_key(os.path.join(pghoard.test_site, "xlog", name)) self._switch_wal(db, 1) arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path]) archived_wals = set(list_archive("xlog")) # assume the same timeline file as before and one to three wal files assert len(archived_wals) >= 1 assert len(archived_wals) <= 3 archived_timelines = set(list_archive("timeline")) assert list(archived_timelines) == ["00000002.history"]
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock): from pghoard.archive_sync import ArchiveSync arsy = ArchiveSync() arsy.set_config({"http_port": 8080, "backup_sites": {"foo": {}}}, site="foo") requests_put_mock.return_value = HTTPResult(201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock(return_value="00000005000000000000008C") assert arsy.check_wal_archive_integrity() == 0 assert requests_head_mock.call_count == 3 # Check integrity when timeline has changed arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock(return_value="000000080000000000000005") assert arsy.check_wal_archive_integrity() == 0 assert requests_head_mock.call_count == 7 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock(return_value="000000030000000000000005") assert arsy.check_wal_archive_integrity() == -1 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock(return_value="000000060000000000000001") assert arsy.check_wal_archive_integrity() == 0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError config_file = tmpdir.join("arsy.conf").strpath write_json_file(config_file, {"http_port": 8080, "backup_sites": {"foo": {}}}) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult(201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock(return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock(return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1