def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock): from pghoard.archive_sync import ArchiveSync arsy = ArchiveSync() arsy.set_config({"http_port": 8080, "backup_sites": {"foo": {}}}, site="foo") requests_put_mock.return_value = HTTPResult(201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock(return_value="00000005000000000000008C") assert arsy.check_wal_archive_integrity() == 0 assert requests_head_mock.call_count == 3 # Check integrity when timeline has changed arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock(return_value="000000080000000000000005") assert arsy.check_wal_archive_integrity() == 0 assert requests_head_mock.call_count == 7 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock(return_value="000000030000000000000005") assert arsy.check_wal_archive_integrity() == -1 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock(return_value="000000060000000000000001") assert arsy.check_wal_archive_integrity() == 0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError config_file = tmpdir.join("arsy.conf").strpath write_json_file(config_file, {"http_port": 8080, "backup_sites": {"foo": {}}}) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult(201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock(return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock(return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(tmpdir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6") config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": pg_data_directory } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult( 201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock( return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock( return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1
def test_check_and_upload_missing_local_files(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync data_dir = str(tmpdir) wal_dir = os.path.join(data_dir, "pg_xlog") os.makedirs(wal_dir) open(os.path.join(data_dir, "PG_VERSION"), "w").write("9.6") # Write a bunch of local files file_hashes = {} for index in range(32): fn = "{:024X}".format(index + 1) data = os.urandom(32) sha1_hasher = hashlib.sha1() sha1_hasher.update(data) file_hashes[index + 1] = sha1_hasher.hexdigest() with open(os.path.join(wal_dir, fn), "wb") as f: f.write(data) head_call_indexes = [] put_call_indexes = [] missing_hash_indexes = {0xf, 0x10} def requests_head(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) head_call_indexes.append(wal_index) if wal_index > 0x14: return HTTPResult(404) sha1 = file_hashes[wal_index] # For some files return invalid hash if wal_index in {0x1, 0xb, 0xd, 0xf, 0x11, 0x13}: sha1 += "invalid" # For some files don't return sha1 header to test the code copes with missing header correctly if wal_index in missing_hash_indexes: headers = {} else: headers = { "metadata-hash": sha1, "metadata-hash-algorithm": "sha1" } return HTTPResult(200, headers=headers) def requests_put(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) put_call_indexes.append(wal_index) return HTTPResult(201) config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": data_dir } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.side_effect = requests_put requests_head_mock.side_effect = requests_head arsy.get_current_wal_file = Mock(return_value="00000000000000000000001A") arsy.get_first_required_wal_segment = Mock( return_value=("000000000000000000000001", 90300)) arsy.check_and_upload_missing_local_files(15) assert head_call_indexes == list( reversed([index + 1 for index in range(0x19)])) # Files above 0x1a in future, 0x1a is current. 0x14 and under are already uploaded but 0x13, 0x11, 0xf, # 0xd, 0xb and 0x1 have invalid hash. Of those 0x1 doesn't get re-uploaded because we set max hashes to # check to a value that is exceeded before reaching that and 0xf doesn't get reuploaded because remote # hash for that isn't available so hash cannot be validated but 0x10 does get reuploaded because it is # the first file missing a hash. assert put_call_indexes == [ 0xb, 0xd, 0x10, 0x11, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19 ] missing_hash_indexes.update(set(range(0x20))) head_call_indexes.clear() put_call_indexes.clear() arsy.check_and_upload_missing_local_files(15) # The first file that already existed (0x14) should've been re-uploaded due to missing sha1 assert put_call_indexes == [0x14, 0x15, 0x16, 0x17, 0x18, 0x19]