def test_recovery_targets(self, tmpdir): config_file = tmpdir.join("conf.json").strpath write_json_file(config_file, {"backup_sites": {"test": {}}}) r = Restore() r._get_object_storage = Mock() # pylint: disable=protected-access with pytest.raises(RestoreError) as excinfo: r.run(args=[ "get-basebackup", "--config", config_file, "--target-dir", tmpdir.strpath, "--site=test", "--recovery-target-action=promote", "--recovery-target-name=foobar", "--recovery-target-xid=42", ]) assert "at most one" in str(excinfo.value) with pytest.raises(RestoreError) as excinfo: r.run(args=[ "get-basebackup", "--config", config_file, "--target-dir", tmpdir.strpath, "--site=test", "--recovery-target-action=promote", "--recovery-target-time=foobar", ]) assert "recovery_target_time 'foobar'" in str(excinfo.value)
def setup_method(self, method): super().setup_method(method) self.config = self.config_template() self.config["backup_sites"][self.test_site].update({ "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [{ "host": "127.0.0.4" }], }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) os.makedirs(self.config["alert_file_dir"], exist_ok=True) backup_site_path = os.path.join(self.config["backup_location"], self.test_site) self.compressed_xlog_path = os.path.join(backup_site_path, "xlog") os.makedirs(self.compressed_xlog_path) self.basebackup_path = os.path.join(backup_site_path, "basebackup") os.makedirs(self.basebackup_path) self.pghoard = PGHoard(config_path) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def test_recovery_targets(self, tmpdir): config_file = tmpdir.join("conf.json").strpath # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6") write_json_file(config_file, {"backup_sites": {"test": {"pg_data_directory": pg_data_directory}}}) r = Restore() r._get_object_storage = Mock() # pylint: disable=protected-access with pytest.raises(RestoreError) as excinfo: r.run(args=[ "get-basebackup", "--config", config_file, "--target-dir", tmpdir.strpath, "--site=test", "--recovery-target-action=promote", "--recovery-target-name=foobar", "--recovery-target-xid=42", ]) assert "at most one" in str(excinfo.value) with pytest.raises(RestoreError) as excinfo: r.run(args=[ "get-basebackup", "--config", config_file, "--target-dir", tmpdir.strpath, "--site=test", "--recovery-target-action=promote", "--recovery-target-time=foobar", ]) assert "recovery_target_time 'foobar'" in str(excinfo.value)
def write_backup_state_to_json_file(self): """Periodically write a JSON state file to disk""" start_time = time.time() state_file_path = self.config["json_state_file_path"] self.state["walreceivers"] = { key: {"latest_activity": value.latest_activity, "running": value.running, "last_flushed_lsn": value.last_flushed_lsn} for key, value in self.walreceivers.items() } self.state["pg_receivexlogs"] = { key: {"latest_activity": value.latest_activity, "running": value.running} for key, value in self.receivexlogs.items() } self.state["pg_basebackups"] = { key: {"latest_activity": value.latest_activity, "running": value.running} for key, value in self.basebackups.items() } self.state["compressors"] = [compressor.state for compressor in self.compressors] self.state["transfer_agents"] = [ta.state for ta in self.transfer_agents] self.state["queues"] = { "compression_queue": self.compression_queue.qsize(), "transfer_queue": self.transfer_queue.qsize(), } self.log.debug("Writing JSON state file to %r", state_file_path) write_json_file(state_file_path, self.state) self.log.debug("Wrote JSON state file to disk, took %.4fs", time.time() - start_time)
def setup_method(self, method): super().setup_method(method) self.config = self.config_template({ "backup_sites": { self.test_site: { "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [ { "host": "127.0.0.4", }, ], }, }, }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) self.pghoard = PGHoard(config_path) # This is the "final storage location" when using "local" storage type self.local_storage_dir = os.path.join(self.config["backup_sites"][self.test_site]["object_storage"]["directory"], self.test_site) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def test_json_serialization(self, tmpdir): ob = { "foo": [ "bar", "baz", 42, ], "t": datetime.datetime(2015, 9, 1, 4, 0, 0), "f": 0.42, } res = json.dumps(ob, default=default_json_serialization, separators=(",", ":"), sort_keys=True) assert res == '{"f":0.42,"foo":["bar","baz",42],"t":"2015-09-01T04:00:00Z"}' assert isinstance(json_encode(ob), str) assert isinstance(json_encode(ob, binary=True), bytes) assert "\n" not in json_encode(ob) assert "\n" in json_encode(ob, compact=False) output_file = tmpdir.join("test.json").strpath write_json_file(output_file, ob) with open(output_file, "r") as fp: ob2 = json.load(fp) ob_ = dict(ob, t=ob["t"].isoformat() + "Z") assert ob2 == ob_ write_json_file(output_file, ob, compact=True) with open(output_file, "r") as fp: output_data = fp.read() assert "\n" not in output_data ob2_ = json.loads(output_data) assert ob2 == ob2_
def setup_method(self, method): super().setup_method(method) self.config = self.config_template({ "backup_sites": { self.test_site: { "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [ { "host": "127.0.0.4", }, ], }, }, }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) self.pghoard = PGHoard(config_path) # This is the "final storage location" when using "local" storage type self.local_storage_dir = os.path.join( self.config["backup_sites"][self.test_site]["object_storage"] ["directory"], self.test_site) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError config_file = tmpdir.join("arsy.conf").strpath write_json_file(config_file, {"http_port": 8080, "backup_sites": {"foo": {}}}) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult(201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock(return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock(return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock(return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1
def write_backup_state_to_json_file(self): """Periodically write a JSON state file to disk""" start_time = time.time() state_file_path = self.config["json_state_file_path"] self.state["walreceivers"] = { key: { "latest_activity": value.latest_activity, "running": value.running, "last_flushed_lsn": value.last_flushed_lsn } for key, value in self.walreceivers.items() } self.state["pg_receivexlogs"] = { key: { "latest_activity": value.latest_activity, "running": value.running } for key, value in self.receivexlogs.items() } self.state["pg_basebackups"] = { key: { "latest_activity": value.latest_activity, "running": value.running } for key, value in self.basebackups.items() } self.state["compressors"] = [ compressor.state for compressor in self.compressors ] # All transfer agents share the same state, no point in writing it multiple times self.state["transfer_agent_state"] = self.transfer_agent_state self.state["queues"] = { "compression_queue": self.compression_queue.qsize(), "transfer_queue": self.transfer_queue.qsize(), } self.state[ "served_files"] = self.webserver.get_most_recently_served_files( ) if self.webserver else {} self.log.debug("Writing JSON state file to %r", state_file_path) write_json_file(state_file_path, self.state) self.log.debug("Wrote JSON state file to disk, took %.4fs", time.time() - start_time)
def setup_method(self, method): super().setup_method(method) self.config = self.config_template() self.config["backup_sites"][self.test_site].update({ "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [{"host": "127.0.0.4"}], }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) os.makedirs(self.config["alert_file_dir"], exist_ok=True) backup_site_path = os.path.join(self.config["backup_location"], self.test_site) self.compressed_xlog_path = os.path.join(backup_site_path, "xlog") os.makedirs(self.compressed_xlog_path) self.basebackup_path = os.path.join(backup_site_path, "basebackup") os.makedirs(self.basebackup_path) self.pghoard = PGHoard(config_path) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def handle_event(self, event, filetype): # pylint: disable=redefined-variable-type rsa_public_key = None site = event.get("site") if not site: site = self.find_site_for_file(event["full_path"]) encryption_key_id = self.config["backup_sites"][site][ "encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][site][ "encryption_keys"][encryption_key_id]["public"] compressed_blob = None if event.get("compress_to_memory"): output_obj = BytesIO() compressed_filepath = None else: compressed_filepath = self.get_compressed_file_path( site, filetype, event["full_path"]) output_obj = NamedTemporaryFile( dir=os.path.dirname(compressed_filepath), prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress") input_obj = event.get("input_data") if not input_obj: input_obj = open(event["full_path"], "rb") with output_obj, input_obj: hash_algorithm = self.config["hash_algorithm"] hasher = None if filetype == "xlog": wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj) hasher = hashlib.new(hash_algorithm) original_file_size, compressed_file_size = rohmufile.write_file( data_callback=hasher.update if hasher else None, input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, log_func=self.log.info, ) if compressed_filepath: os.link(output_obj.name, compressed_filepath) else: compressed_blob = output_obj.getvalue() if event.get("delete_file_after_compression", True): os.unlink(event["full_path"]) metadata = event.get("metadata", {}) metadata.update({ "pg-version": self.config["backup_sites"][site].get("pg_version"), "compression-algorithm": self.config["compression"]["algorithm"], "compression-level": self.config["compression"]["level"], "original-file-size": original_file_size, "host": socket.gethostname(), }) if hasher: metadata["hash"] = hasher.hexdigest() metadata["hash-algorithm"] = hash_algorithm if encryption_key_id: metadata.update({"encryption-key-id": encryption_key_id}) if compressed_filepath: metadata_path = compressed_filepath + ".metadata" write_json_file(metadata_path, metadata) self.set_state_defaults_for_site(site) self.state[site][filetype]["original_data"] += original_file_size self.state[site][filetype]["compressed_data"] += compressed_file_size self.state[site][filetype]["count"] += 1 if original_file_size: size_ratio = compressed_file_size / original_file_size self.metrics.gauge("pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.config["compression"]["algorithm"], "site": site, "type": filetype, }) transfer_object = { "callback_queue": event.get("callback_queue"), "file_size": compressed_file_size, "filetype": filetype, "metadata": metadata, "opaque": event.get("opaque"), "site": site, "type": "UPLOAD", } if compressed_filepath: transfer_object["local_path"] = compressed_filepath else: transfer_object["blob"] = compressed_blob transfer_object["local_path"] = event["full_path"] self.transfer_queue.put(transfer_object) return True
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync, SyncError # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(tmpdir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6") config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": pg_data_directory } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.return_value = HTTPResult( 201) # So the backup requests succeeds requests_head_mock.side_effect = requests_head_call_return # Check integrity within same timeline arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F") arsy.get_first_required_wal_segment = Mock( return_value=("00000005000000000000008C", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 3 assert requests_put_mock.call_count == 0 # Check integrity when timeline has changed requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000090000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000080000000000000005", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_head_mock.call_count == 4 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000030000000000000008") arsy.get_first_required_wal_segment = Mock( return_value=("000000030000000000000005", 90300)) with pytest.raises(SyncError): arsy.check_wal_archive_integrity(new_backup_on_failure=False) assert requests_put_mock.call_count == 0 assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000070000000000000002") arsy.get_first_required_wal_segment = Mock( return_value=("000000060000000000000001", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90200)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0 assert requests_put_mock.call_count == 0 requests_head_mock.call_count = 0 requests_put_mock.call_count = 0 arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000") arsy.get_first_required_wal_segment = Mock( return_value=("000000020000000A000000FD", 90300)) assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0 assert requests_put_mock.call_count == 1
def test_check_and_upload_missing_local_files(requests_put_mock, requests_head_mock, tmpdir): from pghoard.archive_sync import ArchiveSync data_dir = str(tmpdir) wal_dir = os.path.join(data_dir, "pg_xlog") os.makedirs(wal_dir) open(os.path.join(data_dir, "PG_VERSION"), "w").write("9.6") # Write a bunch of local files file_hashes = {} for index in range(32): fn = "{:024X}".format(index + 1) data = os.urandom(32) sha1_hasher = hashlib.sha1() sha1_hasher.update(data) file_hashes[index + 1] = sha1_hasher.hexdigest() with open(os.path.join(wal_dir, fn), "wb") as f: f.write(data) head_call_indexes = [] put_call_indexes = [] missing_hash_indexes = {0xf, 0x10} def requests_head(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) head_call_indexes.append(wal_index) if wal_index > 0x14: return HTTPResult(404) sha1 = file_hashes[wal_index] # For some files return invalid hash if wal_index in {0x1, 0xb, 0xd, 0xf, 0x11, 0x13}: sha1 += "invalid" # For some files don't return sha1 header to test the code copes with missing header correctly if wal_index in missing_hash_indexes: headers = {} else: headers = { "metadata-hash": sha1, "metadata-hash-algorithm": "sha1" } return HTTPResult(200, headers=headers) def requests_put(*args, **kwargs): # pylint: disable=unused-argument wal_index = int(os.path.split(args[0])[1], 16) put_call_indexes.append(wal_index) return HTTPResult(201) config_file = tmpdir.join("arsy.conf").strpath write_json_file( config_file, { "http_port": 8080, "backup_sites": { "foo": { "pg_data_directory": data_dir } } }) arsy = ArchiveSync() arsy.set_config(config_file, site="foo") requests_put_mock.side_effect = requests_put requests_head_mock.side_effect = requests_head arsy.get_current_wal_file = Mock(return_value="00000000000000000000001A") arsy.get_first_required_wal_segment = Mock( return_value=("000000000000000000000001", 90300)) arsy.check_and_upload_missing_local_files(15) assert head_call_indexes == list( reversed([index + 1 for index in range(0x19)])) # Files above 0x1a in future, 0x1a is current. 0x14 and under are already uploaded but 0x13, 0x11, 0xf, # 0xd, 0xb and 0x1 have invalid hash. Of those 0x1 doesn't get re-uploaded because we set max hashes to # check to a value that is exceeded before reaching that and 0xf doesn't get reuploaded because remote # hash for that isn't available so hash cannot be validated but 0x10 does get reuploaded because it is # the first file missing a hash. assert put_call_indexes == [ 0xb, 0xd, 0x10, 0x11, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19 ] missing_hash_indexes.update(set(range(0x20))) head_call_indexes.clear() put_call_indexes.clear() arsy.check_and_upload_missing_local_files(15) # The first file that already existed (0x14) should've been re-uploaded due to missing sha1 assert put_call_indexes == [0x14, 0x15, 0x16, 0x17, 0x18, 0x19]
def handle_event(self, event, filetype): rsa_public_key = None site = event.get("site", self.find_site_for_file(event["full_path"])) encryption_key_id = self.config["backup_sites"][site][ "encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][site][ "encryption_keys"][encryption_key_id]["public"] if event.get("compress_to_memory", False): original_file_size, compressed_blob = self.compress_filepath_to_memory( filepath=event["full_path"], compression_algorithm=self.config["compression"]["algorithm"], rsa_public_key=rsa_public_key) compressed_file_size = len(compressed_blob) compressed_filepath = None else: compressed_blob = None compressed_filepath = self.get_compressed_file_path( site, filetype, event["full_path"]) original_file_size, compressed_file_size = self.compress_filepath( filepath=event["full_path"], compressed_filepath=compressed_filepath, compression_algorithm=self.config["compression"]["algorithm"], rsa_public_key=rsa_public_key) if event.get("delete_file_after_compression", True): os.unlink(event["full_path"]) metadata = event.get("metadata", {}) metadata.update({ "pg-version": self.config["backup_sites"][site].get("pg_version", 90500), "compression-algorithm": self.config["compression"]["algorithm"], "original-file-size": original_file_size, }) if encryption_key_id: metadata.update({"encryption-key-id": encryption_key_id}) if compressed_filepath: metadata_path = compressed_filepath + ".metadata" write_json_file(metadata_path, metadata) self.set_state_defaults_for_site(site) self.state[site][filetype]["original_data"] += original_file_size self.state[site][filetype]["compressed_data"] += compressed_file_size self.state[site][filetype]["count"] += 1 transfer_object = { "callback_queue": event.get("callback_queue"), "file_size": compressed_file_size, "filetype": filetype, "metadata": metadata, "opaque": event.get("opaque"), "site": site, "type": "UPLOAD", } if event.get("compress_to_memory", False): transfer_object["blob"] = compressed_blob transfer_object["local_path"] = event["full_path"] else: transfer_object["local_path"] = compressed_filepath self.transfer_queue.put(transfer_object) return True
def handle_event(self, event, filetype): # pylint: disable=redefined-variable-type rsa_public_key = None site = event.get("site") if not site: site = self.find_site_for_file(event["full_path"]) encryption_key_id = self.config["backup_sites"][site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][site]["encryption_keys"][encryption_key_id]["public"] compressed_blob = None if event.get("compress_to_memory"): output_obj = BytesIO() compressed_filepath = None else: compressed_filepath = self.get_compressed_file_path(site, filetype, event["full_path"]) output_obj = NamedTemporaryFile(dir=os.path.dirname(compressed_filepath), prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress") input_obj = event.get("input_data") if not input_obj: input_obj = open(event["full_path"], "rb") with output_obj, input_obj: hash_algorithm = self.config["hash_algorithm"] hasher = None if filetype == "xlog": wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj) hasher = hashlib.new(hash_algorithm) original_file_size, compressed_file_size = rohmufile.write_file( data_callback=hasher.update if hasher else None, input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, log_func=self.log.info, ) if compressed_filepath: os.link(output_obj.name, compressed_filepath) else: compressed_blob = output_obj.getvalue() if event.get("delete_file_after_compression", True): os.unlink(event["full_path"]) metadata = event.get("metadata", {}) metadata.update({ "pg-version": self.config["backup_sites"][site].get("pg_version"), "compression-algorithm": self.config["compression"]["algorithm"], "compression-level": self.config["compression"]["level"], "original-file-size": original_file_size, "host": socket.gethostname(), }) if hasher: metadata["hash"] = hasher.hexdigest() metadata["hash-algorithm"] = hash_algorithm if encryption_key_id: metadata.update({"encryption-key-id": encryption_key_id}) if compressed_filepath: metadata_path = compressed_filepath + ".metadata" write_json_file(metadata_path, metadata) self.set_state_defaults_for_site(site) self.state[site][filetype]["original_data"] += original_file_size self.state[site][filetype]["compressed_data"] += compressed_file_size self.state[site][filetype]["count"] += 1 if original_file_size: size_ratio = compressed_file_size / original_file_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.config["compression"]["algorithm"], "site": site, "type": filetype, }) transfer_object = { "callback_queue": event.get("callback_queue"), "file_size": compressed_file_size, "filetype": filetype, "metadata": metadata, "opaque": event.get("opaque"), "site": site, "type": "UPLOAD", } if compressed_filepath: transfer_object["local_path"] = compressed_filepath else: transfer_object["blob"] = compressed_blob transfer_object["local_path"] = event["full_path"] self.transfer_queue.put(transfer_object) return True