def _upload_key_from_file(self, key, f) -> StorageUploadResult: encryption_key_id = self.config.encryption_key_id compression = self.config.compression metadata = RohmuMetadata() rsa_public_key = None if encryption_key_id: metadata.encryption_key_id = encryption_key_id rsa_public_key = self._public_key_lookup(encryption_key_id) if compression.algorithm: metadata.compression_algorithm = compression.algorithm rohmu_metadata = metadata.dict(exclude_defaults=True, by_alias=True) plain_size = f.seek(0, 2) f.seek(0) with tempfile.TemporaryFile( dir=self.config.temporary_directory) as temp_file: rohmufile.write_file(input_obj=f, output_obj=temp_file, compression_algorithm=compression.algorithm, compression_level=compression.level, rsa_public_key=rsa_public_key, log_func=logger.debug) # compression_threads=compression.threads, # I wish # currently not supported by write_file API compressed_size = temp_file.tell() temp_file.seek(0) self.storage.store_file_object(key, temp_file, metadata=rohmu_metadata) return StorageUploadResult(size=plain_size, stored_size=compressed_size)
def test_write_file(self): ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random") with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj: orig_len, compr_len = rohmufile.write_file( input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.algorithm, log_func=self.log.info ) assert output_obj.tell() == compr_len assert len(output_obj.getvalue()) == compr_len assert orig_len == ifile.size
def basebackup_compression_pipe(self, proc, basebackup_path): rsa_public_key = None encryption_key_id = self.site_config["encryption_key_id"] if encryption_key_id: rsa_public_key = self.site_config["encryption_keys"][encryption_key_id]["public"] compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] self.log.debug("Compressing basebackup directly to file: %r", basebackup_path) set_stream_nonblocking(proc.stderr) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, "host": socket.gethostname(), } with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj: def extract_header_func(input_data): # backup_label should always be first in the tar ball if input_data[0:12].startswith(b"backup_label"): # skip the 512 byte tar header to get to the actual backup label content start_wal_segment, start_time = self.parse_backup_label(input_data[512:1024]) metadata.update({"start-wal-segment": start_wal_segment, "start-time": start_time}) def progress_callback(): stderr_data = proc.stderr.read() if stderr_data: self.latest_activity = datetime.datetime.utcnow() self.log.debug("pg_basebackup stderr: %r", stderr_data) original_input_size, compressed_file_size = rohmufile.write_file( input_obj=proc.stdout, output_obj=output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key, progress_callback=progress_callback, log_func=self.log.info, header_func=extract_header_func ) os.link(output_obj.name, basebackup_path) if original_input_size: size_ratio = compressed_file_size / original_input_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": compression_algorithm, "site": self.site, "type": "basebackup", } ) return original_input_size, compressed_file_size, metadata
def basebackup_compression_pipe(self, proc, basebackup_path): rsa_public_key = None encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] self.log.debug("Compressing basebackup directly to file: %r", basebackup_path) set_stream_nonblocking(proc.stderr) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, "host": socket.gethostname(), } with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj: def extract_header_func(input_data): # backup_label should always be first in the tar ball if input_data[0:12].startswith(b"backup_label"): # skip the 512 byte tar header to get to the actual backup label content start_wal_segment, start_time = self.parse_backup_label(input_data[512:1024]) metadata.update({"start-wal-segment": start_wal_segment, "start-time": start_time}) def progress_callback(): stderr_data = proc.stderr.read() if stderr_data: self.latest_activity = datetime.datetime.utcnow() self.log.debug("pg_basebackup stderr: %r", stderr_data) original_input_size, compressed_file_size = rohmufile.write_file( input_obj=proc.stdout, output_obj=output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key, progress_callback=progress_callback, log_func=self.log.info, header_func=extract_header_func ) os.link(output_obj.name, basebackup_path) if original_input_size: size_ratio = compressed_file_size / original_input_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": compression_algorithm, "site": self.site, "type": "basebackup", }) return original_input_size, compressed_file_size, metadata
def test_write_file(self): ifile = WALTester(self.incoming_path, "00000001000000000000000D", "random") with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj: orig_len, compr_len = rohmufile.write_file( input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.algorithm, log_func=self.log.info, ) assert output_obj.tell() == compr_len assert len(output_obj.getvalue()) == compr_len assert orig_len == ifile.size
def test_decompression_decrypt_event(self): ifile = WALTester(self.incoming_path, "00000001000000000000000E", "random") output_obj = io.BytesIO() with open(ifile.path, "rb") as input_obj: rohmufile.write_file( input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY, log_func=self.log.info, ) callback_queue = Queue() local_filepath = os.path.join(self.temp_dir, "00000001000000000000000E") self.compression_queue.put({ "blob": output_obj.getvalue(), "callback_queue": callback_queue, "filetype": "xlog", "local_path": local_filepath, "metadata": { "compression-algorithm": self.algorithm, "compression-level": 0, "encryption-key-id": "testkey", "host": socket.gethostname(), "original-file-size": ifile.size, "pg-version": 90500, }, "site": self.test_site, "type": "DECOMPRESSION", }) callback_queue.get(timeout=5.0) assert os.path.exists(local_filepath) is True with open(local_filepath, "rb") as fp: fdata = fp.read() assert fdata[:100] == ifile.contents[:100] assert fdata == ifile.contents
def test_decompression_decrypt_event(self): ifile = TestXlog(self.incoming_path, "00000001000000000000000E", "random") output_obj = io.BytesIO() with open(ifile.path, "rb") as input_obj: rohmufile.write_file( input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY, log_func=self.log.info, ) callback_queue = Queue() local_filepath = os.path.join(self.temp_dir, "00000001000000000000000E") self.compression_queue.put( { "blob": output_obj.getvalue(), "callback_queue": callback_queue, "filetype": "xlog", "local_path": local_filepath, "metadata": { "compression-algorithm": self.algorithm, "compression-level": 0, "encryption-key-id": "testkey", "original-file-size": ifile.size, "pg-version": 90500, }, "site": self.test_site, "type": "DECOMPRESSION", } ) callback_queue.get(timeout=1.0) assert os.path.exists(local_filepath) is True with open(local_filepath, "rb") as fp: fdata = fp.read() assert fdata[:100] == ifile.contents[:100] assert fdata == ifile.contents
def basebackup_compression_pipe(self, proc, basebackup_path): rsa_public_key = None encryption_key_id = self.config["backup_sites"][ self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][ self.site]["encryption_keys"][encryption_key_id]["public"] compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] self.log.debug("Compressing basebackup directly to file: %r", basebackup_path) set_stream_nonblocking(proc.stderr) with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj: def progress_callback(): stderr_data = proc.stderr.read() if stderr_data: self.latest_activity = datetime.datetime.utcnow() self.log.debug("pg_basebackup stderr: %r", stderr_data) original_input_size, compressed_file_size = rohmufile.write_file( input_obj=proc.stdout, output_obj=output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key, progress_callback=progress_callback, log_func=self.log.info, ) os.link(output_obj.name, basebackup_path) if original_input_size: size_ratio = compressed_file_size / original_input_size self.stats.gauge("pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": compression_algorithm, "site": self.site, "type": "basebackup", }) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, } return original_input_size, compressed_file_size, metadata
def basebackup_compression_pipe(self, proc, basebackup_path): rsa_public_key = None encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] self.log.debug("Compressing basebackup directly to file: %r", basebackup_path) set_stream_nonblocking(proc.stderr) with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj: def progress_callback(): stderr_data = proc.stderr.read() if stderr_data: self.latest_activity = datetime.datetime.utcnow() self.log.debug("pg_basebackup stderr: %r", stderr_data) original_input_size, compressed_file_size = rohmufile.write_file( input_obj=proc.stdout, output_obj=output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key, progress_callback=progress_callback, log_func=self.log.info, ) os.link(output_obj.name, basebackup_path) if original_input_size: size_ratio = compressed_file_size / original_input_size self.stats.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": compression_algorithm, "site": self.site, "type": "basebackup", }) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, } return original_input_size, compressed_file_size, metadata
def handle_event(self, event, filetype): # pylint: disable=redefined-variable-type rsa_public_key = None site = event.get("site") if not site: site = self.find_site_for_file(event["full_path"]) encryption_key_id = self.config["backup_sites"][site][ "encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][site][ "encryption_keys"][encryption_key_id]["public"] compressed_blob = None if event.get("compress_to_memory"): output_obj = BytesIO() compressed_filepath = None else: compressed_filepath = self.get_compressed_file_path( site, filetype, event["full_path"]) output_obj = NamedTemporaryFile( dir=os.path.dirname(compressed_filepath), prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress") input_obj = event.get("input_data") if not input_obj: input_obj = open(event["full_path"], "rb") with output_obj, input_obj: hash_algorithm = self.config["hash_algorithm"] hasher = None if filetype == "xlog": wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj) hasher = hashlib.new(hash_algorithm) original_file_size, compressed_file_size = rohmufile.write_file( data_callback=hasher.update if hasher else None, input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, log_func=self.log.info, ) if compressed_filepath: os.link(output_obj.name, compressed_filepath) else: compressed_blob = output_obj.getvalue() if event.get("delete_file_after_compression", True): os.unlink(event["full_path"]) metadata = event.get("metadata", {}) metadata.update({ "pg-version": self.config["backup_sites"][site].get("pg_version"), "compression-algorithm": self.config["compression"]["algorithm"], "compression-level": self.config["compression"]["level"], "original-file-size": original_file_size, "host": socket.gethostname(), }) if hasher: metadata["hash"] = hasher.hexdigest() metadata["hash-algorithm"] = hash_algorithm if encryption_key_id: metadata.update({"encryption-key-id": encryption_key_id}) if compressed_filepath: metadata_path = compressed_filepath + ".metadata" write_json_file(metadata_path, metadata) self.set_state_defaults_for_site(site) self.state[site][filetype]["original_data"] += original_file_size self.state[site][filetype]["compressed_data"] += compressed_file_size self.state[site][filetype]["count"] += 1 if original_file_size: size_ratio = compressed_file_size / original_file_size self.metrics.gauge("pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.config["compression"]["algorithm"], "site": site, "type": filetype, }) transfer_object = { "callback_queue": event.get("callback_queue"), "file_size": compressed_file_size, "filetype": filetype, "metadata": metadata, "opaque": event.get("opaque"), "site": site, "type": "UPLOAD", } if compressed_filepath: transfer_object["local_path"] = compressed_filepath else: transfer_object["blob"] = compressed_blob transfer_object["local_path"] = event["full_path"] self.transfer_queue.put(transfer_object) return True
def handle_event(self, event, filetype): # pylint: disable=redefined-variable-type rsa_public_key = None site = event.get("site") if not site: site = self.find_site_for_file(event["full_path"]) encryption_key_id = self.config["backup_sites"][site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][site]["encryption_keys"][encryption_key_id]["public"] compressed_blob = None if event.get("compress_to_memory"): output_obj = BytesIO() compressed_filepath = None else: compressed_filepath = self.get_compressed_file_path(site, filetype, event["full_path"]) output_obj = NamedTemporaryFile(dir=os.path.dirname(compressed_filepath), prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress") input_obj = event.get("input_data") if not input_obj: input_obj = open(event["full_path"], "rb") with output_obj, input_obj: hash_algorithm = self.config["hash_algorithm"] hasher = None if filetype == "xlog": wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj) hasher = hashlib.new(hash_algorithm) original_file_size, compressed_file_size = rohmufile.write_file( data_callback=hasher.update if hasher else None, input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, log_func=self.log.info, ) if compressed_filepath: os.link(output_obj.name, compressed_filepath) else: compressed_blob = output_obj.getvalue() if event.get("delete_file_after_compression", True): os.unlink(event["full_path"]) metadata = event.get("metadata", {}) metadata.update({ "pg-version": self.config["backup_sites"][site].get("pg_version"), "compression-algorithm": self.config["compression"]["algorithm"], "compression-level": self.config["compression"]["level"], "original-file-size": original_file_size, "host": socket.gethostname(), }) if hasher: metadata["hash"] = hasher.hexdigest() metadata["hash-algorithm"] = hash_algorithm if encryption_key_id: metadata.update({"encryption-key-id": encryption_key_id}) if compressed_filepath: metadata_path = compressed_filepath + ".metadata" write_json_file(metadata_path, metadata) self.set_state_defaults_for_site(site) self.state[site][filetype]["original_data"] += original_file_size self.state[site][filetype]["compressed_data"] += compressed_file_size self.state[site][filetype]["count"] += 1 if original_file_size: size_ratio = compressed_file_size / original_file_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.config["compression"]["algorithm"], "site": site, "type": filetype, }) transfer_object = { "callback_queue": event.get("callback_queue"), "file_size": compressed_file_size, "filetype": filetype, "metadata": metadata, "opaque": event.get("opaque"), "site": site, "type": "UPLOAD", } if compressed_filepath: transfer_object["local_path"] = compressed_filepath else: transfer_object["blob"] = compressed_blob transfer_object["local_path"] = event["full_path"] self.transfer_queue.put(transfer_object) return True
def _delta_upload_hexdigest(self, *, temp_dir, chunk_path, file_obj, callback_queue, relative_path): skip_upload = False start_time = time.monotonic() input_size = file_obj.seek(0, os.SEEK_END) file_obj.seek(0) result_hash = hashlib.blake2s() def update_hash(data): result_hash.update(data) with NamedTemporaryFile(dir=temp_dir, prefix=os.path.basename(chunk_path), suffix=".tmp") as raw_output_obj: rohmufile.write_file( input_obj=file_obj, output_obj=raw_output_obj, compression_algorithm=self.compression_data.algorithm, compression_level=self.compression_data.level, rsa_public_key=self.encryption_data.rsa_public_key, log_func=self.log.info, data_callback=update_hash) result_size = raw_output_obj.tell() raw_output_obj.seek(0) result_digest = result_hash.hexdigest() with self.submitted_hashes_lock: if result_digest in self.submitted_hashes: # file with the same hash was already submitted self.log.debug( "Skip uploading file %r, file with the same was hash already submitted for uploading", relative_path) skip_upload = True return input_size, result_size, result_digest, skip_upload else: self.submitted_hashes.add(result_digest) os.link(raw_output_obj.name, chunk_path) rohmufile.log_compression_result( encrypted=bool(self.encryption_data.encryption_key_id), elapsed=time.monotonic() - start_time, original_size=input_size, result_size=result_size, source_name="$PGDATA delta basebackup file", log_func=self.log.info, ) if input_size: size_ratio = result_size / input_size self.metrics.gauge("pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.compression_data.algorithm, "site": self.site, "type": "basebackup_delta", }) metadata = { "compression-algorithm": self.compression_data.algorithm, "encryption-key-id": self.encryption_data.encryption_key_id, "format": BaseBackupFormat.delta_v1, "original-file-size": input_size, "host": socket.gethostname(), } self.transfer_queue.put({ "callback_queue": callback_queue, "file_size": result_size, "filetype": "basebackup_delta", "local_path": chunk_path, "metadata": metadata, "site": self.site, "type": "UPLOAD", "delta": { "hexdigest": result_digest, }, }) return input_size, result_size, result_digest, skip_upload