Beispiel #1
0
 def _upload_key_from_file(self, key, f) -> StorageUploadResult:
     encryption_key_id = self.config.encryption_key_id
     compression = self.config.compression
     metadata = RohmuMetadata()
     rsa_public_key = None
     if encryption_key_id:
         metadata.encryption_key_id = encryption_key_id
         rsa_public_key = self._public_key_lookup(encryption_key_id)
     if compression.algorithm:
         metadata.compression_algorithm = compression.algorithm
     rohmu_metadata = metadata.dict(exclude_defaults=True, by_alias=True)
     plain_size = f.seek(0, 2)
     f.seek(0)
     with tempfile.TemporaryFile(
             dir=self.config.temporary_directory) as temp_file:
         rohmufile.write_file(input_obj=f,
                              output_obj=temp_file,
                              compression_algorithm=compression.algorithm,
                              compression_level=compression.level,
                              rsa_public_key=rsa_public_key,
                              log_func=logger.debug)
         # compression_threads=compression.threads, # I wish
         # currently not supported by write_file API
         compressed_size = temp_file.tell()
         temp_file.seek(0)
         self.storage.store_file_object(key,
                                        temp_file,
                                        metadata=rohmu_metadata)
     return StorageUploadResult(size=plain_size,
                                stored_size=compressed_size)
Beispiel #2
0
 def test_write_file(self):
     ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random")
     with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj:
         orig_len, compr_len = rohmufile.write_file(
             input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.algorithm, log_func=self.log.info
         )
         assert output_obj.tell() == compr_len
         assert len(output_obj.getvalue()) == compr_len
         assert orig_len == ifile.size
    def basebackup_compression_pipe(self, proc, basebackup_path):
        rsa_public_key = None
        encryption_key_id = self.site_config["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.site_config["encryption_keys"][encryption_key_id]["public"]
        compression_algorithm = self.config["compression"]["algorithm"]
        compression_level = self.config["compression"]["level"]
        self.log.debug("Compressing basebackup directly to file: %r", basebackup_path)
        set_stream_nonblocking(proc.stderr)

        metadata = {
            "compression-algorithm": compression_algorithm,
            "encryption-key-id": encryption_key_id,
            "host": socket.gethostname(),
        }

        with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj:

            def extract_header_func(input_data):
                # backup_label should always be first in the tar ball
                if input_data[0:12].startswith(b"backup_label"):
                    # skip the 512 byte tar header to get to the actual backup label content
                    start_wal_segment, start_time = self.parse_backup_label(input_data[512:1024])

                    metadata.update({"start-wal-segment": start_wal_segment, "start-time": start_time})

            def progress_callback():
                stderr_data = proc.stderr.read()
                if stderr_data:
                    self.latest_activity = datetime.datetime.utcnow()
                    self.log.debug("pg_basebackup stderr: %r", stderr_data)

            original_input_size, compressed_file_size = rohmufile.write_file(
                input_obj=proc.stdout,
                output_obj=output_obj,
                compression_algorithm=compression_algorithm,
                compression_level=compression_level,
                rsa_public_key=rsa_public_key,
                progress_callback=progress_callback,
                log_func=self.log.info,
                header_func=extract_header_func
            )
            os.link(output_obj.name, basebackup_path)

        if original_input_size:
            size_ratio = compressed_file_size / original_input_size
            self.metrics.gauge(
                "pghoard.compressed_size_ratio",
                size_ratio,
                tags={
                    "algorithm": compression_algorithm,
                    "site": self.site,
                    "type": "basebackup",
                }
            )

        return original_input_size, compressed_file_size, metadata
Beispiel #4
0
    def basebackup_compression_pipe(self, proc, basebackup_path):
        rsa_public_key = None
        encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"]
        compression_algorithm = self.config["compression"]["algorithm"]
        compression_level = self.config["compression"]["level"]
        self.log.debug("Compressing basebackup directly to file: %r", basebackup_path)
        set_stream_nonblocking(proc.stderr)

        metadata = {
            "compression-algorithm": compression_algorithm,
            "encryption-key-id": encryption_key_id,
            "host": socket.gethostname(),
        }

        with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj:
            def extract_header_func(input_data):
                # backup_label should always be first in the tar ball
                if input_data[0:12].startswith(b"backup_label"):
                    # skip the 512 byte tar header to get to the actual backup label content
                    start_wal_segment, start_time = self.parse_backup_label(input_data[512:1024])

                    metadata.update({"start-wal-segment": start_wal_segment,
                                     "start-time": start_time})

            def progress_callback():
                stderr_data = proc.stderr.read()
                if stderr_data:
                    self.latest_activity = datetime.datetime.utcnow()
                    self.log.debug("pg_basebackup stderr: %r", stderr_data)

            original_input_size, compressed_file_size = rohmufile.write_file(
                input_obj=proc.stdout,
                output_obj=output_obj,
                compression_algorithm=compression_algorithm,
                compression_level=compression_level,
                rsa_public_key=rsa_public_key,
                progress_callback=progress_callback,
                log_func=self.log.info,
                header_func=extract_header_func
            )
            os.link(output_obj.name, basebackup_path)

        if original_input_size:
            size_ratio = compressed_file_size / original_input_size
            self.metrics.gauge(
                "pghoard.compressed_size_ratio", size_ratio,
                tags={
                    "algorithm": compression_algorithm,
                    "site": self.site,
                    "type": "basebackup",
                })

        return original_input_size, compressed_file_size, metadata
Beispiel #5
0
 def test_write_file(self):
     ifile = WALTester(self.incoming_path, "00000001000000000000000D", "random")
     with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj:
         orig_len, compr_len = rohmufile.write_file(
             input_obj=input_obj,
             output_obj=output_obj,
             compression_algorithm=self.algorithm,
             log_func=self.log.info,
         )
         assert output_obj.tell() == compr_len
         assert len(output_obj.getvalue()) == compr_len
         assert orig_len == ifile.size
Beispiel #6
0
 def test_decompression_decrypt_event(self):
     ifile = WALTester(self.incoming_path, "00000001000000000000000E",
                       "random")
     output_obj = io.BytesIO()
     with open(ifile.path, "rb") as input_obj:
         rohmufile.write_file(
             input_obj=input_obj,
             output_obj=output_obj,
             compression_algorithm=self.config["compression"]["algorithm"],
             compression_level=self.config["compression"]["level"],
             rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY,
             log_func=self.log.info,
         )
     callback_queue = Queue()
     local_filepath = os.path.join(self.temp_dir,
                                   "00000001000000000000000E")
     self.compression_queue.put({
         "blob": output_obj.getvalue(),
         "callback_queue": callback_queue,
         "filetype": "xlog",
         "local_path": local_filepath,
         "metadata": {
             "compression-algorithm": self.algorithm,
             "compression-level": 0,
             "encryption-key-id": "testkey",
             "host": socket.gethostname(),
             "original-file-size": ifile.size,
             "pg-version": 90500,
         },
         "site": self.test_site,
         "type": "DECOMPRESSION",
     })
     callback_queue.get(timeout=5.0)
     assert os.path.exists(local_filepath) is True
     with open(local_filepath, "rb") as fp:
         fdata = fp.read()
     assert fdata[:100] == ifile.contents[:100]
     assert fdata == ifile.contents
Beispiel #7
0
 def test_decompression_decrypt_event(self):
     ifile = TestXlog(self.incoming_path, "00000001000000000000000E", "random")
     output_obj = io.BytesIO()
     with open(ifile.path, "rb") as input_obj:
         rohmufile.write_file(
             input_obj=input_obj,
             output_obj=output_obj,
             compression_algorithm=self.config["compression"]["algorithm"],
             compression_level=self.config["compression"]["level"],
             rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY,
             log_func=self.log.info,
         )
     callback_queue = Queue()
     local_filepath = os.path.join(self.temp_dir, "00000001000000000000000E")
     self.compression_queue.put(
         {
             "blob": output_obj.getvalue(),
             "callback_queue": callback_queue,
             "filetype": "xlog",
             "local_path": local_filepath,
             "metadata": {
                 "compression-algorithm": self.algorithm,
                 "compression-level": 0,
                 "encryption-key-id": "testkey",
                 "original-file-size": ifile.size,
                 "pg-version": 90500,
             },
             "site": self.test_site,
             "type": "DECOMPRESSION",
         }
     )
     callback_queue.get(timeout=1.0)
     assert os.path.exists(local_filepath) is True
     with open(local_filepath, "rb") as fp:
         fdata = fp.read()
     assert fdata[:100] == ifile.contents[:100]
     assert fdata == ifile.contents
Beispiel #8
0
    def basebackup_compression_pipe(self, proc, basebackup_path):
        rsa_public_key = None
        encryption_key_id = self.config["backup_sites"][
            self.site]["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][
                self.site]["encryption_keys"][encryption_key_id]["public"]
        compression_algorithm = self.config["compression"]["algorithm"]
        compression_level = self.config["compression"]["level"]
        self.log.debug("Compressing basebackup directly to file: %r",
                       basebackup_path)
        set_stream_nonblocking(proc.stderr)

        with NamedTemporaryFile(prefix=basebackup_path,
                                suffix=".tmp-compress") as output_obj:

            def progress_callback():
                stderr_data = proc.stderr.read()
                if stderr_data:
                    self.latest_activity = datetime.datetime.utcnow()
                    self.log.debug("pg_basebackup stderr: %r", stderr_data)

            original_input_size, compressed_file_size = rohmufile.write_file(
                input_obj=proc.stdout,
                output_obj=output_obj,
                compression_algorithm=compression_algorithm,
                compression_level=compression_level,
                rsa_public_key=rsa_public_key,
                progress_callback=progress_callback,
                log_func=self.log.info,
            )
            os.link(output_obj.name, basebackup_path)

        if original_input_size:
            size_ratio = compressed_file_size / original_input_size
            self.stats.gauge("pghoard.compressed_size_ratio",
                             size_ratio,
                             tags={
                                 "algorithm": compression_algorithm,
                                 "site": self.site,
                                 "type": "basebackup",
                             })

        metadata = {
            "compression-algorithm": compression_algorithm,
            "encryption-key-id": encryption_key_id,
        }
        return original_input_size, compressed_file_size, metadata
Beispiel #9
0
    def basebackup_compression_pipe(self, proc, basebackup_path):
        rsa_public_key = None
        encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"]
        compression_algorithm = self.config["compression"]["algorithm"]
        compression_level = self.config["compression"]["level"]
        self.log.debug("Compressing basebackup directly to file: %r", basebackup_path)
        set_stream_nonblocking(proc.stderr)

        with NamedTemporaryFile(prefix=basebackup_path, suffix=".tmp-compress") as output_obj:
            def progress_callback():
                stderr_data = proc.stderr.read()
                if stderr_data:
                    self.latest_activity = datetime.datetime.utcnow()
                    self.log.debug("pg_basebackup stderr: %r", stderr_data)

            original_input_size, compressed_file_size = rohmufile.write_file(
                input_obj=proc.stdout,
                output_obj=output_obj,
                compression_algorithm=compression_algorithm,
                compression_level=compression_level,
                rsa_public_key=rsa_public_key,
                progress_callback=progress_callback,
                log_func=self.log.info,
            )
            os.link(output_obj.name, basebackup_path)

        if original_input_size:
            size_ratio = compressed_file_size / original_input_size
            self.stats.gauge(
                "pghoard.compressed_size_ratio", size_ratio,
                tags={
                    "algorithm": compression_algorithm,
                    "site": self.site,
                    "type": "basebackup",
                })

        metadata = {
            "compression-algorithm": compression_algorithm,
            "encryption-key-id": encryption_key_id,
        }
        return original_input_size, compressed_file_size, metadata
Beispiel #10
0
    def handle_event(self, event, filetype):
        # pylint: disable=redefined-variable-type
        rsa_public_key = None
        site = event.get("site")
        if not site:
            site = self.find_site_for_file(event["full_path"])

        encryption_key_id = self.config["backup_sites"][site][
            "encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][site][
                "encryption_keys"][encryption_key_id]["public"]

        compressed_blob = None
        if event.get("compress_to_memory"):
            output_obj = BytesIO()
            compressed_filepath = None
        else:
            compressed_filepath = self.get_compressed_file_path(
                site, filetype, event["full_path"])
            output_obj = NamedTemporaryFile(
                dir=os.path.dirname(compressed_filepath),
                prefix=os.path.basename(compressed_filepath),
                suffix=".tmp-compress")

        input_obj = event.get("input_data")
        if not input_obj:
            input_obj = open(event["full_path"], "rb")
        with output_obj, input_obj:
            hash_algorithm = self.config["hash_algorithm"]
            hasher = None
            if filetype == "xlog":
                wal.verify_wal(wal_name=os.path.basename(event["full_path"]),
                               fileobj=input_obj)
                hasher = hashlib.new(hash_algorithm)

            original_file_size, compressed_file_size = rohmufile.write_file(
                data_callback=hasher.update if hasher else None,
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=rsa_public_key,
                log_func=self.log.info,
            )

            if compressed_filepath:
                os.link(output_obj.name, compressed_filepath)
            else:
                compressed_blob = output_obj.getvalue()

        if event.get("delete_file_after_compression", True):
            os.unlink(event["full_path"])

        metadata = event.get("metadata", {})
        metadata.update({
            "pg-version":
            self.config["backup_sites"][site].get("pg_version"),
            "compression-algorithm":
            self.config["compression"]["algorithm"],
            "compression-level":
            self.config["compression"]["level"],
            "original-file-size":
            original_file_size,
            "host":
            socket.gethostname(),
        })
        if hasher:
            metadata["hash"] = hasher.hexdigest()
            metadata["hash-algorithm"] = hash_algorithm
        if encryption_key_id:
            metadata.update({"encryption-key-id": encryption_key_id})
        if compressed_filepath:
            metadata_path = compressed_filepath + ".metadata"
            write_json_file(metadata_path, metadata)

        self.set_state_defaults_for_site(site)
        self.state[site][filetype]["original_data"] += original_file_size
        self.state[site][filetype]["compressed_data"] += compressed_file_size
        self.state[site][filetype]["count"] += 1
        if original_file_size:
            size_ratio = compressed_file_size / original_file_size
            self.metrics.gauge("pghoard.compressed_size_ratio",
                               size_ratio,
                               tags={
                                   "algorithm":
                                   self.config["compression"]["algorithm"],
                                   "site":
                                   site,
                                   "type":
                                   filetype,
                               })
        transfer_object = {
            "callback_queue": event.get("callback_queue"),
            "file_size": compressed_file_size,
            "filetype": filetype,
            "metadata": metadata,
            "opaque": event.get("opaque"),
            "site": site,
            "type": "UPLOAD",
        }
        if compressed_filepath:
            transfer_object["local_path"] = compressed_filepath
        else:
            transfer_object["blob"] = compressed_blob
            transfer_object["local_path"] = event["full_path"]

        self.transfer_queue.put(transfer_object)
        return True
Beispiel #11
0
    def handle_event(self, event, filetype):
        # pylint: disable=redefined-variable-type
        rsa_public_key = None
        site = event.get("site")
        if not site:
            site = self.find_site_for_file(event["full_path"])

        encryption_key_id = self.config["backup_sites"][site]["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][site]["encryption_keys"][encryption_key_id]["public"]

        compressed_blob = None
        if event.get("compress_to_memory"):
            output_obj = BytesIO()
            compressed_filepath = None
        else:
            compressed_filepath = self.get_compressed_file_path(site, filetype, event["full_path"])
            output_obj = NamedTemporaryFile(dir=os.path.dirname(compressed_filepath),
                                            prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress")

        input_obj = event.get("input_data")
        if not input_obj:
            input_obj = open(event["full_path"], "rb")
        with output_obj, input_obj:
            hash_algorithm = self.config["hash_algorithm"]
            hasher = None
            if filetype == "xlog":
                wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj)
                hasher = hashlib.new(hash_algorithm)

            original_file_size, compressed_file_size = rohmufile.write_file(
                data_callback=hasher.update if hasher else None,
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=rsa_public_key,
                log_func=self.log.info,
            )

            if compressed_filepath:
                os.link(output_obj.name, compressed_filepath)
            else:
                compressed_blob = output_obj.getvalue()

        if event.get("delete_file_after_compression", True):
            os.unlink(event["full_path"])

        metadata = event.get("metadata", {})
        metadata.update({
            "pg-version": self.config["backup_sites"][site].get("pg_version"),
            "compression-algorithm": self.config["compression"]["algorithm"],
            "compression-level": self.config["compression"]["level"],
            "original-file-size": original_file_size,
            "host": socket.gethostname(),
        })
        if hasher:
            metadata["hash"] = hasher.hexdigest()
            metadata["hash-algorithm"] = hash_algorithm
        if encryption_key_id:
            metadata.update({"encryption-key-id": encryption_key_id})
        if compressed_filepath:
            metadata_path = compressed_filepath + ".metadata"
            write_json_file(metadata_path, metadata)

        self.set_state_defaults_for_site(site)
        self.state[site][filetype]["original_data"] += original_file_size
        self.state[site][filetype]["compressed_data"] += compressed_file_size
        self.state[site][filetype]["count"] += 1
        if original_file_size:
            size_ratio = compressed_file_size / original_file_size
            self.metrics.gauge(
                "pghoard.compressed_size_ratio", size_ratio,
                tags={
                    "algorithm": self.config["compression"]["algorithm"],
                    "site": site,
                    "type": filetype,
                })
        transfer_object = {
            "callback_queue": event.get("callback_queue"),
            "file_size": compressed_file_size,
            "filetype": filetype,
            "metadata": metadata,
            "opaque": event.get("opaque"),
            "site": site,
            "type": "UPLOAD",
        }
        if compressed_filepath:
            transfer_object["local_path"] = compressed_filepath
        else:
            transfer_object["blob"] = compressed_blob
            transfer_object["local_path"] = event["full_path"]

        self.transfer_queue.put(transfer_object)
        return True
Beispiel #12
0
    def _delta_upload_hexdigest(self, *, temp_dir, chunk_path, file_obj,
                                callback_queue, relative_path):
        skip_upload = False
        start_time = time.monotonic()

        input_size = file_obj.seek(0, os.SEEK_END)
        file_obj.seek(0)

        result_hash = hashlib.blake2s()

        def update_hash(data):
            result_hash.update(data)

        with NamedTemporaryFile(dir=temp_dir,
                                prefix=os.path.basename(chunk_path),
                                suffix=".tmp") as raw_output_obj:
            rohmufile.write_file(
                input_obj=file_obj,
                output_obj=raw_output_obj,
                compression_algorithm=self.compression_data.algorithm,
                compression_level=self.compression_data.level,
                rsa_public_key=self.encryption_data.rsa_public_key,
                log_func=self.log.info,
                data_callback=update_hash)
            result_size = raw_output_obj.tell()
            raw_output_obj.seek(0)

            result_digest = result_hash.hexdigest()

            with self.submitted_hashes_lock:
                if result_digest in self.submitted_hashes:
                    # file with the same hash was already submitted
                    self.log.debug(
                        "Skip uploading file %r, file with the same was hash already submitted for uploading",
                        relative_path)
                    skip_upload = True
                    return input_size, result_size, result_digest, skip_upload
                else:
                    self.submitted_hashes.add(result_digest)

            os.link(raw_output_obj.name, chunk_path)

        rohmufile.log_compression_result(
            encrypted=bool(self.encryption_data.encryption_key_id),
            elapsed=time.monotonic() - start_time,
            original_size=input_size,
            result_size=result_size,
            source_name="$PGDATA delta basebackup file",
            log_func=self.log.info,
        )

        if input_size:
            size_ratio = result_size / input_size
            self.metrics.gauge("pghoard.compressed_size_ratio",
                               size_ratio,
                               tags={
                                   "algorithm":
                                   self.compression_data.algorithm,
                                   "site": self.site,
                                   "type": "basebackup_delta",
                               })

        metadata = {
            "compression-algorithm": self.compression_data.algorithm,
            "encryption-key-id": self.encryption_data.encryption_key_id,
            "format": BaseBackupFormat.delta_v1,
            "original-file-size": input_size,
            "host": socket.gethostname(),
        }

        self.transfer_queue.put({
            "callback_queue": callback_queue,
            "file_size": result_size,
            "filetype": "basebackup_delta",
            "local_path": chunk_path,
            "metadata": metadata,
            "site": self.site,
            "type": "UPLOAD",
            "delta": {
                "hexdigest": result_digest,
            },
        })

        return input_size, result_size, result_digest, skip_upload