Beispiel #1
0
class Compression(PGHoardTestCase):
    algorithm = None

    def compress(self, data):
        raise NotImplementedError

    def decompress(self, data):
        raise NotImplementedError

    def setup_method(self, method):
        super().setup_method(method)
        self.config = {
            "backup_sites": {
                self.test_site: {
                    "object_storage": {
                        "storage_type": "s3",
                    },
                    "encryption_keys": {
                        "testkey": {
                            "public": CONSTANT_TEST_RSA_PUBLIC_KEY,
                            "private": CONSTANT_TEST_RSA_PRIVATE_KEY
                        },
                    },
                },
            },
            "backup_location": os.path.join(self.temp_dir, "backups"),
            "compression": {
                "algorithm": self.algorithm,
            }
        }
        self.compression_queue = Queue()
        self.transfer_queue = Queue()
        self.incoming_path = os.path.join(self.temp_dir, self.test_site, "xlog")
        os.makedirs(self.incoming_path)
        self.handled_path = os.path.join(self.config["backup_location"], self.test_site, "xlog")
        os.makedirs(self.handled_path)
        self.random_file_path = os.path.join(self.incoming_path, "00000001000000000000000C")
        self.random_file_path_partial = os.path.join(self.incoming_path, "00000001000000000000000C.partial")

        # Create a totally random file, bigger than block size.  Compressed output is longer
        self.random_file_contents = os.urandom(IO_BLOCK_SIZE * 2)
        with open(self.random_file_path, "wb") as out:
            out.write(self.random_file_contents)
            self.random_file_size = out.tell()

        # Create an easily compressible test file, too (with random prefix and suffix)
        self.zero_file_path = os.path.join(self.incoming_path, "00000001000000000000000D")
        self.zero_file_path_partial = os.path.join(self.incoming_path, "00000001000000000000000D.partial")

        # ensure the plaintext file is bigger than the block size and zero (compressed is smaller)
        zeros = (IO_BLOCK_SIZE * 2 - 32) * b"\x00"
        self.zero_file_contents = os.urandom(16) + zeros + os.urandom(16)
        with open(self.zero_file_path, "wb") as out:
            out.write(self.zero_file_contents)
            self.zero_file_size = out.tell()

        self.compressor = CompressorThread(config=self.config,
                                           compression_queue=self.compression_queue,
                                           transfer_queue=self.transfer_queue)
        self.compressor.start()

    def teardown_method(self, method):
        self.compressor.running = False
        self.compression_queue.put({"type": "QUIT"})
        self.compressor.join()
        super().teardown_method(method)

    def test_get_event_type(self):
        filetype = self.compressor.get_event_filetype({
            "full_path": "00000001000000000000000C",
            "src_path": "00000001000000000000000C.partial",
            "type": "MOVE",
        })
        assert filetype == "xlog"
        # todo check timeline history file naming format
        filetype = self.compressor.get_event_filetype({
            "full_path": "1.history",
            "src_path": "1.history.partial",
            "type": "MOVE",
        })
        assert filetype == "timeline"
        filetype = self.compressor.get_event_filetype({"type": "CLOSE_WRITE", "full_path": "base.tar"})
        assert filetype == "basebackup"

    def test_compress_to_file(self):
        self.compression_queue.put({
            "type": "MOVE",
            "src_path": self.random_file_path_partial,
            "full_path": self.random_file_path,
        })
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "filetype": "xlog",
            "local_path": self.random_file_path.replace(self.incoming_path, self.handled_path),
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_compress_to_memory(self):
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.random_file_path,
            "src_path": self.random_file_path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": self.random_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get()
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(transfer_event["blob"]) == self.random_file_contents

    def test_compress_encrypt_to_memory(self):
        self.compressor.config["backup_sites"][self.test_site]["encryption_key_id"] = "testkey"
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.random_file_path,
            "src_path": self.random_file_path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": self.random_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "encryption-key-id": "testkey",
                "original-file-size": self.random_file_size,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get()
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_archive_command_compression(self):
        callback_queue = Queue()
        event = {
            "callback_queue": callback_queue,
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.zero_file_path,
            "src_path": self.zero_file_path_partial,
            "type": "CLOSE_WRITE",
        }
        transfer_event = self.compression_queue.put(event)
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": self.zero_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.zero_file_size,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(transfer_event["blob"]) == self.zero_file_contents

    def test_decompression_event(self):
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir, "00000001000000000000000D")
        self.compression_queue.put({
            "blob": self.compress(self.random_file_contents),
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
            },
            "site": self.test_site,
            "type": "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            assert fp.read() == self.random_file_contents

    def test_decompression_decrypt_event(self):
        _, blob = self.compressor.compress_filepath_to_memory(
            self.random_file_path,
            compression_algorithm=self.compressor.compression_algorithm(),
            rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY)
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir, "00000001000000000000000E")
        self.compression_queue.put({
            "blob": blob,
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "encryption-key-id": "testkey",
                "original-file-size": self.random_file_size,
            },
            "site": self.test_site,
            "type": "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            assert fp.read() == self.random_file_contents
Beispiel #2
0
class CompressionCase(PGHoardTestCase):
    algorithm = None

    def compress(self, data):
        raise NotImplementedError

    def decompress(self, data):
        raise NotImplementedError

    def setup_method(self, method):
        super().setup_method(method)
        self.log = logging.getLogger(str(method))
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site] = {
            "encryption_key_id": None,
            "encryption_keys": {
                "testkey": {"public": CONSTANT_TEST_RSA_PUBLIC_KEY, "private": CONSTANT_TEST_RSA_PRIVATE_KEY}
            },
            "object_storage": {"storage_type": "s3"},
            "pg_version": 90500,
        }
        self.config["compression"]["algorithm"] = self.algorithm
        self.compression_queue = Queue()
        self.transfer_queue = Queue()
        self.incoming_path = os.path.join(self.temp_dir, self.test_site, "xlog")
        os.makedirs(self.incoming_path)
        self.handled_path = os.path.join(self.config["backup_location"], self.test_site, "xlog")
        os.makedirs(self.handled_path)

        self.compressor = CompressorThread(
            config_dict=self.config,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue,
            stats=statsd.StatsClient(host=None),
        )
        self.compressor.start()

    def teardown_method(self, method):
        self.compressor.running = False
        self.compression_queue.put({"type": "QUIT"})
        self.compressor.join()
        super().teardown_method(method)

    def test_get_event_type(self):
        # Rename from .partial to final should be recognized
        event = {
            "full_path": "/out/00000001000000000000000C",
            "src_path": "/tmp/00000001000000000000000C.partial",
            "type": "MOVE",
        }
        assert self.compressor.get_event_filetype(event) == "xlog"
        # Rename from non-partial suffix is not recognized
        event["src_path"] += "xyz"
        assert self.compressor.get_event_filetype(event) is None
        # "CLOSE_WRITE" doesn't consider src_path
        del event["src_path"]
        event["type"] = "CLOSE_WRITE"
        assert self.compressor.get_event_filetype(event) == "xlog"
        # other event types are ignored
        event["type"] = "NAKKI"
        assert self.compressor.get_event_filetype(event) is None

        # Timeline history files are handled the same way (do they actually ever have .partial?)
        event = {"full_path": "/xlog/0000000A.history", "src_path": "/tmp/0000000A.history.partial", "type": "MOVE"}
        assert self.compressor.get_event_filetype(event) == "timeline"
        event["src_path"] += "xyz"
        assert self.compressor.get_event_filetype(event) is None
        del event["src_path"]
        event["type"] = "CLOSE_WRITE"
        assert self.compressor.get_event_filetype(event) == "timeline"

        event = {"full_path": "/data/base.tar", "type": "CLOSE_WRITE"}
        assert self.compressor.get_event_filetype(event) == "basebackup"

    def test_write_file(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random")
        with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj:
            orig_len, compr_len = rohmufile.write_file(
                input_obj=input_obj, output_obj=output_obj, compression_algorithm=self.algorithm, log_func=self.log.info
            )
            assert output_obj.tell() == compr_len
            assert len(output_obj.getvalue()) == compr_len
            assert orig_len == ifile.size

    def test_compress_to_file_xlog(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random")
        self._test_compress_to_file("xlog", ifile.size, ifile.path, ifile.path_partial)

    def test_compress_to_file_history(self):
        file_path = os.path.join(self.incoming_path, "0000000F.history")
        contents = "\n".join("# FOOBAR {}".format(n) for n in range(10)) + "\n"
        contents = contents.encode("ascii")
        with open(file_path, "wb") as out:
            out.write(contents)
            file_size = out.tell()

        self._test_compress_to_file("timeline", file_size, file_path, file_path + ".partial")

    def _test_compress_to_file(self, filetype, file_size, file_path, file_path_partial):
        self.compression_queue.put({"type": "MOVE", "src_path": file_path_partial, "full_path": file_path})
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "filetype": filetype,
            "local_path": file_path.replace(self.incoming_path, self.handled_path),
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_compress_to_memory(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random")
        self.compression_queue.put(
            {
                "compress_to_memory": True,
                "delete_file_after_compression": False,
                "full_path": ifile.path,
                "src_path": ifile.path_partial,
                "type": "MOVE",
            }
        )
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": ifile.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get(timeout=1.0)
        for key, value in expected.items():
            assert transfer_event[key] == value

        result = self.decompress(transfer_event["blob"])
        assert result[:100] == ifile.contents[:100]
        assert result == ifile.contents

    def test_compress_encrypt_to_memory(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C", "random")
        self.compressor.config["backup_sites"][self.test_site]["encryption_key_id"] = "testkey"
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": ifile.path,
            "src_path": ifile.path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": ifile.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "encryption-key-id": "testkey",
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get(timeout=1.0)
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_archive_command_compression(self):
        zero = TestXlog(self.incoming_path, "00000001000000000000000D", "zero")
        callback_queue = Queue()
        event = {
            "callback_queue": callback_queue,
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": zero.path,
            "src_path": zero.path_partial,
            "type": "CLOSE_WRITE",
        }
        transfer_event = self.compression_queue.put(event)
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": zero.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": zero.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(transfer_event["blob"]) == zero.contents

    def test_decompression_event(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000A", "random")
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir, "00000001000000000000000A")
        self.compression_queue.put(
            {
                "blob": self.compress(ifile.contents),
                "callback_queue": callback_queue,
                "filetype": "xlog",
                "local_path": local_filepath,
                "metadata": {
                    "compression-algorithm": self.algorithm,
                    "compression-level": 0,
                    "original-file-size": ifile.size,
                    "pg-version": 90500,
                },
                "site": self.test_site,
                "type": "DECOMPRESSION",
            }
        )
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            fdata = fp.read()
        assert fdata[:100] == ifile.contents[:100]
        assert fdata == ifile.contents

    def test_decompression_decrypt_event(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000E", "random")
        output_obj = io.BytesIO()
        with open(ifile.path, "rb") as input_obj:
            rohmufile.write_file(
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY,
                log_func=self.log.info,
            )
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir, "00000001000000000000000E")
        self.compression_queue.put(
            {
                "blob": output_obj.getvalue(),
                "callback_queue": callback_queue,
                "filetype": "xlog",
                "local_path": local_filepath,
                "metadata": {
                    "compression-algorithm": self.algorithm,
                    "compression-level": 0,
                    "encryption-key-id": "testkey",
                    "original-file-size": ifile.size,
                    "pg-version": 90500,
                },
                "site": self.test_site,
                "type": "DECOMPRESSION",
            }
        )
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            fdata = fp.read()
        assert fdata[:100] == ifile.contents[:100]
        assert fdata == ifile.contents

    def test_compress_decompress_fileobj(self, tmpdir):
        plaintext = TestXlog(self.incoming_path, "00000001000000000000000E", "random").contents
        output_file = tmpdir.join("data.out").strpath
        with open(output_file, "w+b") as plain_fp:
            cmp_fp = compressor.CompressionFile(plain_fp, self.algorithm)

            assert cmp_fp.fileno() == plain_fp.fileno()
            assert cmp_fp.readable() is False
            with pytest.raises(io.UnsupportedOperation):
                cmp_fp.read(1)
            assert cmp_fp.seekable() is False
            with pytest.raises(io.UnsupportedOperation):
                cmp_fp.seek(1, os.SEEK_CUR)
            assert cmp_fp.writable() is True

            cmp_fp.write(plaintext)
            cmp_fp.write(b"")
            assert cmp_fp.tell() == len(plaintext)
            cmp_fp.close()
            cmp_fp.close()

            plain_fp.seek(0)

            dec_fp = compressor.DecompressionFile(plain_fp, self.algorithm)
            assert dec_fp.fileno() == plain_fp.fileno()
            assert dec_fp.readable() is True
            assert dec_fp.writable() is False
            with pytest.raises(io.UnsupportedOperation):
                dec_fp.write(b"x")
            dec_fp.flush()

            # TODO: snappy returns random amounts of output per read call
            chunks = []
            while chunks == [] or chunks[-1] != b"":
                chunks.append(dec_fp.read())
            result = b"".join(chunks)

            assert plaintext == result
Beispiel #3
0
class CompressionCase(PGHoardTestCase):
    algorithm = None

    def compress(self, data):
        raise NotImplementedError

    def decompress(self, data):
        raise NotImplementedError

    def setup_method(self, method):
        super().setup_method(method)
        self.log = logging.getLogger(str(method))
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site] = {
            "encryption_key_id": None,
            "encryption_keys": {
                "testkey": {
                    "public": CONSTANT_TEST_RSA_PUBLIC_KEY,
                    "private": CONSTANT_TEST_RSA_PRIVATE_KEY
                },
            },
            "object_storage": {
                "storage_type": "s3",
            },
            "pg_version": 90500,
        }
        self.config["compression"]["algorithm"] = self.algorithm
        self.compression_queue = Queue()
        self.transfer_queue = Queue()
        self.incoming_path = os.path.join(self.temp_dir, self.test_site,
                                          "xlog")
        os.makedirs(self.incoming_path)
        self.handled_path = os.path.join(self.config["backup_location"],
                                         self.test_site, "xlog")
        os.makedirs(self.handled_path)

        self.compressor = CompressorThread(
            config_dict=self.config,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue,
            stats=statsd.StatsClient(host=None),
        )
        self.compressor.start()

    def teardown_method(self, method):
        self.compressor.running = False
        self.compression_queue.put({"type": "QUIT"})
        self.compressor.join()
        super().teardown_method(method)

    def test_get_event_type(self):
        # Rename from .partial to final should be recognized
        event = {
            "full_path": "/out/00000001000000000000000C",
            "src_path": "/tmp/00000001000000000000000C.partial",
            "type": "MOVE",
        }
        assert self.compressor.get_event_filetype(event) == "xlog"
        # Rename from non-partial suffix is not recognized
        event["src_path"] += "xyz"
        assert self.compressor.get_event_filetype(event) is None
        # "CLOSE_WRITE" doesn't consider src_path
        del event["src_path"]
        event["type"] = "CLOSE_WRITE"
        assert self.compressor.get_event_filetype(event) == "xlog"
        # other event types are ignored
        event["type"] = "NAKKI"
        assert self.compressor.get_event_filetype(event) is None

        # Timeline history files are handled the same way (do they actually ever have .partial?)
        event = {
            "full_path": "/xlog/0000000A.history",
            "src_path": "/tmp/0000000A.history.partial",
            "type": "MOVE",
        }
        assert self.compressor.get_event_filetype(event) == "timeline"
        event["src_path"] += "xyz"
        assert self.compressor.get_event_filetype(event) is None
        del event["src_path"]
        event["type"] = "CLOSE_WRITE"
        assert self.compressor.get_event_filetype(event) == "timeline"

        event = {
            "full_path": "/data/base.tar",
            "type": "CLOSE_WRITE",
        }
        assert self.compressor.get_event_filetype(event) == "basebackup"

    def test_write_file(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C",
                         "random")
        with open(ifile.path, "rb") as input_obj, io.BytesIO() as output_obj:
            orig_len, compr_len = rohmufile.write_file(
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.algorithm,
                log_func=self.log.info,
            )
            assert output_obj.tell() == compr_len
            assert len(output_obj.getvalue()) == compr_len
            assert orig_len == ifile.size

    def test_compress_to_file_xlog(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C",
                         "random")
        self._test_compress_to_file("xlog", ifile.size, ifile.path,
                                    ifile.path_partial)

    def test_compress_to_file_history(self):
        file_path = os.path.join(self.incoming_path, "0000000F.history")
        contents = "\n".join("# FOOBAR {}".format(n) for n in range(10)) + "\n"
        contents = contents.encode("ascii")
        with open(file_path, "wb") as out:
            out.write(contents)
            file_size = out.tell()

        self._test_compress_to_file("timeline", file_size, file_path,
                                    file_path + ".partial")

    def _test_compress_to_file(self, filetype, file_size, file_path,
                               file_path_partial):
        self.compression_queue.put({
            "type": "MOVE",
            "src_path": file_path_partial,
            "full_path": file_path,
        })
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "filetype": filetype,
            "local_path": file_path.replace(self.incoming_path,
                                            self.handled_path),
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_compress_to_memory(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C",
                         "random")
        self.compression_queue.put({
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": ifile.path,
            "src_path": ifile.path_partial,
            "type": "MOVE",
        })
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": ifile.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get(timeout=1.0)
        for key, value in expected.items():
            assert transfer_event[key] == value

        result = self.decompress(transfer_event["blob"])
        assert result[:100] == ifile.contents[:100]
        assert result == ifile.contents

    def test_compress_encrypt_to_memory(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000C",
                         "random")
        self.compressor.config["backup_sites"][
            self.test_site]["encryption_key_id"] = "testkey"
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": ifile.path,
            "src_path": ifile.path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": ifile.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "encryption-key-id": "testkey",
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get(timeout=1.0)
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_archive_command_compression(self):
        zero = TestXlog(self.incoming_path, "00000001000000000000000D", "zero")
        callback_queue = Queue()
        event = {
            "callback_queue": callback_queue,
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": zero.path,
            "src_path": zero.path_partial,
            "type": "CLOSE_WRITE",
        }
        transfer_event = self.compression_queue.put(event)
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": zero.path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": zero.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(transfer_event["blob"]) == zero.contents

    def test_decompression_event(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000A",
                         "random")
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir,
                                      "00000001000000000000000A")
        self.compression_queue.put({
            "blob": self.compress(ifile.contents),
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
            "type": "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            fdata = fp.read()
        assert fdata[:100] == ifile.contents[:100]
        assert fdata == ifile.contents

    def test_decompression_decrypt_event(self):
        ifile = TestXlog(self.incoming_path, "00000001000000000000000E",
                         "random")
        output_obj = io.BytesIO()
        with open(ifile.path, "rb") as input_obj:
            rohmufile.write_file(
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY,
                log_func=self.log.info,
            )
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir,
                                      "00000001000000000000000E")
        self.compression_queue.put({
            "blob": output_obj.getvalue(),
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "compression-level": 0,
                "encryption-key-id": "testkey",
                "original-file-size": ifile.size,
                "pg-version": 90500,
            },
            "site": self.test_site,
            "type": "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            fdata = fp.read()
        assert fdata[:100] == ifile.contents[:100]
        assert fdata == ifile.contents

    def test_compress_decompress_fileobj(self, tmpdir):
        plaintext = TestXlog(self.incoming_path, "00000001000000000000000E",
                             "random").contents
        output_file = tmpdir.join("data.out").strpath
        with open(output_file, "w+b") as plain_fp:
            cmp_fp = compressor.CompressionFile(plain_fp, self.algorithm)

            assert cmp_fp.fileno() == plain_fp.fileno()
            assert cmp_fp.readable() is False
            with pytest.raises(io.UnsupportedOperation):
                cmp_fp.read(1)
            assert cmp_fp.seekable() is False
            with pytest.raises(io.UnsupportedOperation):
                cmp_fp.seek(1, os.SEEK_CUR)
            assert cmp_fp.writable() is True

            cmp_fp.write(plaintext)
            cmp_fp.write(b"")
            assert cmp_fp.tell() == len(plaintext)
            cmp_fp.close()
            cmp_fp.close()

            plain_fp.seek(0)

            dec_fp = compressor.DecompressionFile(plain_fp, self.algorithm)
            assert dec_fp.fileno() == plain_fp.fileno()
            assert dec_fp.readable() is True
            assert dec_fp.writable() is False
            with pytest.raises(io.UnsupportedOperation):
                dec_fp.write(b"x")
            dec_fp.flush()

            # TODO: snappy returns random amounts of output per read call
            chunks = []
            while chunks == [] or chunks[-1] != b"":
                chunks.append(dec_fp.read())
            result = b"".join(chunks)

            assert plaintext == result
Beispiel #4
0
class Compression(PGHoardTestCase):
    algorithm = None

    def compress(self, data):
        raise NotImplementedError

    def decompress(self, data):
        raise NotImplementedError

    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site] = {
            "encryption_key_id": None,
            "encryption_keys": {
                "testkey": {
                    "public": CONSTANT_TEST_RSA_PUBLIC_KEY,
                    "private": CONSTANT_TEST_RSA_PRIVATE_KEY
                },
            },
            "object_storage": {
                "storage_type": "s3",
            },
            "pg_version": 90500,
        }
        self.config["compression"]["algorithm"] = self.algorithm
        self.compression_queue = Queue()
        self.transfer_queue = Queue()
        self.incoming_path = os.path.join(self.temp_dir, self.test_site,
                                          "xlog")
        os.makedirs(self.incoming_path)
        self.handled_path = os.path.join(self.config["backup_location"],
                                         self.test_site, "xlog")
        os.makedirs(self.handled_path)
        self.random_file_path = os.path.join(self.incoming_path,
                                             "00000001000000000000000C")
        self.random_file_path_partial = os.path.join(
            self.incoming_path, "00000001000000000000000C.partial")

        # Create a totally random file, bigger than block size.  Compressed output is longer
        self.random_file_contents = os.urandom(IO_BLOCK_SIZE * 2)
        with open(self.random_file_path, "wb") as out:
            out.write(self.random_file_contents)
            self.random_file_size = out.tell()

        # Create an easily compressible test file, too (with random prefix and suffix)
        self.zero_file_path = os.path.join(self.incoming_path,
                                           "00000001000000000000000D")
        self.zero_file_path_partial = os.path.join(
            self.incoming_path, "00000001000000000000000D.partial")

        # ensure the plaintext file is bigger than the block size and zero (compressed is smaller)
        zeros = (IO_BLOCK_SIZE * 2 - 32) * b"\x00"
        self.zero_file_contents = os.urandom(16) + zeros + os.urandom(16)
        with open(self.zero_file_path, "wb") as out:
            out.write(self.zero_file_contents)
            self.zero_file_size = out.tell()

        self.compressor = CompressorThread(
            config=self.config,
            compression_queue=self.compression_queue,
            transfer_queue=self.transfer_queue)
        self.compressor.start()

    def teardown_method(self, method):
        self.compressor.running = False
        self.compression_queue.put({"type": "QUIT"})
        self.compressor.join()
        super().teardown_method(method)

    def test_get_event_type(self):
        filetype = self.compressor.get_event_filetype({
            "full_path": "00000001000000000000000C",
            "src_path": "00000001000000000000000C.partial",
            "type": "MOVE",
        })
        assert filetype == "xlog"
        # todo check timeline history file naming format
        filetype = self.compressor.get_event_filetype({
            "full_path": "1.history",
            "src_path": "1.history.partial",
            "type": "MOVE",
        })
        assert filetype == "timeline"
        filetype = self.compressor.get_event_filetype({
            "type": "CLOSE_WRITE",
            "full_path": "base.tar"
        })
        assert filetype == "basebackup"

    def test_compress_to_file(self):
        self.compression_queue.put({
            "type": "MOVE",
            "src_path": self.random_file_path_partial,
            "full_path": self.random_file_path,
        })
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "filetype":
            "xlog",
            "local_path":
            self.random_file_path.replace(self.incoming_path,
                                          self.handled_path),
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
                "pg-version": 90500,
            },
            "site":
            self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_compress_to_memory(self):
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.random_file_path,
            "src_path": self.random_file_path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": self.random_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get()
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(
            transfer_event["blob"]) == self.random_file_contents

    def test_compress_encrypt_to_memory(self):
        self.compressor.config["backup_sites"][
            self.test_site]["encryption_key_id"] = "testkey"
        event = {
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.random_file_path,
            "src_path": self.random_file_path_partial,
            "type": "MOVE",
        }
        self.compressor.handle_event(event, filetype="xlog")
        expected = {
            "callback_queue": None,
            "filetype": "xlog",
            "local_path": self.random_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "encryption-key-id": "testkey",
                "original-file-size": self.random_file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        transfer_event = self.transfer_queue.get()
        for key, value in expected.items():
            assert transfer_event[key] == value

    def test_archive_command_compression(self):
        callback_queue = Queue()
        event = {
            "callback_queue": callback_queue,
            "compress_to_memory": True,
            "delete_file_after_compression": False,
            "full_path": self.zero_file_path,
            "src_path": self.zero_file_path_partial,
            "type": "CLOSE_WRITE",
        }
        transfer_event = self.compression_queue.put(event)
        transfer_event = self.transfer_queue.get(timeout=1.0)
        expected = {
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": self.zero_file_path,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.zero_file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
        }
        for key, value in expected.items():
            assert transfer_event[key] == value

        assert self.decompress(
            transfer_event["blob"]) == self.zero_file_contents

    def test_decompression_event(self):
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir,
                                      "00000001000000000000000D")
        self.compression_queue.put({
            "blob":
            self.compress(self.random_file_contents),
            "callback_queue":
            callback_queue,
            "filetype":
            "xlog",
            "local_path":
            local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "original-file-size": self.random_file_size,
                "pg-version": 90500,
            },
            "site":
            self.test_site,
            "type":
            "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            assert fp.read() == self.random_file_contents

    def test_decompression_decrypt_event(self):
        _, blob = self.compressor.compress_filepath_to_memory(
            self.random_file_path,
            compression_algorithm=self.config["compression"]["algorithm"],
            rsa_public_key=CONSTANT_TEST_RSA_PUBLIC_KEY)
        callback_queue = Queue()
        local_filepath = os.path.join(self.temp_dir,
                                      "00000001000000000000000E")
        self.compression_queue.put({
            "blob": blob,
            "callback_queue": callback_queue,
            "filetype": "xlog",
            "local_path": local_filepath,
            "metadata": {
                "compression-algorithm": self.algorithm,
                "encryption-key-id": "testkey",
                "original-file-size": self.random_file_size,
                "pg-version": 90500,
            },
            "site": self.test_site,
            "type": "DECOMPRESSION",
        })
        callback_queue.get(timeout=1.0)
        assert os.path.exists(local_filepath) is True
        with open(local_filepath, "rb") as fp:
            assert fp.read() == self.random_file_contents