Example #1
0
 def test_recovery_targets(self, tmpdir):
     config_file = tmpdir.join("conf.json").strpath
     write_json_file(config_file, {"backup_sites": {"test": {}}})
     r = Restore()
     r._get_object_storage = Mock()  # pylint: disable=protected-access
     with pytest.raises(RestoreError) as excinfo:
         r.run(args=[
             "get-basebackup",
             "--config", config_file,
             "--target-dir", tmpdir.strpath,
             "--site=test",
             "--recovery-target-action=promote",
             "--recovery-target-name=foobar",
             "--recovery-target-xid=42",
         ])
     assert "at most one" in str(excinfo.value)
     with pytest.raises(RestoreError) as excinfo:
         r.run(args=[
             "get-basebackup",
             "--config", config_file,
             "--target-dir", tmpdir.strpath,
             "--site=test",
             "--recovery-target-action=promote",
             "--recovery-target-time=foobar",
         ])
     assert "recovery_target_time 'foobar'" in str(excinfo.value)
Example #2
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site].update({
            "basebackup_count":
            1,
            "basebackup_interval_hours":
            1,
            "nodes": [{
                "host": "127.0.0.4"
            }],
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)
        os.makedirs(self.config["alert_file_dir"], exist_ok=True)

        backup_site_path = os.path.join(self.config["backup_location"],
                                        self.test_site)
        self.compressed_xlog_path = os.path.join(backup_site_path, "xlog")
        os.makedirs(self.compressed_xlog_path)
        self.basebackup_path = os.path.join(backup_site_path, "basebackup")
        os.makedirs(self.basebackup_path)
        self.pghoard = PGHoard(config_path)
        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Example #3
0
    def test_recovery_targets(self, tmpdir):
        config_file = tmpdir.join("conf.json").strpath

        # Instantiate a fake PG data directory
        pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY")
        os.makedirs(pg_data_directory)
        open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6")

        write_json_file(config_file, {"backup_sites": {"test": {"pg_data_directory": pg_data_directory}}})

        r = Restore()
        r._get_object_storage = Mock()  # pylint: disable=protected-access
        with pytest.raises(RestoreError) as excinfo:
            r.run(args=[
                "get-basebackup",
                "--config", config_file,
                "--target-dir", tmpdir.strpath,
                "--site=test",
                "--recovery-target-action=promote",
                "--recovery-target-name=foobar",
                "--recovery-target-xid=42",
            ])
        assert "at most one" in str(excinfo.value)
        with pytest.raises(RestoreError) as excinfo:
            r.run(args=[
                "get-basebackup",
                "--config", config_file,
                "--target-dir", tmpdir.strpath,
                "--site=test",
                "--recovery-target-action=promote",
                "--recovery-target-time=foobar",
            ])
        assert "recovery_target_time 'foobar'" in str(excinfo.value)
Example #4
0
 def write_backup_state_to_json_file(self):
     """Periodically write a JSON state file to disk"""
     start_time = time.time()
     state_file_path = self.config["json_state_file_path"]
     self.state["walreceivers"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running,
               "last_flushed_lsn": value.last_flushed_lsn}
         for key, value in self.walreceivers.items()
     }
     self.state["pg_receivexlogs"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running}
         for key, value in self.receivexlogs.items()
     }
     self.state["pg_basebackups"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running}
         for key, value in self.basebackups.items()
     }
     self.state["compressors"] = [compressor.state for compressor in self.compressors]
     self.state["transfer_agents"] = [ta.state for ta in self.transfer_agents]
     self.state["queues"] = {
         "compression_queue": self.compression_queue.qsize(),
         "transfer_queue": self.transfer_queue.qsize(),
     }
     self.log.debug("Writing JSON state file to %r", state_file_path)
     write_json_file(state_file_path, self.state)
     self.log.debug("Wrote JSON state file to disk, took %.4fs", time.time() - start_time)
Example #5
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template({
            "backup_sites": {
                self.test_site: {
                    "basebackup_count": 1,
                    "basebackup_interval_hours": 1,
                    "nodes": [
                        {
                            "host": "127.0.0.4",
                        },
                    ],
                },
            },
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)

        self.pghoard = PGHoard(config_path)
        # This is the "final storage location" when using "local" storage type
        self.local_storage_dir = os.path.join(self.config["backup_sites"][self.test_site]["object_storage"]["directory"],
                                              self.test_site)

        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Example #6
0
 def write_backup_state_to_json_file(self):
     """Periodically write a JSON state file to disk"""
     start_time = time.time()
     state_file_path = self.config["json_state_file_path"]
     self.state["walreceivers"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running,
               "last_flushed_lsn": value.last_flushed_lsn}
         for key, value in self.walreceivers.items()
     }
     self.state["pg_receivexlogs"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running}
         for key, value in self.receivexlogs.items()
     }
     self.state["pg_basebackups"] = {
         key: {"latest_activity": value.latest_activity, "running": value.running}
         for key, value in self.basebackups.items()
     }
     self.state["compressors"] = [compressor.state for compressor in self.compressors]
     self.state["transfer_agents"] = [ta.state for ta in self.transfer_agents]
     self.state["queues"] = {
         "compression_queue": self.compression_queue.qsize(),
         "transfer_queue": self.transfer_queue.qsize(),
     }
     self.log.debug("Writing JSON state file to %r", state_file_path)
     write_json_file(state_file_path, self.state)
     self.log.debug("Wrote JSON state file to disk, took %.4fs", time.time() - start_time)
Example #7
0
    def test_json_serialization(self, tmpdir):
        ob = {
            "foo": [
                "bar",
                "baz",
                42,
            ],
            "t": datetime.datetime(2015, 9, 1, 4, 0, 0),
            "f": 0.42,
        }
        res = json.dumps(ob, default=default_json_serialization, separators=(",", ":"), sort_keys=True)
        assert res == '{"f":0.42,"foo":["bar","baz",42],"t":"2015-09-01T04:00:00Z"}'

        assert isinstance(json_encode(ob), str)
        assert isinstance(json_encode(ob, binary=True), bytes)
        assert "\n" not in json_encode(ob)
        assert "\n" in json_encode(ob, compact=False)

        output_file = tmpdir.join("test.json").strpath
        write_json_file(output_file, ob)
        with open(output_file, "r") as fp:
            ob2 = json.load(fp)
        ob_ = dict(ob, t=ob["t"].isoformat() + "Z")
        assert ob2 == ob_

        write_json_file(output_file, ob, compact=True)
        with open(output_file, "r") as fp:
            output_data = fp.read()
        assert "\n" not in output_data
        ob2_ = json.loads(output_data)

        assert ob2 == ob2_
Example #8
0
    def test_recovery_targets(self, tmpdir):
        config_file = tmpdir.join("conf.json").strpath

        # Instantiate a fake PG data directory
        pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY")
        os.makedirs(pg_data_directory)
        open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6")

        write_json_file(config_file, {"backup_sites": {"test": {"pg_data_directory": pg_data_directory}}})

        r = Restore()
        r._get_object_storage = Mock()  # pylint: disable=protected-access
        with pytest.raises(RestoreError) as excinfo:
            r.run(args=[
                "get-basebackup",
                "--config", config_file,
                "--target-dir", tmpdir.strpath,
                "--site=test",
                "--recovery-target-action=promote",
                "--recovery-target-name=foobar",
                "--recovery-target-xid=42",
            ])
        assert "at most one" in str(excinfo.value)
        with pytest.raises(RestoreError) as excinfo:
            r.run(args=[
                "get-basebackup",
                "--config", config_file,
                "--target-dir", tmpdir.strpath,
                "--site=test",
                "--recovery-target-action=promote",
                "--recovery-target-time=foobar",
            ])
        assert "recovery_target_time 'foobar'" in str(excinfo.value)
Example #9
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template({
            "backup_sites": {
                self.test_site: {
                    "basebackup_count": 1,
                    "basebackup_interval_hours": 1,
                    "nodes": [
                        {
                            "host": "127.0.0.4",
                        },
                    ],
                },
            },
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)

        self.pghoard = PGHoard(config_path)
        # This is the "final storage location" when using "local" storage type
        self.local_storage_dir = os.path.join(
            self.config["backup_sites"][self.test_site]["object_storage"]
            ["directory"], self.test_site)

        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Example #10
0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir):
    from pghoard.archive_sync import ArchiveSync, SyncError
    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(config_file, {"http_port": 8080, "backup_sites": {"foo": {}}})
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.return_value = HTTPResult(201)  # So the backup requests succeeds
    requests_head_mock.side_effect = requests_head_call_return

    # Check integrity within same timeline
    arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F")
    arsy.get_first_required_wal_segment = Mock(return_value=("00000005000000000000008C", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 3
    assert requests_put_mock.call_count == 0

    # Check integrity when timeline has changed
    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000090000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000080000000000000005", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 4

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000030000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000030000000000000005", 90300))
    with pytest.raises(SyncError):
        arsy.check_wal_archive_integrity(new_backup_on_failure=False)
    assert requests_put_mock.call_count == 0
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000070000000000000002")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000060000000000000001", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90200))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1
Example #11
0
 def write_backup_state_to_json_file(self):
     """Periodically write a JSON state file to disk"""
     start_time = time.time()
     state_file_path = self.config["json_state_file_path"]
     self.state["walreceivers"] = {
         key: {
             "latest_activity": value.latest_activity,
             "running": value.running,
             "last_flushed_lsn": value.last_flushed_lsn
         }
         for key, value in self.walreceivers.items()
     }
     self.state["pg_receivexlogs"] = {
         key: {
             "latest_activity": value.latest_activity,
             "running": value.running
         }
         for key, value in self.receivexlogs.items()
     }
     self.state["pg_basebackups"] = {
         key: {
             "latest_activity": value.latest_activity,
             "running": value.running
         }
         for key, value in self.basebackups.items()
     }
     self.state["compressors"] = [
         compressor.state for compressor in self.compressors
     ]
     # All transfer agents share the same state, no point in writing it multiple times
     self.state["transfer_agent_state"] = self.transfer_agent_state
     self.state["queues"] = {
         "compression_queue": self.compression_queue.qsize(),
         "transfer_queue": self.transfer_queue.qsize(),
     }
     self.state[
         "served_files"] = self.webserver.get_most_recently_served_files(
         ) if self.webserver else {}
     self.log.debug("Writing JSON state file to %r", state_file_path)
     write_json_file(state_file_path, self.state)
     self.log.debug("Wrote JSON state file to disk, took %.4fs",
                    time.time() - start_time)
Example #12
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site].update({
            "basebackup_count": 1,
            "basebackup_interval_hours": 1,
            "nodes": [{"host": "127.0.0.4"}],
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)
        os.makedirs(self.config["alert_file_dir"], exist_ok=True)

        backup_site_path = os.path.join(self.config["backup_location"], self.test_site)
        self.compressed_xlog_path = os.path.join(backup_site_path, "xlog")
        os.makedirs(self.compressed_xlog_path)
        self.basebackup_path = os.path.join(backup_site_path, "basebackup")
        os.makedirs(self.basebackup_path)
        self.pghoard = PGHoard(config_path)
        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Example #13
0
    def handle_event(self, event, filetype):
        # pylint: disable=redefined-variable-type
        rsa_public_key = None
        site = event.get("site")
        if not site:
            site = self.find_site_for_file(event["full_path"])

        encryption_key_id = self.config["backup_sites"][site][
            "encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][site][
                "encryption_keys"][encryption_key_id]["public"]

        compressed_blob = None
        if event.get("compress_to_memory"):
            output_obj = BytesIO()
            compressed_filepath = None
        else:
            compressed_filepath = self.get_compressed_file_path(
                site, filetype, event["full_path"])
            output_obj = NamedTemporaryFile(
                dir=os.path.dirname(compressed_filepath),
                prefix=os.path.basename(compressed_filepath),
                suffix=".tmp-compress")

        input_obj = event.get("input_data")
        if not input_obj:
            input_obj = open(event["full_path"], "rb")
        with output_obj, input_obj:
            hash_algorithm = self.config["hash_algorithm"]
            hasher = None
            if filetype == "xlog":
                wal.verify_wal(wal_name=os.path.basename(event["full_path"]),
                               fileobj=input_obj)
                hasher = hashlib.new(hash_algorithm)

            original_file_size, compressed_file_size = rohmufile.write_file(
                data_callback=hasher.update if hasher else None,
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=rsa_public_key,
                log_func=self.log.info,
            )

            if compressed_filepath:
                os.link(output_obj.name, compressed_filepath)
            else:
                compressed_blob = output_obj.getvalue()

        if event.get("delete_file_after_compression", True):
            os.unlink(event["full_path"])

        metadata = event.get("metadata", {})
        metadata.update({
            "pg-version":
            self.config["backup_sites"][site].get("pg_version"),
            "compression-algorithm":
            self.config["compression"]["algorithm"],
            "compression-level":
            self.config["compression"]["level"],
            "original-file-size":
            original_file_size,
            "host":
            socket.gethostname(),
        })
        if hasher:
            metadata["hash"] = hasher.hexdigest()
            metadata["hash-algorithm"] = hash_algorithm
        if encryption_key_id:
            metadata.update({"encryption-key-id": encryption_key_id})
        if compressed_filepath:
            metadata_path = compressed_filepath + ".metadata"
            write_json_file(metadata_path, metadata)

        self.set_state_defaults_for_site(site)
        self.state[site][filetype]["original_data"] += original_file_size
        self.state[site][filetype]["compressed_data"] += compressed_file_size
        self.state[site][filetype]["count"] += 1
        if original_file_size:
            size_ratio = compressed_file_size / original_file_size
            self.metrics.gauge("pghoard.compressed_size_ratio",
                               size_ratio,
                               tags={
                                   "algorithm":
                                   self.config["compression"]["algorithm"],
                                   "site":
                                   site,
                                   "type":
                                   filetype,
                               })
        transfer_object = {
            "callback_queue": event.get("callback_queue"),
            "file_size": compressed_file_size,
            "filetype": filetype,
            "metadata": metadata,
            "opaque": event.get("opaque"),
            "site": site,
            "type": "UPLOAD",
        }
        if compressed_filepath:
            transfer_object["local_path"] = compressed_filepath
        else:
            transfer_object["blob"] = compressed_blob
            transfer_object["local_path"] = event["full_path"]

        self.transfer_queue.put(transfer_object)
        return True
Example #14
0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock,
                                     tmpdir):
    from pghoard.archive_sync import ArchiveSync, SyncError

    # Instantiate a fake PG data directory
    pg_data_directory = os.path.join(str(tmpdir), "PG_DATA_DIRECTORY")
    os.makedirs(pg_data_directory)
    open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6")

    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(
        config_file, {
            "http_port": 8080,
            "backup_sites": {
                "foo": {
                    "pg_data_directory": pg_data_directory
                }
            }
        })
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.return_value = HTTPResult(
        201)  # So the backup requests succeeds
    requests_head_mock.side_effect = requests_head_call_return

    # Check integrity within same timeline
    arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("00000005000000000000008C", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 3
    assert requests_put_mock.call_count == 0

    # Check integrity when timeline has changed
    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000090000000000000008")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000080000000000000005", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 4

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000030000000000000008")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000030000000000000005", 90300))
    with pytest.raises(SyncError):
        arsy.check_wal_archive_integrity(new_backup_on_failure=False)
    assert requests_put_mock.call_count == 0
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000070000000000000002")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000060000000000000001", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000020000000A000000FD", 90200))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000020000000A000000FD", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1
Example #15
0
def test_check_and_upload_missing_local_files(requests_put_mock,
                                              requests_head_mock, tmpdir):
    from pghoard.archive_sync import ArchiveSync

    data_dir = str(tmpdir)
    wal_dir = os.path.join(data_dir, "pg_xlog")
    os.makedirs(wal_dir)
    open(os.path.join(data_dir, "PG_VERSION"), "w").write("9.6")

    # Write a bunch of local files
    file_hashes = {}
    for index in range(32):
        fn = "{:024X}".format(index + 1)
        data = os.urandom(32)
        sha1_hasher = hashlib.sha1()
        sha1_hasher.update(data)
        file_hashes[index + 1] = sha1_hasher.hexdigest()
        with open(os.path.join(wal_dir, fn), "wb") as f:
            f.write(data)

    head_call_indexes = []
    put_call_indexes = []
    missing_hash_indexes = {0xf, 0x10}

    def requests_head(*args, **kwargs):  # pylint: disable=unused-argument
        wal_index = int(os.path.split(args[0])[1], 16)
        head_call_indexes.append(wal_index)
        if wal_index > 0x14:
            return HTTPResult(404)
        sha1 = file_hashes[wal_index]
        # For some files return invalid hash
        if wal_index in {0x1, 0xb, 0xd, 0xf, 0x11, 0x13}:
            sha1 += "invalid"
        # For some files don't return sha1 header to test the code copes with missing header correctly
        if wal_index in missing_hash_indexes:
            headers = {}
        else:
            headers = {
                "metadata-hash": sha1,
                "metadata-hash-algorithm": "sha1"
            }
        return HTTPResult(200, headers=headers)

    def requests_put(*args, **kwargs):  # pylint: disable=unused-argument
        wal_index = int(os.path.split(args[0])[1], 16)
        put_call_indexes.append(wal_index)
        return HTTPResult(201)

    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(
        config_file, {
            "http_port": 8080,
            "backup_sites": {
                "foo": {
                    "pg_data_directory": data_dir
                }
            }
        })
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.side_effect = requests_put
    requests_head_mock.side_effect = requests_head
    arsy.get_current_wal_file = Mock(return_value="00000000000000000000001A")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000000000000000000001", 90300))

    arsy.check_and_upload_missing_local_files(15)

    assert head_call_indexes == list(
        reversed([index + 1 for index in range(0x19)]))
    # Files above 0x1a in future, 0x1a is current. 0x14 and under are already uploaded but 0x13, 0x11, 0xf,
    # 0xd, 0xb and 0x1 have invalid hash. Of those 0x1 doesn't get re-uploaded because we set max hashes to
    # check to a value that is exceeded before reaching that and 0xf doesn't get reuploaded because remote
    # hash for that isn't available so hash cannot be validated but 0x10 does get reuploaded because it is
    # the first file missing a hash.
    assert put_call_indexes == [
        0xb, 0xd, 0x10, 0x11, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19
    ]

    missing_hash_indexes.update(set(range(0x20)))
    head_call_indexes.clear()
    put_call_indexes.clear()
    arsy.check_and_upload_missing_local_files(15)
    # The first file that already existed (0x14) should've been re-uploaded due to missing sha1
    assert put_call_indexes == [0x14, 0x15, 0x16, 0x17, 0x18, 0x19]
Example #16
0
    def handle_event(self, event, filetype):
        rsa_public_key = None
        site = event.get("site", self.find_site_for_file(event["full_path"]))
        encryption_key_id = self.config["backup_sites"][site][
            "encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][site][
                "encryption_keys"][encryption_key_id]["public"]

        if event.get("compress_to_memory", False):
            original_file_size, compressed_blob = self.compress_filepath_to_memory(
                filepath=event["full_path"],
                compression_algorithm=self.config["compression"]["algorithm"],
                rsa_public_key=rsa_public_key)
            compressed_file_size = len(compressed_blob)
            compressed_filepath = None
        else:
            compressed_blob = None
            compressed_filepath = self.get_compressed_file_path(
                site, filetype, event["full_path"])
            original_file_size, compressed_file_size = self.compress_filepath(
                filepath=event["full_path"],
                compressed_filepath=compressed_filepath,
                compression_algorithm=self.config["compression"]["algorithm"],
                rsa_public_key=rsa_public_key)

        if event.get("delete_file_after_compression", True):
            os.unlink(event["full_path"])

        metadata = event.get("metadata", {})
        metadata.update({
            "pg-version":
            self.config["backup_sites"][site].get("pg_version", 90500),
            "compression-algorithm":
            self.config["compression"]["algorithm"],
            "original-file-size":
            original_file_size,
        })
        if encryption_key_id:
            metadata.update({"encryption-key-id": encryption_key_id})
        if compressed_filepath:
            metadata_path = compressed_filepath + ".metadata"
            write_json_file(metadata_path, metadata)

        self.set_state_defaults_for_site(site)
        self.state[site][filetype]["original_data"] += original_file_size
        self.state[site][filetype]["compressed_data"] += compressed_file_size
        self.state[site][filetype]["count"] += 1
        transfer_object = {
            "callback_queue": event.get("callback_queue"),
            "file_size": compressed_file_size,
            "filetype": filetype,
            "metadata": metadata,
            "opaque": event.get("opaque"),
            "site": site,
            "type": "UPLOAD",
        }
        if event.get("compress_to_memory", False):
            transfer_object["blob"] = compressed_blob
            transfer_object["local_path"] = event["full_path"]
        else:
            transfer_object["local_path"] = compressed_filepath
        self.transfer_queue.put(transfer_object)
        return True
Example #17
0
    def handle_event(self, event, filetype):
        # pylint: disable=redefined-variable-type
        rsa_public_key = None
        site = event.get("site")
        if not site:
            site = self.find_site_for_file(event["full_path"])

        encryption_key_id = self.config["backup_sites"][site]["encryption_key_id"]
        if encryption_key_id:
            rsa_public_key = self.config["backup_sites"][site]["encryption_keys"][encryption_key_id]["public"]

        compressed_blob = None
        if event.get("compress_to_memory"):
            output_obj = BytesIO()
            compressed_filepath = None
        else:
            compressed_filepath = self.get_compressed_file_path(site, filetype, event["full_path"])
            output_obj = NamedTemporaryFile(dir=os.path.dirname(compressed_filepath),
                                            prefix=os.path.basename(compressed_filepath), suffix=".tmp-compress")

        input_obj = event.get("input_data")
        if not input_obj:
            input_obj = open(event["full_path"], "rb")
        with output_obj, input_obj:
            hash_algorithm = self.config["hash_algorithm"]
            hasher = None
            if filetype == "xlog":
                wal.verify_wal(wal_name=os.path.basename(event["full_path"]), fileobj=input_obj)
                hasher = hashlib.new(hash_algorithm)

            original_file_size, compressed_file_size = rohmufile.write_file(
                data_callback=hasher.update if hasher else None,
                input_obj=input_obj,
                output_obj=output_obj,
                compression_algorithm=self.config["compression"]["algorithm"],
                compression_level=self.config["compression"]["level"],
                rsa_public_key=rsa_public_key,
                log_func=self.log.info,
            )

            if compressed_filepath:
                os.link(output_obj.name, compressed_filepath)
            else:
                compressed_blob = output_obj.getvalue()

        if event.get("delete_file_after_compression", True):
            os.unlink(event["full_path"])

        metadata = event.get("metadata", {})
        metadata.update({
            "pg-version": self.config["backup_sites"][site].get("pg_version"),
            "compression-algorithm": self.config["compression"]["algorithm"],
            "compression-level": self.config["compression"]["level"],
            "original-file-size": original_file_size,
            "host": socket.gethostname(),
        })
        if hasher:
            metadata["hash"] = hasher.hexdigest()
            metadata["hash-algorithm"] = hash_algorithm
        if encryption_key_id:
            metadata.update({"encryption-key-id": encryption_key_id})
        if compressed_filepath:
            metadata_path = compressed_filepath + ".metadata"
            write_json_file(metadata_path, metadata)

        self.set_state_defaults_for_site(site)
        self.state[site][filetype]["original_data"] += original_file_size
        self.state[site][filetype]["compressed_data"] += compressed_file_size
        self.state[site][filetype]["count"] += 1
        if original_file_size:
            size_ratio = compressed_file_size / original_file_size
            self.metrics.gauge(
                "pghoard.compressed_size_ratio", size_ratio,
                tags={
                    "algorithm": self.config["compression"]["algorithm"],
                    "site": site,
                    "type": filetype,
                })
        transfer_object = {
            "callback_queue": event.get("callback_queue"),
            "file_size": compressed_file_size,
            "filetype": filetype,
            "metadata": metadata,
            "opaque": event.get("opaque"),
            "site": site,
            "type": "UPLOAD",
        }
        if compressed_filepath:
            transfer_object["local_path"] = compressed_filepath
        else:
            transfer_object["blob"] = compressed_blob
            transfer_object["local_path"] = event["full_path"]

        self.transfer_queue.put(transfer_object)
        return True