Example #1
0
    def test_archive_sync(self, db, pghoard):
        log = logging.getLogger("test_archive_sync")
        store = pghoard.transfer_agents[0].get_object_storage(pghoard.test_site)

        def list_archive(folder):
            if folder == "timeline":
                matcher = wal.TIMELINE_RE.match
            else:
                matcher = wal.XLOG_RE.match

            path_to_list = "{}/{}".format(pghoard.test_site, folder)
            files_found, files_total = 0, 0
            for obj in store.list_path(path_to_list):
                fname = os.path.basename(obj["name"])
                files_total += 1
                if matcher(fname):
                    files_found += 1
                    yield fname

            log.info("Listed %r, %r out of %r matched %r pattern", path_to_list, files_found, files_total, folder)

        # create a basebackup to start with
        self._run_and_wait_basebackup(pghoard, db, "pipe")

        # force a couple of wal segment switches
        start_xlog, _ = self._switch_xlog(db, 4)
        # we should have at least 4 xlog files now (there may be more in
        # case other tests created them -- we share a single postresql
        # cluster between all tests)
        pg_xlog_dir = pghoard.config["backup_sites"][pghoard.test_site]["pg_xlog_directory"]
        pg_xlogs = {f for f in os.listdir(pg_xlog_dir) if wal.XLOG_RE.match(f) and f > start_xlog}
        assert len(pg_xlogs) >= 4

        # create a couple of "recycled" xlog files that we must ignore
        last_xlog = sorted(pg_xlogs)[-1]
        dummy_data = b"x" * (16 * 2 ** 20)

        def write_dummy_xlog(inc):
            filename = "{:024X}".format((int(last_xlog, 16) + inc))
            print("writing dummy xlog file", filename)
            open(os.path.join(pg_xlog_dir, filename), "wb").write(dummy_data)
            return filename

        recycled1 = write_dummy_xlog(1)
        recycled2 = write_dummy_xlog(2)

        # check what we have archived, there should be at least the three
        # above xlogs that are NOT there at the moment
        archived_xlogs = set(list_archive("xlog"))
        assert len(pg_xlogs - archived_xlogs) >= 4
        # now perform an archive sync
        arsy = ArchiveSync()
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        # and now archive should include all our xlogs
        archived_xlogs = set(list_archive("xlog"))

        # the recycled files must not appear in archived files
        assert recycled1 not in archived_xlogs
        assert recycled2 not in archived_xlogs

        # the regular wals must be archived
        assert archived_xlogs.issuperset(pg_xlogs)

        # if we delete a wal file that's not the latest archival it should
        # get synced to the archive as we don't have a basebackup newer than
        # it
        current_wal = arsy.get_current_wal_file()
        old_xlogs = sorted(wal for wal in pg_xlogs if wal < current_wal)
        store.delete_key(os.path.join(pghoard.test_site, "xlog", old_xlogs[-2]))
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        archived_xlogs = set(list_archive("xlog"))
        assert archived_xlogs.issuperset(pg_xlogs)
        # delete the topmost wal file, this should cause resync too
        store.delete_key(os.path.join(pghoard.test_site, "xlog", old_xlogs[-1]))
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        archived_xlogs = set(list_archive("xlog"))
        assert archived_xlogs.issuperset(pg_xlogs)
        # let's do a little dance to turn our DB into a standby and then
        # promote it, forcing a timeline switch
        db.kill(force=False)
        with open(os.path.join(db.pgdata, "recovery.conf"), "w") as fp:
            fp.write(
                "standby_mode = 'on'\n"
                "recovery_target_timeline = 'latest'\n"
                "restore_command = 'false'\n"
            )
        # start PG and promote it
        db.run_pg()
        db.run_cmd("pg_ctl", "-D", db.pgdata, "promote")
        time.sleep(5)  # TODO: instead of sleeping, poll the db until ready
        # we should have a single timeline file in pg_xlog now
        pg_xlog_timelines = {f for f in os.listdir(pg_xlog_dir) if wal.TIMELINE_RE.match(f)}
        assert len(pg_xlog_timelines) > 0
        # but there should be nothing archived as archive_command wasn't setup
        archived_timelines = set(list_archive("timeline"))
        assert len(archived_timelines) == 0
        # let's hit archive sync
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        # now we should have an archived timeline
        archived_timelines = set(list_archive("timeline"))
        assert archived_timelines.issuperset(pg_xlog_timelines)
        assert "00000002.history" in archived_timelines

        # let's take a new basebackup
        self._run_and_wait_basebackup(pghoard, db, "basic")
        # nuke archives and resync them
        for name in list_archive(folder="timeline"):
            store.delete_key(os.path.join(pghoard.test_site, "timeline", name))
        for name in list_archive(folder="xlog"):
            store.delete_key(os.path.join(pghoard.test_site, "xlog", name))
        self._switch_xlog(db, 1)

        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])

        archived_xlogs = set(list_archive("xlog"))
        # assume the same timeline file as before and one to three wal files
        assert len(archived_xlogs) >= 1
        assert len(archived_xlogs) <= 3
        archived_timelines = set(list_archive("timeline"))
        assert list(archived_timelines) == ["00000002.history"]
Example #2
0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock,
                                     tmpdir):
    from pghoard.archive_sync import ArchiveSync, SyncError

    # Instantiate a fake PG data directory
    pg_data_directory = os.path.join(str(tmpdir), "PG_DATA_DIRECTORY")
    os.makedirs(pg_data_directory)
    open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write("9.6")

    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(
        config_file, {
            "http_port": 8080,
            "backup_sites": {
                "foo": {
                    "pg_data_directory": pg_data_directory
                }
            }
        })
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.return_value = HTTPResult(
        201)  # So the backup requests succeeds
    requests_head_mock.side_effect = requests_head_call_return

    # Check integrity within same timeline
    arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("00000005000000000000008C", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 3
    assert requests_put_mock.call_count == 0

    # Check integrity when timeline has changed
    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000090000000000000008")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000080000000000000005", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 4

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000030000000000000008")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000030000000000000005", 90300))
    with pytest.raises(SyncError):
        arsy.check_wal_archive_integrity(new_backup_on_failure=False)
    assert requests_put_mock.call_count == 0
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000070000000000000002")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000060000000000000001", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000020000000A000000FD", 90200))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000020000000A000000FD", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1
Example #3
0
def test_check_and_upload_missing_local_files(requests_put_mock,
                                              requests_head_mock, tmpdir):
    from pghoard.archive_sync import ArchiveSync

    data_dir = str(tmpdir)
    wal_dir = os.path.join(data_dir, "pg_xlog")
    os.makedirs(wal_dir)
    open(os.path.join(data_dir, "PG_VERSION"), "w").write("9.6")

    # Write a bunch of local files
    file_hashes = {}
    for index in range(32):
        fn = "{:024X}".format(index + 1)
        data = os.urandom(32)
        sha1_hasher = hashlib.sha1()
        sha1_hasher.update(data)
        file_hashes[index + 1] = sha1_hasher.hexdigest()
        with open(os.path.join(wal_dir, fn), "wb") as f:
            f.write(data)

    head_call_indexes = []
    put_call_indexes = []
    missing_hash_indexes = {0xf, 0x10}

    def requests_head(*args, **kwargs):  # pylint: disable=unused-argument
        wal_index = int(os.path.split(args[0])[1], 16)
        head_call_indexes.append(wal_index)
        if wal_index > 0x14:
            return HTTPResult(404)
        sha1 = file_hashes[wal_index]
        # For some files return invalid hash
        if wal_index in {0x1, 0xb, 0xd, 0xf, 0x11, 0x13}:
            sha1 += "invalid"
        # For some files don't return sha1 header to test the code copes with missing header correctly
        if wal_index in missing_hash_indexes:
            headers = {}
        else:
            headers = {
                "metadata-hash": sha1,
                "metadata-hash-algorithm": "sha1"
            }
        return HTTPResult(200, headers=headers)

    def requests_put(*args, **kwargs):  # pylint: disable=unused-argument
        wal_index = int(os.path.split(args[0])[1], 16)
        put_call_indexes.append(wal_index)
        return HTTPResult(201)

    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(
        config_file, {
            "http_port": 8080,
            "backup_sites": {
                "foo": {
                    "pg_data_directory": data_dir
                }
            }
        })
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.side_effect = requests_put
    requests_head_mock.side_effect = requests_head
    arsy.get_current_wal_file = Mock(return_value="00000000000000000000001A")
    arsy.get_first_required_wal_segment = Mock(
        return_value=("000000000000000000000001", 90300))

    arsy.check_and_upload_missing_local_files(15)

    assert head_call_indexes == list(
        reversed([index + 1 for index in range(0x19)]))
    # Files above 0x1a in future, 0x1a is current. 0x14 and under are already uploaded but 0x13, 0x11, 0xf,
    # 0xd, 0xb and 0x1 have invalid hash. Of those 0x1 doesn't get re-uploaded because we set max hashes to
    # check to a value that is exceeded before reaching that and 0xf doesn't get reuploaded because remote
    # hash for that isn't available so hash cannot be validated but 0x10 does get reuploaded because it is
    # the first file missing a hash.
    assert put_call_indexes == [
        0xb, 0xd, 0x10, 0x11, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19
    ]

    missing_hash_indexes.update(set(range(0x20)))
    head_call_indexes.clear()
    put_call_indexes.clear()
    arsy.check_and_upload_missing_local_files(15)
    # The first file that already existed (0x14) should've been re-uploaded due to missing sha1
    assert put_call_indexes == [0x14, 0x15, 0x16, 0x17, 0x18, 0x19]
Example #4
0
    def test_archive_sync(self, db, pghoard):
        log = logging.getLogger("test_archive_sync")
        store = pghoard.transfer_agents[0].get_object_storage(pghoard.test_site)

        def list_archive(folder):
            if folder == "timeline":
                matcher = wal.TIMELINE_RE.match
            else:
                matcher = wal.WAL_RE.match

            path_to_list = "{}/{}".format(pghoard.test_site, folder)
            files_found, files_total = 0, 0
            for obj in store.list_path(path_to_list):
                fname = os.path.basename(obj["name"])
                files_total += 1
                if matcher(fname):
                    files_found += 1
                    yield fname

            log.info("Listed %r, %r out of %r matched %r pattern", path_to_list, files_found, files_total, folder)

        # create a basebackup to start with
        self._run_and_wait_basebackup(pghoard, db, "pipe")

        # force a couple of wal segment switches
        start_wal, _ = self._switch_wal(db, 4)
        # we should have at least 4 WAL files now (there may be more in
        # case other tests created them -- we share a single postresql
        # cluster between all tests)
        pg_wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
        pg_wals = {f for f in os.listdir(pg_wal_dir) if wal.WAL_RE.match(f) and f > start_wal}
        assert len(pg_wals) >= 4

        # create a couple of "recycled" xlog files that we must ignore
        last_wal = sorted(pg_wals)[-1]
        dummy_data = b"x" * (16 * 2 ** 20)

        def write_dummy_wal(inc):
            filename = "{:024X}".format((int(last_wal, 16) + inc))
            print("Writing dummy WAL file", filename)
            open(os.path.join(pg_wal_dir, filename), "wb").write(dummy_data)
            return filename

        recycled1 = write_dummy_wal(1)
        recycled2 = write_dummy_wal(2)

        # check what we have archived, there should be at least the three
        # above WALs that are NOT there at the moment
        archived_wals = set(list_archive("xlog"))
        assert len(pg_wals - archived_wals) >= 4
        # now perform an archive sync
        arsy = ArchiveSync()
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        # and now archive should include all our WALs
        archived_wals = set(list_archive("xlog"))

        # the recycled files must not appear in archived files
        assert recycled1 not in archived_wals
        assert recycled2 not in archived_wals

        # the regular wals must be archived
        assert archived_wals.issuperset(pg_wals)

        # if we delete a wal file that's not the latest archival it should
        # get synced to the archive as we don't have a basebackup newer than
        # it
        current_wal = arsy.get_current_wal_file()
        old_wals = sorted(wal for wal in pg_wals if wal < current_wal)
        store.delete_key(os.path.join(pghoard.test_site, "xlog", old_wals[-2]))
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        archived_wals = set(list_archive("xlog"))
        assert archived_wals.issuperset(pg_wals)
        # delete the topmost wal file, this should cause resync too
        store.delete_key(os.path.join(pghoard.test_site, "xlog", old_wals[-1]))
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        archived_wals = set(list_archive("xlog"))
        assert archived_wals.issuperset(pg_wals)
        # let's do a little dance to turn our DB into a standby and then
        # promote it, forcing a timeline switch
        db.kill(force=False)
        with open(os.path.join(db.pgdata, "recovery.conf"), "w") as fp:
            fp.write(
                "standby_mode = 'on'\n"
                "recovery_target_timeline = 'latest'\n"
                "restore_command = 'false'\n"
            )
        # start PG and promote it
        db.run_pg()
        db.run_cmd("pg_ctl", "-D", db.pgdata, "promote")
        time.sleep(5)  # TODO: instead of sleeping, poll the db until ready
        # we should have a single timeline file in pg_xlog/pg_wal now
        pg_wal_timelines = {f for f in os.listdir(pg_wal_dir) if wal.TIMELINE_RE.match(f)}
        assert len(pg_wal_timelines) > 0
        # but there should be nothing archived as archive_command wasn't setup
        archived_timelines = set(list_archive("timeline"))
        assert len(archived_timelines) == 0
        # let's hit archive sync
        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])
        # now we should have an archived timeline
        archived_timelines = set(list_archive("timeline"))
        assert archived_timelines.issuperset(pg_wal_timelines)
        assert "00000002.history" in archived_timelines

        # let's take a new basebackup
        self._run_and_wait_basebackup(pghoard, db, "basic")
        # nuke archives and resync them
        for name in list_archive(folder="timeline"):
            store.delete_key(os.path.join(pghoard.test_site, "timeline", name))
        for name in list_archive(folder="xlog"):
            store.delete_key(os.path.join(pghoard.test_site, "xlog", name))
        self._switch_wal(db, 1)

        arsy.run(["--site", pghoard.test_site, "--config", pghoard.config_path])

        archived_wals = set(list_archive("xlog"))
        # assume the same timeline file as before and one to three wal files
        assert len(archived_wals) >= 1
        assert len(archived_wals) <= 3
        archived_timelines = set(list_archive("timeline"))
        assert list(archived_timelines) == ["00000002.history"]
Example #5
0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock):
    from pghoard.archive_sync import ArchiveSync
    arsy = ArchiveSync()
    arsy.set_config({"http_port": 8080, "backup_sites": {"foo": {}}}, site="foo")
    requests_put_mock.return_value = HTTPResult(201)  # So the backup requests succeeds
    requests_head_mock.side_effect = requests_head_call_return

    # Check integrity within same timeline
    arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F")
    arsy.get_first_required_wal_segment = Mock(return_value="00000005000000000000008C")
    assert arsy.check_wal_archive_integrity() == 0
    assert requests_head_mock.call_count == 3

    # Check integrity when timeline has changed
    arsy.get_current_wal_file = Mock(return_value="000000090000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value="000000080000000000000005")
    assert arsy.check_wal_archive_integrity() == 0
    assert requests_head_mock.call_count == 7

    arsy.get_current_wal_file = Mock(return_value="000000030000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value="000000030000000000000005")
    assert arsy.check_wal_archive_integrity() == -1

    arsy.get_current_wal_file = Mock(return_value="000000070000000000000002")
    arsy.get_first_required_wal_segment = Mock(return_value="000000060000000000000001")
    assert arsy.check_wal_archive_integrity() == 0
Example #6
0
def test_check_wal_archive_integrity(requests_put_mock, requests_head_mock, tmpdir):
    from pghoard.archive_sync import ArchiveSync, SyncError
    config_file = tmpdir.join("arsy.conf").strpath
    write_json_file(config_file, {"http_port": 8080, "backup_sites": {"foo": {}}})
    arsy = ArchiveSync()
    arsy.set_config(config_file, site="foo")
    requests_put_mock.return_value = HTTPResult(201)  # So the backup requests succeeds
    requests_head_mock.side_effect = requests_head_call_return

    # Check integrity within same timeline
    arsy.get_current_wal_file = Mock(return_value="00000005000000000000008F")
    arsy.get_first_required_wal_segment = Mock(return_value=("00000005000000000000008C", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 3
    assert requests_put_mock.call_count == 0

    # Check integrity when timeline has changed
    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000090000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000080000000000000005", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_head_mock.call_count == 4

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000030000000000000008")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000030000000000000005", 90300))
    with pytest.raises(SyncError):
        arsy.check_wal_archive_integrity(new_backup_on_failure=False)
    assert requests_put_mock.call_count == 0
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000070000000000000002")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000060000000000000001", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90200))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=False) == 0
    assert requests_put_mock.call_count == 0

    requests_head_mock.call_count = 0
    requests_put_mock.call_count = 0
    arsy.get_current_wal_file = Mock(return_value="000000020000000B00000000")
    arsy.get_first_required_wal_segment = Mock(return_value=("000000020000000A000000FD", 90300))
    assert arsy.check_wal_archive_integrity(new_backup_on_failure=True) == 0
    assert requests_put_mock.call_count == 1