Ejemplo n.º 1
0
 def download_one_backup(self, *, transfer, basebackup_data_file,
                         progress_callback, site):
     dl_dir = os.path.join(
         self.config["backup_location"],
         self.config["backup_sites"][site]["prefix"],
         "basebackup_incoming",
     )
     compat.makedirs(dl_dir, exist_ok=True)
     tmp = tempfile.NamedTemporaryFile(dir=dl_dir,
                                       prefix="basebackup.",
                                       suffix=".pghoard")
     try:
         metadata = transfer.get_contents_to_fileobj(
             key=basebackup_data_file,
             fileobj_to_store_to=tmp,
             progress_callback=progress_callback)
         progress_callback(1, 1)
         self.log.info("Downloaded %r", basebackup_data_file)
         tmp.seek(0)
     except:  # pylint: disable=bare-except
         self.log.exception("Problem downloading a backup file: %r",
                            basebackup_data_file)
         tmp.close()
         raise
     return tmp, metadata
Ejemplo n.º 2
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site].update({
            "basebackup_count":
            1,
            "basebackup_interval_hours":
            1,
            "nodes": [{
                "host": "127.0.0.4"
            }],
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)
        compat.makedirs(self.config["alert_file_dir"], exist_ok=True)

        self.pghoard = PGHoard(config_path)
        # This is the "final storage location" when using "local" storage type
        self.local_storage_dir = os.path.join(
            self.config["backup_sites"][self.test_site]["object_storage"]
            ["directory"], self.test_site)

        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Ejemplo n.º 3
0
    def _extract_pghoard_bb_v1(self, fileobj, pgdata, tablespaces):
        directories = []
        tar_meta = None
        # | in mode to use tarfile's internal stream buffer manager, currently required because our SnappyFile
        # interface doesn't do proper buffering for reads
        with tarfile.open(fileobj=fileobj, mode="r|",
                          bufsize=IO_BLOCK_SIZE) as tar:
            for tarinfo in tar:
                if tarinfo.name == ".pghoard_tar_metadata.json":
                    tar_meta_bytes = tar.extractfile(tarinfo).read()
                    tar_meta = json.loads(tar_meta_bytes.decode("utf-8"))
                    continue

                if tarinfo.name == "pgdata" or tarinfo.name == "tablespaces":
                    continue  # ignore these directory entries
                if tarinfo.name.startswith("pgdata/"):
                    target_name = os.path.join(pgdata, tarinfo.name[7:])
                elif tarinfo.name.startswith("tablespaces/"):
                    tscomponents = tarinfo.name.split("/", 2)
                    tsname = tscomponents[1]
                    tspath = tablespaces[tsname]["path"]

                    if len(tscomponents) == 2 and tarinfo.isdir():
                        # Create tablespace entry
                        assert tar_meta["tablespaces"][tsname][
                            "oid"] == tablespaces[tsname]["oid"]
                        linkname = os.path.join(
                            pgdata, "pg_tblspc",
                            str(tablespaces[tsname]["oid"]))
                        os.symlink(tspath, linkname)
                        directories.append([tspath, tarinfo])
                        continue

                    target_name = os.path.join(tspath, tscomponents[2])
                else:
                    raise Exception("Unrecognized path {!r} in tar".format(
                        tarinfo.name))

                if tarinfo.isdir():
                    directories.append([target_name, tarinfo])
                    compat.makedirs(target_name, exist_ok=True)
                elif tarinfo.isreg():
                    target_dir = os.path.dirname(target_name)
                    if not os.path.exists(target_dir):
                        compat.makedirs(target_dir, exist_ok=True)
                    tar.makefile(tarinfo, target_name)
                    tar.chmod(tarinfo, target_name)
                    tar.utime(tarinfo, target_name)
                elif tarinfo.issym():
                    os.symlink(tarinfo.linkname, target_name)
                else:
                    raise Exception(
                        "Unrecognized file type for file {!r} in tar".format(
                            tarinfo.name))

        for target_name, tarinfo in directories:
            tar.chmod(tarinfo, target_name)
            tar.utime(tarinfo, target_name)
Ejemplo n.º 4
0
    def _extract_pghoard_bb_v1(self, fileobj, pgdata, tablespaces):
        directories = []
        tar_meta = None
        # | in mode to use tarfile's internal stream buffer manager, currently required because our SnappyFile
        # interface doesn't do proper buffering for reads
        with tarfile.open(fileobj=fileobj, mode="r|", bufsize=IO_BLOCK_SIZE) as tar:
            for tarinfo in tar:
                if tarinfo.name == ".pghoard_tar_metadata.json":
                    tar_meta_bytes = tar.extractfile(tarinfo).read()
                    tar_meta = json.loads(tar_meta_bytes.decode("utf-8"))
                    continue

                if tarinfo.name == "pgdata" or tarinfo.name == "tablespaces":
                    continue  # ignore these directory entries
                if tarinfo.name.startswith("pgdata/"):
                    target_name = os.path.join(pgdata, tarinfo.name[7:])
                elif tarinfo.name.startswith("tablespaces/"):
                    tscomponents = tarinfo.name.split("/", 2)
                    tsname = tscomponents[1]
                    tspath = tablespaces[tsname]["path"]

                    if len(tscomponents) == 2 and tarinfo.isdir():
                        # Create tablespace entry
                        assert tar_meta["tablespaces"][tsname]["oid"] == tablespaces[tsname]["oid"]
                        linkname = os.path.join(pgdata, "pg_tblspc", str(tablespaces[tsname]["oid"]))
                        os.symlink(tspath, linkname)
                        directories.append([tspath, tarinfo])
                        continue

                    target_name = os.path.join(tspath, tscomponents[2])
                else:
                    raise Exception("Unrecognized path {!r} in tar".format(tarinfo.name))

                if tarinfo.isdir():
                    directories.append([target_name, tarinfo])
                    compat.makedirs(target_name, exist_ok=True)
                elif tarinfo.isreg():
                    target_dir = os.path.dirname(target_name)
                    if not os.path.exists(target_dir):
                        compat.makedirs(target_dir, exist_ok=True)
                    tar.makefile(tarinfo, target_name)
                    tar.chmod(tarinfo, target_name)
                    tar.utime(tarinfo, target_name)
                elif tarinfo.issym():
                    os.symlink(tarinfo.linkname, target_name)
                else:
                    raise Exception("Unrecognized file type for file {!r} in tar".format(tarinfo.name))

        for target_name, tarinfo in directories:
            tar.chmod(tarinfo, target_name)
            tar.utime(tarinfo, target_name)
Ejemplo n.º 5
0
    def config_template(self, override=None):
        # NOTE: we set pg_receivexlog_path and pg_basebackup_path per site and globally mostly to verify that
        # it works, the config keys are deprecated and will be removed in a future release at which point we'll
        # switch to using pg_bin_directory config.
        bindir, ver = find_pg_binary("")

        if hasattr(psycopg2.extras, "PhysicalReplicationConnection"):
            active_backup_mode = "walreceiver"
        else:
            active_backup_mode = "pg_receivexlog"

        # Instantiate a fake PG data directory
        pg_data_directory = os.path.join(str(self.temp_dir),
                                         "PG_DATA_DIRECTORY")
        os.makedirs(pg_data_directory)
        open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write(ver)

        config = {
            "alert_file_dir": os.path.join(str(self.temp_dir), "alerts"),
            "backup_location": os.path.join(str(self.temp_dir), "backupspool"),
            "backup_sites": {
                self.test_site: {
                    "active_backup_mode": active_backup_mode,
                    "object_storage": {
                        "storage_type": "local",
                        "directory": os.path.join(self.temp_dir, "backups"),
                    },
                    "pg_data_directory": pg_data_directory,
                    "pg_receivexlog_path":
                    os.path.join(bindir, "pg_receivexlog"),
                },
            },
            "json_state_file_path": os.path.join(self.temp_dir, "state.json"),
            "pg_basebackup_path": os.path.join(bindir, "pg_basebackup"),
        }
        if ver == "10":
            config["backup_sites"][
                self.test_site]["pg_receivexlog_path"] = os.path.join(
                    bindir, "pg_receivewal")
        if override:
            all_site_overrides = override.pop("backup_sites", None)
            for site_name, site_override in (all_site_overrides or {}).items():
                if site_name in config["backup_sites"]:
                    config["backup_sites"][site_name].update(site_override)
                else:
                    config["backup_sites"][site_name] = site_override
            config.update(override)

        compat.makedirs(config["alert_file_dir"], exist_ok=True)
        return set_and_check_config_defaults(config)
Ejemplo n.º 6
0
Archivo: base.py Proyecto: ohmu/pghoard
    def config_template(self, override=None):
        # NOTE: we set pg_receivexlog_path and pg_basebackup_path per site and globally mostly to verify that
        # it works, the config keys are deprecated and will be removed in a future release at which point we'll
        # switch to using pg_bin_directory config.
        bindir, ver = find_pg_binary("")

        if hasattr(psycopg2.extras, "PhysicalReplicationConnection"):
            active_backup_mode = "walreceiver"
        else:
            active_backup_mode = "pg_receivexlog"

        # Instantiate a fake PG data directory
        pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY")
        os.makedirs(pg_data_directory)
        open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write(ver)

        config = {
            "alert_file_dir": os.path.join(str(self.temp_dir), "alerts"),
            "backup_location": os.path.join(str(self.temp_dir), "backupspool"),
            "backup_sites": {
                self.test_site: {
                    "active_backup_mode": active_backup_mode,
                    "object_storage": {
                        "storage_type": "local",
                        "directory": os.path.join(self.temp_dir, "backups"),
                    },
                    "pg_data_directory": pg_data_directory,
                    "pg_receivexlog_path": os.path.join(bindir, "pg_receivexlog"),
                },
            },
            "json_state_file_path": os.path.join(self.temp_dir, "state.json"),
            "pg_basebackup_path": os.path.join(bindir, "pg_basebackup"),
        }
        if ver in ("10", "11"):
            config["backup_sites"][self.test_site]["pg_receivexlog_path"] = os.path.join(bindir, "pg_receivewal")
        if override:
            all_site_overrides = override.pop("backup_sites", None)
            for site_name, site_override in (all_site_overrides or {}).items():
                if site_name in config["backup_sites"]:
                    config["backup_sites"][site_name].update(site_override)
                else:
                    config["backup_sites"][site_name] = site_override
            config.update(override)

        compat.makedirs(config["alert_file_dir"], exist_ok=True)
        return set_and_check_config_defaults(config)
Ejemplo n.º 7
0
    def setup_method(self, method):
        super().setup_method(method)
        self.config = self.config_template()
        self.config["backup_sites"][self.test_site].update({
            "basebackup_count": 1,
            "basebackup_interval_hours": 1,
            "nodes": [{"host": "127.0.0.4"}],
        })
        config_path = os.path.join(self.temp_dir, "pghoard.json")
        write_json_file(config_path, self.config)
        compat.makedirs(self.config["alert_file_dir"], exist_ok=True)

        self.pghoard = PGHoard(config_path)
        # This is the "final storage location" when using "local" storage type
        self.local_storage_dir = os.path.join(self.config["backup_sites"][self.test_site]["object_storage"]["directory"],
                                              self.test_site)

        self.real_check_pg_server_version = self.pghoard.check_pg_server_version
        self.pghoard.check_pg_server_version = Mock(return_value=90404)
        self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok
        self.pghoard.check_pg_versions_ok = Mock(return_value=True)
Ejemplo n.º 8
0
def _test_storage(st, driver, tmpdir, storage_config):
    scratch = tmpdir.join("scratch")
    compat.makedirs(str(scratch), exist_ok=True)

    # File not found cases
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_metadata_for_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.delete_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_file("NONEXISTENT", str(scratch.join("a")))
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_fileobj("NONEXISTENT", BytesIO())
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_string("NONEXISTENT")
    assert st.list_path("") == []
    assert st.list_path("NONEXISTENT") == []
    st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None)
    dummy_file = str(scratch.join("a"))
    with open(dummy_file, "wb") as fp:
        fp.write(b"dummy")
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None)
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1})

    st.delete_key("NONEXISTENT-b/x1")
    st.delete_key("NONEXISTENT-a/x1")

    # Other basic cases
    from_disk_file = str(scratch.join("a"))
    input_data = b"from disk"
    if driver in ["local", "sftp"]:
        input_data = input_data * 150000
    with open(from_disk_file, "wb") as fp:
        fp.write(input_data)
    st.store_file_from_disk("test1/x1", from_disk_file, None)
    out = BytesIO()

    reported_positions = []

    def progress_callback(pos, total):
        reported_positions.append((pos, total))

    assert st.get_contents_to_fileobj(
        "test1/x1", out, progress_callback=progress_callback) == {}
    assert out.getvalue() == input_data
    if driver in ["local", "sftp"]:
        input_size = len(input_data)
        assert reported_positions[-1] == (input_size, input_size)

    if driver == "local":
        assert reported_positions == [(1024 * 1024, input_size),
                                      (input_size, input_size)]

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(
            storage_config.get("encrypted"))

    st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"})
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"}
    assert out.getvalue() == b"dummy"

    # sftp does not support remote copy
    if driver != "sftp":
        # Copy file
        st.copy_file(source_key="test1/x1", destination_key="test_copy/copy1")
        assert st.get_contents_to_string("test_copy/copy1") == (b"dummy", {
            "k": "v"
        })
        st.copy_file(source_key="test1/x1",
                     destination_key="test_copy/copy2",
                     metadata={"new": "meta"})
        assert st.get_contents_to_string("test_copy/copy2") == (b"dummy", {
            "new": "meta"
        })

    st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"})
    assert st.get_contents_to_string("test1/x1") == (b"l", {
        "fancymetadata": "value"
    })

    st.store_file_from_memory("test1/x1", b"1", None)
    assert st.get_contents_to_string("test1/x1") == (b"1", {})

    st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"})
    to_disk_file = str(scratch.join("b"))
    assert st.get_contents_to_file("test1/td", to_disk_file) == {
        "to-disk": "42"
    }

    created_keys = {"test1/x1", "test1/td"}

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(
            storage_config.get("encrypted"))

    assert st.list_path("") == [
    ]  # nothing at top level (directories not listed)
    if driver == "local":
        # create a dot-file (hidden), this must be ignored
        target_file = os.path.join(st.prefix, "test1/.null")
        with open(target_file, "w"):
            pass

    tlist = st.list_path("test1")
    assert len(tlist) == 2
    for fe in tlist:
        assert isinstance(fe["last_modified"], datetime.datetime)
        assert fe["last_modified"].tzinfo is not None
        if fe["name"] == "test1/x1":
            assert fe["size"] == 1
            assert fe["metadata"] == {}
        elif fe["name"] == "test1/td":
            assert fe["size"] == len(b"to disk")
            assert fe["metadata"] == {"to-disk": "42"}
        else:
            assert 0, "unexpected name in directory"

    assert set(st.iter_prefixes("test1")) == set()

    for key in [
            "test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3"
    ]:
        st.store_file_from_memory(key, b"1", None)
        created_keys.add(key)

    if driver == "local":
        # sub3 is a file. Actual object storage systems support this, but a file system does not
        with pytest.raises(NotADirectoryError):
            st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None)
    elif driver == "sftp":
        # sub3 is a file. Actual object storage systems support this, but a file system does not
        with pytest.raises(errors.StorageError):
            st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None)
    else:
        st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None)
        created_keys.add("test1/sub3/sub3.1/sub3.1.1")

    if driver in ["local", "sftp"]:
        assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2"}
    else:
        assert set(st.iter_prefixes("test1")) == {
            "test1/sub1", "test1/sub2", "test1/sub3"
        }
    assert {item["name"]
            for item in st.list_path("test1")
            } == {"test1/x1", "test1/td", "test1/sub3"}
    assert set(st.iter_prefixes("test1/sub1")) == set()
    assert {item["name"]
            for item in st.list_path("test1/sub1")} == {"test1/sub1/sub1.1"}
    assert {item["name"] for item in st.list_path("test1/sub2")} == set()
    assert {item["name"] for item in st.list_path("test1/sub3")} == set()
    assert set(st.iter_prefixes("test1/sub2")) == {"test1/sub2/sub2.1"}
    if driver in ["local", "sftp"]:
        assert set(st.iter_prefixes("test1/sub3")) == set()  # sub3 is a file
    else:
        assert set(st.iter_prefixes("test1/sub3")) == {"test1/sub3/sub3.1"}
    assert set(st.iter_prefixes("test1/sub3/3.1")) == set()

    expected_deep_iter_test1_names = {
        "test1/x1",
        "test1/td",
        "test1/sub1/sub1.1",
        "test1/sub2/sub2.1/sub2.1.1",
        "test1/sub3",
    }
    if driver not in ["local", "sftp"]:
        expected_deep_iter_test1_names.add("test1/sub3/sub3.1/sub3.1.1")

    assert {item["name"]
            for item in st.list_path("test1", deep=True)
            } == expected_deep_iter_test1_names

    def _object_names(iterable):
        names = set()
        for item in iterable:
            assert item.type == KEY_TYPE_OBJECT
            names.add(item.value["name"])
        return names

    deep_names_with_key = _object_names(
        st.iter_key("test1/sub3", deep=True, include_key=True))
    deep_names_without_key = _object_names(
        st.iter_key("test1/sub3", deep=True, include_key=False))

    if driver in ["local", "sftp"]:
        assert deep_names_with_key == {"test1/sub3"}
        assert deep_names_without_key == set()
    else:
        assert deep_names_with_key == {
            "test1/sub3", "test1/sub3/sub3.1/sub3.1.1"
        }
        assert deep_names_without_key == {"test1/sub3/sub3.1/sub3.1.1"}

    if driver == "google":
        # test extra props for cacheControl in google
        st.store_file_from_memory("test1/x1",
                                  b"no cache test",
                                  metadata={"test": "value"},
                                  extra_props={"cacheControl": "no-cache"})

    if driver == "local":
        # test LocalFileIsRemoteFileError for local storage
        target_file = os.path.join(st.prefix, "test1/x1")
        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.store_file_from_disk("test1/x1", target_file, {"local": True})
        assert st.get_contents_to_string("test1/x1") == (b"1", {
            "local": "True"
        })

        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.get_contents_to_file("test1/x1", target_file)

        # Missing metadata is an error situation that should fail
        os.unlink(target_file + ".metadata")
        with pytest.raises(errors.FileNotFoundFromStorageError):
            st.get_metadata_for_key("test1/x1")

    for key in created_keys:
        st.delete_key(key)
    assert st.list_path("test1") == []  # empty again

    for name in [
            "test2/foo", "test2/suba/foo", "test2/subb/bar",
            "test2/subb/subsub/zob"
    ]:
        st.store_file_from_memory(name, b"somedata")
    names = sorted(item["name"] for item in st.list_path("test2", deep=True))
    assert names == [
        "test2/foo", "test2/suba/foo", "test2/subb/bar",
        "test2/subb/subsub/zob"
    ]

    st.delete_tree("test2")
    assert st.list_path("test2", deep=True) == []

    test_hash = hashlib.sha256()
    test_file = str(scratch.join("30m"))
    test_size_send = 0
    with open(test_file, "wb") as fp:
        chunk = b"30m file" * 10000
        while test_size_send < 30 * 1024 * 1024:
            test_hash.update(chunk)
            fp.write(chunk)
            test_size_send += len(chunk)
    test_hash_send = test_hash.hexdigest()

    st.store_file_from_disk("test1/30m",
                            test_file,
                            multipart=True,
                            metadata={
                                "thirtymeg": "data",
                                "size": test_size_send,
                                "key": "value-with-a-hyphen"
                            })

    os.unlink(test_file)

    expected_meta = {
        "thirtymeg": "data",
        "size": str(test_size_send),
        "key": "value-with-a-hyphen"
    }
    meta = st.get_metadata_for_key("test1/30m")
    assert meta == expected_meta

    progress_reports = []

    def dl_progress(current_pos, expected_max):
        progress_reports.append((current_pos, expected_max))

    with open(test_file, "wb") as fp:
        assert st.get_contents_to_fileobj(
            "test1/30m", fp, progress_callback=dl_progress) == expected_meta

    assert len(progress_reports) > 0
    assert progress_reports[-1][0] == progress_reports[-1][1]

    test_hash = hashlib.sha256()
    test_size_rec = 0
    with open(test_file, "rb") as fp:
        while True:
            chunk = fp.read(1024 * 1024)
            if not chunk:
                break
            test_hash.update(chunk)
            test_size_rec += len(chunk)
    test_hash_rec = test_hash.hexdigest()
    assert test_hash_rec == test_hash_send
    assert test_size_rec == test_size_send

    tlist = st.list_path("test1")
    assert len(tlist) == 1
    assert tlist[0]["name"] == "test1/30m"
    assert tlist[0]["size"] == test_size_rec

    if driver == "swift":
        segments = test_size_send // st.segment_size
        segment_list = st.list_path("test1_segments/30m")
        assert len(segment_list) >= segments

        if segments >= 2:
            # reupload a file with the same name but with less chunks
            os.truncate(test_file, st.segment_size + 1)
            test_size_send = os.path.getsize(test_file)
            st.store_file_from_disk("test1/30m",
                                    test_file,
                                    multipart=True,
                                    metadata={
                                        "30m": "less data",
                                        "size": test_size_send
                                    })

            segment_list = st.list_path("test1_segments/30m")
            assert len(segment_list) == 2
            assert len(st.list_path("test1")) == 1

    st.delete_key("test1/30m")
    assert st.list_path("test1") == []

    if driver == "swift":
        assert st.list_path("test1_segments/30m") == []

    progress_reports = []

    def upload_progress(progress):
        progress_reports.append(progress)

    for seekable in (False, True):
        for size in (300, 3 * 1024 * 1024, 11 * 1024 * 1024):
            progress_reports = []
            rds = RandomDataSource(size)
            if seekable:
                fd = BytesIO(rds.data)
            else:
                fd = rds
            key = "test1/{}b".format(size)
            st.store_file_object(key, fd, upload_progress_fn=upload_progress)
            # Progress may be reported after each chunk and chunk size depends on available memory
            # on current machine so there's no straightforward way of checking reasonable progress
            # updates were made. Just ensure they're ordered correctly if something was provided
            assert sorted(progress_reports) == progress_reports
            bio = BytesIO()
            st.get_contents_to_fileobj(key, bio)
            buffer = bio.getbuffer()
            assert len(buffer) == size
            assert buffer == rds.data
            st.delete_key(key)
Ejemplo n.º 9
0
    def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir):
        # Create a test tablespace for this instance, but make sure we drop it at the end of the test as the
        # database we use is shared by all test cases, and tablespaces are a global concept so the test
        # tablespace could interfere with other tests
        tspath = tmpdir.join("extra-ts").strpath
        os.makedirs(tspath)
        conn_str = pgutil.create_connection_string(db.user)
        conn = psycopg2.connect(conn_str)
        conn.autocommit = True
        cursor = conn.cursor()
        cursor.execute("CREATE TABLESPACE tstest LOCATION %s", [tspath])
        r_db, r_conn = None, None
        try:
            cursor.execute(
                "CREATE TABLE tstest (id BIGSERIAL PRIMARY KEY, value BIGINT) TABLESPACE tstest"
            )
            cursor.execute(
                "INSERT INTO tstest (value) SELECT * FROM generate_series(1, 1000)"
            )
            cursor.execute("CHECKPOINT")
            cursor.execute(
                "SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'"
            )
            res = cursor.fetchone()
            assert res[1] == tspath

            # Start receivexlog since we want the WALs to be able to restore later on
            wal_directory = os.path.join(pghoard.config["backup_location"],
                                         pghoard.test_site, "xlog_incoming")
            makedirs(wal_directory, exist_ok=True)
            pghoard.receivexlog_listener(pghoard.test_site, db.user,
                                         wal_directory)
            if conn.server_version >= 100000:
                cursor.execute("SELECT txid_current(), pg_switch_wal()")
            else:
                cursor.execute("SELECT txid_current(), pg_switch_xlog()")

            self._test_create_basebackup(capsys, db, pghoard, "local-tar")

            if conn.server_version >= 100000:
                cursor.execute("SELECT txid_current(), pg_switch_wal()")
                cursor.execute("SELECT txid_current(), pg_switch_wal()")
            else:
                cursor.execute("SELECT txid_current(), pg_switch_xlog()")
                cursor.execute("SELECT txid_current(), pg_switch_xlog()")

            backup_out = tmpdir.join("test-restore").strpath
            backup_ts_out = tmpdir.join("test-restore-tstest").strpath

            # Tablespaces are extracted to their previous absolute paths by default, but the path must be empty
            # and it isn't as it's still used by the running PG
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config",
                    pghoard.config_path,
                    "--site",
                    pghoard.test_site,
                    "--target-dir",
                    backup_out,
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "not empty" in str(excinfo.value)
            # We can't restore tablespaces to non-existent directories either
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config",
                    pghoard.config_path,
                    "--site",
                    pghoard.test_site,
                    "--target-dir",
                    backup_out,
                    "--tablespace-dir",
                    "tstest={}".format(backup_ts_out),
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "does not exist" in str(excinfo.value)
            os.makedirs(backup_ts_out)
            # We can't restore if the directory isn't writable
            os.chmod(backup_ts_out, 0o500)
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config",
                    pghoard.config_path,
                    "--site",
                    pghoard.test_site,
                    "--target-dir",
                    backup_out,
                    "--tablespace-dir",
                    "tstest={}".format(backup_ts_out),
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "empty, but not writable" in str(excinfo.value)
            os.chmod(backup_ts_out, 0o700)
            # We can't proceed if we request mappings for non-existent tablespaces
            backup_other_out = tmpdir.join("test-restore-other").strpath
            os.makedirs(backup_other_out)
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config",
                    pghoard.config_path,
                    "--site",
                    pghoard.test_site,
                    "--target-dir",
                    backup_out,
                    "--tablespace-dir",
                    "tstest={}".format(backup_ts_out),
                    "--tablespace-dir",
                    "other={}".format(backup_other_out),
                ])
            assert "Tablespace mapping for ['other'] was requested, but" in str(
                excinfo.value)

            # Now, finally, everything should be valid and we can proceed with restore
            Restore().run([
                "get-basebackup",
                "--config",
                pghoard.config_path,
                "--site",
                pghoard.test_site,
                "--restore-to-master",
                "--target-dir",
                backup_out,
                "--tablespace-dir",
                "tstest={}".format(backup_ts_out),
            ])

            # Adjust the generated recovery.conf to point pghoard_postgres_command to our instance
            new_py_restore_cmd = "PYTHONPATH={} python3 -m pghoard.postgres_command --mode restore".format(
                os.path.dirname(os.path.dirname(__file__)))
            new_go_restore_cmd = "{}/pghoard_postgres_command_go --mode restore".format(
                os.path.dirname(os.path.dirname(__file__)))
            with open(os.path.join(backup_out, "recovery.conf"), "r+") as fp:
                rconf = fp.read()
                rconf = rconf.replace(
                    "pghoard_postgres_command_go --mode restore",
                    new_go_restore_cmd)
                rconf = rconf.replace(
                    "pghoard_postgres_command --mode restore",
                    new_py_restore_cmd)
                fp.seek(0)
                fp.write(rconf)

            r_db = PGTester(backup_out)
            r_db.user = dict(db.user, host=backup_out)
            r_db.run_pg()
            r_conn_str = pgutil.create_connection_string(r_db.user)

            # Wait for PG to start up
            start_time = time.monotonic()
            while True:
                try:
                    r_conn = psycopg2.connect(r_conn_str)
                    break
                except psycopg2.OperationalError as ex:
                    if "starting up" in str(ex):
                        assert time.monotonic() - start_time <= 10
                        time.sleep(1)
                    else:
                        raise

            r_cursor = r_conn.cursor()
            # Make sure the tablespace is defined and points to the right (new) path
            r_cursor.execute(
                "SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'"
            )
            r_res = r_cursor.fetchone()
            assert r_res[1] == backup_ts_out

            # We should be able to read from the table in the tablespace and the values should match what we stored before
            r_cursor.execute("SELECT id FROM tstest")
            r_res = r_cursor.fetchall()
            cursor.execute("SELECT id FROM tstest")
            orig_res = cursor.fetchall()
            assert r_res == orig_res

        finally:
            if r_conn:
                r_conn.close()
            if r_db:
                r_db.kill(force=True)
            cursor.execute("DROP TABLE IF EXISTS tstest")
            cursor.execute("DROP TABLESPACE tstest")
            conn.close()
Ejemplo n.º 10
0
    def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir):
        # Create a test tablespace for this instance, but make sure we drop it at the end of the test as the
        # database we use is shared by all test cases, and tablespaces are a global concept so the test
        # tablespace could interfere with other tests
        tspath = tmpdir.join("extra-ts").strpath
        os.makedirs(tspath)
        conn_str = pgutil.create_connection_string(db.user)
        conn = psycopg2.connect(conn_str)
        conn.autocommit = True
        cursor = conn.cursor()
        cursor.execute("CREATE TABLESPACE tstest LOCATION %s", [tspath])
        r_db, r_conn = None, None
        try:
            cursor.execute("CREATE TABLE tstest (id BIGSERIAL PRIMARY KEY) TABLESPACE tstest")
            cursor.execute("INSERT INTO tstest (id) VALUES (default)")
            cursor.execute("SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'")
            res = cursor.fetchone()
            assert res[1] == tspath

            # Start receivexlog since we want the WALs to be able to restore later on
            xlog_directory = os.path.join(pghoard.config["backup_location"], pghoard.test_site, "xlog_incoming")
            makedirs(xlog_directory, exist_ok=True)
            pghoard.receivexlog_listener(pghoard.test_site, db.user, xlog_directory)
            cursor.execute("SELECT txid_current(), pg_switch_xlog()")
            self._test_create_basebackup(capsys, db, pghoard, "local-tar")
            cursor.execute("SELECT txid_current(), pg_switch_xlog()")
            cursor.execute("SELECT txid_current(), pg_switch_xlog()")

            backup_out = tmpdir.join("test-restore").strpath
            backup_ts_out = tmpdir.join("test-restore-tstest").strpath

            # Tablespaces are extracted to their previous absolute paths by default, but the path must be empty
            # and it isn't as it's still used by the running PG
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config", pghoard.config_path,
                    "--site", pghoard.test_site,
                    "--target-dir", backup_out,
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "not empty" in str(excinfo.value)
            # We can't restore tablespaces to non-existent directories either
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config", pghoard.config_path,
                    "--site", pghoard.test_site,
                    "--target-dir", backup_out,
                    "--tablespace-dir", "tstest={}".format(backup_ts_out),
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "does not exist" in str(excinfo.value)
            os.makedirs(backup_ts_out)
            # We can't restore if the directory isn't writable
            os.chmod(backup_ts_out, 0o500)
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config", pghoard.config_path,
                    "--site", pghoard.test_site,
                    "--target-dir", backup_out,
                    "--tablespace-dir", "tstest={}".format(backup_ts_out),
                ])
            assert "Tablespace 'tstest' target directory" in str(excinfo.value)
            assert "empty, but not writable" in str(excinfo.value)
            os.chmod(backup_ts_out, 0o700)
            # We can't proceed if we request mappings for non-existent tablespaces
            backup_other_out = tmpdir.join("test-restore-other").strpath
            os.makedirs(backup_other_out)
            with pytest.raises(RestoreError) as excinfo:
                Restore().run([
                    "get-basebackup",
                    "--config", pghoard.config_path,
                    "--site", pghoard.test_site,
                    "--target-dir", backup_out,
                    "--tablespace-dir", "tstest={}".format(backup_ts_out),
                    "--tablespace-dir", "other={}".format(backup_other_out),
                ])
            assert "Tablespace mapping for ['other'] was requested, but" in str(excinfo.value)

            # Now, finally, everything should be valid and we can proceed with restore
            Restore().run([
                "get-basebackup",
                "--config", pghoard.config_path,
                "--site", pghoard.test_site,
                "--restore-to-master",
                "--target-dir", backup_out,
                "--tablespace-dir", "tstest={}".format(backup_ts_out),
            ])

            # Adjust the generated recovery.conf to point pghoard_postgres_command to our instance
            new_cmd = "PYTHONPATH={} python3 -m pghoard.postgres_command".format(os.path.dirname(os.path.dirname(__file__)))
            with open(os.path.join(backup_out, "recovery.conf"), "r+") as fp:
                rconf = fp.read()
                rconf = rconf.replace("pghoard_postgres_command", new_cmd)
                fp.seek(0)
                fp.write(rconf)

            r_db = TestPG(backup_out)
            r_db.user = dict(db.user, host=backup_out)
            r_db.run_pg()
            r_conn_str = pgutil.create_connection_string(r_db.user)

            # Wait for PG to start up
            start_time = time.monotonic()
            while True:
                try:
                    r_conn = psycopg2.connect(r_conn_str)
                    break
                except psycopg2.OperationalError as ex:
                    if "starting up" in str(ex):
                        assert time.monotonic() - start_time <= 10
                        time.sleep(1)
                    else:
                        raise

            r_cursor = r_conn.cursor()
            # Make sure the tablespace is defined and points to the right (new) path
            r_cursor.execute("SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'")
            r_res = r_cursor.fetchone()
            assert r_res[1] == backup_ts_out

            # We should be able to read from the table in the tablespace and the values should match what we stored before
            r_cursor.execute("SELECT id FROM tstest")
            r_res = r_cursor.fetchall()
            cursor.execute("SELECT id FROM tstest")
            orig_res = cursor.fetchall()
            assert r_res == orig_res

        finally:
            if r_conn:
                r_conn.close()
            if r_db:
                r_db.kill(force=True)
            cursor.execute("DROP TABLE IF EXISTS tstest")
            cursor.execute("DROP TABLESPACE tstest")
            conn.close()
Ejemplo n.º 11
0
def _test_storage(st, driver, tmpdir, storage_config):
    scratch = tmpdir.join("scratch")
    compat.makedirs(str(scratch), exist_ok=True)

    # File not found cases
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_metadata_for_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.delete_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_file("NONEXISTENT", str(scratch.join("a")))
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_fileobj("NONEXISTENT", BytesIO())
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_string("NONEXISTENT")
    assert st.list_path("") == []
    assert st.list_path("NONEXISTENT") == []
    st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None)
    dummy_file = str(scratch.join("a"))
    with open(dummy_file, "wb") as fp:
        fp.write(b"dummy")
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None)
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1})

    st.delete_key("NONEXISTENT-b/x1")
    st.delete_key("NONEXISTENT-a/x1")

    # Other basic cases
    from_disk_file = str(scratch.join("a"))
    with open(from_disk_file, "wb") as fp:
        fp.write(b"from disk")
    st.store_file_from_disk("test1/x1", from_disk_file, None)
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {}
    assert out.getvalue() == b"from disk"

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(
            storage_config.get('encrypted'))

    st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"})
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"}
    assert out.getvalue() == b"dummy"

    st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"})
    assert st.get_contents_to_string("test1/x1") == (b"l", {
        "fancymetadata": "value"
    })

    st.store_file_from_memory("test1/x1", b"1", None)
    assert st.get_contents_to_string("test1/x1") == (b"1", {})

    st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"})
    to_disk_file = str(scratch.join("b"))
    assert st.get_contents_to_file("test1/td", to_disk_file) == {
        "to-disk": "42"
    }

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(
            storage_config.get('encrypted'))

    assert st.list_path("") == [
    ]  # nothing at top level (directories not listed)
    if driver == "local":
        # create a dot-file (hidden), this must be ignored
        target_file = os.path.join(st.prefix, "test1/.null")
        with open(target_file, "w"):
            pass

    tlist = st.list_path("test1")
    assert len(tlist) == 2
    for fe in tlist:
        assert isinstance(fe["last_modified"], datetime.datetime)
        assert fe["last_modified"].tzinfo is not None
        if fe["name"] == "test1/x1":
            assert fe["size"] == 1
            assert fe["metadata"] == {}
        elif fe["name"] == "test1/td":
            assert fe["size"] == len(b"to disk")
            assert fe["metadata"] == {"to-disk": "42"}
        else:
            assert 0, "unexpected name in directory"

    if driver == "google":
        # test extra props for cacheControl in google
        st.store_file_from_memory("test1/x1",
                                  b"no cache test",
                                  metadata={"test": "value"},
                                  extra_props={"cacheControl": "no-cache"})

    if driver == "local":
        # test LocalFileIsRemoteFileError for local storage
        target_file = os.path.join(st.prefix, "test1/x1")
        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.store_file_from_disk("test1/x1", target_file, {"local": True})
        assert st.get_contents_to_string("test1/x1") == (b"1", {
            "local": "True"
        })

        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.get_contents_to_file("test1/x1", target_file)

        # unlink metadata file, this shouldn't break anything
        os.unlink(target_file + ".metadata")
        assert st.get_metadata_for_key("test1/x1") == {}

    st.delete_key("test1/x1")
    st.delete_key("test1/td")
    assert st.list_path("test1") == []  # empty again

    test_hash = hashlib.sha256()
    test_file = str(scratch.join("30m"))
    test_size_send = 0
    with open(test_file, "wb") as fp:
        chunk = b"30m file" * 10000
        while test_size_send < 30 * 1024 * 1024:
            test_hash.update(chunk)
            fp.write(chunk)
            test_size_send += len(chunk)
    test_hash_send = test_hash.hexdigest()

    st.store_file_from_disk("test1/30m",
                            test_file,
                            multipart=True,
                            metadata={
                                "thirtymeg": "data",
                                "size": test_size_send,
                                "key": "value-with-a-hyphen"
                            })

    os.unlink(test_file)

    expected_meta = {
        "thirtymeg": "data",
        "size": str(test_size_send),
        "key": "value-with-a-hyphen"
    }
    meta = st.get_metadata_for_key("test1/30m")
    assert meta == expected_meta

    progress_reports = []

    def dl_progress(current_pos, expected_max):
        progress_reports.append((current_pos, expected_max))

    with open(test_file, "wb") as fp:
        assert st.get_contents_to_fileobj(
            "test1/30m", fp, progress_callback=dl_progress) == expected_meta

    assert len(progress_reports) > 0
    assert progress_reports[-1][0] == progress_reports[-1][1]

    test_hash = hashlib.sha256()
    test_size_rec = 0
    with open(test_file, "rb") as fp:
        while True:
            chunk = fp.read(1024 * 1024)
            if not chunk:
                break
            test_hash.update(chunk)
            test_size_rec += len(chunk)
    test_hash_rec = test_hash.hexdigest()
    assert test_hash_rec == test_hash_send
    assert test_size_rec == test_size_send

    tlist = st.list_path("test1")
    assert len(tlist) == 1
    assert tlist[0]["name"] == "test1/30m"
    assert tlist[0]["size"] == test_size_rec

    if driver == "swift":
        segments = test_size_send // st.segment_size
        segment_list = st.list_path("test1_segments/30m")
        assert len(segment_list) >= segments

        if segments >= 2:
            # reupload a file with the same name but with less chunks
            os.truncate(test_file, st.segment_size + 1)
            test_size_send = os.path.getsize(test_file)
            st.store_file_from_disk("test1/30m",
                                    test_file,
                                    multipart=True,
                                    metadata={
                                        "30m": "less data",
                                        "size": test_size_send
                                    })

            segment_list = st.list_path("test1_segments/30m")
            assert len(segment_list) == 2
            assert len(st.list_path("test1")) == 1

    st.delete_key("test1/30m")
    assert st.list_path("test1") == []

    if driver == "swift":
        assert st.list_path("test1_segments/30m") == []
Ejemplo n.º 12
0
def _test_storage(st, driver, tmpdir):
    scratch = tmpdir.join("scratch")
    compat.makedirs(str(scratch), exist_ok=True)

    # File not found cases
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_metadata_for_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.delete_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_file("NONEXISTENT", str(scratch.join("a")))
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_fileobj("NONEXISTENT", BytesIO())
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_string("NONEXISTENT")
    assert st.list_path("") == []
    assert st.list_path("NONEXISTENT") == []
    st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None)
    dummy_file = str(scratch.join("a"))
    with open(dummy_file, "wb") as fp:
        fp.write(b"dummy")
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None)
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1})

    st.delete_key("NONEXISTENT-b/x1")
    st.delete_key("NONEXISTENT-a/x1")

    # Other basic cases
    from_disk_file = str(scratch.join("a"))
    with open(from_disk_file, "wb") as fp:
        fp.write(b"from disk")
    st.store_file_from_disk("test1/x1", from_disk_file, None)
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {}
    assert out.getvalue() == b"from disk"

    st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"})
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"}
    assert out.getvalue() == b"dummy"

    st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"})
    assert st.get_contents_to_string("test1/x1") == (b"l", {"fancymetadata": "value"})

    st.store_file_from_memory("test1/x1", b"1", None)
    assert st.get_contents_to_string("test1/x1") == (b"1", {})

    st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"})
    to_disk_file = str(scratch.join("b"))
    assert st.get_contents_to_file("test1/td", to_disk_file) == {"to-disk": "42"}

    assert st.list_path("") == []  # nothing at top level (directories not listed)
    if driver == "local":
        # create a dot-file (hidden), this must be ignored
        target_file = os.path.join(st.prefix, "test1/.null")
        with open(target_file, "w"):
            pass

    tlist = st.list_path("test1")
    assert len(tlist) == 2
    for fe in tlist:
        assert isinstance(fe["last_modified"], datetime.datetime)
        assert fe["last_modified"].tzinfo is not None
        if fe["name"] == "test1/x1":
            assert fe["size"] == 1
            assert fe["metadata"] == {}
        elif fe["name"] == "test1/td":
            assert fe["size"] == len(b"to disk")
            assert fe["metadata"] == {"to-disk": "42"}
        else:
            assert 0, "unexpected name in directory"

    if driver == "google":
        # test extra props for cacheControl in google
        st.store_file_from_memory("test1/x1", b"no cache test",
                                  metadata={"test": "value"},
                                  extra_props={"cacheControl": "no-cache"})

    if driver == "local":
        # test LocalFileIsRemoteFileError for local storage
        target_file = os.path.join(st.prefix, "test1/x1")
        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.store_file_from_disk("test1/x1", target_file, {"local": True})
        assert st.get_contents_to_string("test1/x1") == (b"1", {"local": "True"})

        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.get_contents_to_file("test1/x1", target_file)

        # unlink metadata file, this shouldn't break anything
        os.unlink(target_file + ".metadata")
        assert st.get_metadata_for_key("test1/x1") == {}

    st.delete_key("test1/x1")
    st.delete_key("test1/td")
    assert st.list_path("test1") == []  # empty again

    test_hash = hashlib.sha256()
    test_file = str(scratch.join("30m"))
    test_size_send = 0
    with open(test_file, "wb") as fp:
        chunk = b"30m file" * 10000
        while test_size_send < 30 * 1024 * 1024:
            test_hash.update(chunk)
            fp.write(chunk)
            test_size_send += len(chunk)
    test_hash_send = test_hash.hexdigest()

    if driver == "s3":
        # inject a failure in multipart uploads
        def failing_new_key(key_name):  # pylint: disable=unused-argument
            # fail after the second call, restore functionality after the third
            fail_calls[0] += 1
            if fail_calls[0] > 3:
                st.bucket.new_key = orig_new_key
            if fail_calls[0] > 2:
                raise Exception("multipart upload failure!")

        fail_calls = [0]
        orig_new_key = st.bucket.new_key
        st.bucket.new_key = failing_new_key

        st.store_file_from_disk("test1/30m", test_file, multipart=True,
                                metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"})

        assert fail_calls[0] > 3
    else:
        st.store_file_from_disk("test1/30m", test_file, multipart=True,
                                metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"})

    os.unlink(test_file)

    expected_meta = {"thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen"}
    meta = st.get_metadata_for_key("test1/30m")
    assert meta == expected_meta

    progress_reports = []

    def dl_progress(current_pos, expected_max):
        progress_reports.append((current_pos, expected_max))

    with open(test_file, "wb") as fp:
        assert st.get_contents_to_fileobj("test1/30m", fp, progress_callback=dl_progress) == expected_meta

    assert len(progress_reports) > 0
    assert progress_reports[-1][0] == progress_reports[-1][1]

    test_hash = hashlib.sha256()
    test_size_rec = 0
    with open(test_file, "rb") as fp:
        while True:
            chunk = fp.read(1024 * 1024)
            if not chunk:
                break
            test_hash.update(chunk)
            test_size_rec += len(chunk)
    test_hash_rec = test_hash.hexdigest()
    assert test_hash_rec == test_hash_send
    assert test_size_rec == test_size_send

    tlist = st.list_path("test1")
    assert len(tlist) == 1
    assert tlist[0]["name"] == "test1/30m"
    assert tlist[0]["size"] == test_size_rec

    if driver == "swift":
        segments = test_size_send // st.segment_size
        segment_list = st.list_path("test1_segments/30m")
        assert len(segment_list) >= segments

        if segments >= 2:
            # reupload a file with the same name but with less chunks
            os.truncate(test_file, st.segment_size + 1)
            test_size_send = os.path.getsize(test_file)
            st.store_file_from_disk("test1/30m", test_file, multipart=True,
                                    metadata={"30m": "less data", "size": test_size_send})

            segment_list = st.list_path("test1_segments/30m")
            assert len(segment_list) == 2
            assert len(st.list_path("test1")) == 1

    st.delete_key("test1/30m")
    assert st.list_path("test1") == []

    if driver == "swift":
        assert st.list_path("test1_segments/30m") == []
Ejemplo n.º 13
0
def _test_storage(st, driver, tmpdir, storage_config):
    scratch = tmpdir.join("scratch")
    compat.makedirs(str(scratch), exist_ok=True)

    # File not found cases
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_metadata_for_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.delete_key("NONEXISTENT")
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_file("NONEXISTENT", str(scratch.join("a")))
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_fileobj("NONEXISTENT", BytesIO())
    with pytest.raises(errors.FileNotFoundFromStorageError):
        st.get_contents_to_string("NONEXISTENT")
    assert st.list_path("") == []
    assert st.list_path("NONEXISTENT") == []
    st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None)
    dummy_file = str(scratch.join("a"))
    with open(dummy_file, "wb") as fp:
        fp.write(b"dummy")
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None)
    st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1})

    st.delete_key("NONEXISTENT-b/x1")
    st.delete_key("NONEXISTENT-a/x1")

    # Other basic cases
    from_disk_file = str(scratch.join("a"))
    input_data = b"from disk"
    if driver == "local":
        input_data = input_data * 150000
    with open(from_disk_file, "wb") as fp:
        fp.write(input_data)
    st.store_file_from_disk("test1/x1", from_disk_file, None)
    out = BytesIO()

    reported_positions = []

    def progress_callback(pos, total):
        reported_positions.append((pos, total))

    assert st.get_contents_to_fileobj("test1/x1", out, progress_callback=progress_callback) == {}
    assert out.getvalue() == input_data
    if driver == "local":
        input_size = len(input_data)
        assert reported_positions == [(1024 * 1024, input_size), (input_size, input_size)]

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(storage_config.get('encrypted'))

    st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"})
    out = BytesIO()
    assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"}
    assert out.getvalue() == b"dummy"

    # Copy file
    st.copy_file(source_key="test1/x1", destination_key="test_copy/copy1")
    assert st.get_contents_to_string("test_copy/copy1") == (b"dummy", {"k": "v"})
    st.copy_file(source_key="test1/x1", destination_key="test_copy/copy2", metadata={"new": "meta"})
    assert st.get_contents_to_string("test_copy/copy2") == (b"dummy", {"new": "meta"})

    st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"})
    assert st.get_contents_to_string("test1/x1") == (b"l", {"fancymetadata": "value"})

    st.store_file_from_memory("test1/x1", b"1", None)
    assert st.get_contents_to_string("test1/x1") == (b"1", {})

    st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"})
    to_disk_file = str(scratch.join("b"))
    assert st.get_contents_to_file("test1/td", to_disk_file) == {"to-disk": "42"}

    created_keys = {"test1/x1", "test1/td"}

    if driver == "s3":
        response = st.s3_client.head_object(
            Bucket=st.bucket_name,
            Key=st.format_key_for_backend("test1/x1"),
        )
        assert bool(response.get("ServerSideEncryption")) == bool(storage_config.get('encrypted'))

    assert st.list_path("") == []  # nothing at top level (directories not listed)
    if driver == "local":
        # create a dot-file (hidden), this must be ignored
        target_file = os.path.join(st.prefix, "test1/.null")
        with open(target_file, "w"):
            pass

    tlist = st.list_path("test1")
    assert len(tlist) == 2
    for fe in tlist:
        assert isinstance(fe["last_modified"], datetime.datetime)
        assert fe["last_modified"].tzinfo is not None
        if fe["name"] == "test1/x1":
            assert fe["size"] == 1
            assert fe["metadata"] == {}
        elif fe["name"] == "test1/td":
            assert fe["size"] == len(b"to disk")
            assert fe["metadata"] == {"to-disk": "42"}
        else:
            assert 0, "unexpected name in directory"

    assert set(st.iter_prefixes("test1")) == set()

    for key in ["test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3"]:
        st.store_file_from_memory(key, b"1", None)
        created_keys.add(key)

    if driver == "local":
        # sub3 is a file. Actual object storage systems support this, but a file system does not
        with pytest.raises(NotADirectoryError):
            st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None)
    else:
        st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None)
        created_keys.add("test1/sub3/sub3.1/sub3.1.1")

    if driver == "local":
        assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2"}
    else:
        assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2", "test1/sub3"}
    assert {item["name"] for item in st.list_path("test1")} == {"test1/x1", "test1/td", "test1/sub3"}
    assert set(st.iter_prefixes("test1/sub1")) == set()
    assert {item["name"] for item in st.list_path("test1/sub1")} == {"test1/sub1/sub1.1"}
    assert {item["name"] for item in st.list_path("test1/sub2")} == set()
    assert {item["name"] for item in st.list_path("test1/sub3")} == set()
    assert set(st.iter_prefixes("test1/sub2")) == {"test1/sub2/sub2.1"}
    if driver == "local":
        assert set(st.iter_prefixes("test1/sub3")) == set()  # sub3 is a file
    else:
        assert set(st.iter_prefixes("test1/sub3")) == {"test1/sub3/sub3.1"}
    assert set(st.iter_prefixes("test1/sub3/3.1")) == set()

    expected_deep_iter_test1_names = {
        "test1/x1",
        "test1/td",
        "test1/sub1/sub1.1",
        "test1/sub2/sub2.1/sub2.1.1",
        "test1/sub3",
    }
    if driver != "local":
        expected_deep_iter_test1_names.add("test1/sub3/sub3.1/sub3.1.1")

    assert {item["name"] for item in st.list_path("test1", deep=True)} == expected_deep_iter_test1_names

    def _object_names(iterable):
        names = set()
        for item in iterable:
            assert item.type == KEY_TYPE_OBJECT
            names.add(item.value["name"])
        return names

    deep_names_with_key = _object_names(st.iter_key("test1/sub3", deep=True, include_key=True))
    deep_names_without_key = _object_names(st.iter_key("test1/sub3", deep=True, include_key=False))

    if driver == "local":
        assert deep_names_with_key == {"test1/sub3"}
        assert deep_names_without_key == set()
    else:
        assert deep_names_with_key == {"test1/sub3", "test1/sub3/sub3.1/sub3.1.1"}
        assert deep_names_without_key == {"test1/sub3/sub3.1/sub3.1.1"}

    if driver == "google":
        # test extra props for cacheControl in google
        st.store_file_from_memory("test1/x1", b"no cache test",
                                  metadata={"test": "value"},
                                  extra_props={"cacheControl": "no-cache"})

    if driver == "local":
        # test LocalFileIsRemoteFileError for local storage
        target_file = os.path.join(st.prefix, "test1/x1")
        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.store_file_from_disk("test1/x1", target_file, {"local": True})
        assert st.get_contents_to_string("test1/x1") == (b"1", {"local": "True"})

        with pytest.raises(errors.LocalFileIsRemoteFileError):
            st.get_contents_to_file("test1/x1", target_file)

        # Missing metadata is an error situation that should fail
        os.unlink(target_file + ".metadata")
        with pytest.raises(errors.FileNotFoundFromStorageError):
            st.get_metadata_for_key("test1/x1")

    for key in created_keys:
        st.delete_key(key)
    assert st.list_path("test1") == []  # empty again

    for name in ["test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob"]:
        st.store_file_from_memory(name, b"somedata")
    names = sorted(item["name"] for item in st.list_path("test2", deep=True))
    assert names == ["test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob"]

    st.delete_tree("test2")
    assert st.list_path("test2", deep=True) == []

    test_hash = hashlib.sha256()
    test_file = str(scratch.join("30m"))
    test_size_send = 0
    with open(test_file, "wb") as fp:
        chunk = b"30m file" * 10000
        while test_size_send < 30 * 1024 * 1024:
            test_hash.update(chunk)
            fp.write(chunk)
            test_size_send += len(chunk)
    test_hash_send = test_hash.hexdigest()

    st.store_file_from_disk("test1/30m", test_file, multipart=True,
                            metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"})

    os.unlink(test_file)

    expected_meta = {"thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen"}
    meta = st.get_metadata_for_key("test1/30m")
    assert meta == expected_meta

    progress_reports = []

    def dl_progress(current_pos, expected_max):
        progress_reports.append((current_pos, expected_max))

    with open(test_file, "wb") as fp:
        assert st.get_contents_to_fileobj("test1/30m", fp, progress_callback=dl_progress) == expected_meta

    assert len(progress_reports) > 0
    assert progress_reports[-1][0] == progress_reports[-1][1]

    test_hash = hashlib.sha256()
    test_size_rec = 0
    with open(test_file, "rb") as fp:
        while True:
            chunk = fp.read(1024 * 1024)
            if not chunk:
                break
            test_hash.update(chunk)
            test_size_rec += len(chunk)
    test_hash_rec = test_hash.hexdigest()
    assert test_hash_rec == test_hash_send
    assert test_size_rec == test_size_send

    tlist = st.list_path("test1")
    assert len(tlist) == 1
    assert tlist[0]["name"] == "test1/30m"
    assert tlist[0]["size"] == test_size_rec

    if driver == "swift":
        segments = test_size_send // st.segment_size
        segment_list = st.list_path("test1_segments/30m")
        assert len(segment_list) >= segments

        if segments >= 2:
            # reupload a file with the same name but with less chunks
            os.truncate(test_file, st.segment_size + 1)
            test_size_send = os.path.getsize(test_file)
            st.store_file_from_disk("test1/30m", test_file, multipart=True,
                                    metadata={"30m": "less data", "size": test_size_send})

            segment_list = st.list_path("test1_segments/30m")
            assert len(segment_list) == 2
            assert len(st.list_path("test1")) == 1

    st.delete_key("test1/30m")
    assert st.list_path("test1") == []

    if driver == "swift":
        assert st.list_path("test1_segments/30m") == []

    progress_reports = []

    def upload_progress(progress):
        progress_reports.append(progress)

    for size in (300, 3 * 1024 * 1024, 11 * 1024 * 1024):
        progress_reports = []
        rds = RandomDataSource(size)
        key = "test1/{}b".format(size)
        st.store_file_object(key, rds, upload_progress_fn=upload_progress)
        # Progress may be reported after each chunk and chunk size depends on available memory
        # on current machine so there's no straightforward way of checking reasonable progress
        # updates were made. Just ensure they're ordered correctly if something was provided
        assert sorted(progress_reports) == progress_reports
        bio = BytesIO()
        st.get_contents_to_fileobj(key, bio)
        buffer = bio.getbuffer()
        assert len(buffer) == size
        assert buffer == rds.data
        st.delete_key(key)