def download_one_backup(self, *, transfer, basebackup_data_file, progress_callback, site): dl_dir = os.path.join( self.config["backup_location"], self.config["backup_sites"][site]["prefix"], "basebackup_incoming", ) compat.makedirs(dl_dir, exist_ok=True) tmp = tempfile.NamedTemporaryFile(dir=dl_dir, prefix="basebackup.", suffix=".pghoard") try: metadata = transfer.get_contents_to_fileobj( key=basebackup_data_file, fileobj_to_store_to=tmp, progress_callback=progress_callback) progress_callback(1, 1) self.log.info("Downloaded %r", basebackup_data_file) tmp.seek(0) except: # pylint: disable=bare-except self.log.exception("Problem downloading a backup file: %r", basebackup_data_file) tmp.close() raise return tmp, metadata
def setup_method(self, method): super().setup_method(method) self.config = self.config_template() self.config["backup_sites"][self.test_site].update({ "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [{ "host": "127.0.0.4" }], }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) compat.makedirs(self.config["alert_file_dir"], exist_ok=True) self.pghoard = PGHoard(config_path) # This is the "final storage location" when using "local" storage type self.local_storage_dir = os.path.join( self.config["backup_sites"][self.test_site]["object_storage"] ["directory"], self.test_site) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def _extract_pghoard_bb_v1(self, fileobj, pgdata, tablespaces): directories = [] tar_meta = None # | in mode to use tarfile's internal stream buffer manager, currently required because our SnappyFile # interface doesn't do proper buffering for reads with tarfile.open(fileobj=fileobj, mode="r|", bufsize=IO_BLOCK_SIZE) as tar: for tarinfo in tar: if tarinfo.name == ".pghoard_tar_metadata.json": tar_meta_bytes = tar.extractfile(tarinfo).read() tar_meta = json.loads(tar_meta_bytes.decode("utf-8")) continue if tarinfo.name == "pgdata" or tarinfo.name == "tablespaces": continue # ignore these directory entries if tarinfo.name.startswith("pgdata/"): target_name = os.path.join(pgdata, tarinfo.name[7:]) elif tarinfo.name.startswith("tablespaces/"): tscomponents = tarinfo.name.split("/", 2) tsname = tscomponents[1] tspath = tablespaces[tsname]["path"] if len(tscomponents) == 2 and tarinfo.isdir(): # Create tablespace entry assert tar_meta["tablespaces"][tsname][ "oid"] == tablespaces[tsname]["oid"] linkname = os.path.join( pgdata, "pg_tblspc", str(tablespaces[tsname]["oid"])) os.symlink(tspath, linkname) directories.append([tspath, tarinfo]) continue target_name = os.path.join(tspath, tscomponents[2]) else: raise Exception("Unrecognized path {!r} in tar".format( tarinfo.name)) if tarinfo.isdir(): directories.append([target_name, tarinfo]) compat.makedirs(target_name, exist_ok=True) elif tarinfo.isreg(): target_dir = os.path.dirname(target_name) if not os.path.exists(target_dir): compat.makedirs(target_dir, exist_ok=True) tar.makefile(tarinfo, target_name) tar.chmod(tarinfo, target_name) tar.utime(tarinfo, target_name) elif tarinfo.issym(): os.symlink(tarinfo.linkname, target_name) else: raise Exception( "Unrecognized file type for file {!r} in tar".format( tarinfo.name)) for target_name, tarinfo in directories: tar.chmod(tarinfo, target_name) tar.utime(tarinfo, target_name)
def _extract_pghoard_bb_v1(self, fileobj, pgdata, tablespaces): directories = [] tar_meta = None # | in mode to use tarfile's internal stream buffer manager, currently required because our SnappyFile # interface doesn't do proper buffering for reads with tarfile.open(fileobj=fileobj, mode="r|", bufsize=IO_BLOCK_SIZE) as tar: for tarinfo in tar: if tarinfo.name == ".pghoard_tar_metadata.json": tar_meta_bytes = tar.extractfile(tarinfo).read() tar_meta = json.loads(tar_meta_bytes.decode("utf-8")) continue if tarinfo.name == "pgdata" or tarinfo.name == "tablespaces": continue # ignore these directory entries if tarinfo.name.startswith("pgdata/"): target_name = os.path.join(pgdata, tarinfo.name[7:]) elif tarinfo.name.startswith("tablespaces/"): tscomponents = tarinfo.name.split("/", 2) tsname = tscomponents[1] tspath = tablespaces[tsname]["path"] if len(tscomponents) == 2 and tarinfo.isdir(): # Create tablespace entry assert tar_meta["tablespaces"][tsname]["oid"] == tablespaces[tsname]["oid"] linkname = os.path.join(pgdata, "pg_tblspc", str(tablespaces[tsname]["oid"])) os.symlink(tspath, linkname) directories.append([tspath, tarinfo]) continue target_name = os.path.join(tspath, tscomponents[2]) else: raise Exception("Unrecognized path {!r} in tar".format(tarinfo.name)) if tarinfo.isdir(): directories.append([target_name, tarinfo]) compat.makedirs(target_name, exist_ok=True) elif tarinfo.isreg(): target_dir = os.path.dirname(target_name) if not os.path.exists(target_dir): compat.makedirs(target_dir, exist_ok=True) tar.makefile(tarinfo, target_name) tar.chmod(tarinfo, target_name) tar.utime(tarinfo, target_name) elif tarinfo.issym(): os.symlink(tarinfo.linkname, target_name) else: raise Exception("Unrecognized file type for file {!r} in tar".format(tarinfo.name)) for target_name, tarinfo in directories: tar.chmod(tarinfo, target_name) tar.utime(tarinfo, target_name)
def config_template(self, override=None): # NOTE: we set pg_receivexlog_path and pg_basebackup_path per site and globally mostly to verify that # it works, the config keys are deprecated and will be removed in a future release at which point we'll # switch to using pg_bin_directory config. bindir, ver = find_pg_binary("") if hasattr(psycopg2.extras, "PhysicalReplicationConnection"): active_backup_mode = "walreceiver" else: active_backup_mode = "pg_receivexlog" # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write(ver) config = { "alert_file_dir": os.path.join(str(self.temp_dir), "alerts"), "backup_location": os.path.join(str(self.temp_dir), "backupspool"), "backup_sites": { self.test_site: { "active_backup_mode": active_backup_mode, "object_storage": { "storage_type": "local", "directory": os.path.join(self.temp_dir, "backups"), }, "pg_data_directory": pg_data_directory, "pg_receivexlog_path": os.path.join(bindir, "pg_receivexlog"), }, }, "json_state_file_path": os.path.join(self.temp_dir, "state.json"), "pg_basebackup_path": os.path.join(bindir, "pg_basebackup"), } if ver == "10": config["backup_sites"][ self.test_site]["pg_receivexlog_path"] = os.path.join( bindir, "pg_receivewal") if override: all_site_overrides = override.pop("backup_sites", None) for site_name, site_override in (all_site_overrides or {}).items(): if site_name in config["backup_sites"]: config["backup_sites"][site_name].update(site_override) else: config["backup_sites"][site_name] = site_override config.update(override) compat.makedirs(config["alert_file_dir"], exist_ok=True) return set_and_check_config_defaults(config)
def config_template(self, override=None): # NOTE: we set pg_receivexlog_path and pg_basebackup_path per site and globally mostly to verify that # it works, the config keys are deprecated and will be removed in a future release at which point we'll # switch to using pg_bin_directory config. bindir, ver = find_pg_binary("") if hasattr(psycopg2.extras, "PhysicalReplicationConnection"): active_backup_mode = "walreceiver" else: active_backup_mode = "pg_receivexlog" # Instantiate a fake PG data directory pg_data_directory = os.path.join(str(self.temp_dir), "PG_DATA_DIRECTORY") os.makedirs(pg_data_directory) open(os.path.join(pg_data_directory, "PG_VERSION"), "w").write(ver) config = { "alert_file_dir": os.path.join(str(self.temp_dir), "alerts"), "backup_location": os.path.join(str(self.temp_dir), "backupspool"), "backup_sites": { self.test_site: { "active_backup_mode": active_backup_mode, "object_storage": { "storage_type": "local", "directory": os.path.join(self.temp_dir, "backups"), }, "pg_data_directory": pg_data_directory, "pg_receivexlog_path": os.path.join(bindir, "pg_receivexlog"), }, }, "json_state_file_path": os.path.join(self.temp_dir, "state.json"), "pg_basebackup_path": os.path.join(bindir, "pg_basebackup"), } if ver in ("10", "11"): config["backup_sites"][self.test_site]["pg_receivexlog_path"] = os.path.join(bindir, "pg_receivewal") if override: all_site_overrides = override.pop("backup_sites", None) for site_name, site_override in (all_site_overrides or {}).items(): if site_name in config["backup_sites"]: config["backup_sites"][site_name].update(site_override) else: config["backup_sites"][site_name] = site_override config.update(override) compat.makedirs(config["alert_file_dir"], exist_ok=True) return set_and_check_config_defaults(config)
def setup_method(self, method): super().setup_method(method) self.config = self.config_template() self.config["backup_sites"][self.test_site].update({ "basebackup_count": 1, "basebackup_interval_hours": 1, "nodes": [{"host": "127.0.0.4"}], }) config_path = os.path.join(self.temp_dir, "pghoard.json") write_json_file(config_path, self.config) compat.makedirs(self.config["alert_file_dir"], exist_ok=True) self.pghoard = PGHoard(config_path) # This is the "final storage location" when using "local" storage type self.local_storage_dir = os.path.join(self.config["backup_sites"][self.test_site]["object_storage"]["directory"], self.test_site) self.real_check_pg_server_version = self.pghoard.check_pg_server_version self.pghoard.check_pg_server_version = Mock(return_value=90404) self.real_check_pg_versions_ok = self.pghoard.check_pg_versions_ok self.pghoard.check_pg_versions_ok = Mock(return_value=True)
def _test_storage(st, driver, tmpdir, storage_config): scratch = tmpdir.join("scratch") compat.makedirs(str(scratch), exist_ok=True) # File not found cases with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.delete_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_file("NONEXISTENT", str(scratch.join("a"))) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_fileobj("NONEXISTENT", BytesIO()) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_string("NONEXISTENT") assert st.list_path("") == [] assert st.list_path("NONEXISTENT") == [] st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None) dummy_file = str(scratch.join("a")) with open(dummy_file, "wb") as fp: fp.write(b"dummy") st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None) st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1}) st.delete_key("NONEXISTENT-b/x1") st.delete_key("NONEXISTENT-a/x1") # Other basic cases from_disk_file = str(scratch.join("a")) input_data = b"from disk" if driver in ["local", "sftp"]: input_data = input_data * 150000 with open(from_disk_file, "wb") as fp: fp.write(input_data) st.store_file_from_disk("test1/x1", from_disk_file, None) out = BytesIO() reported_positions = [] def progress_callback(pos, total): reported_positions.append((pos, total)) assert st.get_contents_to_fileobj( "test1/x1", out, progress_callback=progress_callback) == {} assert out.getvalue() == input_data if driver in ["local", "sftp"]: input_size = len(input_data) assert reported_positions[-1] == (input_size, input_size) if driver == "local": assert reported_positions == [(1024 * 1024, input_size), (input_size, input_size)] if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool( storage_config.get("encrypted")) st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"}) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"} assert out.getvalue() == b"dummy" # sftp does not support remote copy if driver != "sftp": # Copy file st.copy_file(source_key="test1/x1", destination_key="test_copy/copy1") assert st.get_contents_to_string("test_copy/copy1") == (b"dummy", { "k": "v" }) st.copy_file(source_key="test1/x1", destination_key="test_copy/copy2", metadata={"new": "meta"}) assert st.get_contents_to_string("test_copy/copy2") == (b"dummy", { "new": "meta" }) st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"}) assert st.get_contents_to_string("test1/x1") == (b"l", { "fancymetadata": "value" }) st.store_file_from_memory("test1/x1", b"1", None) assert st.get_contents_to_string("test1/x1") == (b"1", {}) st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"}) to_disk_file = str(scratch.join("b")) assert st.get_contents_to_file("test1/td", to_disk_file) == { "to-disk": "42" } created_keys = {"test1/x1", "test1/td"} if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool( storage_config.get("encrypted")) assert st.list_path("") == [ ] # nothing at top level (directories not listed) if driver == "local": # create a dot-file (hidden), this must be ignored target_file = os.path.join(st.prefix, "test1/.null") with open(target_file, "w"): pass tlist = st.list_path("test1") assert len(tlist) == 2 for fe in tlist: assert isinstance(fe["last_modified"], datetime.datetime) assert fe["last_modified"].tzinfo is not None if fe["name"] == "test1/x1": assert fe["size"] == 1 assert fe["metadata"] == {} elif fe["name"] == "test1/td": assert fe["size"] == len(b"to disk") assert fe["metadata"] == {"to-disk": "42"} else: assert 0, "unexpected name in directory" assert set(st.iter_prefixes("test1")) == set() for key in [ "test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3" ]: st.store_file_from_memory(key, b"1", None) created_keys.add(key) if driver == "local": # sub3 is a file. Actual object storage systems support this, but a file system does not with pytest.raises(NotADirectoryError): st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None) elif driver == "sftp": # sub3 is a file. Actual object storage systems support this, but a file system does not with pytest.raises(errors.StorageError): st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None) else: st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None) created_keys.add("test1/sub3/sub3.1/sub3.1.1") if driver in ["local", "sftp"]: assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2"} else: assert set(st.iter_prefixes("test1")) == { "test1/sub1", "test1/sub2", "test1/sub3" } assert {item["name"] for item in st.list_path("test1") } == {"test1/x1", "test1/td", "test1/sub3"} assert set(st.iter_prefixes("test1/sub1")) == set() assert {item["name"] for item in st.list_path("test1/sub1")} == {"test1/sub1/sub1.1"} assert {item["name"] for item in st.list_path("test1/sub2")} == set() assert {item["name"] for item in st.list_path("test1/sub3")} == set() assert set(st.iter_prefixes("test1/sub2")) == {"test1/sub2/sub2.1"} if driver in ["local", "sftp"]: assert set(st.iter_prefixes("test1/sub3")) == set() # sub3 is a file else: assert set(st.iter_prefixes("test1/sub3")) == {"test1/sub3/sub3.1"} assert set(st.iter_prefixes("test1/sub3/3.1")) == set() expected_deep_iter_test1_names = { "test1/x1", "test1/td", "test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3", } if driver not in ["local", "sftp"]: expected_deep_iter_test1_names.add("test1/sub3/sub3.1/sub3.1.1") assert {item["name"] for item in st.list_path("test1", deep=True) } == expected_deep_iter_test1_names def _object_names(iterable): names = set() for item in iterable: assert item.type == KEY_TYPE_OBJECT names.add(item.value["name"]) return names deep_names_with_key = _object_names( st.iter_key("test1/sub3", deep=True, include_key=True)) deep_names_without_key = _object_names( st.iter_key("test1/sub3", deep=True, include_key=False)) if driver in ["local", "sftp"]: assert deep_names_with_key == {"test1/sub3"} assert deep_names_without_key == set() else: assert deep_names_with_key == { "test1/sub3", "test1/sub3/sub3.1/sub3.1.1" } assert deep_names_without_key == {"test1/sub3/sub3.1/sub3.1.1"} if driver == "google": # test extra props for cacheControl in google st.store_file_from_memory("test1/x1", b"no cache test", metadata={"test": "value"}, extra_props={"cacheControl": "no-cache"}) if driver == "local": # test LocalFileIsRemoteFileError for local storage target_file = os.path.join(st.prefix, "test1/x1") with pytest.raises(errors.LocalFileIsRemoteFileError): st.store_file_from_disk("test1/x1", target_file, {"local": True}) assert st.get_contents_to_string("test1/x1") == (b"1", { "local": "True" }) with pytest.raises(errors.LocalFileIsRemoteFileError): st.get_contents_to_file("test1/x1", target_file) # Missing metadata is an error situation that should fail os.unlink(target_file + ".metadata") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("test1/x1") for key in created_keys: st.delete_key(key) assert st.list_path("test1") == [] # empty again for name in [ "test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob" ]: st.store_file_from_memory(name, b"somedata") names = sorted(item["name"] for item in st.list_path("test2", deep=True)) assert names == [ "test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob" ] st.delete_tree("test2") assert st.list_path("test2", deep=True) == [] test_hash = hashlib.sha256() test_file = str(scratch.join("30m")) test_size_send = 0 with open(test_file, "wb") as fp: chunk = b"30m file" * 10000 while test_size_send < 30 * 1024 * 1024: test_hash.update(chunk) fp.write(chunk) test_size_send += len(chunk) test_hash_send = test_hash.hexdigest() st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={ "thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen" }) os.unlink(test_file) expected_meta = { "thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen" } meta = st.get_metadata_for_key("test1/30m") assert meta == expected_meta progress_reports = [] def dl_progress(current_pos, expected_max): progress_reports.append((current_pos, expected_max)) with open(test_file, "wb") as fp: assert st.get_contents_to_fileobj( "test1/30m", fp, progress_callback=dl_progress) == expected_meta assert len(progress_reports) > 0 assert progress_reports[-1][0] == progress_reports[-1][1] test_hash = hashlib.sha256() test_size_rec = 0 with open(test_file, "rb") as fp: while True: chunk = fp.read(1024 * 1024) if not chunk: break test_hash.update(chunk) test_size_rec += len(chunk) test_hash_rec = test_hash.hexdigest() assert test_hash_rec == test_hash_send assert test_size_rec == test_size_send tlist = st.list_path("test1") assert len(tlist) == 1 assert tlist[0]["name"] == "test1/30m" assert tlist[0]["size"] == test_size_rec if driver == "swift": segments = test_size_send // st.segment_size segment_list = st.list_path("test1_segments/30m") assert len(segment_list) >= segments if segments >= 2: # reupload a file with the same name but with less chunks os.truncate(test_file, st.segment_size + 1) test_size_send = os.path.getsize(test_file) st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={ "30m": "less data", "size": test_size_send }) segment_list = st.list_path("test1_segments/30m") assert len(segment_list) == 2 assert len(st.list_path("test1")) == 1 st.delete_key("test1/30m") assert st.list_path("test1") == [] if driver == "swift": assert st.list_path("test1_segments/30m") == [] progress_reports = [] def upload_progress(progress): progress_reports.append(progress) for seekable in (False, True): for size in (300, 3 * 1024 * 1024, 11 * 1024 * 1024): progress_reports = [] rds = RandomDataSource(size) if seekable: fd = BytesIO(rds.data) else: fd = rds key = "test1/{}b".format(size) st.store_file_object(key, fd, upload_progress_fn=upload_progress) # Progress may be reported after each chunk and chunk size depends on available memory # on current machine so there's no straightforward way of checking reasonable progress # updates were made. Just ensure they're ordered correctly if something was provided assert sorted(progress_reports) == progress_reports bio = BytesIO() st.get_contents_to_fileobj(key, bio) buffer = bio.getbuffer() assert len(buffer) == size assert buffer == rds.data st.delete_key(key)
def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir): # Create a test tablespace for this instance, but make sure we drop it at the end of the test as the # database we use is shared by all test cases, and tablespaces are a global concept so the test # tablespace could interfere with other tests tspath = tmpdir.join("extra-ts").strpath os.makedirs(tspath) conn_str = pgutil.create_connection_string(db.user) conn = psycopg2.connect(conn_str) conn.autocommit = True cursor = conn.cursor() cursor.execute("CREATE TABLESPACE tstest LOCATION %s", [tspath]) r_db, r_conn = None, None try: cursor.execute( "CREATE TABLE tstest (id BIGSERIAL PRIMARY KEY, value BIGINT) TABLESPACE tstest" ) cursor.execute( "INSERT INTO tstest (value) SELECT * FROM generate_series(1, 1000)" ) cursor.execute("CHECKPOINT") cursor.execute( "SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'" ) res = cursor.fetchone() assert res[1] == tspath # Start receivexlog since we want the WALs to be able to restore later on wal_directory = os.path.join(pghoard.config["backup_location"], pghoard.test_site, "xlog_incoming") makedirs(wal_directory, exist_ok=True) pghoard.receivexlog_listener(pghoard.test_site, db.user, wal_directory) if conn.server_version >= 100000: cursor.execute("SELECT txid_current(), pg_switch_wal()") else: cursor.execute("SELECT txid_current(), pg_switch_xlog()") self._test_create_basebackup(capsys, db, pghoard, "local-tar") if conn.server_version >= 100000: cursor.execute("SELECT txid_current(), pg_switch_wal()") cursor.execute("SELECT txid_current(), pg_switch_wal()") else: cursor.execute("SELECT txid_current(), pg_switch_xlog()") cursor.execute("SELECT txid_current(), pg_switch_xlog()") backup_out = tmpdir.join("test-restore").strpath backup_ts_out = tmpdir.join("test-restore-tstest").strpath # Tablespaces are extracted to their previous absolute paths by default, but the path must be empty # and it isn't as it's still used by the running PG with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "not empty" in str(excinfo.value) # We can't restore tablespaces to non-existent directories either with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "does not exist" in str(excinfo.value) os.makedirs(backup_ts_out) # We can't restore if the directory isn't writable os.chmod(backup_ts_out, 0o500) with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "empty, but not writable" in str(excinfo.value) os.chmod(backup_ts_out, 0o700) # We can't proceed if we request mappings for non-existent tablespaces backup_other_out = tmpdir.join("test-restore-other").strpath os.makedirs(backup_other_out) with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), "--tablespace-dir", "other={}".format(backup_other_out), ]) assert "Tablespace mapping for ['other'] was requested, but" in str( excinfo.value) # Now, finally, everything should be valid and we can proceed with restore Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--restore-to-master", "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) # Adjust the generated recovery.conf to point pghoard_postgres_command to our instance new_py_restore_cmd = "PYTHONPATH={} python3 -m pghoard.postgres_command --mode restore".format( os.path.dirname(os.path.dirname(__file__))) new_go_restore_cmd = "{}/pghoard_postgres_command_go --mode restore".format( os.path.dirname(os.path.dirname(__file__))) with open(os.path.join(backup_out, "recovery.conf"), "r+") as fp: rconf = fp.read() rconf = rconf.replace( "pghoard_postgres_command_go --mode restore", new_go_restore_cmd) rconf = rconf.replace( "pghoard_postgres_command --mode restore", new_py_restore_cmd) fp.seek(0) fp.write(rconf) r_db = PGTester(backup_out) r_db.user = dict(db.user, host=backup_out) r_db.run_pg() r_conn_str = pgutil.create_connection_string(r_db.user) # Wait for PG to start up start_time = time.monotonic() while True: try: r_conn = psycopg2.connect(r_conn_str) break except psycopg2.OperationalError as ex: if "starting up" in str(ex): assert time.monotonic() - start_time <= 10 time.sleep(1) else: raise r_cursor = r_conn.cursor() # Make sure the tablespace is defined and points to the right (new) path r_cursor.execute( "SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'" ) r_res = r_cursor.fetchone() assert r_res[1] == backup_ts_out # We should be able to read from the table in the tablespace and the values should match what we stored before r_cursor.execute("SELECT id FROM tstest") r_res = r_cursor.fetchall() cursor.execute("SELECT id FROM tstest") orig_res = cursor.fetchall() assert r_res == orig_res finally: if r_conn: r_conn.close() if r_db: r_db.kill(force=True) cursor.execute("DROP TABLE IF EXISTS tstest") cursor.execute("DROP TABLESPACE tstest") conn.close()
def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir): # Create a test tablespace for this instance, but make sure we drop it at the end of the test as the # database we use is shared by all test cases, and tablespaces are a global concept so the test # tablespace could interfere with other tests tspath = tmpdir.join("extra-ts").strpath os.makedirs(tspath) conn_str = pgutil.create_connection_string(db.user) conn = psycopg2.connect(conn_str) conn.autocommit = True cursor = conn.cursor() cursor.execute("CREATE TABLESPACE tstest LOCATION %s", [tspath]) r_db, r_conn = None, None try: cursor.execute("CREATE TABLE tstest (id BIGSERIAL PRIMARY KEY) TABLESPACE tstest") cursor.execute("INSERT INTO tstest (id) VALUES (default)") cursor.execute("SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'") res = cursor.fetchone() assert res[1] == tspath # Start receivexlog since we want the WALs to be able to restore later on xlog_directory = os.path.join(pghoard.config["backup_location"], pghoard.test_site, "xlog_incoming") makedirs(xlog_directory, exist_ok=True) pghoard.receivexlog_listener(pghoard.test_site, db.user, xlog_directory) cursor.execute("SELECT txid_current(), pg_switch_xlog()") self._test_create_basebackup(capsys, db, pghoard, "local-tar") cursor.execute("SELECT txid_current(), pg_switch_xlog()") cursor.execute("SELECT txid_current(), pg_switch_xlog()") backup_out = tmpdir.join("test-restore").strpath backup_ts_out = tmpdir.join("test-restore-tstest").strpath # Tablespaces are extracted to their previous absolute paths by default, but the path must be empty # and it isn't as it's still used by the running PG with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "not empty" in str(excinfo.value) # We can't restore tablespaces to non-existent directories either with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "does not exist" in str(excinfo.value) os.makedirs(backup_ts_out) # We can't restore if the directory isn't writable os.chmod(backup_ts_out, 0o500) with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) assert "Tablespace 'tstest' target directory" in str(excinfo.value) assert "empty, but not writable" in str(excinfo.value) os.chmod(backup_ts_out, 0o700) # We can't proceed if we request mappings for non-existent tablespaces backup_other_out = tmpdir.join("test-restore-other").strpath os.makedirs(backup_other_out) with pytest.raises(RestoreError) as excinfo: Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), "--tablespace-dir", "other={}".format(backup_other_out), ]) assert "Tablespace mapping for ['other'] was requested, but" in str(excinfo.value) # Now, finally, everything should be valid and we can proceed with restore Restore().run([ "get-basebackup", "--config", pghoard.config_path, "--site", pghoard.test_site, "--restore-to-master", "--target-dir", backup_out, "--tablespace-dir", "tstest={}".format(backup_ts_out), ]) # Adjust the generated recovery.conf to point pghoard_postgres_command to our instance new_cmd = "PYTHONPATH={} python3 -m pghoard.postgres_command".format(os.path.dirname(os.path.dirname(__file__))) with open(os.path.join(backup_out, "recovery.conf"), "r+") as fp: rconf = fp.read() rconf = rconf.replace("pghoard_postgres_command", new_cmd) fp.seek(0) fp.write(rconf) r_db = TestPG(backup_out) r_db.user = dict(db.user, host=backup_out) r_db.run_pg() r_conn_str = pgutil.create_connection_string(r_db.user) # Wait for PG to start up start_time = time.monotonic() while True: try: r_conn = psycopg2.connect(r_conn_str) break except psycopg2.OperationalError as ex: if "starting up" in str(ex): assert time.monotonic() - start_time <= 10 time.sleep(1) else: raise r_cursor = r_conn.cursor() # Make sure the tablespace is defined and points to the right (new) path r_cursor.execute("SELECT oid, pg_tablespace_location(oid) FROM pg_tablespace WHERE spcname = 'tstest'") r_res = r_cursor.fetchone() assert r_res[1] == backup_ts_out # We should be able to read from the table in the tablespace and the values should match what we stored before r_cursor.execute("SELECT id FROM tstest") r_res = r_cursor.fetchall() cursor.execute("SELECT id FROM tstest") orig_res = cursor.fetchall() assert r_res == orig_res finally: if r_conn: r_conn.close() if r_db: r_db.kill(force=True) cursor.execute("DROP TABLE IF EXISTS tstest") cursor.execute("DROP TABLESPACE tstest") conn.close()
def _test_storage(st, driver, tmpdir, storage_config): scratch = tmpdir.join("scratch") compat.makedirs(str(scratch), exist_ok=True) # File not found cases with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.delete_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_file("NONEXISTENT", str(scratch.join("a"))) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_fileobj("NONEXISTENT", BytesIO()) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_string("NONEXISTENT") assert st.list_path("") == [] assert st.list_path("NONEXISTENT") == [] st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None) dummy_file = str(scratch.join("a")) with open(dummy_file, "wb") as fp: fp.write(b"dummy") st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None) st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1}) st.delete_key("NONEXISTENT-b/x1") st.delete_key("NONEXISTENT-a/x1") # Other basic cases from_disk_file = str(scratch.join("a")) with open(from_disk_file, "wb") as fp: fp.write(b"from disk") st.store_file_from_disk("test1/x1", from_disk_file, None) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {} assert out.getvalue() == b"from disk" if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool( storage_config.get('encrypted')) st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"}) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"} assert out.getvalue() == b"dummy" st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"}) assert st.get_contents_to_string("test1/x1") == (b"l", { "fancymetadata": "value" }) st.store_file_from_memory("test1/x1", b"1", None) assert st.get_contents_to_string("test1/x1") == (b"1", {}) st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"}) to_disk_file = str(scratch.join("b")) assert st.get_contents_to_file("test1/td", to_disk_file) == { "to-disk": "42" } if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool( storage_config.get('encrypted')) assert st.list_path("") == [ ] # nothing at top level (directories not listed) if driver == "local": # create a dot-file (hidden), this must be ignored target_file = os.path.join(st.prefix, "test1/.null") with open(target_file, "w"): pass tlist = st.list_path("test1") assert len(tlist) == 2 for fe in tlist: assert isinstance(fe["last_modified"], datetime.datetime) assert fe["last_modified"].tzinfo is not None if fe["name"] == "test1/x1": assert fe["size"] == 1 assert fe["metadata"] == {} elif fe["name"] == "test1/td": assert fe["size"] == len(b"to disk") assert fe["metadata"] == {"to-disk": "42"} else: assert 0, "unexpected name in directory" if driver == "google": # test extra props for cacheControl in google st.store_file_from_memory("test1/x1", b"no cache test", metadata={"test": "value"}, extra_props={"cacheControl": "no-cache"}) if driver == "local": # test LocalFileIsRemoteFileError for local storage target_file = os.path.join(st.prefix, "test1/x1") with pytest.raises(errors.LocalFileIsRemoteFileError): st.store_file_from_disk("test1/x1", target_file, {"local": True}) assert st.get_contents_to_string("test1/x1") == (b"1", { "local": "True" }) with pytest.raises(errors.LocalFileIsRemoteFileError): st.get_contents_to_file("test1/x1", target_file) # unlink metadata file, this shouldn't break anything os.unlink(target_file + ".metadata") assert st.get_metadata_for_key("test1/x1") == {} st.delete_key("test1/x1") st.delete_key("test1/td") assert st.list_path("test1") == [] # empty again test_hash = hashlib.sha256() test_file = str(scratch.join("30m")) test_size_send = 0 with open(test_file, "wb") as fp: chunk = b"30m file" * 10000 while test_size_send < 30 * 1024 * 1024: test_hash.update(chunk) fp.write(chunk) test_size_send += len(chunk) test_hash_send = test_hash.hexdigest() st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={ "thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen" }) os.unlink(test_file) expected_meta = { "thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen" } meta = st.get_metadata_for_key("test1/30m") assert meta == expected_meta progress_reports = [] def dl_progress(current_pos, expected_max): progress_reports.append((current_pos, expected_max)) with open(test_file, "wb") as fp: assert st.get_contents_to_fileobj( "test1/30m", fp, progress_callback=dl_progress) == expected_meta assert len(progress_reports) > 0 assert progress_reports[-1][0] == progress_reports[-1][1] test_hash = hashlib.sha256() test_size_rec = 0 with open(test_file, "rb") as fp: while True: chunk = fp.read(1024 * 1024) if not chunk: break test_hash.update(chunk) test_size_rec += len(chunk) test_hash_rec = test_hash.hexdigest() assert test_hash_rec == test_hash_send assert test_size_rec == test_size_send tlist = st.list_path("test1") assert len(tlist) == 1 assert tlist[0]["name"] == "test1/30m" assert tlist[0]["size"] == test_size_rec if driver == "swift": segments = test_size_send // st.segment_size segment_list = st.list_path("test1_segments/30m") assert len(segment_list) >= segments if segments >= 2: # reupload a file with the same name but with less chunks os.truncate(test_file, st.segment_size + 1) test_size_send = os.path.getsize(test_file) st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={ "30m": "less data", "size": test_size_send }) segment_list = st.list_path("test1_segments/30m") assert len(segment_list) == 2 assert len(st.list_path("test1")) == 1 st.delete_key("test1/30m") assert st.list_path("test1") == [] if driver == "swift": assert st.list_path("test1_segments/30m") == []
def _test_storage(st, driver, tmpdir): scratch = tmpdir.join("scratch") compat.makedirs(str(scratch), exist_ok=True) # File not found cases with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.delete_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_file("NONEXISTENT", str(scratch.join("a"))) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_fileobj("NONEXISTENT", BytesIO()) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_string("NONEXISTENT") assert st.list_path("") == [] assert st.list_path("NONEXISTENT") == [] st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None) dummy_file = str(scratch.join("a")) with open(dummy_file, "wb") as fp: fp.write(b"dummy") st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None) st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1}) st.delete_key("NONEXISTENT-b/x1") st.delete_key("NONEXISTENT-a/x1") # Other basic cases from_disk_file = str(scratch.join("a")) with open(from_disk_file, "wb") as fp: fp.write(b"from disk") st.store_file_from_disk("test1/x1", from_disk_file, None) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {} assert out.getvalue() == b"from disk" st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"}) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"} assert out.getvalue() == b"dummy" st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"}) assert st.get_contents_to_string("test1/x1") == (b"l", {"fancymetadata": "value"}) st.store_file_from_memory("test1/x1", b"1", None) assert st.get_contents_to_string("test1/x1") == (b"1", {}) st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"}) to_disk_file = str(scratch.join("b")) assert st.get_contents_to_file("test1/td", to_disk_file) == {"to-disk": "42"} assert st.list_path("") == [] # nothing at top level (directories not listed) if driver == "local": # create a dot-file (hidden), this must be ignored target_file = os.path.join(st.prefix, "test1/.null") with open(target_file, "w"): pass tlist = st.list_path("test1") assert len(tlist) == 2 for fe in tlist: assert isinstance(fe["last_modified"], datetime.datetime) assert fe["last_modified"].tzinfo is not None if fe["name"] == "test1/x1": assert fe["size"] == 1 assert fe["metadata"] == {} elif fe["name"] == "test1/td": assert fe["size"] == len(b"to disk") assert fe["metadata"] == {"to-disk": "42"} else: assert 0, "unexpected name in directory" if driver == "google": # test extra props for cacheControl in google st.store_file_from_memory("test1/x1", b"no cache test", metadata={"test": "value"}, extra_props={"cacheControl": "no-cache"}) if driver == "local": # test LocalFileIsRemoteFileError for local storage target_file = os.path.join(st.prefix, "test1/x1") with pytest.raises(errors.LocalFileIsRemoteFileError): st.store_file_from_disk("test1/x1", target_file, {"local": True}) assert st.get_contents_to_string("test1/x1") == (b"1", {"local": "True"}) with pytest.raises(errors.LocalFileIsRemoteFileError): st.get_contents_to_file("test1/x1", target_file) # unlink metadata file, this shouldn't break anything os.unlink(target_file + ".metadata") assert st.get_metadata_for_key("test1/x1") == {} st.delete_key("test1/x1") st.delete_key("test1/td") assert st.list_path("test1") == [] # empty again test_hash = hashlib.sha256() test_file = str(scratch.join("30m")) test_size_send = 0 with open(test_file, "wb") as fp: chunk = b"30m file" * 10000 while test_size_send < 30 * 1024 * 1024: test_hash.update(chunk) fp.write(chunk) test_size_send += len(chunk) test_hash_send = test_hash.hexdigest() if driver == "s3": # inject a failure in multipart uploads def failing_new_key(key_name): # pylint: disable=unused-argument # fail after the second call, restore functionality after the third fail_calls[0] += 1 if fail_calls[0] > 3: st.bucket.new_key = orig_new_key if fail_calls[0] > 2: raise Exception("multipart upload failure!") fail_calls = [0] orig_new_key = st.bucket.new_key st.bucket.new_key = failing_new_key st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"}) assert fail_calls[0] > 3 else: st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"}) os.unlink(test_file) expected_meta = {"thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen"} meta = st.get_metadata_for_key("test1/30m") assert meta == expected_meta progress_reports = [] def dl_progress(current_pos, expected_max): progress_reports.append((current_pos, expected_max)) with open(test_file, "wb") as fp: assert st.get_contents_to_fileobj("test1/30m", fp, progress_callback=dl_progress) == expected_meta assert len(progress_reports) > 0 assert progress_reports[-1][0] == progress_reports[-1][1] test_hash = hashlib.sha256() test_size_rec = 0 with open(test_file, "rb") as fp: while True: chunk = fp.read(1024 * 1024) if not chunk: break test_hash.update(chunk) test_size_rec += len(chunk) test_hash_rec = test_hash.hexdigest() assert test_hash_rec == test_hash_send assert test_size_rec == test_size_send tlist = st.list_path("test1") assert len(tlist) == 1 assert tlist[0]["name"] == "test1/30m" assert tlist[0]["size"] == test_size_rec if driver == "swift": segments = test_size_send // st.segment_size segment_list = st.list_path("test1_segments/30m") assert len(segment_list) >= segments if segments >= 2: # reupload a file with the same name but with less chunks os.truncate(test_file, st.segment_size + 1) test_size_send = os.path.getsize(test_file) st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={"30m": "less data", "size": test_size_send}) segment_list = st.list_path("test1_segments/30m") assert len(segment_list) == 2 assert len(st.list_path("test1")) == 1 st.delete_key("test1/30m") assert st.list_path("test1") == [] if driver == "swift": assert st.list_path("test1_segments/30m") == []
def _test_storage(st, driver, tmpdir, storage_config): scratch = tmpdir.join("scratch") compat.makedirs(str(scratch), exist_ok=True) # File not found cases with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.delete_key("NONEXISTENT") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_file("NONEXISTENT", str(scratch.join("a"))) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_fileobj("NONEXISTENT", BytesIO()) with pytest.raises(errors.FileNotFoundFromStorageError): st.get_contents_to_string("NONEXISTENT") assert st.list_path("") == [] assert st.list_path("NONEXISTENT") == [] st.store_file_from_memory("NONEXISTENT-a/x1", b"dummy", None) dummy_file = str(scratch.join("a")) with open(dummy_file, "wb") as fp: fp.write(b"dummy") st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, None) st.store_file_from_disk("NONEXISTENT-b/x1", dummy_file, {"x": 1}) st.delete_key("NONEXISTENT-b/x1") st.delete_key("NONEXISTENT-a/x1") # Other basic cases from_disk_file = str(scratch.join("a")) input_data = b"from disk" if driver == "local": input_data = input_data * 150000 with open(from_disk_file, "wb") as fp: fp.write(input_data) st.store_file_from_disk("test1/x1", from_disk_file, None) out = BytesIO() reported_positions = [] def progress_callback(pos, total): reported_positions.append((pos, total)) assert st.get_contents_to_fileobj("test1/x1", out, progress_callback=progress_callback) == {} assert out.getvalue() == input_data if driver == "local": input_size = len(input_data) assert reported_positions == [(1024 * 1024, input_size), (input_size, input_size)] if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool(storage_config.get('encrypted')) st.store_file_from_memory("test1/x1", b"dummy", {"k": "v"}) out = BytesIO() assert st.get_contents_to_fileobj("test1/x1", out) == {"k": "v"} assert out.getvalue() == b"dummy" # Copy file st.copy_file(source_key="test1/x1", destination_key="test_copy/copy1") assert st.get_contents_to_string("test_copy/copy1") == (b"dummy", {"k": "v"}) st.copy_file(source_key="test1/x1", destination_key="test_copy/copy2", metadata={"new": "meta"}) assert st.get_contents_to_string("test_copy/copy2") == (b"dummy", {"new": "meta"}) st.store_file_from_memory("test1/x1", b"l", {"fancymetadata": "value"}) assert st.get_contents_to_string("test1/x1") == (b"l", {"fancymetadata": "value"}) st.store_file_from_memory("test1/x1", b"1", None) assert st.get_contents_to_string("test1/x1") == (b"1", {}) st.store_file_from_memory("test1/td", b"to disk", {"to-disk": "42"}) to_disk_file = str(scratch.join("b")) assert st.get_contents_to_file("test1/td", to_disk_file) == {"to-disk": "42"} created_keys = {"test1/x1", "test1/td"} if driver == "s3": response = st.s3_client.head_object( Bucket=st.bucket_name, Key=st.format_key_for_backend("test1/x1"), ) assert bool(response.get("ServerSideEncryption")) == bool(storage_config.get('encrypted')) assert st.list_path("") == [] # nothing at top level (directories not listed) if driver == "local": # create a dot-file (hidden), this must be ignored target_file = os.path.join(st.prefix, "test1/.null") with open(target_file, "w"): pass tlist = st.list_path("test1") assert len(tlist) == 2 for fe in tlist: assert isinstance(fe["last_modified"], datetime.datetime) assert fe["last_modified"].tzinfo is not None if fe["name"] == "test1/x1": assert fe["size"] == 1 assert fe["metadata"] == {} elif fe["name"] == "test1/td": assert fe["size"] == len(b"to disk") assert fe["metadata"] == {"to-disk": "42"} else: assert 0, "unexpected name in directory" assert set(st.iter_prefixes("test1")) == set() for key in ["test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3"]: st.store_file_from_memory(key, b"1", None) created_keys.add(key) if driver == "local": # sub3 is a file. Actual object storage systems support this, but a file system does not with pytest.raises(NotADirectoryError): st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None) else: st.store_file_from_memory("test1/sub3/sub3.1/sub3.1.1", b"1", None) created_keys.add("test1/sub3/sub3.1/sub3.1.1") if driver == "local": assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2"} else: assert set(st.iter_prefixes("test1")) == {"test1/sub1", "test1/sub2", "test1/sub3"} assert {item["name"] for item in st.list_path("test1")} == {"test1/x1", "test1/td", "test1/sub3"} assert set(st.iter_prefixes("test1/sub1")) == set() assert {item["name"] for item in st.list_path("test1/sub1")} == {"test1/sub1/sub1.1"} assert {item["name"] for item in st.list_path("test1/sub2")} == set() assert {item["name"] for item in st.list_path("test1/sub3")} == set() assert set(st.iter_prefixes("test1/sub2")) == {"test1/sub2/sub2.1"} if driver == "local": assert set(st.iter_prefixes("test1/sub3")) == set() # sub3 is a file else: assert set(st.iter_prefixes("test1/sub3")) == {"test1/sub3/sub3.1"} assert set(st.iter_prefixes("test1/sub3/3.1")) == set() expected_deep_iter_test1_names = { "test1/x1", "test1/td", "test1/sub1/sub1.1", "test1/sub2/sub2.1/sub2.1.1", "test1/sub3", } if driver != "local": expected_deep_iter_test1_names.add("test1/sub3/sub3.1/sub3.1.1") assert {item["name"] for item in st.list_path("test1", deep=True)} == expected_deep_iter_test1_names def _object_names(iterable): names = set() for item in iterable: assert item.type == KEY_TYPE_OBJECT names.add(item.value["name"]) return names deep_names_with_key = _object_names(st.iter_key("test1/sub3", deep=True, include_key=True)) deep_names_without_key = _object_names(st.iter_key("test1/sub3", deep=True, include_key=False)) if driver == "local": assert deep_names_with_key == {"test1/sub3"} assert deep_names_without_key == set() else: assert deep_names_with_key == {"test1/sub3", "test1/sub3/sub3.1/sub3.1.1"} assert deep_names_without_key == {"test1/sub3/sub3.1/sub3.1.1"} if driver == "google": # test extra props for cacheControl in google st.store_file_from_memory("test1/x1", b"no cache test", metadata={"test": "value"}, extra_props={"cacheControl": "no-cache"}) if driver == "local": # test LocalFileIsRemoteFileError for local storage target_file = os.path.join(st.prefix, "test1/x1") with pytest.raises(errors.LocalFileIsRemoteFileError): st.store_file_from_disk("test1/x1", target_file, {"local": True}) assert st.get_contents_to_string("test1/x1") == (b"1", {"local": "True"}) with pytest.raises(errors.LocalFileIsRemoteFileError): st.get_contents_to_file("test1/x1", target_file) # Missing metadata is an error situation that should fail os.unlink(target_file + ".metadata") with pytest.raises(errors.FileNotFoundFromStorageError): st.get_metadata_for_key("test1/x1") for key in created_keys: st.delete_key(key) assert st.list_path("test1") == [] # empty again for name in ["test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob"]: st.store_file_from_memory(name, b"somedata") names = sorted(item["name"] for item in st.list_path("test2", deep=True)) assert names == ["test2/foo", "test2/suba/foo", "test2/subb/bar", "test2/subb/subsub/zob"] st.delete_tree("test2") assert st.list_path("test2", deep=True) == [] test_hash = hashlib.sha256() test_file = str(scratch.join("30m")) test_size_send = 0 with open(test_file, "wb") as fp: chunk = b"30m file" * 10000 while test_size_send < 30 * 1024 * 1024: test_hash.update(chunk) fp.write(chunk) test_size_send += len(chunk) test_hash_send = test_hash.hexdigest() st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={"thirtymeg": "data", "size": test_size_send, "key": "value-with-a-hyphen"}) os.unlink(test_file) expected_meta = {"thirtymeg": "data", "size": str(test_size_send), "key": "value-with-a-hyphen"} meta = st.get_metadata_for_key("test1/30m") assert meta == expected_meta progress_reports = [] def dl_progress(current_pos, expected_max): progress_reports.append((current_pos, expected_max)) with open(test_file, "wb") as fp: assert st.get_contents_to_fileobj("test1/30m", fp, progress_callback=dl_progress) == expected_meta assert len(progress_reports) > 0 assert progress_reports[-1][0] == progress_reports[-1][1] test_hash = hashlib.sha256() test_size_rec = 0 with open(test_file, "rb") as fp: while True: chunk = fp.read(1024 * 1024) if not chunk: break test_hash.update(chunk) test_size_rec += len(chunk) test_hash_rec = test_hash.hexdigest() assert test_hash_rec == test_hash_send assert test_size_rec == test_size_send tlist = st.list_path("test1") assert len(tlist) == 1 assert tlist[0]["name"] == "test1/30m" assert tlist[0]["size"] == test_size_rec if driver == "swift": segments = test_size_send // st.segment_size segment_list = st.list_path("test1_segments/30m") assert len(segment_list) >= segments if segments >= 2: # reupload a file with the same name but with less chunks os.truncate(test_file, st.segment_size + 1) test_size_send = os.path.getsize(test_file) st.store_file_from_disk("test1/30m", test_file, multipart=True, metadata={"30m": "less data", "size": test_size_send}) segment_list = st.list_path("test1_segments/30m") assert len(segment_list) == 2 assert len(st.list_path("test1")) == 1 st.delete_key("test1/30m") assert st.list_path("test1") == [] if driver == "swift": assert st.list_path("test1_segments/30m") == [] progress_reports = [] def upload_progress(progress): progress_reports.append(progress) for size in (300, 3 * 1024 * 1024, 11 * 1024 * 1024): progress_reports = [] rds = RandomDataSource(size) key = "test1/{}b".format(size) st.store_file_object(key, rds, upload_progress_fn=upload_progress) # Progress may be reported after each chunk and chunk size depends on available memory # on current machine so there's no straightforward way of checking reasonable progress # updates were made. Just ensure they're ordered correctly if something was provided assert sorted(progress_reports) == progress_reports bio = BytesIO() st.get_contents_to_fileobj(key, bio) buffer = bio.getbuffer() assert len(buffer) == size assert buffer == rds.data st.delete_key(key)