def with_adapter( adapter: str, bucket: str, other_bucket: str ) -> Generator[str, None, None]: tmp_dir = None scheme = "gs" if adapter == "gcs": # Use GCS use_fs(False) credentials = gcs_credentials_from_env() if credentials is not None: set_client_params("gs", credentials=credentials) elif adapter == "fs": # Use local file-system in a temp folder tmp_dir = tempfile.mkdtemp() use_fs(tmp_dir) bucket_one = Pathy.from_bucket(bucket) if not bucket_one.exists(): bucket_one.mkdir() bucket_two = Pathy.from_bucket(other_bucket) if not bucket_two.exists(): bucket_two.mkdir() else: raise ValueError("invalid adapter, nothing is configured") # execute the test yield scheme if adapter == "fs" and tmp_dir is not None: # Cleanup fs temp folder shutil.rmtree(tmp_dir) use_fs(False) use_fs_cache(False)
def with_adapter(adapter: str, bucket: str, other_bucket: str): tmp_dir = None if adapter == "gcs": # Use GCS (with system credentials) use_fs(False) elif adapter == "fs": # Use local file-system in a temp folder tmp_dir = tempfile.mkdtemp() use_fs(tmp_dir) bucket_one = Pathy.from_bucket(bucket) if not bucket_one.exists(): bucket_one.mkdir() bucket_two = Pathy.from_bucket(other_bucket) if not bucket_two.exists(): bucket_two.mkdir() else: raise ValueError("invalid adapter, nothing is configured") # execute the test yield if adapter == "fs" and tmp_dir is not None: # Cleanup fs temp folder shutil.rmtree(tmp_dir) use_fs(False) use_fs_cache(False)
def test_cli_ls_local_files(with_adapter: str, bucket: str) -> None: root = Pathy.fluid(tempfile.mkdtemp()) / "ls" root.mkdir(parents=True, exist_ok=True) for i in range(3): (root / f"file_{i}").write_text("NICE") files = list(root.ls()) assert len(files) == 3 for i, blob_stat in enumerate(files): assert blob_stat.name == f"file_{i}" assert blob_stat.size == 4 assert blob_stat.last_modified is not None root = Pathy.from_bucket(bucket) / "cli_ls" one = str(root / "file.txt") two = str(root / "other.txt") three = str(root / "folder/file.txt") Pathy(one).write_text("---") Pathy(two).write_text("---") Pathy(three).write_text("---") result = runner.invoke(app, ["ls", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output result = runner.invoke(app, ["ls", "-l", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output
def test_cli_ls_invalid_source(with_adapter: str, bucket: str) -> None: root = Pathy.from_bucket(bucket) / ENV_ID / "cli_ls_invalid" three = str(root / "folder/file.txt") result = runner.invoke(app, ["ls", str(three)]) assert result.exit_code == 1 assert "No such file or directory" in result.output
def test_cli_mv_folder_across_buckets(with_adapter, bucket: str, other_bucket: str): source = Pathy.from_bucket(bucket) / "cli_mv_folder_across_buckets" destination = Pathy.from_bucket(other_bucket) / "cli_mv_folder_across_buckets" for i in range(2): for j in range(2): (source / f"{i}" / f"{j}").write_text("---") assert runner.invoke(app, ["mv", str(source), str(destination)]).exit_code == 0 assert not Pathy(source).exists() assert Pathy(destination).is_dir() # Ensure source files are gone for i in range(2): for j in range(2): assert not (source / f"{i}" / f"{j}").is_file() # And dest files exist for i in range(2): for j in range(2): assert (destination / f"{i}" / f"{j}").is_file()
def test_api_ignore_extension(with_adapter, bucket: str): """The smart_open library does automatic decompression based on the filename. We disable that to avoid errors, e.g. if you have a .tar.gz file that isn't gzipped.""" not_targz = Pathy.from_bucket(bucket) / "ignore_ext/one.tar.gz" fixture_tar = Path(__file__).parent / "fixtures" / "tar_but_not_gzipped.tar.gz" not_targz.write_bytes(fixture_tar.read_bytes()) again = not_targz.read_bytes() assert again is not None
def test_cli_cp_folder(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) source = root / "cli_cp_folder" destination = root / "cli_cp_folder_other" for i in range(2): for j in range(2): (source / f"{i}" / f"{j}").write_text("---") assert runner.invoke(app, ["cp", str(source), str(destination)]).exit_code == 0 assert Pathy(source).exists() assert Pathy(destination).is_dir() for i in range(2): for j in range(2): assert (destination / f"{i}" / f"{j}").is_file()
def test_cli_ls(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) / "cli_ls" one = str(root / "file.txt") two = str(root / "other.txt") three = str(root / "folder/file.txt") Pathy(one).write_text("---") Pathy(two).write_text("---") Pathy(three).write_text("---") result = runner.invoke(app, ["ls", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output
def test_cli_rm_folder(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) source = root / "cli_rm_folder" for i in range(2): for j in range(2): (source / f"{i}" / f"{j}").write_text("---") # Returns exit code 1 without recursive flag when given a folder assert runner.invoke(app, ["rm", str(source)]).exit_code == 1 assert runner.invoke(app, ["rm", "-r", str(source)]).exit_code == 0 assert not Pathy(source).exists() # Ensure source files are gone for i in range(2): for j in range(2): assert not (source / f"{i}" / f"{j}").is_file()
def test_cli_rm_verbose(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) / "cli_rm_folder" source = str(root / "file.txt") other = str(root / "folder/other") Pathy(source).write_text("---") Pathy(other).write_text("---") result = runner.invoke(app, ["rm", "-v", source]) assert result.exit_code == 0 assert source in result.output assert other not in result.output Pathy(source).write_text("---") result = runner.invoke(app, ["rm", "-rv", str(root)]) assert result.exit_code == 0 assert source in result.output assert other in result.output
def test_api_path_to_local(with_adapter: str, bucket: str) -> None: root: Pathy = Pathy.from_bucket(bucket) / "to_local" foo_blob: Pathy = root / "foo" foo_blob.write_text("---") assert isinstance(foo_blob, Pathy) use_fs_cache() # Cache a blob cached: Path = Pathy.to_local(foo_blob) second_cached: Path = Pathy.to_local(foo_blob) assert isinstance(cached, Path) assert cached.exists() and cached.is_file(), "local file should exist" assert second_cached == cached, "must be the same path" assert second_cached.stat() == cached.stat(), "must have the same stat" # Cache a folder hierarchy with blobs complex_folder = root / "complex" for i in range(3): folder = f"folder_{i}" for j in range(2): gcs_blob: Pathy = complex_folder / folder / f"file_{j}.txt" gcs_blob.write_text("---") cached_folder: Path = Pathy.to_local(complex_folder) assert isinstance(cached_folder, Path) assert cached_folder.exists() and cached_folder.is_dir() # Verify all the files exist in the file-system cache folder for i in range(3): folder = f"folder_{i}" for j in range(2): iter_blob: Path = cached_folder / folder / f"file_{j}.txt" assert iter_blob.exists() assert iter_blob.read_text() == "---" clear_fs_cache() assert not cached.exists(), "cache clear should delete file"
def test_api_iterdir_pipstore(with_adapter: str, bucket: str) -> None: path = Pathy.from_bucket(bucket) / "iterdir_pipstore/prodigy/prodigy.whl" path.write_bytes(b"---") path = Pathy.from_bucket(bucket) / "iterdir_pipstore" res = [e.name for e in sorted(path.iterdir())] assert res == ["prodigy"]