Beispiel #1
0
def test_api_fluid(with_adapter: str, bucket: str) -> None:
    path: FluidPath = Pathy.fluid(f"gs://{bucket}/fake-key")
    assert isinstance(path, Pathy)
    path = Pathy.fluid("foo/bar.txt")
    assert isinstance(path, BasePath)
    path = Pathy.fluid("/dev/null")
    assert isinstance(path, BasePath)
Beispiel #2
0
def test_cli_ls_local_files(with_adapter: str, bucket: str) -> None:
    root = Pathy.fluid(tempfile.mkdtemp()) / "ls"
    root.mkdir(parents=True, exist_ok=True)
    for i in range(3):
        (root / f"file_{i}").write_text("NICE")
    files = list(root.ls())
    assert len(files) == 3
    for i, blob_stat in enumerate(files):
        assert blob_stat.name == f"file_{i}"
        assert blob_stat.size == 4
        assert blob_stat.last_modified is not None

    root = Pathy.from_bucket(bucket) / "cli_ls"
    one = str(root / "file.txt")
    two = str(root / "other.txt")
    three = str(root / "folder/file.txt")
    Pathy(one).write_text("---")
    Pathy(two).write_text("---")
    Pathy(three).write_text("---")

    result = runner.invoke(app, ["ls", str(root)])
    assert result.exit_code == 0
    assert one in result.output
    assert two in result.output
    assert str(root / "folder") in result.output

    result = runner.invoke(app, ["ls", "-l", str(root)])
    assert result.exit_code == 0
    assert one in result.output
    assert two in result.output
    assert str(root / "folder") in result.output
def test_cli_cp_file(with_adapter, bucket: str):
    source = f"gs://{bucket}/cli_cp_file/file.txt"
    destination = f"gs://{bucket}/cli_cp_file/other.txt"
    Pathy(source).write_text("---")
    assert runner.invoke(app, ["cp", source, destination]).exit_code == 0
    assert Pathy(source).exists()
    assert Pathy(destination).is_file()
Beispiel #4
0
def test_api_replace_files_in_bucket(with_adapter: str, bucket: str) -> None:
    # replace a single file
    Pathy(f"gs://{bucket}/replace/file.txt").write_text("---")
    Pathy(f"gs://{bucket}/replace/file.txt").replace(
        f"gs://{bucket}/replace/other.txt")
    assert not Pathy(f"gs://{bucket}/replace/file.txt").exists()
    assert Pathy(f"gs://{bucket}/replace/other.txt").is_file()
Beispiel #5
0
def with_adapter(adapter: str, bucket: str, other_bucket: str):
    tmp_dir = None
    if adapter == "gcs":
        # Use GCS (with system credentials)
        use_fs(False)
    elif adapter == "fs":
        # Use local file-system in a temp folder
        tmp_dir = tempfile.mkdtemp()
        use_fs(tmp_dir)
        bucket_one = Pathy.from_bucket(bucket)
        if not bucket_one.exists():
            bucket_one.mkdir()
        bucket_two = Pathy.from_bucket(other_bucket)
        if not bucket_two.exists():
            bucket_two.mkdir()
    else:
        raise ValueError("invalid adapter, nothing is configured")
    # execute the test
    yield

    if adapter == "fs" and tmp_dir is not None:
        # Cleanup fs temp folder
        shutil.rmtree(tmp_dir)
    use_fs(False)
    use_fs_cache(False)
Beispiel #6
0
    def _load_object_detection_api(self, model_spec: ObjectDetectionAPI_ModelSpec):
        import tensorflow as tf
        from object_detection.utils import config_util
        from object_detection.builders import model_builder
        temp_dir = tempfile.TemporaryDirectory()
        temp_dir_path = Path(temp_dir.name)
        model_config_path = temp_dir_path / Pathy(model_spec.config_path).name
        with open(model_config_path, 'wb') as out:
            with fsspec.open(model_spec.config_path, 'rb') as src:
                out.write(src.read())
        src_checkpoint_path = Pathy(model_spec.checkpoint_path)
        checkpoint_path = temp_dir_path / src_checkpoint_path.name
        for src_file in fsspec.open_files(f"{src_checkpoint_path}*", 'rb'):
            out_file = temp_dir_path / Pathy(src_file.path).name
            with open(out_file, 'wb') as out:
                with src_file as src:
                    out.write(src.read())
        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path=str(model_config_path)
        )
        model_config = configs['model']
        self.model = model_builder.build(
            model_config=model_config, is_training=False
        )
        ckpt = tf.compat.v2.train.Checkpoint(model=self.model)
        ckpt.restore(str(checkpoint_path)).expect_partial()
        self.input_dtype = np.float32

        # Run model through a dummy image so that variables are created
        zeros = np.zeros([640, 640, 3])
        self._raw_predict_single_image_default(zeros)

        temp_dir.cleanup()
Beispiel #7
0
def with_adapter(
    adapter: str, bucket: str, other_bucket: str
) -> Generator[str, None, None]:
    tmp_dir = None
    scheme = "gs"
    if adapter == "gcs":
        # Use GCS
        use_fs(False)
        credentials = gcs_credentials_from_env()
        if credentials is not None:
            set_client_params("gs", credentials=credentials)
    elif adapter == "fs":
        # Use local file-system in a temp folder
        tmp_dir = tempfile.mkdtemp()
        use_fs(tmp_dir)
        bucket_one = Pathy.from_bucket(bucket)
        if not bucket_one.exists():
            bucket_one.mkdir()
        bucket_two = Pathy.from_bucket(other_bucket)
        if not bucket_two.exists():
            bucket_two.mkdir()
    else:
        raise ValueError("invalid adapter, nothing is configured")
    # execute the test
    yield scheme

    if adapter == "fs" and tmp_dir is not None:
        # Cleanup fs temp folder
        shutil.rmtree(tmp_dir)
    use_fs(False)
    use_fs_cache(False)
Beispiel #8
0
def test_api_rename_files_in_bucket(with_adapter, bucket: str):
    # Rename a single file
    Pathy(f"gs://{bucket}/rename/file.txt").write_text("---")
    Pathy(f"gs://{bucket}/rename/file.txt").rename(
        f"gs://{bucket}/rename/other.txt")
    assert not Pathy(f"gs://{bucket}/rename/file.txt").exists()
    assert Pathy(f"gs://{bucket}/rename/other.txt").is_file()
Beispiel #9
0
def test_gcs_scandir_list_buckets(with_adapter: str, bucket: str,
                                  other_bucket: str) -> None:
    from pathy.gcs import ScanDirGCS

    root = Pathy("gs://foo/bar")
    client = root._accessor.client(root)  # type:ignore
    scandir = ScanDirGCS(client=client, path=Pathy())
    assert sorted([s.name for s in scandir]) == sorted([bucket, other_bucket])
def test_cli_mv_file_across_buckets(with_adapter, bucket: str, other_bucket: str):
    source = f"gs://{bucket}/cli_mv_file_across_buckets/file.txt"
    destination = f"gs://{other_bucket}/cli_mv_file_across_buckets/other.txt"
    Pathy(source).write_text("---")
    assert Pathy(source).exists()
    assert runner.invoke(app, ["mv", source, destination]).exit_code == 0
    assert not Pathy(source).exists()
    assert Pathy(destination).is_file()
Beispiel #11
0
def test_api_readwrite_lines(with_adapter: str, bucket: str) -> None:
    path = Pathy(f"gs://{bucket}/write_text/file.txt")
    with path.open("w") as file_obj:
        file_obj.writelines(["---"])
    with path.open("r") as file_obj:
        assert file_obj.readlines() == ["---"]
    with path.open("rt") as file_obj:
        assert file_obj.readline() == "---"
Beispiel #12
0
def test_api_is_file(with_adapter: str, bucket: str) -> None:
    path = Pathy(f"gs://{bucket}/is_file/subfolder/another/my.file")
    path.write_text("---")
    # The full file is a file
    assert path.is_file() is True
    # Each parent node in the path is only a directory
    for parent in path.parents:
        assert parent.is_file() is False
Beispiel #13
0
def test_api_replace_files_across_buckets(with_adapter: str, bucket: str,
                                          other_bucket: str) -> None:
    # Rename a single file across buckets
    Pathy(f"gs://{bucket}/replace/file.txt").write_text("---")
    Pathy(f"gs://{bucket}/replace/file.txt").replace(
        f"gs://{other_bucket}/replace/other.txt")
    assert not Pathy(f"gs://{bucket}/replace/file.txt").exists()
    assert Pathy(f"gs://{other_bucket}/replace/other.txt").is_file()
Beispiel #14
0
def test_file_get_blob_owner_key_error_protection(with_adapter: str) -> None:
    gs_bucket = Pathy("gs://my_bucket")
    gs_bucket.mkdir()
    path = gs_bucket / "blob.txt"
    path.write_text("hello world!")
    gcs_client: BucketClientFS = get_client("gs")
    bucket: BucketFS = gcs_client.get_bucket(gs_bucket)
    blob: Optional[BlobFS] = bucket.get_blob("blob.txt")
    assert blob is not None and blob.owner is None
Beispiel #15
0
def test_s3_scandir_list_buckets(with_adapter: str, bucket: str,
                                 other_bucket: str) -> None:
    from pathy.s3 import ScanDirS3

    root = Pathy("s3://foo/bar")
    client = root._accessor.client(root)  # type:ignore
    scandir = ScanDirS3(client=client, path=Pathy())
    buckets = [s.name for s in scandir]
    assert bucket in buckets
    assert other_bucket in buckets
Beispiel #16
0
def test_api_ls_blobs_with_stat(with_adapter: str, bucket: str) -> None:
    root = Pathy(f"gs://{bucket}/ls")
    for i in range(3):
        (root / f"file_{i}").write_text("NICE")
    files = list(root.ls())
    assert len(files) == 3
    for i, blob_stat in enumerate(files):
        assert blob_stat.name == f"file_{i}"
        assert blob_stat.size == 4
        assert blob_stat.last_modified is not None
Beispiel #17
0
def test_file_bucket_client_fs_make_uri(with_adapter: str) -> None:
    client: BucketClientFS = get_client("gs")
    blob = Pathy("gs://foo/bar")
    actual = client.make_uri(blob)
    expected = f"file://{client.root}/foo/bar"
    assert actual == expected

    # Invalid root
    other = Pathy("")
    with pytest.raises(ValueError):
        client.make_uri(other)
Beispiel #18
0
def test_api_rmdir(with_adapter: str, bucket: str) -> None:
    Pathy(f"gs://{bucket}/rmdir/one.txt").write_text("---")
    Pathy(f"gs://{bucket}/rmdir/folder/two.txt").write_text("---")
    path = Pathy(f"gs://{bucket}/rmdir/")
    path.rmdir()
    assert not Pathy(f"gs://{bucket}/rmdir/one.txt").is_file()
    assert not Pathy(f"gs://{bucket}/rmdir/other/two.txt").is_file()
    assert not path.exists()
def test_cli_ls(with_adapter, bucket: str):
    root = Pathy.from_bucket(bucket) / "cli_ls"
    one = str(root / "file.txt")
    two = str(root / "other.txt")
    three = str(root / "folder/file.txt")
    Pathy(one).write_text("---")
    Pathy(two).write_text("---")
    Pathy(three).write_text("---")
    result = runner.invoke(app, ["ls", str(root)])
    assert result.exit_code == 0
    assert one in result.output
    assert two in result.output
    assert str(root / "folder") in result.output
def test_cli_cp_folder(with_adapter, bucket: str):
    root = Pathy.from_bucket(bucket)
    source = root / "cli_cp_folder"
    destination = root / "cli_cp_folder_other"
    for i in range(2):
        for j in range(2):
            (source / f"{i}" / f"{j}").write_text("---")
    assert runner.invoke(app, ["cp", str(source), str(destination)]).exit_code == 0
    assert Pathy(source).exists()
    assert Pathy(destination).is_dir()
    for i in range(2):
        for j in range(2):
            assert (destination / f"{i}" / f"{j}").is_file()
Beispiel #21
0
def test_file_bucket_client_fs_create_bucket(with_adapter: str) -> None:
    client: BucketClientFS = get_client("gs")
    # Invalid root
    invalid = Pathy("")
    with pytest.raises(ValueError):
        client.create_bucket(invalid)

    # Can create a bucket with a valid path
    root = Pathy("gs://bucket_name")
    assert client.create_bucket(root) is not None

    # Bucket already exists error
    with pytest.raises(FileExistsError):
        client.create_bucket(root)
Beispiel #22
0
def pathy_fixture():
    import tempfile
    import shutil
    from pathy import use_fs, Pathy

    temp_folder = tempfile.mkdtemp(prefix="thinc-pathy")
    use_fs(temp_folder)

    root = Pathy("gs://test-bucket")
    root.mkdir(exist_ok=True)

    yield root
    use_fs(False)
    shutil.rmtree(temp_folder)
Beispiel #23
0
    def _load_tensorflow_KeypointsRegressor_model_spec(
            self, model_spec: TensorFlow_KeypointsRegressorModelSpec):
        import tensorflow as tf
        if model_spec.saved_model_type in [
                "tf.keras", "tf.saved_model", "tflite",
                "tflite_one_image_per_batch"
        ]:
            model_openfile = fsspec.open(model_spec.model_path, 'rb')
            if model_openfile.fs.isdir(model_openfile.path):
                temp_folder = copy_files_from_directory_to_temp_directory(
                    directory=model_spec.model_path)
                model_path = Pathy(temp_folder.name)
                temp_files_cleanup = temp_folder.cleanup
            else:
                temp_file = tempfile.NamedTemporaryFile()
                with model_openfile as src:
                    temp_file.write(src.read())
                model_path = Pathy(temp_file.name)
                temp_files_cleanup = temp_file.close

            if model_spec.saved_model_type in "tf.keras":
                self.model = tf.keras.models.load_model(str(model_path),
                                                        compile=False)
                self.input_dtype = np.float32
            elif model_spec.saved_model_type == "tf.saved_model":
                self.loaded_model = tf.saved_model.load(
                    str(model_path))  # to protect from gc
                self.model = self.loaded_model.signatures["serving_default"]
                self.input_dtype = np.float32
            elif model_spec.saved_model_type in [
                    'tflite', 'tflite_one_image_per_batch'
            ]:
                self.model = tf.lite.Interpreter(str(model_path))
                # self.model.allocate_tensors()
                input_details = self.model.get_input_details()[0]
                self.input_index = input_details['index']
                self.input_dtype = input_details['dtype']
                self.output_index = self.model.get_output_details()[0]['index']

            temp_files_cleanup()

        elif model_spec.saved_model_type == "tf.keras.Model":
            self.model = model_spec.model_path
            self.input_dtype = np.float32
        else:
            raise ValueError(
                "Tensorflow_KeypointsRegressorModel got unknown saved_model_type "
                f"in TensorFlow_KeypointsRegressorModelSpec: {model_spec.saved_model_type}"
            )
def test_cli_rm_folder(with_adapter, bucket: str):
    root = Pathy.from_bucket(bucket)
    source = root / "cli_rm_folder"
    for i in range(2):
        for j in range(2):
            (source / f"{i}" / f"{j}").write_text("---")

    # Returns exit code 1 without recursive flag when given a folder
    assert runner.invoke(app, ["rm", str(source)]).exit_code == 1
    assert runner.invoke(app, ["rm", "-r", str(source)]).exit_code == 0
    assert not Pathy(source).exists()
    # Ensure source files are gone
    for i in range(2):
        for j in range(2):
            assert not (source / f"{i}" / f"{j}").is_file()
Beispiel #25
0
def test_scandir_custom_class(bucket: str) -> None:
    use_fs(True)
    client = BucketClientFS()
    root = Pathy(f"gs://{bucket}/")
    scandir = MockScanDir(client=client, path=root)
    blobs = [b for b in scandir]
    assert len(blobs) == 1
Beispiel #26
0
def test_cli_cp_file_name_from_source(with_adapter: str, bucket: str) -> None:
    source = pathlib.Path("./file.txt")
    source.touch()
    destination = f"gs://{bucket}/{ENV_ID}/cli_cp_file/"
    assert runner.invoke(app, ["cp", str(source), destination]).exit_code == 0
    assert Pathy(f"{destination}file.txt").is_file()
    source.unlink()
def test_cli_rm_verbose(with_adapter, bucket: str):
    root = Pathy.from_bucket(bucket) / "cli_rm_folder"
    source = str(root / "file.txt")
    other = str(root / "folder/other")
    Pathy(source).write_text("---")
    Pathy(other).write_text("---")
    result = runner.invoke(app, ["rm", "-v", source])
    assert result.exit_code == 0
    assert source in result.output
    assert other not in result.output

    Pathy(source).write_text("---")
    result = runner.invoke(app, ["rm", "-rv", str(root)])
    assert result.exit_code == 0
    assert source in result.output
    assert other in result.output
Beispiel #28
0
def test_s3_bucket_client_list_blobs(with_adapter: str, bucket: str) -> None:
    """Test corner-case in S3 client that isn't easily reachable from Pathy"""
    from pathy.s3 import BucketClientS3

    client: BucketClientS3 = get_client("s3")
    root = Pathy("s3://invalid_h3gE_ds5daEf_Sdf15487t2n4")
    assert len(list(client.list_blobs(root))) == 0
Beispiel #29
0
def test_s3_scandir_invalid_bucket_name(with_adapter: str) -> None:
    from pathy.s3 import ScanDirS3

    root = Pathy(f"{with_adapter}://invalid_h3gE_ds5daEf_Sdf15487t2n4/bar")
    client = root._accessor.client(root)  # type:ignore
    scandir = ScanDirS3(client=client, path=root)
    assert len(list(scandir)) == 0
Beispiel #30
0
def test_file_scandir_list_buckets(
    with_adapter: str, bucket: str, other_bucket: str
) -> None:
    root = Pathy()
    client = root._accessor.client(root)  # type:ignore
    scandir = ScanDirFS(client=client, path=root)
    assert sorted([s.name for s in scandir]) == sorted([bucket, other_bucket])