Esempio n. 1
0
def test_joining_absolute_paths():
    absolute_one = AbsolutePath("/an/absolute/path.ext")
    absolute_two = AbsolutePath("/prefix/absolute/path")
    relative_one = absolute_one.as_relative_path()
    joined_1 = relative_one.prepend(absolute_two)
    joined_2 = absolute_two.append(relative_one)
    assert joined_1 == joined_2
Esempio n. 2
0
class GCStorageAdapter(IOAdapter):

    _file_list: Optional[FileList]
    _bucket: Optional[Any]

    def __init__(self,
                 project: str = "brain-deepviz",
                 bucket: str = "lucid-flow") -> None:
        self.project_name = project
        self.bucket_name = bucket
        self.tempdir = AbsolutePath(mkdtemp())
        self._file_list = None
        self._bucket = None

    @property
    def bucket(self) -> Any:
        if not self._bucket:
            self._client = storage.Client(project=self.project_name)
            self._bucket = self._client.bucket(self.bucket_name)
        return self._bucket

    @property
    def file_list(self) -> FileList:
        if not self._file_list:
            self._file_list = FileList(project=self.project_name,
                                       bucket=self.bucket_name)
            self._file_list._get_all_gcs_files()
        return self._file_list

    def normpath(self, path: str) -> AbsolutePath:
        # logging.debug(f"normpathing: {path}")
        if path.startswith("gs://"):
            path = path[5:]
        # logging.debug(f"removed gs scheme: {path}")
        if path.startswith(self.bucket_name):
            path = path[len(self.bucket_name):]
        # logging.debug(f"removed bucket: {path}")
        # if path.startswith("/"):
        # path = path[1:]
        # logging.debug(f"removed leading slash: {path}")
        return AbsolutePath(path)

    @contextmanager
    def _reading(self, path: AbsolutePath, mode: str = "r+b") -> IO:
        local_path = self._download(path)
        reading_file = localfs_open(local_path, mode=mode)
        yield reading_file
        reading_file.close()

    @contextmanager
    def _writing(self, path: AbsolutePath, mode: str = "w+b") -> IO:
        blob = storage.blob.Blob(path.as_relative_path(), self.bucket)
        local_path = self.tempdir.append(path.as_relative_path())
        makedirs(dirname(local_path), exist_ok=True)
        writing_file = localfs_open(local_path, mode=mode)
        yield writing_file
        writing_file.close()
        blob.upload_from_filename(local_path)

    def _makedirs(self, path: str) -> None:
        pass

    def _glob(self, glob_path: AbsolutePath) -> List[AbsolutePath]:
        fields = "items/name,items/updated,nextPageToken"
        matched_paths: List[AbsolutePath] = []
        # GCS returns folders iff a trailing slash is specified, so we try both:
        if glob_path.endswith("/"):
            other_path = AbsolutePath(glob_path[:-1])
        else:
            other_path = AbsolutePath(glob_path + "/")
        for glob_string in [glob_path, other_path]:
            # prefix = glob_string.split('*')[0] # == entire string if no '*' found
            # bucket_listing = self.bucket.list_blobs(fields=fields, prefix=prefix)
            # file_paths = [blob.name for blob in bucket_listing]
            # file_paths = self.file_list.glob(glob_string)
            # matched_paths += fnmatch.filter(file_paths, glob_string)
            matched_paths += self.file_list.glob(glob_string)
        # matched_paths = list(sorted(set(matched_paths)))  # should already be unique
        return matched_paths

    def _exist(self, paths: List[AbsolutePath]) -> List[bool]:
        return [self.file_list.exists(path) for path in paths]

    def _download(self, path: AbsolutePath) -> AbsolutePath:
        local_path = self.tempdir.append(path.as_relative_path())
        makedirs(dirname(local_path), exist_ok=True)
        blob = storage.blob.Blob(path.as_relative_path(), self.bucket)
        blob.download_to_filename(local_path)
        return local_path

    def _upload(self, local_path: str, remote_path: RelativePath) -> None:
        assert not remote_path.startswith("/")
        blob = storage.blob.Blob(remote_path, self.bucket)
        blob.upload_from_filename(local_path)