def get_buckets(project_ids: List[str],
                gcs_client: storage.Client) -> List[Dict[str, str]]:
    """Retrieves list of metadata for all buckets in a GCP org.

    Args:
        project_ids: List of strings holding project IDs
        gcs_client: storage.Client object

    Returns:
        List of dictionaries mapping bucket-level metadata.
    """
    output_list = []
    try:
        for project_id in project_ids:
            try:
                bucket_list = list(gcs_client.list_buckets(project=project_id))
                for bucket in bucket_list:
                    output_list.append({
                        "bucket_name":
                        bucket.name,
                        "project_id":
                        project_id,
                        "last_read_timestamp":
                        "",
                        "days_since_last_read":
                        -1,
                        "read_count_30_days":
                        -1,
                        "read_count_90_days":
                        -1,
                        "export_day":
                        datetime.datetime.utcnow().strftime("%Y-%m-%d"),
                        "recommended_OLM":
                        ""
                    })
            except Forbidden as err:
                logging.error(f"""Access denied on bucket {bucket.name}.
                              {err}""")

            except BadRequest as err:
                logging.error(f"Could not find bucket {bucket.name}.")
                logging.error(err)
                pass
        return output_list

    except Exception as err:
        logging.error(f"""Could not access buckets in {project_id}.
                      {err}""")
Beispiel #2
0
class GoogleClient(CloudClient):
    """
    Implementation of a Google Client using the Google API

    """
    def __init__(self, auth_dict, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cred_dict_string = base64.b64decode(
            auth_dict.get("CREDENTIALS_JSON_BASE64")).decode("utf-8")
        cred_dict = json.loads(self.cred_dict_string)
        credentials = service_account.Credentials.from_service_account_info(
            cred_dict)

        with open(constants.GOOGLE_CREDS_JSON_PATH, "w") as cred_dump:
            cred_dump.write(self.cred_dict_string)

        self.secret = self.create_gcp_secret()

        try:
            self.client = GCPStorageClient(project=cred_dict["project_id"],
                                           credentials=credentials)
        except DefaultCredentialsError:
            raise

    def internal_create_uls(self, name, region=None):
        """
        Creates the Underlying Storage using the Google API

        Args:
           name (str): The Underlying Storage name to be created
           region (str): The region to create the Underlying Storage

        """
        if region is None:
            self.client.create_bucket(name)
        else:
            self.client.create_bucket(name, location=region)

    def internal_delete_uls(self, name):
        """
        Deletes the Underlying Storage using the Google API

        Args:
           name (str): The Underlying Storage name to be deleted

        """
        # Todo: Replace with a TimeoutSampler
        for _ in range(10):
            try:
                bucket = GCPBucket(client=self.client, name=name)
                bucket.delete_blobs(bucket.list_blobs())
                bucket.delete()
                break
            except GoogleExceptions.NotFound:
                logger.warning(
                    "Failed to delete some of the bucket blobs. Retrying...")
                sleep(10)

    def get_all_uls_names(self):
        """
        Returns a set containing all the bucket names that the client has access to

        """
        return {bucket.id for bucket in self.client.list_buckets()}

    def verify_uls_exists(self, uls_name):
        """
        Verifies whether a Underlying Storage with the given uls_name exists

        Args:
           uls_name (str): The Underlying Storage name to be verified

        Returns:
             bool: True if Underlying Storage exists, False otherwise

        """
        try:
            self.client.get_bucket(uls_name)
            return True
        except GoogleExceptions.NotFound:
            return False

    def create_gcp_secret(self):
        """
        Create a Kubernetes secret to allow NooBaa to create Google-based backingstores

        """
        bs_secret_data = templating.load_yaml(
            constants.MCG_BACKINGSTORE_SECRET_YAML)
        bs_secret_data["metadata"]["name"] = create_unique_resource_name(
            "cldmgr-gcp", "secret")
        bs_secret_data["metadata"]["namespace"] = config.ENV_DATA[
            "cluster_namespace"]
        bs_secret_data["data"][
            "GoogleServiceAccountPrivateKeyJson"] = base64.urlsafe_b64encode(
                self.cred_dict_string.encode("UTF-8")).decode("ascii")

        return create_resource(**bs_secret_data)
Beispiel #3
0
class BucketClientGCS(BucketClient):
    client: Optional[GCSNativeClient]

    def __init__(self, client: Optional[GCSNativeClient] = None):
        try:
            self.client = GCSNativeClient() if GCSNativeClient else None
        except (BaseException, DefaultCredentialsError):
            self.client = None

    def make_uri(self, path: PurePathy) -> str:
        return str(path)

    def create_bucket(self, path: PurePathy) -> Bucket:
        assert self.client is not None, _MISSING_DEPS
        return self.client.create_bucket(path.root)

    def delete_bucket(self, path: PurePathy) -> None:
        assert self.client is not None, _MISSING_DEPS
        bucket = self.client.get_bucket(path.root)
        bucket.delete()

    def exists(self, path: PurePathy) -> bool:
        # Because we want all the parents of a valid blob (e.g. "directory" in
        # "directory/foo.file") to return True, we enumerate the blobs with a prefix
        # and compare the object names to see if they match a substring of the path
        key_name = str(path.key)
        try:
            for obj in self.list_blobs(path):
                if obj.name == key_name:
                    return True
                if obj.name.startswith(key_name + path._flavour.sep):
                    return True
        except gcs_errors.ClientError:
            return False
        return False

    def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]:
        assert self.client is not None, _MISSING_DEPS
        try:
            native_bucket = self.client.bucket(path.root)
            if native_bucket is not None:
                return BucketGCS(str(path.root), bucket=native_bucket)
        except gcs_errors.ClientError as err:
            print(err)

        return None

    def get_bucket(self, path: PurePathy) -> BucketGCS:
        assert self.client is not None, _MISSING_DEPS
        try:
            native_bucket = self.client.bucket(path.root)
            if native_bucket is not None:
                return BucketGCS(str(path.root), bucket=native_bucket)
            raise FileNotFoundError(f"Bucket {path.root} does not exist!")
        except gcs_errors.ClientError as e:
            raise ClientError(message=e.message, code=e.code)

    def list_buckets(
            self,
            **kwargs: Dict[str,
                           Any]) -> Generator[GCSNativeBucket, None, None]:
        assert self.client is not None, _MISSING_DEPS
        return self.client.list_buckets(**kwargs)  # type:ignore

    def scandir(  # type:ignore[override]
        self,
        path: Optional[PurePathy] = None,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
    ) -> Generator[BucketEntryGCS, None, None]:  # type:ignore[override]
        assert self.client is not None, _MISSING_DEPS
        continuation_token = None
        if path is None or not path.root:
            gcs_bucket: GCSNativeBucket
            for gcs_bucket in self.list_buckets():
                yield BucketEntryGCS(gcs_bucket.name, is_dir=True, raw=None)
            return
        sep = path._flavour.sep
        bucket = self.lookup_bucket(path)
        if bucket is None:
            return
        while True:
            if continuation_token:
                response = self.client.list_blobs(
                    bucket.name,
                    prefix=prefix,
                    delimiter=sep,
                    page_token=continuation_token,
                )
            else:
                response = self.client.list_blobs(bucket.name,
                                                  prefix=prefix,
                                                  delimiter=sep)
            for page in response.pages:
                for folder in list(page.prefixes):
                    full_name = folder[:-1] if folder.endswith(sep) else folder
                    name = full_name.split(sep)[-1]
                    if name:
                        yield BucketEntryGCS(name, is_dir=True, raw=None)
                for item in page:
                    name = item.name.split(sep)[-1]
                    if name:
                        yield BucketEntryGCS(
                            name=name,
                            is_dir=False,
                            size=item.size,
                            last_modified=item.updated.timestamp(),
                            raw=item,
                        )
            if response.next_page_token is None:
                break
            continuation_token = response.next_page_token

    def list_blobs(
        self,
        path: PurePathy,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
        include_dirs: bool = False,
    ) -> Generator[BlobGCS, None, None]:
        assert self.client is not None, _MISSING_DEPS
        continuation_token = None
        bucket = self.lookup_bucket(path)
        if bucket is None:
            return
        while True:
            if continuation_token:
                response = self.client.list_blobs(
                    path.root,
                    prefix=prefix,
                    delimiter=delimiter,
                    page_token=continuation_token,
                )
            else:
                response = self.client.list_blobs(path.root,
                                                  prefix=prefix,
                                                  delimiter=delimiter)
            for page in response.pages:
                for item in page:
                    yield BlobGCS(
                        bucket=bucket,
                        owner=item.owner,
                        name=item.name,
                        raw=item,
                        size=item.size,
                        updated=item.updated.timestamp(),
                    )
            if response.next_page_token is None:
                break
            continuation_token = response.next_page_token
Beispiel #4
0
class BucketClientGCS(BucketClient):
    client: Optional[GCSNativeClient]

    @property
    def client_params(self) -> Any:
        return dict(client=self.client)

    def __init__(self, **kwargs: Any) -> None:
        self.recreate(**kwargs)

    def recreate(self, **kwargs: Any) -> None:
        creds = kwargs["credentials"] if "credentials" in kwargs else None
        if creds is not None:
            kwargs["project"] = creds.project_id
        try:
            self.client = GCSNativeClient(**kwargs)
        except TypeError:
            # TypeError is raised if the imports for GCSNativeClient fail and are
            #  assigned to Any, which is not callable.
            self.client = None

    def make_uri(self, path: PurePathy) -> str:
        return str(path)

    def create_bucket(self, path: PurePathy) -> Bucket:
        assert self.client is not None, _MISSING_DEPS
        return self.client.create_bucket(path.root)

    def delete_bucket(self, path: PurePathy) -> None:
        assert self.client is not None, _MISSING_DEPS
        bucket = self.client.get_bucket(path.root)
        bucket.delete()

    def exists(self, path: PurePathy) -> bool:
        # Because we want all the parents of a valid blob (e.g. "directory" in
        # "directory/foo.file") to return True, we enumerate the blobs with a prefix
        # and compare the object names to see if they match a substring of the path
        key_name = str(path.key)
        try:
            for obj in self.list_blobs(path):
                if obj.name == key_name:
                    return True
                if obj.name.startswith(key_name + path._flavour.sep):
                    return True
        except gcs_errors.ClientError:
            return False
        return False

    def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]:
        assert self.client is not None, _MISSING_DEPS
        try:
            native_bucket = self.client.bucket(path.root)
            if native_bucket is not None:
                return BucketGCS(str(path.root), bucket=native_bucket)
        except gcs_errors.ClientError as err:
            print(err)

        return None

    def get_bucket(self, path: PurePathy) -> BucketGCS:
        assert self.client is not None, _MISSING_DEPS
        try:
            native_bucket = self.client.bucket(path.root)
            if native_bucket is not None:
                return BucketGCS(str(path.root), bucket=native_bucket)
            raise FileNotFoundError(f"Bucket {path.root} does not exist!")
        except gcs_errors.ClientError as e:
            raise ClientError(message=e.message, code=e.code)

    def list_buckets(
        self, **kwargs: Dict[str, Any]
    ) -> Generator[GCSNativeBucket, None, None]:
        assert self.client is not None, _MISSING_DEPS
        return self.client.list_buckets(**kwargs)  # type:ignore

    def scandir(  # type:ignore[override]
        self,
        path: Optional[PurePathy] = None,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
    ) -> PathyScanDir:
        return _GCSScanDir(client=self, path=path, prefix=prefix, delimiter=delimiter)

    def list_blobs(
        self,
        path: PurePathy,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
        include_dirs: bool = False,
    ) -> Generator[BlobGCS, None, None]:
        assert self.client is not None, _MISSING_DEPS
        continuation_token = None
        bucket = self.lookup_bucket(path)
        if bucket is None:
            return
        while True:
            if continuation_token:
                response = self.client.list_blobs(
                    path.root,
                    prefix=prefix,
                    delimiter=delimiter,
                    page_token=continuation_token,
                )
            else:
                response = self.client.list_blobs(
                    path.root, prefix=prefix, delimiter=delimiter
                )
            for page in response.pages:
                for item in page:
                    yield BlobGCS(
                        bucket=bucket,
                        owner=item.owner,
                        name=item.name,
                        raw=item,
                        size=item.size,
                        updated=item.updated.timestamp(),
                    )
            if response.next_page_token is None:
                break
            continuation_token = response.next_page_token
Beispiel #5
0
class BucketClientGCS(BucketClient):
    client: GCSNativeClient

    @property
    def client_params(self) -> Any:
        return dict(client=self.client)

    def __init__(self, **kwargs: Any) -> None:
        self.recreate(**kwargs)

    def recreate(self, **kwargs: Any) -> None:
        creds = kwargs["credentials"] if "credentials" in kwargs else None
        if creds is not None:
            kwargs["project"] = creds.project_id
        self.client = GCSNativeClient(**kwargs)

    def make_uri(self, path: PurePathy) -> str:
        return str(path)

    def create_bucket(  # type:ignore[override]
            self, path: PurePathy) -> GCSNativeBucket:
        return self.client.create_bucket(path.root)  # type:ignore

    def delete_bucket(self, path: PurePathy) -> None:
        bucket = self.client.get_bucket(path.root)  # type:ignore
        bucket.delete()  # type:ignore

    def exists(self, path: PurePathy) -> bool:
        # Because we want all the parents of a valid blob (e.g. "directory" in
        # "directory/foo.file") to return True, we enumerate the blobs with a prefix
        # and compare the object names to see if they match a substring of the path
        key_name = str(path.key)
        for obj in self.list_blobs(path):
            if obj.name.startswith(key_name +
                                   path._flavour.sep):  # type:ignore
                return True
        return False

    def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]:
        try:
            return self.get_bucket(path)
        except FileNotFoundError:
            return None

    def get_bucket(self, path: PurePathy) -> BucketGCS:
        native_bucket: Any = self.client.bucket(path.root)  # type:ignore
        try:
            if native_bucket.exists():
                return BucketGCS(str(path.root), bucket=native_bucket)
        except BadRequest:
            pass
        raise FileNotFoundError(f"Bucket {path.root} does not exist!")

    def list_buckets(  # type:ignore[override]
            self,
            **kwargs: Dict[str,
                           Any]) -> Generator[GCSNativeBucket, None, None]:
        return self.client.list_buckets(**kwargs)  # type:ignore

    def scandir(  # type:ignore[override]
        self,
        path: Optional[PurePathy] = None,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
    ) -> PathyScanDir:
        return ScanDirGCS(client=self,
                          path=path,
                          prefix=prefix,
                          delimiter=delimiter)

    def list_blobs(
        self,
        path: PurePathy,
        prefix: Optional[str] = None,
        delimiter: Optional[str] = None,
    ) -> Generator[BlobGCS, None, None]:
        bucket = self.lookup_bucket(path)
        if bucket is None:
            return
        response: Any = self.client.list_blobs(  # type:ignore
            path.root, prefix=prefix, delimiter=delimiter)
        for page in response.pages:  # type:ignore
            for item in page:
                yield BlobGCS(
                    bucket=bucket,
                    owner=item.owner,
                    name=item.name,
                    raw=item,
                    size=item.size,
                    updated=item.updated.timestamp(),
                )