def get_buckets(project_ids: List[str], gcs_client: storage.Client) -> List[Dict[str, str]]: """Retrieves list of metadata for all buckets in a GCP org. Args: project_ids: List of strings holding project IDs gcs_client: storage.Client object Returns: List of dictionaries mapping bucket-level metadata. """ output_list = [] try: for project_id in project_ids: try: bucket_list = list(gcs_client.list_buckets(project=project_id)) for bucket in bucket_list: output_list.append({ "bucket_name": bucket.name, "project_id": project_id, "last_read_timestamp": "", "days_since_last_read": -1, "read_count_30_days": -1, "read_count_90_days": -1, "export_day": datetime.datetime.utcnow().strftime("%Y-%m-%d"), "recommended_OLM": "" }) except Forbidden as err: logging.error(f"""Access denied on bucket {bucket.name}. {err}""") except BadRequest as err: logging.error(f"Could not find bucket {bucket.name}.") logging.error(err) pass return output_list except Exception as err: logging.error(f"""Could not access buckets in {project_id}. {err}""")
class GoogleClient(CloudClient): """ Implementation of a Google Client using the Google API """ def __init__(self, auth_dict, *args, **kwargs): super().__init__(*args, **kwargs) self.cred_dict_string = base64.b64decode( auth_dict.get("CREDENTIALS_JSON_BASE64")).decode("utf-8") cred_dict = json.loads(self.cred_dict_string) credentials = service_account.Credentials.from_service_account_info( cred_dict) with open(constants.GOOGLE_CREDS_JSON_PATH, "w") as cred_dump: cred_dump.write(self.cred_dict_string) self.secret = self.create_gcp_secret() try: self.client = GCPStorageClient(project=cred_dict["project_id"], credentials=credentials) except DefaultCredentialsError: raise def internal_create_uls(self, name, region=None): """ Creates the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be created region (str): The region to create the Underlying Storage """ if region is None: self.client.create_bucket(name) else: self.client.create_bucket(name, location=region) def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be deleted """ # Todo: Replace with a TimeoutSampler for _ in range(10): try: bucket = GCPBucket(client=self.client, name=name) bucket.delete_blobs(bucket.list_blobs()) bucket.delete() break except GoogleExceptions.NotFound: logger.warning( "Failed to delete some of the bucket blobs. Retrying...") sleep(10) def get_all_uls_names(self): """ Returns a set containing all the bucket names that the client has access to """ return {bucket.id for bucket in self.client.list_buckets()} def verify_uls_exists(self, uls_name): """ Verifies whether a Underlying Storage with the given uls_name exists Args: uls_name (str): The Underlying Storage name to be verified Returns: bool: True if Underlying Storage exists, False otherwise """ try: self.client.get_bucket(uls_name) return True except GoogleExceptions.NotFound: return False def create_gcp_secret(self): """ Create a Kubernetes secret to allow NooBaa to create Google-based backingstores """ bs_secret_data = templating.load_yaml( constants.MCG_BACKINGSTORE_SECRET_YAML) bs_secret_data["metadata"]["name"] = create_unique_resource_name( "cldmgr-gcp", "secret") bs_secret_data["metadata"]["namespace"] = config.ENV_DATA[ "cluster_namespace"] bs_secret_data["data"][ "GoogleServiceAccountPrivateKeyJson"] = base64.urlsafe_b64encode( self.cred_dict_string.encode("UTF-8")).decode("ascii") return create_resource(**bs_secret_data)
class BucketClientGCS(BucketClient): client: Optional[GCSNativeClient] def __init__(self, client: Optional[GCSNativeClient] = None): try: self.client = GCSNativeClient() if GCSNativeClient else None except (BaseException, DefaultCredentialsError): self.client = None def make_uri(self, path: PurePathy) -> str: return str(path) def create_bucket(self, path: PurePathy) -> Bucket: assert self.client is not None, _MISSING_DEPS return self.client.create_bucket(path.root) def delete_bucket(self, path: PurePathy) -> None: assert self.client is not None, _MISSING_DEPS bucket = self.client.get_bucket(path.root) bucket.delete() def exists(self, path: PurePathy) -> bool: # Because we want all the parents of a valid blob (e.g. "directory" in # "directory/foo.file") to return True, we enumerate the blobs with a prefix # and compare the object names to see if they match a substring of the path key_name = str(path.key) try: for obj in self.list_blobs(path): if obj.name == key_name: return True if obj.name.startswith(key_name + path._flavour.sep): return True except gcs_errors.ClientError: return False return False def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]: assert self.client is not None, _MISSING_DEPS try: native_bucket = self.client.bucket(path.root) if native_bucket is not None: return BucketGCS(str(path.root), bucket=native_bucket) except gcs_errors.ClientError as err: print(err) return None def get_bucket(self, path: PurePathy) -> BucketGCS: assert self.client is not None, _MISSING_DEPS try: native_bucket = self.client.bucket(path.root) if native_bucket is not None: return BucketGCS(str(path.root), bucket=native_bucket) raise FileNotFoundError(f"Bucket {path.root} does not exist!") except gcs_errors.ClientError as e: raise ClientError(message=e.message, code=e.code) def list_buckets( self, **kwargs: Dict[str, Any]) -> Generator[GCSNativeBucket, None, None]: assert self.client is not None, _MISSING_DEPS return self.client.list_buckets(**kwargs) # type:ignore def scandir( # type:ignore[override] self, path: Optional[PurePathy] = None, prefix: Optional[str] = None, delimiter: Optional[str] = None, ) -> Generator[BucketEntryGCS, None, None]: # type:ignore[override] assert self.client is not None, _MISSING_DEPS continuation_token = None if path is None or not path.root: gcs_bucket: GCSNativeBucket for gcs_bucket in self.list_buckets(): yield BucketEntryGCS(gcs_bucket.name, is_dir=True, raw=None) return sep = path._flavour.sep bucket = self.lookup_bucket(path) if bucket is None: return while True: if continuation_token: response = self.client.list_blobs( bucket.name, prefix=prefix, delimiter=sep, page_token=continuation_token, ) else: response = self.client.list_blobs(bucket.name, prefix=prefix, delimiter=sep) for page in response.pages: for folder in list(page.prefixes): full_name = folder[:-1] if folder.endswith(sep) else folder name = full_name.split(sep)[-1] if name: yield BucketEntryGCS(name, is_dir=True, raw=None) for item in page: name = item.name.split(sep)[-1] if name: yield BucketEntryGCS( name=name, is_dir=False, size=item.size, last_modified=item.updated.timestamp(), raw=item, ) if response.next_page_token is None: break continuation_token = response.next_page_token def list_blobs( self, path: PurePathy, prefix: Optional[str] = None, delimiter: Optional[str] = None, include_dirs: bool = False, ) -> Generator[BlobGCS, None, None]: assert self.client is not None, _MISSING_DEPS continuation_token = None bucket = self.lookup_bucket(path) if bucket is None: return while True: if continuation_token: response = self.client.list_blobs( path.root, prefix=prefix, delimiter=delimiter, page_token=continuation_token, ) else: response = self.client.list_blobs(path.root, prefix=prefix, delimiter=delimiter) for page in response.pages: for item in page: yield BlobGCS( bucket=bucket, owner=item.owner, name=item.name, raw=item, size=item.size, updated=item.updated.timestamp(), ) if response.next_page_token is None: break continuation_token = response.next_page_token
class BucketClientGCS(BucketClient): client: Optional[GCSNativeClient] @property def client_params(self) -> Any: return dict(client=self.client) def __init__(self, **kwargs: Any) -> None: self.recreate(**kwargs) def recreate(self, **kwargs: Any) -> None: creds = kwargs["credentials"] if "credentials" in kwargs else None if creds is not None: kwargs["project"] = creds.project_id try: self.client = GCSNativeClient(**kwargs) except TypeError: # TypeError is raised if the imports for GCSNativeClient fail and are # assigned to Any, which is not callable. self.client = None def make_uri(self, path: PurePathy) -> str: return str(path) def create_bucket(self, path: PurePathy) -> Bucket: assert self.client is not None, _MISSING_DEPS return self.client.create_bucket(path.root) def delete_bucket(self, path: PurePathy) -> None: assert self.client is not None, _MISSING_DEPS bucket = self.client.get_bucket(path.root) bucket.delete() def exists(self, path: PurePathy) -> bool: # Because we want all the parents of a valid blob (e.g. "directory" in # "directory/foo.file") to return True, we enumerate the blobs with a prefix # and compare the object names to see if they match a substring of the path key_name = str(path.key) try: for obj in self.list_blobs(path): if obj.name == key_name: return True if obj.name.startswith(key_name + path._flavour.sep): return True except gcs_errors.ClientError: return False return False def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]: assert self.client is not None, _MISSING_DEPS try: native_bucket = self.client.bucket(path.root) if native_bucket is not None: return BucketGCS(str(path.root), bucket=native_bucket) except gcs_errors.ClientError as err: print(err) return None def get_bucket(self, path: PurePathy) -> BucketGCS: assert self.client is not None, _MISSING_DEPS try: native_bucket = self.client.bucket(path.root) if native_bucket is not None: return BucketGCS(str(path.root), bucket=native_bucket) raise FileNotFoundError(f"Bucket {path.root} does not exist!") except gcs_errors.ClientError as e: raise ClientError(message=e.message, code=e.code) def list_buckets( self, **kwargs: Dict[str, Any] ) -> Generator[GCSNativeBucket, None, None]: assert self.client is not None, _MISSING_DEPS return self.client.list_buckets(**kwargs) # type:ignore def scandir( # type:ignore[override] self, path: Optional[PurePathy] = None, prefix: Optional[str] = None, delimiter: Optional[str] = None, ) -> PathyScanDir: return _GCSScanDir(client=self, path=path, prefix=prefix, delimiter=delimiter) def list_blobs( self, path: PurePathy, prefix: Optional[str] = None, delimiter: Optional[str] = None, include_dirs: bool = False, ) -> Generator[BlobGCS, None, None]: assert self.client is not None, _MISSING_DEPS continuation_token = None bucket = self.lookup_bucket(path) if bucket is None: return while True: if continuation_token: response = self.client.list_blobs( path.root, prefix=prefix, delimiter=delimiter, page_token=continuation_token, ) else: response = self.client.list_blobs( path.root, prefix=prefix, delimiter=delimiter ) for page in response.pages: for item in page: yield BlobGCS( bucket=bucket, owner=item.owner, name=item.name, raw=item, size=item.size, updated=item.updated.timestamp(), ) if response.next_page_token is None: break continuation_token = response.next_page_token
class BucketClientGCS(BucketClient): client: GCSNativeClient @property def client_params(self) -> Any: return dict(client=self.client) def __init__(self, **kwargs: Any) -> None: self.recreate(**kwargs) def recreate(self, **kwargs: Any) -> None: creds = kwargs["credentials"] if "credentials" in kwargs else None if creds is not None: kwargs["project"] = creds.project_id self.client = GCSNativeClient(**kwargs) def make_uri(self, path: PurePathy) -> str: return str(path) def create_bucket( # type:ignore[override] self, path: PurePathy) -> GCSNativeBucket: return self.client.create_bucket(path.root) # type:ignore def delete_bucket(self, path: PurePathy) -> None: bucket = self.client.get_bucket(path.root) # type:ignore bucket.delete() # type:ignore def exists(self, path: PurePathy) -> bool: # Because we want all the parents of a valid blob (e.g. "directory" in # "directory/foo.file") to return True, we enumerate the blobs with a prefix # and compare the object names to see if they match a substring of the path key_name = str(path.key) for obj in self.list_blobs(path): if obj.name.startswith(key_name + path._flavour.sep): # type:ignore return True return False def lookup_bucket(self, path: PurePathy) -> Optional[BucketGCS]: try: return self.get_bucket(path) except FileNotFoundError: return None def get_bucket(self, path: PurePathy) -> BucketGCS: native_bucket: Any = self.client.bucket(path.root) # type:ignore try: if native_bucket.exists(): return BucketGCS(str(path.root), bucket=native_bucket) except BadRequest: pass raise FileNotFoundError(f"Bucket {path.root} does not exist!") def list_buckets( # type:ignore[override] self, **kwargs: Dict[str, Any]) -> Generator[GCSNativeBucket, None, None]: return self.client.list_buckets(**kwargs) # type:ignore def scandir( # type:ignore[override] self, path: Optional[PurePathy] = None, prefix: Optional[str] = None, delimiter: Optional[str] = None, ) -> PathyScanDir: return ScanDirGCS(client=self, path=path, prefix=prefix, delimiter=delimiter) def list_blobs( self, path: PurePathy, prefix: Optional[str] = None, delimiter: Optional[str] = None, ) -> Generator[BlobGCS, None, None]: bucket = self.lookup_bucket(path) if bucket is None: return response: Any = self.client.list_blobs( # type:ignore path.root, prefix=prefix, delimiter=delimiter) for page in response.pages: # type:ignore for item in page: yield BlobGCS( bucket=bucket, owner=item.owner, name=item.name, raw=item, size=item.size, updated=item.updated.timestamp(), )