class GoogleCloudStorage(FileStoreBase): def __init__(self, *args, **kwargs): self.client = Client(**kwargs) def list_files(self, bucket_name, prefix, **kwargs): try: basic_request = {'prefix': prefix} list_files_request = {**basic_request, **kwargs} bucket = self.client.get_bucket(bucket_name) response_elements = bucket.list_blobs(**list_files_request) return [x.name for x in response_elements] except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.list_files] with bucket_name {} and prefix {} and kwargs {}' .format(bucket_name, prefix, kwargs)) raise e def delete_object(self, bucket_name, key, **kwargs): try: bucket = self.client.get_bucket(bucket_name) bucket.delete_blob(key) except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.delete_object] with bucket_name {} and key {}' .format(bucket_name, key)) raise e def put_object(self, bucket_name, key, obj, **kwargs): try: bucket = self.client.get_bucket(bucket_name) blob = Blob(key, bucket) blob.upload_from_string(obj) except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.put_object] with bucket_name {} and key {}' .format(bucket_name, key)) raise e def delete_bucket(self, bucket_name): try: bucket = self.client.get_bucket(bucket_name) bucket.delete() except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.delete_bucket] with bucket_name {}' .format(bucket_name)) raise e def create_bucket(self, bucket_name, **kwargs): try: basic_request = {'bucket_name': bucket_name} create_bucket_request = {**basic_request, **kwargs} self.client.create_bucket(**create_bucket_request) except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}' .format(bucket_name, kwargs)) raise e
def test_create_w_extra_properties(self): from google.cloud.storage.client import Client from google.cloud.storage.bucket import Bucket BUCKET_NAME = "bucket-name" PROJECT = "PROJECT" CORS = [ { "maxAgeSeconds": 60, "methods": ["*"], "origin": ["https://example.com/frontend"], "responseHeader": ["X-Custom-Header"], } ] LIFECYCLE_RULES = [{"action": {"type": "Delete"}, "condition": {"age": 365}}] LOCATION = "eu" LABELS = {"color": "red", "flavor": "cherry"} STORAGE_CLASS = "NEARLINE" DATA = { "name": BUCKET_NAME, "cors": CORS, "lifecycle": {"rule": LIFECYCLE_RULES}, "location": LOCATION, "storageClass": STORAGE_CLASS, "versioning": {"enabled": True}, "billing": {"requesterPays": True}, "labels": LABELS, } connection = _make_connection(DATA) client = Client(project=PROJECT) client._base_connection = connection bucket = Bucket(client=client, name=BUCKET_NAME) bucket.cors = CORS bucket.lifecycle_rules = LIFECYCLE_RULES bucket.storage_class = STORAGE_CLASS bucket.versioning_enabled = True bucket.requester_pays = True bucket.labels = LABELS client.create_bucket(bucket, location=LOCATION) connection.api_request.assert_called_once_with( method="POST", path="/b", query_params={"project": PROJECT}, data=DATA, _target_object=bucket, timeout=self._get_default_timeout(), )
class GoogleCloudStorage(FileStoreBase): def __init__(self, *args, **kwargs): self.client = Client(**kwargs) def create_bucket(self, bucket_name, **kwargs): try: basic_request = { 'bucket_name': bucket_name } create_bucket_request = {**basic_request, **kwargs} self.client.create_bucket(**create_bucket_request) except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}'.format(bucket_name, kwargs)) raise e
def test_create_hit(self): from google.cloud.storage.client import Client PROJECT = "PROJECT" BUCKET_NAME = "bucket-name" DATA = {"name": BUCKET_NAME} connection = _make_connection(DATA) client = Client(project=PROJECT) client._base_connection = connection bucket = client.create_bucket(BUCKET_NAME) connection.api_request.assert_called_once_with( method="POST", path="/b", query_params={"project": PROJECT}, data=DATA, _target_object=bucket, timeout=self._get_default_timeout(), )
def test_create_w_predefined_acl_valid(self): from google.cloud.storage.client import Client PROJECT = "PROJECT" BUCKET_NAME = "bucket-name" DATA = {"name": BUCKET_NAME} client = Client(project=PROJECT) connection = _make_connection(DATA) client._base_connection = connection bucket = client.create_bucket(BUCKET_NAME, predefined_acl="publicRead") connection.api_request.assert_called_once_with( method="POST", path="/b", query_params={ "project": PROJECT, "predefinedAcl": "publicRead" }, data=DATA, _target_object=bucket, )
def test_create_w_explicit_location(self): from google.cloud.storage.client import Client PROJECT = "PROJECT" BUCKET_NAME = "bucket-name" LOCATION = "us-central1" DATA = {"location": LOCATION, "name": BUCKET_NAME} connection = _make_connection( DATA, "{'location': 'us-central1', 'name': 'bucket-name'}") client = Client(project=PROJECT) client._base_connection = connection bucket = client.create_bucket(BUCKET_NAME, location=LOCATION) connection.api_request.assert_called_once_with( method="POST", path="/b", data=DATA, _target_object=bucket, query_params={"project": "PROJECT"}, ) self.assertEqual(bucket.location, LOCATION)
class GoogleStorageArchive(VirtualArchive): TIMEOUT = 84600 def __init__(self, bucket=None): super(GoogleStorageArchive, self).__init__(bucket) self.client = Client() log.info("Archive: gs://%s", bucket) self.bucket = self.client.lookup_bucket(bucket) if self.bucket is None: self.bucket = self.client.create_bucket(bucket) policy = { "origin": ['*'], "method": ['GET'], "responseHeader": [ 'Accept-Ranges', 'Content-Encoding', 'Content-Length', 'Content-Range' ], "maxAgeSeconds": self.TIMEOUT } self.bucket.cors = [policy] self.bucket.update() def _locate_blob(self, content_hash): """Check if a file with the given hash exists on S3.""" if content_hash is None: return prefix = self._get_prefix(content_hash) if prefix is None: return for blob in self.bucket.list_blobs(max_results=1, prefix=prefix): return blob def archive_file(self, file_path, content_hash=None): """Store the file located at the given path on S3, based on a path made up from its SHA1 content hash.""" if content_hash is None: content_hash = checksum(file_path) blob = self._locate_blob(content_hash) if blob is None: path = os.path.join(self._get_prefix(content_hash), 'data') blob = Blob(path, self.bucket) blob.upload_from_filename(file_path) return content_hash def load_file(self, content_hash, file_name=None, temp_path=None): """Retrieve a file from S3 storage and put it onto the local file system for further processing.""" blob = self._locate_blob(content_hash) if blob is not None: path = self._local_path(content_hash, file_name, temp_path) blob.download_to_filename(path) return path def generate_url(self, content_hash, file_name=None, mime_type=None): blob = self._locate_blob(content_hash) if blob is None: return disposition = None if file_name: disposition = 'inline; filename=%s' % file_name expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT) return blob.generate_signed_url(expire, response_type=mime_type, response_disposition=disposition)
class GoogleStorageArchive(VirtualArchive): TIMEOUT = 84600 def __init__(self, bucket=None): super(GoogleStorageArchive, self).__init__(bucket) self.client = Client() log.info("Archive: gs://%s", bucket) self.bucket = self.client.lookup_bucket(bucket) if self.bucket is None: self.bucket = self.client.create_bucket(bucket) self.upgrade() def upgrade(self): # 'Accept-Ranges', # 'Content-Encoding', # 'Content-Length', # 'Content-Range', # 'Cache-Control', # 'Content-Language', # 'Content-Type', # 'Expires', # 'Last-Modified', # 'Pragma', # 'Range', # 'Date', policy = { "origin": ['*'], "method": ['GET', 'HEAD', 'OPTIONS'], "responseHeader": ['*'], "maxAgeSeconds": self.TIMEOUT } self.bucket.cors = [policy] self.bucket.update() def _locate_blob(self, content_hash): """Check if a file with the given hash exists on S3.""" if content_hash is None: return prefix = self._get_prefix(content_hash) if prefix is None: return # First, check the standard file name: blob = Blob(os.path.join(prefix, 'data'), self.bucket) if blob.exists(): return blob # Second, iterate over all file names: for blob in self.bucket.list_blobs(max_results=1, prefix=prefix): return blob def archive_file(self, file_path, content_hash=None, mime_type=None): """Store the file located at the given path on Google, based on a path made up from its SHA1 content hash.""" file_path = ensure_path(file_path) if content_hash is None: content_hash = checksum(file_path) if content_hash is None: return file_path = ensure_posix_path(file_path) for attempt in service_retries(): try: # blob = self._locate_blob(content_hash) # if blob is not None: # return content_hash path = os.path.join(self._get_prefix(content_hash), 'data') blob = Blob(path, self.bucket) blob.upload_from_filename(file_path, content_type=mime_type) return content_hash except FAILURES: log.exception("Store error in GS") backoff(failures=attempt) def load_file(self, content_hash, file_name=None, temp_path=None): """Retrieve a file from Google storage and put it onto the local file system for further processing.""" for attempt in service_retries(): try: blob = self._locate_blob(content_hash) if blob is not None: path = self._local_path(content_hash, file_name, temp_path) blob.download_to_filename(path) return path except FAILURES: log.exception("Load error in GS") backoff(failures=attempt) # Returns None for "persistent error" as well as "file not found" :/ log.debug("[%s] not found, or the backend is down.", content_hash) def generate_url(self, content_hash, file_name=None, mime_type=None): blob = self._locate_blob(content_hash) if blob is None: return disposition = None if file_name is not None: disposition = 'inline; filename=%s' % file_name expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT) return blob.generate_signed_url(expire, response_type=mime_type, response_disposition=disposition)