def delete_gcp_file(url): print('###### start removing %s' % url) client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH) bucket = client.get_bucket(settings.BUCKET_NAME) blob = Blob(gcp_path(url), bucket) blob.delete() print('###### removing success %s' % url)
def txfr_blob(filename: str, bq: BigQueryer = PostBigQueryer(), cs: CloudStorager = CloudStorager()): """ uploads the blob to bigquery. This would probably be better as a shell script :param cs: :param bq: :param bucket: :param filename: :return: """ tm = current_time_ms( ) # pain in the ass to get nanotime in python apparently objname = 'api-update-blob-{}'.format(tm) blob = Blob(objname, cs.get_cloud_storage_bucket()) logger.info("Uploading file (this will take a long time)... ") blob.upload_from_filename(filename) # change this to change table table = bq.get_bigquery_table() uri = 'gs://' + cs.bucket + "/" + objname logger.info("Loading file to BQ...") # insert into tmp table # tmptable = bq.client.dataset('forums').table(objname) job = LoadTableFromStorageJob('api-job-{}'.format(tm), table, [uri], client=bq.client) job.write_disposition = 'WRITE_APPEND' job.source_format = 'AVRO' job.begin() wait_for_job(job) logger.info("Cleaning up...") blob.delete(cs.client)
def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob, next_job_id: Optional[str]): """Reclaim the lock blob for the new job id (in-place) or delete the lock blob if next_job_id is None.""" try: if next_job_id: if lock_blob.exists(client=gcs_client): lock_blob.upload_from_string( next_job_id, if_generation_match=lock_blob.generation, client=gcs_client) else: # This happens when submitting the first job in the backlog lock_blob.upload_from_string(next_job_id, if_generation_match=0, client=gcs_client) else: print("releasing lock at: " f"gs://{lock_blob.bucket.name}/{lock_blob.name}") lock_blob.delete( if_generation_match=lock_blob.generation, client=gcs_client, ) except google.api_core.exceptions.PreconditionFailed as err: raise exceptions.BacklogException( f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} " f"was changed by another process.") from err
def test_blob(): """Provides a pre-existing blob in the test bucket.""" bucket = storage.Client().bucket(BUCKET) blob_name = "test_blob_{}".format(uuid.uuid4().hex) blob = Blob( blob_name, bucket, encryption_key=TEST_ENCRYPTION_KEY_DECODED, ) content = "Hello, is it me you're looking for?" blob.upload_from_string(content) yield blob.name, content # To delete an encrypted blob, you have to provide the same key # used for the blob. When you provide a wrong key, you'll get # NotFound. try: # Clean up for the case that the rotation didn't occur. blob.delete() except NotFound as e: # For the case that the rotation succeeded. print("Ignoring 404, detail: {}".format(e)) blob = Blob(blob_name, bucket, encryption_key=TEST_ENCRYPTION_KEY_2_DECODED) blob.delete()
def delete_file(module, client, name): try: bucket = client.get_bucket(module.params['bucket']) blob = Blob(name, bucket) blob.delete() return {} except google.cloud.exceptions.NotFound as e: module.fail_json(msg=str(e))
def drop_db(flask_app): """ Remove the bucket and object that we used for testing """ storage_client = storage.Client() bucket_name = flask_app.config.get('DATA_BUCKET_NAME') bucket = storage_client.get_bucket(bucket_name) blob = Blob(flask_app.config.get('DATA_FILE_NAME'), bucket) if blob.exists(): blob.delete() bucket.delete()
def remove_blob_quietly( gcs_client: storage.Client, blob: storage.Blob, ): """ Removes a blob and eats the error if it doesn't exist. """ try: blob.delete(client=gcs_client) except google.api_core.exceptions.NotFound: print(f"Attempted to delete {blob.name=} " f"but the file wasn't found.")
def delete_documents(documents, uid): """ Deletes the given user's specified documents Args: documents - list of document_names uid - firestore user's document id """ for document in documents: blob = Blob(get_cloud_folder(uid) + document, bucket, encryption_key=encryption_key) blob.delete()
def get_container(self): try: container = self.storage_client.get_bucket(self.CONTAINER) # Test if the container is accessible blob = Blob('AccessTestByServiceFabrikPythonLibrary', container) blob.upload_from_string( 'Sample Message for AccessTestByServiceFabrikPythonLibrary', content_type='text/plain') blob.delete() return container except Exception as error: self.logger.error('[GCP] [STORAGE] ERROR: Unable to find or access container {}.\n{}'.format( self.CONTAINER, error)) return None
def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob, next_job_id: Optional[str], table: bigquery.TableReference, retry_attempt_cnt: Optional[int] = None): """Reclaim the lock blob for the new job id (in-place) or delete the lock blob if next_job_id is None.""" try: if next_job_id: lock_blob_contents = json.dumps( dict(job_id=next_job_id, table=table.to_api_repr(), retry_attempt_cnt=retry_attempt_cnt)) logging.log_with_table( table, f"Writing the following content to lock_blob {lock_blob.name}:" f" {dict(job_id=next_job_id, table=table.to_api_repr(), retry_attempt_cnt=retry_attempt_cnt)}" ) if lock_blob.exists(client=gcs_client): lock_blob.upload_from_string( lock_blob_contents, if_generation_match=lock_blob.generation, client=gcs_client) else: # This happens when submitting the first job in the backlog lock_blob.upload_from_string( lock_blob_contents, if_generation_match=0, # noqa: E126 client=gcs_client) else: logging.log_with_table( table, "releasing lock at: " f"gs://{lock_blob.bucket.name}/{lock_blob.name}") lock_blob.delete( if_generation_match=lock_blob.generation, client=gcs_client, ) except (google.api_core.exceptions.PreconditionFailed, google.api_core.exceptions.NotFound) as err: if isinstance(err, google.api_core.exceptions.PreconditionFailed): raise exceptions.BacklogException( f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} " f"was changed by another process.") from err logging.log_with_table( table, "Tried deleting a lock blob that was either already deleted " "or never existed.")
def delete(self, name): blob = Blob(name, self.bucket) blob.delete()
def handle_backlog( gcs_client: storage.Client, bq_client: bigquery.Client, bkt: storage.Bucket, lock_blob: storage.Blob, backfill_blob: storage.Blob, ): """submit the next item in the _backlog if it is non-empty or clean up the _BACKFILL and _bqlock files. Args: gcs_client: storage.Client bq_client: bigquery.Client bkt: storage.Bucket lock_blob: storage.Blob _bqlock blob backfill_blob: storage.blob _BACKFILL blob Returns: bool: should this backlog subscriber exit """ table_prefix = utils.get_table_prefix(gcs_client, backfill_blob) check_backlog_time = time.monotonic() next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt, table_prefix) if next_backlog_file: next_success_file: storage.Blob = bkt.blob( next_backlog_file.name.replace("/_backlog/", "/")) if not next_success_file.exists(client=gcs_client): raise exceptions.BacklogException( "backlog contains " f"gs://{next_backlog_file.bucket}/{next_backlog_file.name} " "but the corresponding success file does not exist at: " f"gs://{next_success_file.bucket}/{next_success_file.name}") print("applying next batch for:" f"gs://{next_success_file.bucket}/{next_success_file.name}") next_job_id = utils.create_job_id(next_success_file.name) utils.apply(gcs_client, bq_client, next_success_file, lock_blob, next_job_id) return False # BQ job running print("no more files found in the backlog deleteing backfill blob") backfill_blob.delete(if_generation_match=backfill_blob.generation, client=gcs_client) if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS < time.monotonic()): print("checking if the backlog is still empty for " f"gs://${bkt.name}/{table_prefix}/_backlog/" f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}" " seconds between listing items on the backlog and " f"deleting the {constants.BACKFILL_FILENAME}. " "This should not happen often but is meant to alleviate a " "race condition in the event that something caused the " "delete operation was delayed or had to be retried for a " "long time.") next_backlog_file = utils.get_next_backlog_item( gcs_client, bkt, table_prefix) if next_backlog_file: # The backfill file was deleted but the backlog is # not empty. Re-trigger the backfill subscriber loop by # dropping a new backfill file. start_backfill_subscriber_if_not_running(gcs_client, bkt, table_prefix) return True # we are re-triggering a new backlog subscriber table = None # Get table from lock blob lock_contents_str = utils.read_gcs_file_if_exists( gcs_client, f"gs://{bkt.name}/{lock_blob.name}") if lock_contents_str: lock_contents: Dict = json.loads(lock_contents_str) if lock_contents: print( json.dumps( dict(message=f"View lock contents in jsonPayload for" f" gs://{bkt.name}/{lock_blob.name}", lock_contents=lock_contents))) table = bigquery.TableReference.from_api_repr( lock_contents.get('table')) utils.handle_bq_lock(gcs_client, lock_blob, None, table) print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. " "backlog subscriber exiting.") return True # the backlog is empty