def delete_gcp_file(url):
    print('###### start removing %s' % url)
    client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH)
    bucket = client.get_bucket(settings.BUCKET_NAME)
    blob = Blob(gcp_path(url), bucket)
    blob.delete()
    print('###### removing success %s' % url)
def txfr_blob(filename: str,
              bq: BigQueryer = PostBigQueryer(),
              cs: CloudStorager = CloudStorager()):
    """
    uploads the blob to bigquery. This would probably be better as a shell script
    :param cs:
    :param bq:
    :param bucket:
    :param filename:
    :return:
    """
    tm = current_time_ms(
    )  # pain in the ass to get nanotime in python apparently
    objname = 'api-update-blob-{}'.format(tm)
    blob = Blob(objname, cs.get_cloud_storage_bucket())
    logger.info("Uploading file (this will take a long time)... ")
    blob.upload_from_filename(filename)
    # change this to change table
    table = bq.get_bigquery_table()
    uri = 'gs://' + cs.bucket + "/" + objname
    logger.info("Loading file to BQ...")
    # insert into tmp table
    # tmptable = bq.client.dataset('forums').table(objname)
    job = LoadTableFromStorageJob('api-job-{}'.format(tm),
                                  table, [uri],
                                  client=bq.client)
    job.write_disposition = 'WRITE_APPEND'
    job.source_format = 'AVRO'
    job.begin()
    wait_for_job(job)
    logger.info("Cleaning up...")
    blob.delete(cs.client)
Example #3
0
def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
                   next_job_id: Optional[str]):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    next_job_id,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(next_job_id,
                                             if_generation_match=0,
                                             client=gcs_client)
        else:
            print("releasing lock at: "
                  f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except google.api_core.exceptions.PreconditionFailed as err:
        raise exceptions.BacklogException(
            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
            f"was changed by another process.") from err
Example #4
0
def test_blob():
    """Provides a pre-existing blob in the test bucket."""
    bucket = storage.Client().bucket(BUCKET)
    blob_name = "test_blob_{}".format(uuid.uuid4().hex)
    blob = Blob(
        blob_name,
        bucket,
        encryption_key=TEST_ENCRYPTION_KEY_DECODED,
    )
    content = "Hello, is it me you're looking for?"
    blob.upload_from_string(content)

    yield blob.name, content

    # To delete an encrypted blob, you have to provide the same key
    # used for the blob. When you provide a wrong key, you'll get
    # NotFound.
    try:
        # Clean up for the case that the rotation didn't occur.
        blob.delete()
    except NotFound as e:
        # For the case that the rotation succeeded.
        print("Ignoring 404, detail: {}".format(e))
        blob = Blob(blob_name,
                    bucket,
                    encryption_key=TEST_ENCRYPTION_KEY_2_DECODED)
        blob.delete()
def delete_file(module, client, name):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(name, bucket)
        blob.delete()
        return {}
    except google.cloud.exceptions.NotFound as e:
        module.fail_json(msg=str(e))
Example #6
0
def drop_db(flask_app):
    """
        Remove the bucket and object that we used for testing
    """
    storage_client = storage.Client()
    bucket_name = flask_app.config.get('DATA_BUCKET_NAME')
    bucket = storage_client.get_bucket(bucket_name)
    blob = Blob(flask_app.config.get('DATA_FILE_NAME'), bucket)
    if blob.exists():
        blob.delete()
    bucket.delete()
Example #7
0
def remove_blob_quietly(
    gcs_client: storage.Client,
    blob: storage.Blob,
):
    """
    Removes a blob and eats the error if it doesn't exist.
    """
    try:
        blob.delete(client=gcs_client)
    except google.api_core.exceptions.NotFound:
        print(f"Attempted to delete {blob.name=} "
              f"but the file wasn't found.")
Example #8
0
def delete_documents(documents, uid):
    """ 
        Deletes the given user's specified documents 

        Args:
            documents - list of document_names
            uid - firestore user's document id
    """
    for document in documents:
        blob = Blob(get_cloud_folder(uid) + document,
                    bucket,
                    encryption_key=encryption_key)
        blob.delete()
 def get_container(self):
     try:
         container = self.storage_client.get_bucket(self.CONTAINER)
         # Test if the container is accessible
         blob = Blob('AccessTestByServiceFabrikPythonLibrary', container)
         blob.upload_from_string(
             'Sample Message for AccessTestByServiceFabrikPythonLibrary', content_type='text/plain')
         blob.delete()
         return container
     except Exception as error:
         self.logger.error('[GCP] [STORAGE] ERROR: Unable to find or access container {}.\n{}'.format(
             self.CONTAINER, error))
         return None
Example #10
0
def handle_bq_lock(gcs_client: storage.Client,
                   lock_blob: storage.Blob,
                   next_job_id: Optional[str],
                   table: bigquery.TableReference,
                   retry_attempt_cnt: Optional[int] = None):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            lock_blob_contents = json.dumps(
                dict(job_id=next_job_id,
                     table=table.to_api_repr(),
                     retry_attempt_cnt=retry_attempt_cnt))
            logging.log_with_table(
                table,
                f"Writing the following content to lock_blob {lock_blob.name}:"
                f" {dict(job_id=next_job_id, table=table.to_api_repr(), retry_attempt_cnt=retry_attempt_cnt)}"
            )
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=0,  # noqa: E126
                    client=gcs_client)
        else:
            logging.log_with_table(
                table, "releasing lock at: "
                f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except (google.api_core.exceptions.PreconditionFailed,
            google.api_core.exceptions.NotFound) as err:
        if isinstance(err, google.api_core.exceptions.PreconditionFailed):
            raise exceptions.BacklogException(
                f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
                f"was changed by another process.") from err
        logging.log_with_table(
            table,
            "Tried deleting a lock blob that was either already deleted "
            "or never existed.")
Example #11
0
 def delete(self, name):
     blob = Blob(name, self.bucket)
     blob.delete()
Example #12
0
def handle_backlog(
    gcs_client: storage.Client,
    bq_client: bigquery.Client,
    bkt: storage.Bucket,
    lock_blob: storage.Blob,
    backfill_blob: storage.Blob,
):
    """submit the next item in the _backlog if it is non-empty or clean up the
    _BACKFILL and _bqlock files.
    Args:
        gcs_client: storage.Client
        bq_client: bigquery.Client
        bkt: storage.Bucket
        lock_blob: storage.Blob _bqlock blob
        backfill_blob: storage.blob _BACKFILL blob
    Returns:
        bool: should this backlog subscriber exit
    """
    table_prefix = utils.get_table_prefix(gcs_client, backfill_blob)
    check_backlog_time = time.monotonic()
    next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                    table_prefix)
    if next_backlog_file:
        next_success_file: storage.Blob = bkt.blob(
            next_backlog_file.name.replace("/_backlog/", "/"))
        if not next_success_file.exists(client=gcs_client):
            raise exceptions.BacklogException(
                "backlog contains "
                f"gs://{next_backlog_file.bucket}/{next_backlog_file.name} "
                "but the corresponding success file does not exist at: "
                f"gs://{next_success_file.bucket}/{next_success_file.name}")
        print("applying next batch for:"
              f"gs://{next_success_file.bucket}/{next_success_file.name}")
        next_job_id = utils.create_job_id(next_success_file.name)
        utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
                    next_job_id)
        return False  # BQ job running
    print("no more files found in the backlog deleteing backfill blob")
    backfill_blob.delete(if_generation_match=backfill_blob.generation,
                         client=gcs_client)
    if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
            time.monotonic()):
        print("checking if the backlog is still empty for "
              f"gs://${bkt.name}/{table_prefix}/_backlog/"
              f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}"
              " seconds between listing items on the backlog and "
              f"deleting the {constants.BACKFILL_FILENAME}. "
              "This should not happen often but is meant to alleviate a "
              "race condition in the event that something caused the "
              "delete operation was delayed or had to be retried for a "
              "long time.")
        next_backlog_file = utils.get_next_backlog_item(
            gcs_client, bkt, table_prefix)
        if next_backlog_file:
            # The backfill file was deleted but the backlog is
            # not empty. Re-trigger the backfill subscriber loop by
            # dropping a new backfill file.
            start_backfill_subscriber_if_not_running(gcs_client, bkt,
                                                     table_prefix)
            return True  # we are re-triggering a new backlog subscriber
    table = None
    # Get table from lock blob
    lock_contents_str = utils.read_gcs_file_if_exists(
        gcs_client, f"gs://{bkt.name}/{lock_blob.name}")
    if lock_contents_str:
        lock_contents: Dict = json.loads(lock_contents_str)
        if lock_contents:
            print(
                json.dumps(
                    dict(message=f"View lock contents in jsonPayload for"
                         f" gs://{bkt.name}/{lock_blob.name}",
                         lock_contents=lock_contents)))
            table = bigquery.TableReference.from_api_repr(
                lock_contents.get('table'))
    utils.handle_bq_lock(gcs_client, lock_blob, None, table)
    print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
          "backlog subscriber exiting.")
    return True  # the backlog is empty