コード例 #1
0
ファイル: routes.py プロジェクト: fckuligowski/abagdemo-hs
def get_data():
    """
       Retrieve the data file from GCP Storage, and return
       the file as a dictionary.
       Create the file, with dummy data, if it don't exist.
    """
    # Introduce a delay here.
    do_delay()
    # Start of the actual function
    rtn = None
    storage_client = storage.Client()
    bucket_name = current_app.config.get('DATA_BUCKET_NAME')
    print('bucket_name=%s' % bucket_name)
    try:
        bucket = storage_client.get_bucket(bucket_name)
    except Exception as e:
        bucket = storage_client.create_bucket(bucket_name)
    # Test if the data file is found in the bucket, and
    # create it if it doesn't exist.
    blob = Blob(current_app.config.get('DATA_FILE_NAME'), bucket)
    if not blob.exists():
        # Open the initial data file
        init_fname = current_app.config.get('INIT_DATA_FILE')
        with open(init_fname) as infile:
            init_data = json.load(infile)
        # Copy it to the storage bucket
        blob.upload_from_string(json.dumps(init_data, indent=4))
    data_str = blob.download_as_string()
    rtn = json.loads(data_str)
    print('GOT BAG DATA:')
    print(json.dumps(rtn, indent=4))
    return rtn, blob
コード例 #2
0
def wait_on_gcs_blob(gcs_client: storage.Client,
                     wait_blob: storage.Blob,
                     polling_timeout: int,
                     polling_interval: int = 1) -> bool:
    """"
    Wait for a GCS Object to exists.

    Args:
        gcs_client: storage.Client
        wait_blob: storage.Bllob the GCS to wait on.
        polling_timeout: int number of seconds to poll this job ID
        polling_interval: frequency to query the job state during polling
    Returns:
        bool: if the job ID has finished successfully. True if DONE without
        errors, False if RUNNING or PENDING
    Raises:
        exceptions.BigQueryJobFailure if the job failed.
        google.api_core.exceptions.NotFound if the job id cannot be found.
    """
    start_poll = time.monotonic()
    while time.monotonic() - start_poll < (polling_timeout - polling_interval):
        if wait_blob.exists(client=gcs_client):
            return True
        print(
            f"waiting on GCS file gs://{wait_blob.bucket.name}/{wait_blob.name}"
        )
        time.sleep(polling_interval)
    return False
コード例 #3
0
def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
                   next_job_id: Optional[str]):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    next_job_id,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(next_job_id,
                                             if_generation_match=0,
                                             client=gcs_client)
        else:
            print("releasing lock at: "
                  f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except google.api_core.exceptions.PreconditionFailed as err:
        raise exceptions.BacklogException(
            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
            f"was changed by another process.") from err
コード例 #4
0
def blob_exists(bucket_name, blob_name):
    """
    check if blob/key exists or not!
    """
    tries = 0

    while tries < NUM_TRIES:
        try:
            if bucket_exists(bucket_name):
                client = storage.Client()
                bucket = client.bucket(bucket_name)
                blob = Blob(blob_name, bucket)
                return blob.exists()
            else:
                return False
        except BadRequest:
            return False
        except Exception:
            time.sleep(300)
            tries += 1

    logger.error(
        "Can not check the status of the blob {} after multiple attempts".format(
            blob_name
        )
    )
    return False
コード例 #5
0
ファイル: conftest.py プロジェクト: fckuligowski/abagdemo-hs
def drop_db(flask_app):
    """
        Remove the bucket and object that we used for testing
    """
    storage_client = storage.Client()
    bucket_name = flask_app.config.get('DATA_BUCKET_NAME')
    bucket = storage_client.get_bucket(bucket_name)
    blob = Blob(flask_app.config.get('DATA_FILE_NAME'), bucket)
    if blob.exists():
        blob.delete()
    bucket.delete()
コード例 #6
0
 def _save(self, name, content):
     name = os.path.basename(name)
     new_name = name
     count = 0
     while True:
         blob = Blob(new_name, self.bucket, chunk_size=1024 * 256)
         if not blob.exists():
             break
         count += 1
         new_name = name + '.%d' % count
     blob.upload_from_file(content)
     blob.make_public()
     return new_name
コード例 #7
0
def create_df_with_yesterday(bucket, interval_date, raw_csv):
    dfm = pd.read_csv(StringIO(raw_csv))
    df_today = fix_dst_issue(dfm)

    yesterday = interval_date - timedelta(days=1)
    blob_name_yesterday = f"{LEMS_STORAGE_PATH_PREFIX}/{yesterday.year}/lems_data_{yesterday.strftime('%Y%m%d')}.csv"
    blob_yesterday = Blob(blob_name_yesterday, bucket)
    csv_yesterday = None
    if blob_yesterday.exists():
        csv_yesterday = blob_yesterday.download_as_string().decode('utf-8')
        df_yesterday = fix_dst_issue(pd.read_csv(StringIO(csv_yesterday)))
        return df_today.append(df_yesterday)

    return df_today
コード例 #8
0
ファイル: gs.py プロジェクト: simonwoerpel/servicelayer
    def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return

        # First, check the standard file name:
        blob = Blob(os.path.join(prefix, 'data'), self.bucket)
        if blob.exists():
            return blob

        # Second, iterate over all file names:
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob
コード例 #9
0
def handle_bq_lock(gcs_client: storage.Client,
                   lock_blob: storage.Blob,
                   next_job_id: Optional[str],
                   table: bigquery.TableReference,
                   retry_attempt_cnt: Optional[int] = None):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            lock_blob_contents = json.dumps(
                dict(job_id=next_job_id,
                     table=table.to_api_repr(),
                     retry_attempt_cnt=retry_attempt_cnt))
            logging.log_with_table(
                table,
                f"Writing the following content to lock_blob {lock_blob.name}:"
                f" {dict(job_id=next_job_id, table=table.to_api_repr(), retry_attempt_cnt=retry_attempt_cnt)}"
            )
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=0,  # noqa: E126
                    client=gcs_client)
        else:
            logging.log_with_table(
                table, "releasing lock at: "
                f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except (google.api_core.exceptions.PreconditionFailed,
            google.api_core.exceptions.NotFound) as err:
        if isinstance(err, google.api_core.exceptions.PreconditionFailed):
            raise exceptions.BacklogException(
                f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
                f"was changed by another process.") from err
        logging.log_with_table(
            table,
            "Tried deleting a lock blob that was either already deleted "
            "or never existed.")
コード例 #10
0
def up_to_date(input_blob: storage.Blob, output_blob: storage.Blob):
    """
    Checks if the blob is up-to-date.
    :param input_blob:
    :param output_blob:
    :return: true if the output blob is up-to-date. If the blob doesn't
    exist or is outdated, returns false.
    """
    if not output_blob.exists():
        return False

    input_blob.reload()
    output_blob.reload()
    assert input_blob.updated is not None, 'input blob should exist'
    if input_blob.updated > output_blob.updated:
        return False

    return True
コード例 #11
0
 def exists(self, name):
     blob = Blob(name, self.bucket)
     return blob.exists()
コード例 #12
0
 def _create_folder_file():
     folder_key = path.join(root, '{0}_$folder$'.format(folder_name))
     blob = Blob(folder_key, gcp_bucket)
     if not blob.exists():
         blob.upload_from_string(data='')
コード例 #13
0
ファイル: gs.py プロジェクト: alephdata/servicelayer
 def _locate_key(self, key):
     if key is None:
         return
     blob = Blob(key, self.bucket)
     if blob.exists():
         return blob
コード例 #14
0
 def _cache_blob(self, local_path:str, gc_blob:storage.Blob):
     if not gc_blob.exists(): return
     with open(local_path, 'wb') as file:
         gc_blob.download_to_file(file)