Esempio n. 1
0
    def read_chunk(report: dict,
                   chunk: int = 4096,
                   credentials=None,
                   start: int = 0) -> str:
        client = storage.Client(credentials=(
            credentials.get_credentials() if credentials else None))

        path_segments = report['current_path'].split('/')
        report_bucket = path_segments[-2]
        report_blob_name = path_segments[-1].split('?')[0]

        source_bucket = Bucket(client, report_bucket)
        blob = source_bucket.blob(report_blob_name)

        data = blob.download_as_string(start=start,
                                       end=chunk,
                                       raw_download=True).decode('utf-8')
        return data
 def __getBlobUpdated(bucket: Bucket, blobName):
     """
     Takes a bucket object and blob name and returns the blobs generation key
     :param self:
     :param bucket: Bucket
     :param blobName: String
     :return:
     """
     return bucket.get_blob(blobName).updated
Esempio n. 3
0
def upload_from_file(bkt: storage.Bucket,
                     blob: str,
                     file: str,
                     delete_file: bool = True) -> str:
    b = bkt.blob(blob_name=blob)
    b.upload_from_filename(file)
    if delete_file:
        os.remove(file)
    return b.public_url
Esempio n. 4
0
def download_metadata_from_gcs(bucket: storage.Bucket,
                               local_sample_path: ComparisonPath) -> None:
    (local_sample_path / "operations").mkdir_p()

    prefix = str(local_sample_path)
    blobs = bucket.list_blobs(prefix=prefix)
    for blob in blobs:
        if not blob.name.endswith('/digest.json'):
            logging.info(f'Downloading blob: {blob.name}')
            blob.download_to_filename(blob.name)
Esempio n. 5
0
def _UploadDirectory(local_dir: str, gcs_bucket: storage.Bucket, gcs_dir: str):
    """Upload the contents of a local directory to a GCS Bucket."""
    for file_name in os.listdir(local_dir):
        path = os.path.join(local_dir, file_name)
        if not os.path.isfile(path):
            logging.info("Skipping %s as it's not a file.", path)
            continue
        logging.info("Uploading: %s", path)
        gcs_blob = gcs_bucket.blob(f"{gcs_dir}/{file_name}")
        gcs_blob.upload_from_filename(path)
Esempio n. 6
0
def revoke_expiring_gcs_access(
    bucket: storage.Bucket, role: str, user_email: str, prefix: Optional[str] = None
):
    """Revoke a bucket IAM policy change made by calling `grant_expiring_gcs_access`."""
    # see https://cloud.google.com/storage/docs/access-control/using-iam-permissions#code-samples_3
    policy = bucket.get_iam_policy(requested_policy_version=3)
    policy.version = 3

    # find and remove all matching policy bindings for this user if any exist
    for i in range(GOOGLE_MAX_DOWNLOAD_PERMISSIONS):
        removed_binding = _find_and_pop_binding(policy, prefix, role, user_email)
        if removed_binding is None:
            if i == 0:
                warnings.warn(
                    f"Tried to revoke a non-existent download IAM permission for {user_email}/{prefix}"
                )
            break

    bucket.set_iam_policy(policy)
Esempio n. 7
0
def read_weather_for_state_for_date(bucket: Bucket, bucket_raw_base_path: str,
                                    selected_state: str, date: datetime.date):
    yyyymmdd: str = date.strftime("%Y%m%d")
    blob = bucket.blob(
        f"{bucket_raw_base_path.format(date=yyyymmdd)}/{selected_state}.json.gz"
    )
    try:
        return json.loads(gunzip_bytes(blob.download_as_string()))
    except NotFound:
        return None
Esempio n. 8
0
def storage(request):
    # create a random test bucket name
    bucket_name = "test_bucket_" + get_random_string(6, string.ascii_lowercase)

    storage = DjangoGCloudStorage(
        project=request.config.getoption("--gcs-project-name"),
        bucket=bucket_name,
        credentials_file_path=request.config.getoption(
            "--gcs-credentials-file"))

    # Make sure the bucket exists
    bucket = Bucket(storage.client, bucket_name)
    bucket.create(location=request.config.getoption("--gcs-bucket-location"))

    yield storage

    storage.bucket.delete_blobs(storage.bucket.list_blobs())

    storage.bucket.delete(force=True)
Esempio n. 9
0
def process_sqs_message(sqs_client, sqs_queue_url: str,
                        gcs_output_bucket: storage.Bucket,
                        gcs_output_prefix: str) -> None:
    """
    Process SQS Message

    :param sqs_client: AWS SQS client
    :param sqs_queue_url: AWS SQS Queue URL
    :param gcs_output_bucket: GCP GCS bucket
    :param gcs_output_prefix: GCP GCS object prefix
    """

    # Receive one message
    response = sqs_client.receive_message(QueueUrl=sqs_queue_url,
                                          MaxNumberOfMessages=1,
                                          VisibilityTimeout=0,
                                          WaitTimeSeconds=0)

    # Process message
    if 'Messages' in response and len(response['Messages']) > 0:
        message = response['Messages'][0]
        receipt_handle = message['ReceiptHandle']
        message_body = json.loads(message['Body'])
        message_id = message['MessageId']
        text = message_body['text']
        timestamp = message_body['timestamp']
        parsed_text = process_text(text)
        logger.info('Message received successfully!')

        # Send result to GCS
        result = {
            'id': message_id,
            'timestamp': timestamp,
            'text': text,
            'parsed_text': parsed_text
        }
        result_string = json.dumps(result,
                                   ensure_ascii=False,
                                   encoding='utf8',
                                   indent=2)
        object_key = (
            f'{gcs_output_prefix.rstrip("/")}'
            f'/result_{message_id}_{datetime.fromisoformat(timestamp).strftime("%Y%m%dT%H%M%S")}.json'
        )
        blob = gcs_output_bucket.blob(object_key)
        blob.upload_from_string(result_string)
        logger.info(
            f'Sent result to `{object_key}` in `{gcs_output_bucket.name}`')

        # Delete message
        sqs_client.delete_message(QueueUrl=sqs_queue_url,
                                  ReceiptHandle=receipt_handle)
        logger.info('Message deleted successfully!')
    else:
        logger.info('No messages in queue')
Esempio n. 10
0
def start_backfill_subscriber_if_not_running(
        gcs_client: Optional[storage.Client], bkt: storage.Bucket,
        table_prefix: str) -> Optional[storage.Blob]:
    """start the backfill subscriber if  it is not already runnning for this
    table prefix.

    created a backfill file for the table prefix if not exists.
    """
    if not gcs_client:
        gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
    start_backfill = True
    # Do not start subscriber until START_BACKFILL_FILENAME has been dropped
    # at the table prefix.
    if constants.START_BACKFILL_FILENAME:
        start_backfill_blob = bkt.blob(
            f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
        start_backfill = start_backfill_blob.exists(client=gcs_client)
        if not start_backfill:
            print("note triggering backfill because"
                  f"gs://{start_backfill_blob.bucket.name}/"
                  f"{start_backfill_blob.name} was not found.")

    if start_backfill:
        # Create a _BACKFILL file for this table if not exists
        backfill_blob = bkt.blob(
            f"{table_prefix}/{constants.BACKFILL_FILENAME}")
        try:
            backfill_blob.upload_from_string("",
                                             if_generation_match=0,
                                             client=gcs_client)
            print("triggered backfill with "
                  f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
                  f"created at {backfill_blob.time_created}.")
            return backfill_blob
        except google.api_core.exceptions.PreconditionFailed:
            backfill_blob.reload(client=gcs_client)
            print("backfill already in progress due to: "
                  f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
                  f"created at {backfill_blob.time_created}. exiting.")
            return backfill_blob
    else:
        return None
Esempio n. 11
0
def source_bucket(gcs_bucket: storage.Bucket, sts_service_account: str):
    """
    Yields and auto-cleans up a CGS bucket preconfigured with necessary
    STS service account read perms
    """

    # Setup policy for STS
    member: str = f"serviceAccount:{sts_service_account}"
    objectViewer = "roles/storage.objectViewer"
    bucketReader = "roles/storage.legacyBucketReader"

    # Prepare policy
    policy = gcs_bucket.get_iam_policy(requested_policy_version=3)
    policy.bindings.append({"role": objectViewer, "members": {member}})
    policy.bindings.append({"role": bucketReader, "members": {member}})

    # Set policy
    gcs_bucket.set_iam_policy(policy)

    yield gcs_bucket
    def __delete_blob(bucket: Bucket, blob_name: str):
        """Deletes a blob from the bucket."""
        # bucket_name = "your-bucket-name"
        # blob_name = "your-object-name"

        blob = bucket.blob(blob_name)
        try:
            blob.delete()
            print("Blob {} deleted from bucket: {}.".format(blob_name, bucket))
        except NotFound:
            print("File:", blob_name, "doesn't exists in bucket:", bucket)
Esempio n. 13
0
def storage(request):
    # create a random test bucket name
    bucket_name = "test_bucket_" + get_random_string(6, string.ascii_lowercase)

    storage = DjangoGCloudStorage(
        project=request.config.getoption("--gcs-project-name"),
        bucket=bucket_name,
        credentials_file_path=request.config.getoption("--gcs-credentials-file")
    )

    # Make sure the bucket exists
    bucket = Bucket(storage.client, bucket_name)
    bucket.location = request.config.getoption("--gcs-bucket-location")
    bucket.create()

    yield storage

    storage.bucket.delete_blobs(storage.bucket.list_blobs())

    storage.bucket.delete(force=True)
Esempio n. 14
0
def upload_blob(source: str, destination: str):
    """Uploads a file to the bucket."""
    # source = "local/path/to/file"
    # destination = "gs://your-bucket-name/storage-object-name"

    storage_client = storage.Client()
    bucket = Bucket.from_string(destination, storage_client)
    blob = Blob.from_string(destination, storage_client)

    blob.upload_from_filename(source)

    print(f"File {source} uploaded to destination.")
Esempio n. 15
0
def grant_expiring_gcs_access(
    bucket: storage.Bucket, role: str, user_email: str, prefix: Optional[str] = None
):
    """
    Grant `user_email` the provided `role` on a `bucket`, expiring after `INACTIVE_USER_DAYS` 
    days have elapsed. By default, permissions apply to the whole bucket. Optionally, provide 
    an object URL `prefix` to restrict this permission grant to only a portion of the objects 
    in the given bucket.
    """
    # see https://cloud.google.com/storage/docs/access-control/using-iam-permissions#code-samples_3
    policy = bucket.get_iam_policy(requested_policy_version=3)
    policy.version = 3

    # remove the existing binding if one exists so that we can recreate it with
    # an updated TTL.
    _find_and_pop_binding(policy, prefix, role, user_email)
    binding = _build_binding_with_expiry(bucket.name, prefix, role, user_email)

    # (re)insert the binding into the policy
    policy.bindings.append(binding)
    bucket.set_iam_policy(policy)
Esempio n. 16
0
def timeSort(bucket: Bucket,
             prefix: str,
             num: Optional[int] = None) -> List[Image]:
    blobs = bucket.list_blobs(prefix=prefix)
    imgs = [
        Image(el.public_url) for el in blobs if el.public_url.endswith(".png")
    ]
    simgs = sorted(imgs, key=lambda x: (x.date, x.seq), reverse=True)

    if num:
        return simgs[:num]
    return simgs
Esempio n. 17
0
def create(ctx, *args, **kwargs):
    admin_check(ctx.obj["user_id"])
    bucket = Bucket(ctx.obj["client"], name=ctx.obj["name"])
    bucket.location = kwargs["location"].upper()
    bucket.storage_class = kwargs["class"].upper()
    bucket.create()
    return f"Bucket `{bucket.name}` created."
Esempio n. 18
0
def blob_path(bucket_object: storage.Bucket) -> str:
    """ Path of a file placed in the GCS Bucket for tests """
    filename = "sample_file.json"
    local_path = path.join(path.dirname(__file__), f"fixtures/{filename}")
    # remote_path = f"gs://{TEST_BUCKET}/{filename}"

    blob: storage.Blob = bucket_object.blob(filename)
    blob.upload_from_filename(local_path)
    assert blob.exists()
    # print("Created blob?", blob.exists())
    yield filename
    # print("Removing blob...")
    blob.delete()
Esempio n. 19
0
 def _prepare_sync_plan(
     source_bucket: storage.Bucket,
     destination_bucket: storage.Bucket,
     source_object: Optional[str],
     destination_object: Optional[str],
     recursive: bool,
 ) -> Tuple[Set[storage.Blob], Set[storage.Blob], Set[storage.Blob]]:
     # Calculate the number of characters that remove from the name, because they contain information
     # about the parent's path
     source_object_prefix_len = len(source_object) if source_object else 0
     destination_object_prefix_len = len(destination_object) if destination_object else 0
     delimiter = "/" if not recursive else None
     # Fetch blobs list
     source_blobs = list(source_bucket.list_blobs(prefix=source_object, delimiter=delimiter))
     destination_blobs = list(
         destination_bucket.list_blobs(prefix=destination_object, delimiter=delimiter)
     )
     # Create indexes that allow you to identify blobs based on their name
     source_names_index = {a.name[source_object_prefix_len:]: a for a in source_blobs}
     destination_names_index = {a.name[destination_object_prefix_len:]: a for a in destination_blobs}
     # Create sets with names without parent object name
     source_names = set(source_names_index.keys())
     destination_names = set(destination_names_index.keys())
     # Determine objects to copy and delete
     to_copy = source_names - destination_names
     to_delete = destination_names - source_names
     to_copy_blobs = {source_names_index[a] for a in to_copy}  # type: Set[storage.Blob]
     to_delete_blobs = {destination_names_index[a] for a in to_delete}  # type: Set[storage.Blob]
     # Find names that are in both buckets
     names_to_check = source_names.intersection(destination_names)
     to_rewrite_blobs = set()  # type: Set[storage.Blob]
     # Compare objects based on crc32
     for current_name in names_to_check:
         source_blob = source_names_index[current_name]
         destination_blob = destination_names_index[current_name]
         # if the objects are different, save it
         if source_blob.crc32c != destination_blob.crc32c:
             to_rewrite_blobs.add(source_blob)
     return to_copy_blobs, to_delete_blobs, to_rewrite_blobs
Esempio n. 20
0
def save_arrays(arrays: Dict[str, np.ndarray], filename: str,
                bucket: storage.Bucket):
    """
    Saves .npz arrays (compressed in groups of 10) to cloud
    :param arrays: dict mapping IDs --> arrays to be saved
    :param filename: new filename
    :param bucket: bucket to be saved within
    :return:
    """
    out_stream = io.BytesIO()
    np.savez_compressed(out_stream, **arrays)
    out_stream.seek(0)
    out_blob = bucket.blob(filename)
    out_blob.upload_from_file(out_stream)
Esempio n. 21
0
def download_from_bucket(
    bucket: storage.Bucket,
    bucket_file_path: str,
    local_file_path: Optional[Union[Path, str]] = None,
    force: bool = False,
) -> None:
    """
    Download the file from the bucket to the local machine.

    If the local_directory is specified the files are downloaded to this directory,
    otherwise the structure of the file path is preserved.

    Args:
        bucket: bucket from which to download the file
        bucket_file_path: file path in the bucket
        local_file_path: path to which we save locally
        force: whether to force the download or not

    Raises:
        FileNotFoundError: if the file does not exist.
    """
    gs_blob = bucket.blob(bucket_file_path)

    if not gs_blob.exists():
        raise FileNotFoundError(
            f"The file {bucket_file_path} does not exist in Google Bucket '{bucket.name}'"
        )

    if local_file_path is None:
        local_file_path = CACHE_DIRECTORY / bucket_file_path
    else:
        local_file_path = _convert_file_path(local_file_path).resolve()

    should_download = force or not local_file_path.exists()

    if should_download:
        local_file_path.parent.mkdir(exist_ok=True, parents=True)
        url = gs_blob.public_url
        filename = url.split("/")[-1]
        # gs_blob.download_to_filename(local_file_path) no progress bar

        with TqdmUpTo(unit="B",
                      unit_scale=True,
                      unit_divisor=1024,
                      miniters=1,
                      desc=filename) as t:
            urlretrieve(url, filename=local_file_path, reporthook=t.update_to)

        log.info(f"File {bucket_file_path} downloaded from Google Bucket "
                 f"'{bucket.name}' at {local_file_path}")
Esempio n. 22
0
def sync_gcs_to_box(bucket: Bucket, box: BoxClient,
                    cache: dict) -> List[Future]:
    # constuct an executor for copy tasks
    executor = ThreadPoolExecutor(max_workers=cpu_count())
    futures = []

    for blob in bucket.list_blobs():
        if cache.get(blob.name, False):
            # Found the blob in Box
            LOG.debug("Blob {} already in Box.".format(blob.name))

        else:
            # Did not find the Blob in box
            if blob.metadata and blob.metadata[BOX_MTIME_KEY]:
                LOG.info(
                    "Found blob {} in bucket that was synced, but no longer exists in Box. Deleting."
                    .format(blob.name))
                blob.delete()

            else:
                if blob.name[-1] == '/':
                    LOG.info(
                        "Found new folder {} not in Box. Creating.".format(
                            blob.name))
                    path = blob.name.split("/")[:-1]
                    # do this serially, as there should be few.
                    # Ideally, box_mkdir_p never misses cache when making files as the folder will sort first
                    box_mkdir_p(box, path, cache)
                else:
                    # Found a file that doesn't seem to be in Box.
                    blob_name = blob.name
                    LOG.info("Found new blob {} not in Box. Uploading.".format(
                        blob_name))
                    # split name by slashes; last item is file, the previous are folders
                    tokens = blob.name.split("/")
                    path, filename = tokens[:-1], tokens[-1]
                    target_folder = box_mkdir_p(box, path, cache)
                    # prepare the copy
                    temp_file = BytesIO()
                    reader = blob.download_to_file
                    writer = lambda temp: target_folder.upload_stream(
                        temp, filename)
                    transfer_callback = lambda bf: patch_blob_metadata(
                        bucket, blob_name, bf)
                    # submit the copy work
                    future = executor.submit(concurrent_upload, reader, writer,
                                             temp_file, transfer_callback)
                    futures.append(future)

    return futures
Esempio n. 23
0
def copy_id_set(production_bucket: Bucket, build_bucket: Bucket,
                storage_base_path: str, build_bucket_base_path: str):
    """ Copies the id_set.json artifact from the build bucket to the production bucket.

    Args:
        production_bucket (google.cloud.storage.bucket.Bucket): gcs bucket where id_set is copied to.
        build_bucket (google.cloud.storage.bucket.Bucket): gcs bucket where id_set is copied from.
        storage_base_path (str): the path to upload the id_set.json to.
        build_bucket_base_path (str): the path in the build bucket of the id_set.json.
    """

    build_id_set_path = os.path.join(os.path.dirname(build_bucket_base_path),
                                     'id_set.json')
    build_id_set_blob = build_bucket.blob(build_id_set_path)

    if not build_id_set_blob.exists():
        logging.error(
            f"id_set.json file does not exists in build bucket in path: {build_id_set_path}"
        )
        sys.exit(1)

    prod_id_set_path = os.path.join(os.path.dirname(storage_base_path),
                                    'id_set.json')
    try:
        copied_blob = build_bucket.copy_blob(
            blob=build_id_set_blob,
            destination_bucket=production_bucket,
            new_name=prod_id_set_path)
        if not copied_blob.exists():
            logging.error(
                f"Failed to upload id_set.json to {prod_id_set_path}")
            sys.exit(1)
        else:
            logging.success("Finished uploading id_set.json to storage.")
    except Exception as e:
        logging.exception(f"Failed copying ID Set. Additional Info: {str(e)}")
        sys.exit(1)
Esempio n. 24
0
def download_and_extract_index(build_bucket: Bucket, extract_destination_path: str, build_bucket_base_path: str):
    """Downloads and extracts production and build indexes zip from cloud storage.

    Args:
        build_bucket (google.cloud.storage.bucket.Bucket): google storage bucket where build index.zip is stored.
        extract_destination_path (str): the full path of extract folder.
        build_bucket_base_path (str): the path in the build bucket of the index.
    Returns:
        str: extracted build index folder full path.
        Blob: google cloud storage object that represents prod index.zip blob.
        Blob: google cloud storage object that represents build index.zip blob.
        str: downloaded prod index generation.
        str: downloaded build index generation.

    """
    build_index_storage_path = os.path.join(build_bucket_base_path, f"{GCPConfig.INDEX_NAME}.zip")
    download_build_index_path = os.path.join(extract_destination_path, f"{GCPConfig.INDEX_NAME}.zip")

    build_index_blob = build_bucket.blob(build_index_storage_path)
    build_index_folder_path = os.path.join(extract_destination_path, GCPConfig.INDEX_NAME)

    if not os.path.exists(extract_destination_path):
        os.mkdir(extract_destination_path)

    if not build_index_blob.exists():
        logging.error(f"No build index was found in path: {build_index_storage_path}")
        sys.exit(1)

    build_index_blob.reload()
    build_index_generation = build_index_blob.generation
    build_index_blob.download_to_filename(download_build_index_path, if_generation_match=build_index_generation)

    if os.path.exists(download_build_index_path):
        with ZipFile(download_build_index_path, 'r') as index_zip:
            index_zip.extractall(extract_destination_path)

        if not os.path.exists(build_index_folder_path):
            logging.error(f"Failed creating build {GCPConfig.INDEX_NAME} folder with extracted data.")
            sys.exit(1)

        os.remove(download_build_index_path)
        logging.success(f"Finished downloading and extracting build {GCPConfig.INDEX_NAME} file to "
                        f"{extract_destination_path}")

        return build_index_folder_path, build_index_blob, build_index_generation
    else:
        logging.error(f"Failed to download build {GCPConfig.INDEX_NAME}.zip file from cloud storage.")
        sys.exit(1)
Esempio n. 25
0
def upload(bucket: storage.Bucket, thumb: Thumbnail) -> bool:
    blob = bucket.blob(str(thumb.path))
    blob.upload_from_string(thumb.content, thumb.mimetype)
    logger.info('Uploaded {}.', thumb.path)
    # TODO: Copy ACL from original image
    try:
        blob.make_public()
    except ServiceUnavailable as e:
        logger.error('Failed to make {} public.\nError: {}', blob.path, e)
    meta = {'Generator': f'Thunagen v{__version__}'}
    blob.metadata = meta
    try:
        blob.patch()
        logger.debug('Made {} public and set metadata {}', thumb.path, meta)
    except NotFound:
        logger.error('{} was deleted by someone.', blob.path)
    return True
Esempio n. 26
0
def get_files(client: storage.Client, bucket: storage.Bucket) -> List[dict]:
    """Retrieves all files in a given GCS bucket

    Args:
        client: Object representing Python GCS client
        bucket: google.cloud.storage.Bucket holding bucket name

    Returns:
       List of dicts [{name: String holding file name,
                        type: String representing type of file, 'audio/flac'.
                       }]
    """
    bucket = client.get_bucket(bucket)
    return [{
        'name': blob.name,
        'type': blob.content_type
    } for blob in list(bucket.list_blobs())]
def _UploadBuildResults(gcs_bucket: storage.Bucket,
                        gcs_build_results_dir: str):
    """Uploads all build results to Google Cloud Storage."""
    logging.info("Will upload build results to gs://%s/%s.",
                 os.environ[_GCS_BUCKET], gcs_build_results_dir)

    for build_result in os.listdir(flags.FLAGS.build_results_dir):
        path = os.path.join(flags.FLAGS.build_results_dir, build_result)
        if not os.path.isfile(path):
            logging.info("Skipping %s as it's not a file.", path)
            continue
        logging.info("Uploading: %s", path)
        gcs_blob = gcs_bucket.blob("{}/{}".format(gcs_build_results_dir,
                                                  build_result))
        gcs_blob.upload_from_filename(path)

    logging.info("GCS upload done.")
Esempio n. 28
0
def download_job_manifest(bucket: Bucket, job_id: str) -> JobManifest:
    """
    Download the JobManifest associated with job_id in given bucket.

    Parameters
    ----------
    bucket : google.cloud.storage.Bucket
        The GCS bucket where job data is stored.
    job_id : str
        The ID of the job.

    Returns
    -------
    JobManifest
    """
    path = f"thor_jobs/v1/job-{job_id}/manifest.json"
    as_str = bucket.blob(path).download_as_string()
    return JobManifest.from_str(as_str)
Esempio n. 29
0
    def testGetPathContentsSubDir(self):
        requests = ['dummy_bucket1/subdir/', 'dummy_bucket1/subdir']

        dummy_bucket1 = Bucket(client=Mock(), name='dummy_bucket1')

        gcs_buckets = [
            dummy_bucket1,
        ]
        gcs_blobs = [
            Blob(name='subdir/dummy_file', bucket=dummy_bucket1),
            Blob(name='subdir/dummy_dir/', bucket=dummy_bucket1),
        ]

        storage_client = Mock()
        storage_client.list_buckets = MagicMock(return_value=gcs_buckets)
        storage_client.list_blobs = MagicMock(return_value=gcs_blobs)

        wanted = {
            'type':
            'directory',
            'content': [
                {
                    'name': 'dummy_file',
                    'path': 'dummy_bucket1/subdir/dummy_file',
                    'type': 'file',
                    'last_modified': '',
                },
                {
                    'name': 'dummy_dir/',
                    'path': 'dummy_bucket1/subdir/dummy_dir/',
                    'type': 'directory',
                    'last_modified': '',
                },
            ]
        }

        for req in requests:
            got = handlers.getPathContents(req, storage_client)
            self.assertEqual(wanted['content'], got['content'])

        with self.assertRaises(handlers.FileNotFound):
            req = 'dummy_bucket1/sub'
            handlers.getPathContents(req, storage_client)
Esempio n. 30
0
    def testGetPathContentsDirEmpty(self):
        requests = ['dummy_bucket1/', 'dummy_bucket1']

        dummy_bucket1 = Bucket(client=Mock(), name='dummy_bucket1')

        gcs_buckets = [
            dummy_bucket1,
        ]
        gcs_blobs = []

        storage_client = Mock()
        storage_client.list_buckets = MagicMock(return_value=gcs_buckets)
        storage_client.list_blobs = MagicMock(return_value=gcs_blobs)

        wanted = {'type': 'directory', 'content': []}

        for req in requests:
            got = handlers.getPathContents(req, storage_client)
            self.assertDictEqual(wanted, got)
Esempio n. 31
0
def to_public_png(npy_blob: storage.Blob, public_bucket: storage.Bucket):
    """
    Converts a .npy blob into a png file and uploads it to the public
    bucket.

    :param npy_blob:
    :param public_bucket:
    :return:
    """
    npy_filepath = f'/tmp/{npy_blob.name.split("/")[-1]}'
    npy_blob.download_to_filename(npy_filepath)
    arr = np.load(npy_filepath)

    png_filepath = npy_filepath.replace('.npy', '.png')
    plt.imsave(png_filepath, arr)

    png_blob_name = npy_blob.name.replace('.npy', '.png')
    png_blob = public_bucket.blob(png_blob_name)
    png_blob.upload_from_filename(png_filepath)
    os.remove(npy_filepath)
    os.remove(png_filepath)