Example #1
0
 def upload_image(self, location: str, label: str, image_bytes,
                  metadata: dict) -> typing.Optional[str]:
     blob = Blob(f"{location}/{label}", self.bucket)
     blob.metadata = metadata
     blob.upload_from_file(image_bytes, content_type="image/png")
     blob.make_public()
     return blob.public_url
Example #2
0
def upload_blob_string(bucket_name, data_string, destination_blob_name,
                       metadata, content_type):
    logging.debug(
        'upload_blob_string(...) \n\tbucket_name={}\n\tlen data_string={}\n\t'
        'destination_blob_name={}\n\tmetadata={}\n\tcontent_type={}'.format(
            bucket_name, len(data_string), destination_blob_name, metadata,
            content_type))
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)
    blob = Blob(destination_blob_name, bucket)
    if blob.metadata is None:
        blob.metadata = metadata
    else:
        blob.metadata.update(metadata)
    return blob.upload_from_string(data=data_string, content_type=content_type)
Example #3
0
 def write_blob_sha256(self, blob: gcs.Blob, force: bool = False) -> None:
     """
     Calculates a blob's SHA256 and writes the value to the blob's custom
     metadata 'sha256' field.
     """
     current_value = None if blob.metadata is None else blob.metadata.get(
         'sha256')
     log.info('Current SHA256 value: %s', current_value)
     if current_value is None or force:
         file_sha256 = self.calculate_blob_sha256(blob)
         if current_value == file_sha256:
             log.info('Calculated SHA256 matches current value, no change.')
         else:
             log.info('Saving SHA256 value: %s', file_sha256)
             blob.metadata = {'sha256': file_sha256}
             blob.patch()
     else:
         log.info('Blob SHA256 not calculated or changed.')
    def _set_format_metadata(formatted: OptimizedMetricRepresentation,
                             blob: storage.Blob,
                             should_compress: bool = False) -> None:
        """Sets metadata on the Cloud Storage blob that can be used to retrieve data points from the optimized
        representation.

        This includes the ordered dimension manifest, the ordered list of value keys, and the total
        number of data points to effectively "unflatten" the flattened matrix. Also sets the 'Content-Encoding: gzip'
        header if the content is going to be compressed.
        """
        total_data_points = len(
            formatted.value_matrix[0]) if formatted.value_matrix else 0
        metadata = {
            'dimension_manifest': json.dumps(formatted.dimension_manifest),
            'value_keys': json.dumps(formatted.value_keys),
            'total_data_points': total_data_points,
        }
        blob.metadata = metadata

        if should_compress:
            blob.content_encoding = 'gzip'
Example #5
0
def sync_box_to_gcs(box: BoxClient, bucket: Bucket,
                    cache: dict) -> List[Future]:
    """Sync Box account files to a GCS bucket.

    For versioned Box files, the latest version will always be synced back to the GCS bucket. 
    Non-current versions will not be deliberately preserved, though syncing to a versioned 
    bucket will have this effect.
    
    Arguments:
        box {BoxClient} -- [description]
        bucket {Bucket} -- [description]
        cache {dict} -- A dictionary that will opportunistically be filled with Box item paths/objects.
    
    Returns:
        List[Future] -- [description]
    """
    # constuct an executor for copy tasks
    executor = ThreadPoolExecutor(max_workers=cpu_count())
    futures = []
    # sync box files back to GCS
    for path, item in box_walk(box.root_folder()):
        LOG.debug("Box directory walk found: {}".format(path))
        # cache the Box item in module scope
        cache[path] = item

        # get the blob to overwrite, or make a new one
        blob_exists = True
        blob = bucket.get_blob(path)
        if not blob:
            blob_exists = False
            blob = Blob(path, bucket)

        # branch depending on whether file or folder
        if item.type == "folder":
            if not blob_exists:
                blob.metadata = {
                    # Not an important value.
                    BOX_MTIME_KEY: datetime.now().isoformat()
                }
                # create directory marker, used by UI and FUSE
                LOG.info("Creating directory marker in GCS: {}".format(
                    blob.name))
                blob.upload_from_string(b'')

        elif item.type == "file":
            box_file = box.file(item.id)
            box_mtime = box_file.get().modified_at
            if should_copy_box_to_gcs(box_file, box_mtime, blob, blob_exists):
                LOG.info(
                    "Box file {} is not found in GCS or updated since last sync. Copying to {}."
                    .format(item.name, blob.name))
                blob.metadata = {
                    BOX_MTIME_KEY: box_mtime
                }  # This change will "follow" the upload into GCS
                temp_file = BytesIO()
                reader = box_file.download_to
                writer = blob.upload_from_file

                future = executor.submit(concurrent_upload, reader, writer,
                                         temp_file)
                futures.append(future)

        else:
            LOG.info("Ignoring item of type {}".format(item.type))

    return futures
Example #6
0
    def mark_complete(self, blob: storage.Blob):
        blob.metadata = {"export_completed": True}
        patch_retryer = retry.Retry(
            predicate=retry.if_exception_type(ServiceUnavailable), deadline=60)

        patch_retryer(lambda: blob.patch())()