def get_content(bucket: storage.Bucket, file_path: str) -> str: """ Get the blob from the bucket and return its content as a string. """ blob = bucket.get_blob(file_path) return blob.download_as_string().decode("utf-8")
def __getBlobUpdated(bucket: Bucket, blobName): """ Takes a bucket object and blob name and returns the blobs generation key :param self: :param bucket: Bucket :param blobName: String :return: """ return bucket.get_blob(blobName).updated
def is_thumbnail_missing_or_obsolete(thumb_path: PurePosixPath, orig_updated_time: datetime, bucket: Bucket) -> bool: thumb_blob = bucket.get_blob(str(thumb_path)) if not thumb_blob: return True return orig_updated_time > (thumb_blob.updated or thumb_blob.time_created)
def get_blob(bucket: storage.Bucket, url_tokens: str) -> storage.Blob: try: return bucket.get_blob(url_tokens["path"]) except Exception as e: LOG.error("Error accessing object: {}\n\t{}".format( url_tokens["path"], e))
def sync_box_to_gcs(box: BoxClient, bucket: Bucket, cache: dict) -> List[Future]: """Sync Box account files to a GCS bucket. For versioned Box files, the latest version will always be synced back to the GCS bucket. Non-current versions will not be deliberately preserved, though syncing to a versioned bucket will have this effect. Arguments: box {BoxClient} -- [description] bucket {Bucket} -- [description] cache {dict} -- A dictionary that will opportunistically be filled with Box item paths/objects. Returns: List[Future] -- [description] """ # constuct an executor for copy tasks executor = ThreadPoolExecutor(max_workers=cpu_count()) futures = [] # sync box files back to GCS for path, item in box_walk(box.root_folder()): LOG.debug("Box directory walk found: {}".format(path)) # cache the Box item in module scope cache[path] = item # get the blob to overwrite, or make a new one blob_exists = True blob = bucket.get_blob(path) if not blob: blob_exists = False blob = Blob(path, bucket) # branch depending on whether file or folder if item.type == "folder": if not blob_exists: blob.metadata = { # Not an important value. BOX_MTIME_KEY: datetime.now().isoformat() } # create directory marker, used by UI and FUSE LOG.info("Creating directory marker in GCS: {}".format( blob.name)) blob.upload_from_string(b'') elif item.type == "file": box_file = box.file(item.id) box_mtime = box_file.get().modified_at if should_copy_box_to_gcs(box_file, box_mtime, blob, blob_exists): LOG.info( "Box file {} is not found in GCS or updated since last sync. Copying to {}." .format(item.name, blob.name)) blob.metadata = { BOX_MTIME_KEY: box_mtime } # This change will "follow" the upload into GCS temp_file = BytesIO() reader = box_file.download_to writer = blob.upload_from_file future = executor.submit(concurrent_upload, reader, writer, temp_file) futures.append(future) else: LOG.info("Ignoring item of type {}".format(item.type)) return futures