def download_to_file(client, to_delete): # [START download_to_file] from google.cloud.storage import Blob client = storage.Client(project="my-project") bucket = client.get_bucket("my-bucket") encryption_key = "c7f32af42e45e85b9848a6a14dd2a8f6" blob = Blob("secure-data", bucket, encryption_key=encryption_key) blob.upload_from_string("my secret message.") with open("/tmp/my-secure-file", "wb") as file_obj: blob.download_to_file(file_obj) # [END download_to_file] to_delete.append(blob)
class GoogleCloudFile(File): def __init__(self, name, mode, storage): self.name = name self.mime_type = mimetypes.guess_type(name)[0] self._mode = mode self._storage = storage self.blob = storage.bucket.get_blob(name) if not self.blob and 'w' in mode: self.blob = Blob(self.name, storage.bucket, chunk_size=storage.blob_chunk_size) self._file = None self._is_dirty = False @property def size(self): return self.blob.size def _get_file(self): if self._file is None: self._file = SpooledTemporaryFile( max_size=self._storage.max_memory_size, suffix=".GSStorageFile", dir=setting("FILE_UPLOAD_TEMP_DIR")) if 'r' in self._mode: self._is_dirty = False self.blob.download_to_file(self._file) self._file.seek(0) return self._file def _set_file(self, value): self._file = value file = property(_get_file, _set_file) def read(self, num_bytes=None): if 'r' not in self._mode: raise AttributeError("File was not opened in read mode.") if num_bytes is None: num_bytes = -1 return super().read(num_bytes) def write(self, content): if 'w' not in self._mode: raise AttributeError("File was not opened in write mode.") self._is_dirty = True return super().write(force_bytes(content)) def close(self): if self._file is not None: if self._is_dirty: self.blob.upload_from_file( self.file, rewind=True, content_type=self.mime_type, predefined_acl=self._storage.default_acl) self._file.close() self._file = None
def _download_arr(input_blob: storage.Blob): logging.info(f'downloading numpy file: {input_blob.name}') input_stream = io.BytesIO() input_blob.download_to_file(input_stream) input_stream.seek(0) arr = np.load(input_stream) input_stream.close() return arr
def download_file(module, client, name, dest): try: bucket = client.get_bucket(module.params['bucket']) blob = Blob(name, bucket) with open(dest, "wb") as file_obj: blob.download_to_file(file_obj) return blob_to_dict(blob) except google.cloud.exceptions.NotFound as e: module.fail_json(msg=str(e))
def copyToLocal(self, uri_source, uri_target, bucket_name_source=None, **kwargs): gcp_bucket_source = self._gcp_bucket(bucket_name_source) blob = Blob(uri_source, gcp_bucket_source) with open(uri_target, 'wb') as file_pointer: blob.download_to_file(file_pointer)
def download_array(blob: storage.Blob) -> np.ndarray: """Downloads data blobs as numpy arrays :param blob: the GCS blob you want to download as an array :return: """ in_stream = io.BytesIO() blob.download_to_file(in_stream) in_stream.seek(0) # Read from the start of the file-like object return np.load(in_stream)
def download_array(blob: storage.Blob) -> np.ndarray: """Downloads a blob to a numpy array :param blob: GCS blob to download as a numpy array :return: numpy array """ in_stream = io.BytesIO() blob.download_to_file(in_stream) in_stream.seek(0) # Read from the start of the file-like object return np.load(in_stream)
def DownloadSVS(BlobName, outdir='.', project='jax-nihcc-res-00-0011'): # bucketname = 'isb-cgc-open' bucketname = BlobName[5:].split('/')[0] client = storage.Client(project=project) bucket = client.get_bucket(bucketname) blob = Blob(BlobName.lstrip('gs://' + bucketname + '/'), bucket) with open( os.path.join(mkdir_if_not_exist(outdir), BlobName.split('/')[-1]), 'wb') as file_obj: blob.download_to_file(file_obj)
def download_array(blob: storage.Blob) -> np.ndarray: """ Downloads data from blob form to numpy array :param blob: data to be downloaded :return: numpy array """ in_stream = io.BytesIO() blob.download_to_file(in_stream) in_stream.seek(0) # Read from the start of the file-like object return np.load(in_stream)
def main(): # # initial greeting... # print("Hello Google Cloud Storage!") # # create a client # print("creating client...") client = storage.Client() index = 0 print("indexing over bucket list...") for bucket in client.list_buckets(): print(bucket) print("index = " + str(index)) if index == 0: defaultBucket = bucket index += 1 print("") print("chosen bucket is: " + str(defaultBucket)) blob = Blob("raw_image.jpg", defaultBucket) quit = False imageFilePath = "/home/shawn/Desktop/raw_image_download.jpg" while quit == False: blobCount = 0 for blobItem in defaultBucket.list_blobs(): blobCount += 1 if blobCount == 0: print("empty...") else: print("downloading...") with open(imageFilePath, "wb") as imageFile: blob.download_to_file(imageFile) with Image(filename=imageFilePath) as img: print(img.size) print("blurring...") img.gaussian_blur(9, 1) imageFilePath = "/home/shawn/Desktop/blurred_image.jpg" print("saving...") img.save(filename=imageFilePath) with Image(filename=imageFilePath) as img: blob = Blob("blurred_image.jpg", defaultBucket) print("uploading...") with open("/home/shawn/Desktop/blurred_image.jpg", "rb") as imageFile: blob.upload_from_file(imageFile) display(img) time.sleep(1.0) # # final greeting... # print("Goodbye Google Cloud Storage!")
def download_to_file(client, to_delete): # [START download_to_file] from google.cloud.storage import Blob client = storage.Client(project='my-project') bucket = client.get_bucket('my-bucket') encryption_key = 'c7f32af42e45e85b9848a6a14dd2a8f6' blob = Blob('secure-data', bucket, encryption_key=encryption_key) with open('/tmp/my-secure-file', 'wb') as file_obj: blob.download_to_file(file_obj) # [END download_to_file] to_delete.append(blob)
def download_blob_to_file(source_blob: Blob, out_dir: str, filename: str) -> Path: logger.info(f"Downloading ({filename}) from remote storage.") out_dir_path = Path(out_dir) filename_path = Path(filename) download_path = out_dir_path.joinpath(filename_path) download_path.parent.mkdir(parents=True, exist_ok=True) logger.debug(f"File download path: {download_path}") source_blob.chunk_size = 128 * 2**20 # 128 MB chunks with open(download_path, mode="wb") as f: source_blob.download_to_file(f) return download_path
def fetch_object_from_gcs(self, bucket_name, fileobject): if self.check_bucket(bucket_name): bucket = self.gcs.get_bucket(bucket_name) blob = Blob(fileobject, bucket) outfile = fileobject + ".out" try: with open(outfile, 'wb') as output_file: blob.download_to_file(output_file) return True except IOError: print('Error: Cannot find the object {}'.format(fileobject)) return False
def on_get(self, req, resp, security_context='', file_store_id=''): """ Handles GET requests - returns a file_store object from DB """ self.logger.debug('on_get running') try: #Gather inputs user_token = req.context['user']['user'] #self.logger.debug("token" + json.dumps(req.context['user'])) #Check to see if file_store is permitted to select in this context self.web_util.check_grant(security_context, user_token, 'FS_GET') #Initialize Database components file_store_helper = FileStoreDBHelper(self.pers) if file_store_id != '': self.logger.debug('searching for %s', file_store_id) file_store = file_store_helper.sel(security_context, file_store_id) self.logger.debug('found %s', dumps(file_store)) fs_filename = file_store_id + self.get_extension_from_filename( file_store['fileName']) fs_fullpath = self.cfg.FILE_STORE_PATH + fs_filename storage_client = storage.Client(project=self.cfg.GCP_PROJECT_NAME) bucket = storage_client.get_bucket(self.cfg.GCP_CS_BUCKET_NAME) blob = Blob(file_store['cloudId'], bucket, encryption_key=self.cfg.GCP_CS_ENCRTYPION_KEY) local_file_name = '/tmp/' + file_store['cloudId'].replace("/", "_") with open(local_file_name, "wb") as file_obj: blob.download_to_file(file_obj) resp.content_type = file_store['contentType'] resp.stream = open(local_file_name, 'rb') resp.stream_len = file_store['fileSize'] except falcon.HTTPError: raise except: self.logger.error("on_get failed: %s", traceback.print_exc()) raise falcon.HTTPError( falcon.HTTP_400, #BAD Request 'Something went wrong at the server.', 'Someone is already queued to research the issue.')
def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from gcs """ meta = PayloadMeta( extra_data={ 'bucket': task.results['archive_bucket'], 'path': task.results['path'], 'project_id': task.results['project_id'], }) client = Client(project=task.results['project_id']) bucket = client.get_bucket(task.results['archive_bucket']) blob = Blob(task.results['path'], bucket) content = BytesIO() blob.download_to_file(content) content.seek(0) return Payload(content.read(), meta)
def calculate_blob_sha256(self, blob: gcs.Blob) -> str: """ Return the SHA256 for the given blob. To calculate the value the file is downloaded to a temporary file that is deleted after the hash is calculated. """ file = tempfile.NamedTemporaryFile(mode='w+b', delete=False) file_name = file.name try: blob.download_to_file(file) finally: file.close() with open(file_name, 'rb') as file: file_md5 = hashlib.md5() file_sha256 = hashlib.sha256() while chunk := file.read(8192): file_md5.update(chunk) file_sha256.update(chunk)
def get(self, name=None): """ Get a bytes stream of an object Parameters ---------- name if None, prefix will be used as a key name, otherwise name will be added to prefix after '/' separator Returns ------- bytes stream with key's data, or an empty stream if errors occurred """ key = self.base_address if name is None else os.path.join( self.base_address, name) key = key.strip('/') try: blob = Blob(key, self.bucket) filestream = io.BytesIO() blob.download_to_file(filestream) filestream.seek(0) log.info("Getting {} bytes from \tbucket={}\tkey={}".format( blob.size, self.bucket_name, key)) self.metadata = blob.metadata except Exception as e: log.error("{}\tbucket={}\tkey={}".format(e, self.bucket_name, key)) if log.level == logging.DEBUG: raise e if self.do_read: asbytes = filestream.read() if not self.binary: return asbytes.decode() else: return asbytes return filestream
def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from gcs """ meta = PayloadMeta( extra_data={ 'bucketId': task.results['bucketId'], 'objectId': task.results['objectId'], 'projectId': task.results['projectId'], } ) count = 0 client = Client(project=task.results['projectId']) while count < self.max_retries: try: bucket = client.get_bucket(task.results['bucketId']) blob = Blob(task.results['objectId'], bucket) content = BytesIO() blob.download_to_file(content) break except ( InvalidResponse, GoogleAPICallError, InternalServerError, SSLError, ) as e: if count >= self.max_retries: raise StoqPluginException( f'Failed to download {task.results["bucketId"]}/{task.results["objectId"]} from GCS: {str(e)}' ) count += 1 sleep(randrange(0, 4)) content.seek(0) data = content.read() if self.use_encryption: data = self._decrypt(data) return Payload(data, meta)
def get_file_link(post_filename): blob = Blob(post_filename, get_storage()) file = blob.download_to_file() return file
def _cache_blob(self, local_path:str, gc_blob:storage.Blob): if not gc_blob.exists(): return with open(local_path, 'wb') as file: gc_blob.download_to_file(file)
def download_array(blob: storage.Blob) -> np.ndarray: in_stream = io.BytesIO() blob.download_to_file(in_stream) in_stream.seek(0) # Read from the start of the file-like object return np.load(in_stream)
def manipulate(): # initialize execution timer start_time = time.time() response_obj = None if am_i_ok(): # MySQL initialization cnx = mysql.connector.connect(**config) cursor = cnx.cursor() # instance metadata worker = 'someone' try: req = requests.get( "http://metadata/computeMetadata/v1/instance/name", headers={'Metadata-Flavor': 'Google'}) if req.status_code < 299: worker = req.text else: print("Worker name unrecognized") syslog.syslog("Worker name unrecognized") except Exception as exc: print("Worker name unrecognized. %s" % exc) syslog.syslog("Worker name unrecognized. %s" % exc) worker = 'unrecognized' try: # seek my job job_etag = id_generator() mysql_set_job = "UPDATE jobs SET status=2, worker=%s, etag=%s, id = @selected_job := id " \ "WHERE status in (0, 3) LIMIT 1" cursor.execute(mysql_set_job, (worker, job_etag)) cnx.commit() mysql_get_job = "SELECT id, image_bucket, image_path, status, worker, etag " \ "FROM jobs WHERE id = @selected_job" cursor.execute(mysql_get_job) job_id = None job_bucket = None job_image = None for (jid, image_bucket, image_path, status, worker, etag) in cursor: print( "DEBUG: jid=%s, image_bucket=%s, image_path=%s, status=%s, worker=%s, etag=%s" % (jid, image_bucket, image_path, status, worker, etag)) if etag == job_etag: # I have a job! job_id = jid job_bucket = image_bucket job_image = image_path print('Ready to execute the job %s: %s/%s' % (job_id, job_bucket, job_image)) syslog.syslog( 'INTECH worker - Ready to execute the job %s: %s/%s' % (job_id, job_bucket, job_image)) else: # I don't have a job :( print('All jobs are completed') syslog.syslog('INTECH worker - All jobs are completed') break if job_id is None: print('There aren\'t jobs. Closing the communication :( bye') syslog.syslog( 'INTECH worker - There aren\'t jobs. Closing the communication :( bye' ) cursor.close() cnx.close() response_obj = app.response_class(response=json.dumps( {'status': 'I\'m jobless... I have no tasks'}), status=404, mimetype='application/json') return response_obj # initialize GCS client and source image client = storage.Client() source_bucket = client.get_bucket(job_bucket) blob = Blob(job_image, source_bucket) dl_file_name = "dl_%s.jpg" % id_generator() with open(dl_file_name, 'wb') as file_obj: blob.download_to_file(file_obj) # process source image im1 = Image.open(dl_file_name) im2 = im1.filter(ImageFilter.EDGE_ENHANCE_MORE) enhancer = ImageEnhance.Sharpness(im2) im3 = enhancer.enhance(2.0) out = ImageMath.eval("convert(a, 'L')", a=im3) # save new image locally res_file_name = "%s_%s.png" % (id_generator(), job_image.rsplit('/', 1)[-1]) out.save(res_file_name) # upload image to my GCS bucket dest_bucket = client.get_bucket(destination_bucket) blob = Blob("results/%s" % res_file_name, dest_bucket) with open(res_file_name, 'rb') as my_file: blob.upload_from_file(my_file) # cleaning stuffs os.remove(dl_file_name) os.remove(res_file_name) # a polite response response_body = { 'seconds': (time.time() - start_time), 'job': { 'id': job_id, 'image_bucket': job_bucket, 'image_path': job_image, 'etag': job_etag, 'worker': worker } } response_obj = app.response_class( response=json.dumps(response_body), status=200, mimetype='application/json') except Exception as exc: print("Runtime error: %s" % exc) syslog.syslog("INTECH worker - Runtime error: %s" % exc) # a ugly response response_obj = app.response_class(response=json.dumps({ 'seconds': (time.time() - start_time), 'status': 'runtime error' }), status=500, mimetype='application/json') # write a file for the health check with open("alarm.txt", 'w') as file_obj: file_obj.write("this is an error!") try: # closing connections cursor.close() cnx.close() except: pass else: # ops, something is wrong response_obj = app.response_class(response=json.dumps({ 'seconds': (time.time() - start_time), 'status': 'the instance is corrupted!' }), status=500, mimetype='application/json') return response_obj