def upload_new_files_to_bucket(glob_filename, basename='', client_obj=None, bucket_prefix=None, bucket_suffix=None): client_obj = maybe_populate_client_obj(client_obj) if client_obj is None: return bucket_name = lookup_bucket(client_obj, prefix=bucket_prefix, suffix=bucket_suffix) if bucket_name is None: return bucket = client_obj.get_bucket(bucket_name) blobs = bucket.list_blobs() blob_names = [b.name for b in blobs] for filepath in glob(path.join(basename, glob_filename)): filename = path.split(filepath)[-1] if filename not in blob_names: blob = storage.Blob(filename, bucket) with open(filepath, 'rb') as fp: blob.upload_from_file(fp) return client_obj
def test_no_retry_on_keyboadinterrupt(b, collect): """Ensure that KeyboardInterrupts are forwarded.""" blob_name = 'test-blob-name' k = storage.Blob(blob_name, b) # If vanilla KeyboardInterrupt is used, then sending SIGINT to the # test can cause it to pass improperly, so use a subtype instead. class MarkedKeyboardInterrupt(KeyboardInterrupt): pass collect.inject(MarkedKeyboardInterrupt('SIGINT, probably')) d = gs_deleter.Deleter() with pytest.raises(MarkedKeyboardInterrupt): d.delete(k) # Exactly when coroutines are scheduled is non-deterministic, # so spin while yielding to provoke the # MarkedKeyboardInterrupt being processed within the # pytest.raises context manager. while True: gevent.sleep(0.1) # Only one blob should have been aborted, since the purpose is to # *not* retry when processing KeyboardInterrupt. assert collect.aborted_blobs == [blob_name] # Turn off fault injection and flush/synchronize with close(). collect.inject(None) d.close() # Since there is no retrying, no blobs should be deleted. assert not collect.deleted_blobs
def test_direct_write_and_read_into_file(self): blob = storage.Blob(bucket=self.bucket, name='MyBuffer') file_contents = b'Hello World' blob.upload_from_string(file_contents) self.case_blobs_to_delete.append(blob) same_blob = storage.Blob(bucket=self.bucket, name='MyBuffer') same_blob.reload() # Initialize properties. temp_filename = tempfile.mktemp() with open(temp_filename, 'wb') as file_obj: same_blob.download_to_file(file_obj) with open(temp_filename, 'rb') as file_obj: stored_contents = file_obj.read() self.assertEqual(file_contents, stored_contents)
def _upload_single(fpath): """ Uploads single file to GCS. Returns a tuple containing (upload_success, fpath). """ success = True try: client = _get_client('storage') except CouldNotObtainCredentialsError as e: logging.error('Could not obtain GCS credentials: {0}'.format(e)) return False, fpath bucket = client.bucket(config.GCS_BUCKET) blob = storage.Blob(os.path.basename(fpath), bucket) try: blob.upload_from_filename(fpath) msg = 'Successfully uploaded {0} to GCS' logging.info(msg.format(fpath)) except (GCloudError, GCloudStreamingError) as e: msg = '{0} failed to upload to GCS: {1}' logging.error(msg.format(fpath, e)) success = False return success, fpath
def update_bucket_status(timestamps, basename='', client_obj=None, bucket_prefix=None, bucket_suffix=None, folder_prefixes=0): client_obj = maybe_populate_client_obj(client_obj) if client_obj is None: return bucket_name = lookup_bucket(client_obj, prefix=bucket_prefix, suffix=bucket_suffix) if bucket_name is None: return bucket = client_obj.get_bucket(bucket_name) blobs = bucket.list_blobs() blobs = [b for b in blobs] blob_filenames = [ path.join(*([basename] + b.name.split('_')[:folder_prefixes] + ['_'.join(b.name.split('_')[folder_prefixes:])])) for b in blobs ] blobs = dict(zip(blob_filenames, blobs)) for key, timestamp in timestamps.items(): key_parts = key.split('_') n_key_parts = len(key_parts) subpath = path.join( path.join(*key_parts[:folder_prefixes]) if folder_prefixes != 0 else '', '_'.join(key_parts[folder_prefixes:])) + '*' for f in glob( path.join(basename, subpath, *(['*'] * (folder_prefixes - n_key_parts)))): meta = blobs[f].metadata if f in blobs else None meta = ( json.loads(meta['timestamp'], object_hook=cse_json_decoding_hook) if 'timestamp' in meta else None) if meta is not None else None if (meta < timestamp) if meta is not None else True: blob = storage.Blob( '_'.join(metasplit(f, folder_prefixes)[1:]), bucket) with open(f, 'rb') as fp: blob.upload_from_file(fp) blob.metadata = dict( timestamp=json.dumps(timestamp, cls=CSEJSONEncoder)) blob.patch() for f, blob in blobs.items(): if not path.exists(f): try: blob.delete() except NotFound as e: pass return client_obj
def test_create_signed_read_url(self): blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg') expiration = int(time.time() + 5) signed_url = blob.generate_signed_url(expiration, method='GET') response, content = HTTP.request(signed_url, method='GET') self.assertEqual(response.status, 200) self.assertEqual(content, self.LOCAL_FILE)
def _uri_to_blob(creds, uri, conn=None): assert uri.startswith('gs://') url_tup = urlparse(uri) bucket_name = url_tup.netloc if conn is None: conn = calling_format.connect(creds) b = storage.Bucket(conn, name=bucket_name) return storage.Blob(url_tup.path, b)
def setUp(self): super(TestStorageSignURLs, self).setUp() logo_path = self.FILES['logo']['path'] with open(logo_path, 'rb') as file_obj: self.LOCAL_FILE = file_obj.read() blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg') blob.upload_from_string(self.LOCAL_FILE) self.case_blobs_to_delete.append(blob)
def test_processes_one_deletion(b, collect): # Mock up a blob and bucket blob_name = 'test-blob-name' k = storage.Blob(blob_name, b) d = gs_deleter.Deleter() d.delete(k) d.close() assert collect.deleted_blobs == [blob_name]
def test_copy_existing_file(self): filename = self.FILES['logo']['path'] blob = storage.Blob('CloudLogo', bucket=self.bucket) blob.upload_from_filename(filename) self.case_blobs_to_delete.append(blob) new_blob = self.bucket.copy_blob(blob, self.bucket, 'CloudLogoCopy') self.case_blobs_to_delete.append(new_blob) base_contents = blob.download_as_string() copied_contents = new_blob.download_as_string() self.assertEqual(base_contents, copied_contents)
def test_small_file_write_from_filename(self): blob = storage.Blob(bucket=self.bucket, name='SmallFile') self.assertEqual(blob._properties, {}) file_data = self.FILES['simple'] blob.upload_from_filename(file_data['path']) self.case_blobs_to_delete.append(blob) md5_hash = blob.md5_hash if not isinstance(md5_hash, six.binary_type): md5_hash = md5_hash.encode('utf-8') self.assertEqual(md5_hash, file_data['hash'])
def test_create_signed_delete_url(self): blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg') expiration = int(time.time() + 283473274) signed_delete_url = blob.generate_signed_url(expiration, method='DELETE') response, content = HTTP.request(signed_delete_url, method='DELETE') self.assertEqual(response.status, 204) self.assertEqual(content, b'') # Check that the blob has actually been deleted. self.assertFalse(blob.name in self.bucket)
def test_write_metadata(self): filename = self.FILES['logo']['path'] blob_name = os.path.basename(filename) blob = storage.Blob(blob_name, bucket=self.bucket) blob.upload_from_filename(filename) self.case_blobs_to_delete.append(blob) # NOTE: This should not be necessary. We should be able to pass # it in to upload_file and also to upload_from_string. blob.content_type = 'image/png' self.assertEqual(blob.content_type, 'image/png')
def setUpClass(cls): super(TestStoragePseudoHierarchy, cls).setUpClass() # Make sure bucket empty before beginning. for blob in cls.bucket.list_blobs(): blob.delete() simple_path = cls.FILES['simple']['path'] blob = storage.Blob(cls.FILENAMES[0], bucket=cls.bucket) blob.upload_from_filename(simple_path) cls.suite_blobs_to_delete = [blob] for filename in cls.FILENAMES[1:]: new_blob = cls.bucket.copy_blob(blob, cls.bucket, filename) cls.suite_blobs_to_delete.append(new_blob)
def test_large_file_write_from_stream(self): blob = storage.Blob(bucket=self.bucket, name='LargeFile') self.assertEqual(blob._properties, {}) file_data = self.FILES['big'] with open(file_data['path'], 'rb') as file_obj: blob.upload_from_file(file_obj) self.case_blobs_to_delete.append(blob) md5_hash = blob.md5_hash if not isinstance(md5_hash, six.binary_type): md5_hash = md5_hash.encode('utf-8') self.assertEqual(md5_hash, file_data['hash'])
def upload_blob(): """Uploads a file to the bucket.""" storage_client = storage.Client.from_service_account_json( os.path.join(os.getcwd(), 'crypto-trading-c8a8078ea295.json')) bucket = storage_client.get_bucket('stock-twits') blob_name = str(datetime.datetime.now().year) + str( datetime.datetime.now().month) + str( datetime.datetime.now().day) + '_twits.csv' blob = bucket.blob(blob_name) blob.upload_from_filename(os.path.join(os.getcwd(), 'twits/output.csv')) if storage.Blob(bucket=bucket, name=blob_name).exists(storage_client): os.remove(os.path.join(os.getcwd(), 'twits/output.csv')) print('File {} uploaded to {}.'.format(blob_name, 'stock-twits bucket'))
def setUpClass(cls): super(TestStorageListFiles, cls).setUpClass() # Make sure bucket empty before beginning. for blob in cls.bucket.list_blobs(): blob.delete() logo_path = cls.FILES['logo']['path'] blob = storage.Blob(cls.FILENAMES[0], bucket=cls.bucket) blob.upload_from_filename(logo_path) cls.suite_blobs_to_delete = [blob] # Copy main blob onto remaining in FILENAMES. for filename in cls.FILENAMES[1:]: new_blob = cls.bucket.copy_blob(blob, cls.bucket, filename) cls.suite_blobs_to_delete.append(new_blob)
def test_processes_many_deletions(b, collect): # Generate a target list of blobs in a stable order target = sorted(['test-blob-' + str(x) for x in range(20001)]) # Construct blobs from the generated names and delete them all. blobs = [storage.Blob(blob_name, b) for blob_name in target] d = gs_deleter.Deleter() for k in blobs: d.delete(k) d.close() # Sort the deleted blob names to obtain another stable order and # then ensure that everything was passed for deletion # successfully. assert sorted(collect.deleted_blobs) == target
def _upload(self, task): """Upload the file to the google storage Parameters: :param task: The task which define which folder (task.directory) should be uploaded to which storage (task.id) """ # We can not use the self.gcs cause we use it in a thread and if we use self.gcs an exception # is thrown -> Wrong SSL Version. gcs = storage.Client(self.project.project_id) # task.id is the uuid generated name for the bucket bucket = gcs.get_bucket(task.id) # Close the tar file task.tar_file.close() blob = storage.Blob(os.path.basename(task.file), bucket) with open(task.file, 'rb') as file: blob.upload_from_file(file)
def upload(self, fnames): uploaded_files = [] bucket = self.storage.get_bucket(config.GCS_PROCESSED_PHOTOS_BUCKET) batch = self.datastore.batch() for fname in fnames: name, ext = os.path.splitext(fname) fpath = '{0}/{1}{2}'.format(constants.IMAGE_PROCESSOR_DATA_DIR, name, ext) objname = '{0}{1}'.format(name, ext) blob = storage.Blob(objname, bucket) try: blob.upload_from_file(open(fpath, "rb")) uploaded_files.append(fname) msg = 'Successfully uploaded {0} to Cloud Storage' logging.info(msg.format(fname)) except Exception, e: msg = 'Failed to upload {0} to Cloud Storage: {1}' logging.error(msg.format(fname, e)) else: # Update original photo entity photo_key = self.datastore.key(ds.DATASTORE_PHOTO, fname) photo_entity = self.datastore.get(photo_key) photo_entity.update({'processed': True}) batch.put(photo_entity) # Create datastore entry for oriented image name, ext = os.path.splitext(fname) resized_fname = '{0}{1}'.format(name, ext) oriented_key = self.datastore.key(ds.DATASTORE_ORIENTED_IMAGE, resized_fname) oriented_entity = datastore.Entity(oriented_key) oriented_entity['original_photo'] = photo_key oriented_entity['image_type'] = unicode(ds.TOTALITY_IMAGE_TYPE) oriented_entity[ ds.TOTALITY_ORDERING_PROPERTY] = random.random() batch.put(oriented_entity)
def upload(self, fnames): """ Uploads a list of Movie entities to the datastore and uploads the corresponding movie files to Cloud Storage. """ # Name movies based on time created movie_dir = datetime.now().strftime("%Y-%m-%d %H:%M") movie_name = 'movie-{0}.mp4'.format(movie_dir) # Upload movie to Cloud Storage bucket = self.storage.get_bucket(config.GCS_MOVIE_BUCKET) blob = storage.Blob('{0}/{1}'.format(movie_dir, movie_name), bucket) with open(constants.MOVIE_FPATH, 'r') as f: try: blob.upload_from_file(f) msg = 'Successfully uploaded {0} to Cloud Storage' logging.info(msg.format(constants.MOVIE_FPATH)) except Exception, e: msg = 'Failed to upload {0} to Cloud Storage: {1}' logging.error(msg.format(constants.MOVIE_FPATH, e)) return False
def maybe_upload_file_to_bucket(filename, basename='', client_obj=None, bucket_prefix=None, bucket_suffix=None): client_obj = maybe_populate_client_obj(client_obj) if client_obj is None: return bucket_name = lookup_bucket(client_obj, prefix=bucket_prefix, suffix=bucket_suffix) if bucket_name is None: return bucket = client_obj.get_bucket(bucket_name) if bucket.get_blob(filename) is None: blob = storage.Blob(filename, bucket) with open(path.join(basename, filename), 'rb') as fp: blob.upload_from_file(fp) return client_obj
def test_retry_on_normal_error(b, collect): """Ensure retries are processed for most errors.""" blob_name = 'test-blob-name' k = storage.Blob(blob_name, b) collect.inject(Exception('Normal error')) d = gs_deleter.Deleter() d.delete(k) # Since delete_blob will fail over and over again, aborted_blobs # should grow quickly. while len(collect.aborted_blobs) < 2: gevent.sleep(0.1) # Since delete_blob has been failing repeatedly, no blobs should # be successfully deleted. assert not collect.deleted_blobs # Turn off fault injection and flush/synchronize with close(). collect.inject(None) d.close() # The one enqueued job should have been processed. assert collect.deleted_blobs == [blob_name]
connection = storage.get_connection() # OK, now let's look at all of the buckets... print(list(demo.list_buckets(connection))) # This might take a second... # Now let's create a new bucket... bucket_name = ("bucket-%s" % time.time()).replace(".", "") # Get rid of dots. print(bucket_name) bucket = demo.create_bucket(bucket_name, connection) print(bucket) # Let's look at all of the buckets again... print(list(demo.list_buckets(connection))) # How about we create a new blob inside this bucket. blob = storage.Blob("my-new-file.txt", bucket=bucket) # Now let's put some data in there. blob.upload_from_string("this is some data!") # ... and we can read that data back again. print(blob.download_as_string()) # Now let's delete that blob. print(blob.delete()) # And now that we're done, let's delete that bucket... print(bucket.delete()) # Alright! That's all! # Here's an interactive prompt for you now...
class Pipeline(): def __init__(self, datastore_client, storage_client): self.prev_fnames = [] self.datastore = datastore_client self.storage = storage_client def scan(self): """ Scans datastore for all <kind> entities. A list of all entity names is returned. """ # Query datastore for all full disk OrientedImage entities, sorted by the adjusted # timestamp of the image (the ordering of the image in the megamovie). query = self.datastore.query(kind=ds.DATASTORE_ORIENTED_IMAGE, \ order=[ds.TOTALITY_ORDERING_PROPERTY], \ filters=[("image_type","=", ds.TOTALITY_IMAGE_TYPE)]) # Fetch keys only, no need for other entity properties query.keys_only() # Retrieve all datstore entities. Query currently # has no limit & fetches all full disk totality images try: query = query.fetch() except Exception: msg = 'Failed to get {0} from Cloud Datastore.' logging.exception(msg.format(query)) return None fnames = list(entity.key.name for entity in list(query)) if self.prev_fnames == fnames: return [] self.prev_fnames = fnames # Return list of filenames return fnames def assemble(self, fnames): """ Stitches together movies from an ordered list of filenames. Downloads new files from GCS then feeds files to ffmpeg. Returns list of files sucessfully stitched into movie & calls stats func """ # Get files from GCS pool = Pool(min(len(fnames), constants.MOVIE_DAEMON_MAX_PROCESSES)) results = pool.map(get_file_from_gcs, fnames) pool.terminate() # Start ffmpeg subprocess ffmpeg_cmd = [ "ffmpeg", "-y", # Overwrite exsisting movie file "-f", "image2pipe", "-framerate", constants.MOVIE_FRAMERATE, "-vcodec", "mjpeg", "-i", "-", # Input pipe from stdin "-vf", "scale=1024:-1", "-loglevel", "panic", "-vcodec", "libx264", constants.MOVIE_FPATH ] ffmpeg_ps = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE) fnames = list(compress(fnames, results)) files_read = self._pipe_to_ffmpeg(ffmpeg_ps, fnames) if files_read > constants.MOVIE_MIN_FRAMES: ffmpeg_ps.stdin.close() ffmpeg_ps.wait() else: ffmpeg_ps.kill() return fnames def upload(self, fnames): """ Uploads a list of Movie entities to the datastore and uploads the corresponding movie files to Cloud Storage. """ # Name movies based on time created movie_dir = datetime.now().strftime("%Y-%m-%d %H:%M") movie_name = 'movie-{0}.mp4'.format(movie_dir) # Upload movie to Cloud Storage bucket = self.storage.get_bucket(config.GCS_MOVIE_BUCKET) blob = storage.Blob('{0}/{1}'.format(movie_dir, movie_name), bucket) with open(constants.MOVIE_FPATH, 'r') as f: try: blob.upload_from_file(f) msg = 'Successfully uploaded {0} to Cloud Storage' logging.info(msg.format(constants.MOVIE_FPATH)) except Exception, e: msg = 'Failed to upload {0} to Cloud Storage: {1}' logging.error(msg.format(constants.MOVIE_FPATH, e)) return False if os.path.exists(constants.C_MAP_FPATH): map_name = 'map-{0}.png'.format(movie_dir) blob = storage.Blob('{0}/{1}'.format(movie_dir, map_name), bucket) with open(constants.C_MAP_FPATH, 'r') as c_map: try: blob.upload_from_file(c_map) msg = 'Successfully uploaded {0} to Cloud Storage' logging.info(msg.format(map_name)) except Exception, e: msg = 'Failed to upload {0} to Cloud Storage: {1}' logging.error(msg.format(constants.C_MAP_FPATH, e))