def get_bucket(self, bucket_name): """Get a bucket by name. If the bucket isn't found, this will raise a :class:`google.cloud.storage.exceptions.NotFound`. For example:: >>> try: >>> bucket = client.get_bucket('my-bucket') >>> except google.cloud.exceptions.NotFound: >>> print('Sorry, that bucket does not exist!') This implements "storage.buckets.get". :type bucket_name: str :param bucket_name: The name of the bucket to get. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket matching the name provided. :raises: :class:`google.cloud.exceptions.NotFound` """ bucket = Bucket(self, name=bucket_name) bucket.reload(client=self) return bucket
def create_bucket(self, bucket_or_name, requester_pays=None, project=None): """API call: create a new bucket via a POST request. See https://cloud.google.com/storage/docs/json_api/v1/buckets/insert Args: bucket_or_name (Union[ \ :class:`~google.cloud.storage.bucket.Bucket`, \ str, \ ]): The bucket resource to pass or name to create. requester_pays (bool): Optional. Whether requester pays for API requests for this bucket and its blobs. project (str): Optional. the project under which the bucket is to be created. If not passed, uses the project set on the client. Returns: google.cloud.storage.bucket.Bucket The newly created bucket. Raises: google.cloud.exceptions.Conflict If the bucket already exists. Examples: Create a bucket using a string. .. literalinclude:: snippets.py :start-after: [START create_bucket] :end-before: [END create_bucket] Create a bucket using a resource. >>> from google.cloud import storage >>> client = storage.Client() >>> # Set properties on a plain resource object. >>> bucket = storage.Bucket("my-bucket-name") >>> bucket.location = "europe-west6" >>> bucket.storage_class = "COLDLINE" >>> # Pass that resource object to the client. >>> bucket = client.create_bucket(bucket) # API request. """ bucket = None if isinstance(bucket_or_name, Bucket): bucket = bucket_or_name else: bucket = Bucket(self, name=bucket_or_name) if requester_pays is not None: bucket.requester_pays = requester_pays bucket.create(client=self, project=project) return bucket
def _make_one(self, client=None, name=None, properties=None): from google.cloud.storage.bucket import Bucket if client is None: connection = _Connection() client = _Client(connection) bucket = Bucket(client, name=name) bucket._properties = properties or {} return bucket
def get_items_from_response(self, response): """Factory method which yields :class:`.Bucket` items from a response. :type response: dict :param response: The JSON API response for a page of buckets. """ for item in response.get('items', []): name = item.get('name') bucket = Bucket(self.client, name) bucket._set_properties(item) yield bucket
def get_bucket(self, bucket_or_name): """API call: retrieve a bucket via a GET request. See https://cloud.google.com/storage/docs/json_api/v1/buckets/get Args: bucket_or_name (Union[ \ :class:`~google.cloud.storage.bucket.Bucket`, \ str, \ ]): The bucket resource to pass or name to create. Returns: google.cloud.storage.bucket.Bucket The bucket matching the name provided. Raises: google.cloud.exceptions.NotFound If the bucket is not found. Examples: Retrieve a bucket using a string. .. literalinclude:: snippets.py :start-after: [START get_bucket] :end-before: [END get_bucket] Get a bucket using a resource. >>> from google.cloud import storage >>> client = storage.Client() >>> # Set properties on a plain resource object. >>> bucket = client.get_bucket("my-bucket-name") >>> # Time passes. Another program may have modified the bucket ... # in the meantime, so you want to get the latest state. >>> bucket = client.get_bucket(bucket) # API request. """ bucket = None if isinstance(bucket_or_name, Bucket): bucket = bucket_or_name else: bucket = Bucket(self, name=bucket_or_name) bucket.reload(client=self) return bucket
def _item_to_bucket(iterator, item): """Convert a JSON bucket to the native object. :type iterator: :class:`~google.cloud.iterator.Iterator` :param iterator: The iterator that has retrieved the item. :type item: dict :param item: An item to be converted to a bucket. :rtype: :class:`.Bucket` :returns: The next bucket in the page. """ name = item.get('name') bucket = Bucket(iterator.client, name) bucket._set_properties(item) return bucket
def test_create_bucket_with_object_success(self): from google.cloud.storage.bucket import Bucket project = "PROJECT" credentials = _make_credentials() client = self._make_one(project=project, credentials=credentials) bucket_name = "bucket-name" bucket_obj = Bucket(client, bucket_name) bucket_obj.storage_class = "COLDLINE" bucket_obj.requester_pays = True URI = "/".join( [ client._connection.API_BASE_URL, "storage", client._connection.API_VERSION, "b?project=%s" % (project,), ] ) json_expected = { "name": bucket_name, "billing": {"requesterPays": True}, "storageClass": "COLDLINE", } data = json_expected http = _make_requests_session([_make_json_response(data)]) client._http_internal = http bucket = client.create_bucket(bucket_obj) self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) self.assertTrue(bucket.requester_pays) http.request.assert_called_once_with( method="POST", url=URI, data=mock.ANY, headers=mock.ANY ) json_sent = http.request.call_args_list[0][1]["data"] self.assertEqual(json_expected, json.loads(json_sent))
def create_bucket(self, bucket_name, requester_pays=None, project=None): """Create a new bucket. For example: .. literalinclude:: snippets.py :start-after: [START create_bucket] :end-before: [END create_bucket] This implements "storage.buckets.insert". If the bucket already exists, will raise :class:`google.cloud.exceptions.Conflict`. To set additional properties when creating a bucket, such as the bucket location, use :meth:`~.Bucket.create`. :type bucket_name: str :param bucket_name: The bucket name to create. :type requester_pays: bool :param requester_pays: (Optional) Whether requester pays for API requests for this bucket and its blobs. :type project: str :param project: (Optional) the project under which the bucket is to be created. If not passed, uses the project set on the client. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The newly created bucket. """ bucket = Bucket(self, name=bucket_name) if requester_pays is not None: bucket.requester_pays = requester_pays bucket.create(client=self, project=project) return bucket
def create_bucket(self, bucket_name): """Create a new bucket. For example:: >>> bucket = client.create_bucket('my-bucket') >>> print(bucket) <Bucket: my-bucket> This implements "storage.buckets.insert". If the bucket already exists, will raise :class:`google.cloud.exceptions.Conflict`. :type bucket_name: str :param bucket_name: The bucket name to create. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The newly created bucket. """ bucket = Bucket(self, name=bucket_name) bucket.create(client=self) return bucket
def create_bucket(self, bucket_name): """Create a new bucket. For example: .. literalinclude:: storage_snippets.py :start-after: [START create_bucket] :end-before: [END create_bucket] This implements "storage.buckets.insert". If the bucket already exists, will raise :class:`google.cloud.exceptions.Conflict`. :type bucket_name: str :param bucket_name: The bucket name to create. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The newly created bucket. """ bucket = Bucket(self, name=bucket_name) bucket.create(client=self) return bucket
def get_bucket(self, bucket_name): """Get a bucket by name. If the bucket isn't found, this will raise a :class:`google.cloud.storage.exceptions.NotFound`. For example: .. literalinclude:: snippets.py :start-after: [START get_bucket] :end-before: [END get_bucket] This implements "storage.buckets.get". :type bucket_name: str :param bucket_name: The name of the bucket to get. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket matching the name provided. :raises: :class:`google.cloud.exceptions.NotFound` """ bucket = Bucket(self, name=bucket_name) bucket.reload(client=self) return bucket
def get_bucket(self, bucket_name): """Get a bucket by name. If the bucket isn't found, this will raise a :class:`google.cloud.storage.exceptions.NotFound`. For example: .. literalinclude:: storage_snippets.py :start-after: [START get_bucket] :end-before: [END get_bucket] This implements "storage.buckets.get". :type bucket_name: str :param bucket_name: The name of the bucket to get. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket matching the name provided. :raises: :class:`google.cloud.exceptions.NotFound` """ bucket = Bucket(self, name=bucket_name) bucket.reload(client=self) return bucket
def upload_job_inputs(bucket: Bucket, job_id: str, config: Configuration, observations: pd.DataFrame): """Upload all the inputs required to execute a task. These inputs are uploaded into the given bucket. This function uploads the inputs that are common to all tasks in a job: the configuration and the observations. The related method upload_task_inputs uploads the inputs that are specific to a single task, namely the orbits. Parameters ---------- bucket : Bucket The bucket hosting the job. job_id : str The ID of the job. config : Configuration A THOR configuration which the Task executors should use. observations : pd.DataFrame The preprocessed observations which should be used by task executors. """ # Upload configuration file cfg_bytes = config.toYamlString() cfg_path = _job_input_path(job_id, "config.yml") logger.info("uploading job input %s", cfg_path) bucket.blob(cfg_path).upload_from_string(cfg_bytes) # Upload observations observations_buf = io.BytesIO() observations.to_csv(observations_buf, index=False) observations_bytes = observations_buf.getvalue() observations_path = _job_input_path(job_id, "observations.csv") logger.info("uploading job input %s", observations_path) bucket.blob(observations_path).upload_from_string(observations_bytes)
def test_list_blobs_w_all_arguments_and_user_project(self): from google.cloud.storage.bucket import Bucket BUCKET_NAME = "name" USER_PROJECT = "user-project-123" MAX_RESULTS = 10 PAGE_TOKEN = "ABCD" PREFIX = "subfolder" DELIMITER = "/" VERSIONS = True PROJECTION = "full" FIELDS = "items/contentLanguage,nextPageToken" EXPECTED = { "maxResults": 10, "pageToken": PAGE_TOKEN, "prefix": PREFIX, "delimiter": DELIMITER, "versions": VERSIONS, "projection": PROJECTION, "fields": FIELDS, "userProject": USER_PROJECT, } credentials = _make_credentials() client = self._make_one(project=USER_PROJECT, credentials=credentials) connection = _make_connection({"items": []}) with mock.patch('google.cloud.storage.client.Client._connection', new_callable=mock.PropertyMock) as client_mock: client_mock.return_value = connection bucket = Bucket(client, BUCKET_NAME, user_project=USER_PROJECT) iterator = client.list_blobs( bucket_or_name=bucket, max_results=MAX_RESULTS, page_token=PAGE_TOKEN, prefix=PREFIX, delimiter=DELIMITER, versions=VERSIONS, projection=PROJECTION, fields=FIELDS, ) blobs = list(iterator) self.assertEqual(blobs, []) connection.api_request.assert_called_once_with( method="GET", path="/b/%s/o" % BUCKET_NAME, query_params=EXPECTED)
def bucket(self, bucket_name, user_project=None): """Factory constructor for bucket object. .. note:: This will not make an HTTP request; it simply instantiates a bucket object owned by this client. :type bucket_name: str :param bucket_name: The name of the bucket to be instantiated. :type user_project: str :param user_project: (Optional) the project ID to be billed for API requests made via the bucket. :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket object created. """ return Bucket(client=self, name=bucket_name, user_project=user_project)
def _bucket_arg_to_bucket(self, bucket_or_name): """Helper to return given bucket or create new by name. Args: bucket_or_name (Union[ \ :class:`~google.cloud.storage.bucket.Bucket`, \ str, \ ]): The bucket resource to pass or name to create. Returns: google.cloud.storage.bucket.Bucket The newly created bucket or the given one. """ if isinstance(bucket_or_name, Bucket): bucket = bucket_or_name else: bucket = Bucket(self, name=bucket_or_name) return bucket
def get_task_status(bucket: Bucket, job_id: str, task_id: str) -> TaskStatus: """Get the status of a task. Parameters ---------- bucket : Bucket The Google Cloud Storage bucket that hosts th egiven job and task. job_id : str The ID of the job. task_id : str The ID of the task. Returns ------- TaskStatus The status of the Task. """ blob_path = _task_status_path(job_id, task_id) status_str = bucket.blob(blob_path).download_as_string() return TaskStatus.from_bytes(status_str)
def test_list_blobs(self): from google.cloud.storage.bucket import Bucket BUCKET_NAME = "bucket-name" credentials = _make_credentials() client = self._make_one(project="PROJECT", credentials=credentials) connection = _make_connection({"items": []}) with mock.patch('google.cloud.storage.client.Client._connection', new_callable=mock.PropertyMock) as client_mock: client_mock.return_value = connection bucket_obj = Bucket(client, BUCKET_NAME) iterator = client.list_blobs(bucket_obj) blobs = list(iterator) self.assertEqual(blobs, []) connection.api_request.assert_called_once_with( method="GET", path="/b/%s/o" % BUCKET_NAME, query_params={"projection": "noAcl"})
def _copy_local_directory_to_gcs(_local_path: str, _bucket: Bucket, _gcs_path: str, _n_to_remove: int = 0): for local_file in glob.glob(_local_path + '/**'): if not os.path.isfile(local_file): _copy_local_directory_to_gcs(local_file, _bucket, _gcs_path, _n_to_remove) continue remote_path_tmp = os.path.join(_gcs_path, local_file[_n_to_remove:]) if 'part' in remote_path_tmp: now = datetime.now() day, hour = now.day, now.hour folder = '/'.join(remote_path_tmp.split('/')[:-1]) + '/' file_name = remote_path_tmp.split('/')[-1] remote_path = (folder + file_name.split('.')[0] + '-{}{}.'.format(day, hour) + '.'.join(file_name.split('.')[1:])) else: remote_path = remote_path_tmp blob = _bucket.blob(remote_path) blob.upload_from_filename(local_file)
def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be deleted """ # Todo: Replace with a TimeoutSampler for _ in range(10): try: bucket = GCPBucket(client=self.client, name=name) bucket.delete_blobs(bucket.list_blobs()) bucket.delete() break except GoogleExceptions.NotFound: logger.warning("Failed to delete some of the bucket blobs. Retrying...") sleep(10)
def sign(duration: str, key_file: click.File, resource: str) -> None: """ Generate a signed URL that embeds authentication data so the URL can be used by someone who does not have a Google account. This tool exists to overcome a shortcoming of gsutil signurl that limits expiration to 7 days only. KEY_FILE should be a path to a JSON file containing service account private key. See gsutil signurl --help for details RESOURCE is a GCS location in the form <bucket>/<path> (don't add neither "gs://" nor "http://...") Example: gcs-signurl /tmp/creds.json /foo-bucket/bar-file.txt """ bucket_name, _, path = resource.lstrip("/").partition("/") creds = service_account.Credentials.from_service_account_file( key_file.name) till = datetime.now() + _DurationToTimeDelta(duration) # Ignoring potential warning about end user credentials. # We don't actually do any operations on the client, but # unfortunately the only public API in google-cloud-storage package # requires building client->bucket->blob message = "Your application has authenticated using end user credentials from Google Cloud SDK" with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=message) client = Client() bucket = Bucket(client, bucket_name) blob = Blob(path, bucket) # Not passing version argument - to support compatibility with # google-cloud-storage<=1.14.0. They default to version 2 and hopefully # will not change it anytime soon. signed_url = blob.generate_signed_url(expiration=till, credentials=creds) click.echo(signed_url)
def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be deleted """ # Todo: Replace with a TimeoutSampler for _ in range(10): try: bucket = GCPBucket(client=self.client, name=name) bucket.delete_blobs(bucket.list_blobs()) bucket.delete() break except ClientError: # TODO: Find relevant exception logger.info( f"Deletion of Underlying Storage {name} failed. Retrying..." ) sleep(3)
class StorageUtil: """ Auxiliary class for Google Cloud Storage Example: buckets = '' for bucket in Storage.buckets(): buckets += f'{bucket}\n' project_def = ProjectDefinition('project_id', 'location') strge = StorageUtil('bucket-name', 'all_buckets.txt', project=project_def).new_content(buckets) print(f'File content [{strge.file_name}]: {strge.get_content()}') """ client = storage.Client() def __init__(self, bucket_name, file_name=None, project=None, project_id=None, location=None): self.project = project if project else ProjectReference( project_id, location) self._bucket_name = bucket_name self._bucket = None self._blob = None self._file_name = file_name if file_name: self.set_blob(file_name) @property def bucket(self): if not self._bucket: try: self._bucket = self.client.get_bucket(self._bucket_name) except NotFound: self._bucket = Bucket(client=self.client, name=self._bucket_name) self._bucket.create(client=self.client, location=self.project.location) logging.info('Bucket {} not found and was created.'.format( self._bucket.name)) return self._bucket @property def blob(self): if not self._blob: raise NoBlobSetException() return self._blob @property def file_name(self): if not self._blob: raise NoBlobSetException() return self._file_name def set_blob(self, file_name): self._file_name = file_name self._blob = self.bucket.get_blob(file_name) if not self._blob: self._blob = self._bucket.blob(file_name) logging.info(f'File not found and was created: {file_name}') return self def delete_blob(self): try: self.bucket.delete_blob(self.file_name) logging.info(f'File deleted: {self.file_name}') except NotFound: logging.info(f'File not found: {self.file_name}') self._file_name = None self._blob = None return self def new_content(self, new_content: str): content_len = 80 content = f'{new_content[:content_len]} [...]' if len( new_content) > content_len else new_content logging.info( f'Uploading new content to file "{self._file_name}": {repr(content)}' ) self.blob.upload_from_string(new_content) return self def get_content(self): return self.blob.download_as_string() @classmethod def buckets(cls): return [b for b in cls.client.list_buckets()]
def upload_data_to_bucket(bucket: Bucket): """Upload data to a GCS bucket""" blob = bucket.blob(object_name) blob.upload_from_filename(product_resource_file) print("Data from {} has being uploaded to {}".format( product_resource_file, bucket.name))
def _get_client_bucket(name: str) -> Tuple[Client, Bucket]: client = Client() bucket = Bucket(client, name) return (client, bucket)
def bucket(self): if self._bucket is None: self._bucket = Bucket(self.client, name=self.bucket_name) return self._bucket
def main(exp, dataset, **params): params = dict(default_params, exp=exp, dataset=dataset, **params) print(params) storage_client = None bucket = None path = os.path.join(params['datasets_path'], params['dataset']) if params['bucket']: storage_client = storage.Client() bucket = Bucket(storage_client, params['bucket']) preload_dataset(path, storage_client, bucket) results_path = setup_results_dir(params) tee_stdout(os.path.join(results_path, 'log')) (quantize, dequantize) = quantizer(params['q_method']) model = SampleRNN(frame_sizes=params['frame_sizes'], n_rnn=params['n_rnn'], dim=params['dim'], learn_h0=params['learn_h0'], q_levels=params['q_levels'], weight_norm=params['weight_norm']) predictor = Predictor(model, dequantize) if params['cuda'] is not False: print(params['cuda']) model = model.cuda() predictor = predictor.cuda() optimizer = gradient_clipping( torch.optim.Adam(predictor.parameters(), lr=params['learning_rate'])) data_loader = make_data_loader(path, model.lookback, quantize, params) test_split = 1 - params['test_frac'] val_split = test_split - params['val_frac'] trainer = Trainer(predictor, sequence_nll_loss_bits, optimizer, data_loader(0, val_split, eval=False), cuda=params['cuda']) checkpoints_path = os.path.join(results_path, 'checkpoints') checkpoint_data = load_last_checkpoint(checkpoints_path, storage_client, bucket) if checkpoint_data is not None: (state_dict, epoch, iteration) = checkpoint_data trainer.epochs = epoch trainer.iterations = iteration predictor.load_state_dict(state_dict) trainer.register_plugin( TrainingLossMonitor(smoothing=params['loss_smoothing'])) trainer.register_plugin( ValidationPlugin(data_loader(val_split, test_split, eval=True), data_loader(test_split, 1, eval=True))) trainer.register_plugin(SchedulerPlugin(params['lr_scheduler_step'])) def upload(file_path): if bucket is None: return name = file_path.replace(os.path.abspath(os.curdir) + '/', '') blob = Blob(name, bucket) try: blob.upload_from_filename(file_path, timeout=300) except Exception as e: print(str(e)) trainer.register_plugin(AbsoluteTimeMonitor()) samples_path = os.path.join(results_path, 'samples') trainer.register_plugin( SaverPlugin(checkpoints_path, params['keep_old_checkpoints'], upload)) trainer.register_plugin( GeneratorPlugin(samples_path, params['n_samples'], params['sample_length'], params['sample_rate'], params['q_levels'], dequantize, params['sampling_temperature'], upload=upload)) trainer.register_plugin( Logger(['training_loss', 'validation_loss', 'test_loss', 'time'])) trainer.register_plugin( StatsPlugin( results_path, iteration_fields=[ 'training_loss', #('training_loss', 'running_avg'), 'time' ], epoch_fields=[ 'training_loss', ('training_loss', 'running_avg'), 'validation_loss', 'test_loss', 'time' ], plots={ 'loss': { 'x': 'iteration', 'ys': [ 'training_loss', # ('training_loss', 'running_avg'), 'validation_loss', 'test_loss' ], 'log_y': True } })) init_comet(params, trainer, samples_path, params['n_samples'], params['sample_rate']) trainer.run(params['epoch_limit'])
def main(checkpoint, **args): task_id = setup_logging( 'gen', logging.NOTSET if args.get('debug', False) else logging.INFO) params = dict( { 'n_rnn': 3, 'dim': 1024, 'learn_h0': False, 'q_levels': 256, 'weight_norm': True, 'frame_sizes': [16, 16, 4], 'sample_rate': 16000, 'n_samples': 1, 'sample_length': 16000 * 60 * 4, 'sampling_temperature': 1, 'q_method': QMethod.LINEAR, }, exp=checkpoint, **args) logging.info(str(params)) logging.info('booting') # dataset = storage_client.list_blobs(bucket, prefix=path) # for blob in dataset: # blob.download_to_filename(blob.name) bucket = None if args['bucket']: logging.debug('setup google storage bucket {}'.format(args['bucket'])) storage_client = storage.Client() bucket = Bucket(storage_client, args['bucket']) preload_checkpoint(checkpoint, storage_client, bucket) results_path = os.path.abspath( os.path.join(checkpoint, os.pardir, os.pardir, task_id)) ensure_dir_exists(results_path) checkpoint = os.path.abspath(checkpoint) tmp_pretrained_state = torch.load( checkpoint, map_location=lambda storage, loc: storage.cuda(0) if args['cuda'] else storage) # Load all tensors onto GPU 1 # torch.load('tensors.pt', map_location=lambda storage, loc: storage.cuda(1)) pretrained_state = OrderedDict() for k, v in tmp_pretrained_state.items(): # Delete "model." from key names since loading the checkpoint automatically attaches it layer_name = k.replace("model.", "") pretrained_state[layer_name] = v # print("k: {}, layer_name: {}, v: {}".format(k, layer_name, np.shape(v))) # Create model with same parameters as used in training model = SampleRNN(frame_sizes=params['frame_sizes'], n_rnn=params['n_rnn'], dim=params['dim'], learn_h0=params['learn_h0'], q_levels=params['q_levels'], weight_norm=params['weight_norm']) if params['cuda']: model = model.cuda() # Load pretrained model model.load_state_dict(pretrained_state) def upload(file_path): if bucket is None: return # remove prefix /app name = file_path.replace(os.path.abspath(os.curdir) + '/', '') blob = Blob(name, bucket) logging.info('uploading {}'.format(name)) blob.upload_from_filename(file_path) (_, dequantize) = quantizer(params['q_method']) gen = Gen(Runner(model), params['cuda']) gen.register_plugin( GeneratorPlugin(results_path, params['n_samples'], params['sample_length'], params['sample_rate'], params['q_levels'], dequantize, params['sampling_temperature'], upload)) gen.run()
def delete_object_from_bucket(bucket: Bucket): """Delete object from bucket""" blobs = bucket.list_blobs() for blob in blobs: blob.delete() print("all objects are deleted from GCS bucket {}".format(bucket.name))
def _download(bucket: Bucket, data_path: str): blob = bucket.blob(data_path) blob.download_to_filename(data_path)