def DeleteTempBucket(self, bucket_name): bucket_ref = storage_util.BucketReference.FromArgument( bucket_name, require_prefix=False) for obj_message in storage_api.StorageClient().ListBucket(bucket_ref): obj_ref = storage_util.ObjectReference.FromMessage(obj_message) storage_api.StorageClient().DeleteObject(obj_ref) storage_api.StorageClient().DeleteBucket(bucket_ref)
def CreateGcsBucket(self): """Creates the specified GCS bucket.""" name = self.UniqueName('gcs-bucket') storage_api.StorageClient().CreateBucketIfNotExists( name, self.Project()) self.gcs_bucket_names.append(name) return name
def _GetDaisyBucket(self, args): storage_client = storage_api.StorageClient() bucket_location = storage_client.GetBucketLocationForFile( args.destination_uri) bucket_name = daisy_utils.GetDaisyBucketName(bucket_location) storage_client.CreateBucketIfNotExists(bucket_name) return bucket_name
def _Read(path): """Read a file/object (local or on Cloud Storage). >>> with _Read('gs://builder/object.txt') as f: ... assert f.read() == 'foo' >>> with _Read('file:///path/to/object.txt') as f: ... assert f.read() == 'bar' Args: path: str, the path to the file/object to read. Must begin with 'file://' or 'gs://' Returns: a file-like context manager. Raises: IOError: if the file is local and open()ing it raises this error. OSError: if the file is local and open()ing it raises this error. calliope_exceptions.BadFileException: if the remote file read failed. InvalidRuntimeBuilderPath: if the path is invalid (doesn't begin with an appropriate prefix. """ if path.startswith('file://'): return open(path[len('file://'):]) elif path.startswith('gs://'): storage_client = storage_api.StorageClient() object_ = storage_util.ObjectReference.FromUrl(path) return contextlib.closing(storage_client.ReadObject(object_)) else: raise InvalidRuntimeBuilderPath(path)
def SetUp(self): self.storage_client = storage_api.StorageClient() self.files_to_upload = [] self.object_path = next( e2e_utils.GetResourceNameGenerator(prefix='object')) self.bucket_name = next( e2e_utils.GetResourceNameGenerator(prefix=BUCKET_PREFIX))
def ReadCSVFileFromGCS(bucket, csv_file): """Read a CSV file from a bucket. Args: bucket: String. The name of the bucket to read from. csv_file: String. The name of csv file located in a GCS Bucket. Returns: A 2D list of data. Example: table_data[0] = ['1', 'Some name', 'Some value'] table_data[1] = ['2', 'Some other name', 'Some value'] """ client = storage_api.StorageClient() table_object_reference = storage_util.ObjectReference.FromUrl( '{bucket}/{table}'.format(bucket=bucket, table=csv_file)) data = client.ReadObject(table_object_reference) table_data = [] # Different implementation due to differences in strings # between Py2 and Py3 if six.PY3: data = io.TextIOWrapper(data, encoding='utf-8') reader = csv.reader(data) for row in reader: table_data.append(row) return table_data
def SetUp(self): self.apitools_client = api_mock.Client( core_apis.GetClientClass('storage', 'v1')) self.apitools_client.Mock() self.addCleanup(self.apitools_client.Unmock) self.storage_client = storage_api.StorageClient(self.apitools_client)
def Execute(self, callback=None): storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.CopyFileToGCS, args=(self.source_local_path, self.dest_obj_ref)) if callback: callback()
def _EnsureSubdirExists(bucket_ref, subdir): """Checks that a directory marker object exists in the bucket or creates one. The directory marker object is needed for subdir listing to not crash if the directory is empty. Args: bucket_ref: googlecloudsk.api_lib.storage.storage_util.BucketReference, a reference to the environment's bucket subdir: str, the subdirectory to check or recreate. Should not contain slashes. """ subdir_name = '{}/'.format(subdir) subdir_ref = storage_util.ObjectReference.FromBucketRef( bucket_ref, subdir_name) storage_client = storage_api.StorageClient() try: storage_client.GetObject(subdir_ref) except apitools_exceptions.HttpNotFoundError: # Insert an empty object into the bucket named subdir_name, which will # serve as an empty directory marker. insert_req = storage_client.messages.StorageObjectsInsertRequest( bucket=bucket_ref.bucket, name=subdir_name) upload = transfer.Upload.FromStream(io.BytesIO(), 'application/octet-stream') try: storage_client.client.objects.Insert(insert_req, upload=upload) except apitools_exceptions.HttpError: raise command_util.Error( 'Error re-creating empty {}/ directory. List calls may'.format( subdir) + 'fail, but importing will restore the directory.')
def Run(self, args): project_ref = args.CONCEPTS.project.Parse() service_account = 'service-' + str( project_util.GetProjectNumber(project_ref.projectsId) ) + '@gcp-sa-dataplex.iam.gserviceaccount.com' if args.IsSpecified('storage_bucket_resource'): return storage_api.StorageClient().AddIamPolicyBinding( storage_util.BucketReference(args.storage_bucket_resource), 'serviceAccount:' + service_account, 'roles/dataplex.serviceAgent') if args.IsSpecified('bigquery_dataset_resource'): get_dataset_request = apis.GetMessagesModule( 'bigquery', 'v2').BigqueryDatasetsGetRequest( datasetId=args.bigquery_dataset_resource, projectId=args.secondary_project) dataset = apis.GetClientInstance( 'bigquery', 'v2').datasets.Get(request=get_dataset_request) lake.AddServiceAccountToDatasetPolicy( apis.GetMessagesModule('bigquery', 'v2').Dataset.AccessValueListEntry, dataset, service_account, 'roles/dataplex.serviceAgent') return apis.GetClientInstance('bigquery', 'v2').datasets.Patch( apis.GetMessagesModule( 'bigquery', 'v2').BigqueryDatasetsPatchRequest( datasetId=args.bigquery_dataset_resource, projectId=args.secondary_project, dataset=dataset)) if args.IsSpecified('project_resource'): return projects_api.AddIamPolicyBinding( project_util.ParseProject(args.project_resource), 'serviceAccount:' + service_account, 'roles/dataplex.serviceAgent')
def GetAndCreateDaisyBucket(bucket_name=None, storage_client=None, bucket_location=None): """Determine the name of the GCS bucket to use and create if necessary. Args: bucket_name: str, bucket name to use, otherwise the bucket will be named based on the project id. storage_client: The storage_api client object. bucket_location: str, bucket location Returns: A string containing the name of the GCS bucket to use. """ project = properties.VALUES.core.project.GetOrFail() safe_project = project.replace(':', '-') safe_project = safe_project.replace('.', '-') if not bucket_name: bucket_name = '{0}-daisy-bkt'.format(safe_project) if bucket_location: bucket_name = '{0}-{1}'.format(bucket_name, bucket_location).lower() safe_bucket_name = bucket_name.replace('google', 'elgoog') if not storage_client: storage_client = storage_api.StorageClient() # TODO (b/117668144): Make Daisy scratch bucket ACLs same as # source/destination bucket storage_client.CreateBucketIfNotExists( safe_bucket_name, location=bucket_location) return safe_bucket_name
def _UploadFile(file_upload_task): """Upload a single file to Google Cloud Storage. Args: file_upload_task: FileUploadTask describing the file to upload Returns: None if the file was uploaded successfully or a stringified Exception if one was raised """ storage_client = storage_api.StorageClient() bucket_ref = storage_util.BucketReference.FromBucketUrl( file_upload_task.bucket_url) retryer = retry.Retryer(max_retrials=3) path = file_upload_task.path sha1_hash = file_upload_task.sha1_hash log.debug('Uploading [{f}] to [{gcs}]'.format(f=path, gcs=sha1_hash)) try: retryer.RetryOnException( storage_client.CopyFileToGCS, args=(bucket_ref, path, sha1_hash) ) except Exception as err: # pylint: disable=broad-except # pass all errors through as strings (not all exceptions can be serialized) return str(err) return None
def UploadFiles(upload_pairs, bucket_ref, gs_prefix=None): """Uploads files at the local path to a specifically prefixed location. The prefix is 'cloudmldist/<current timestamp>'. Args: upload_pairs: [(str, str)]. Pairs of absolute paths to local files to upload and corresponding path in Cloud Storage (that goes after the prefix). For example, ('/path/foo', 'bar') will upload '/path/foo' to '<prefix>/bar' in Cloud Storage. bucket_ref: storage_util.BucketReference. Files will be uploaded to this bucket. gs_prefix: str. Prefix to the GCS Path where files will be uploaded. Returns: [str]. A list of fully qualified gcs paths for the uploaded files, in the same order they were provided. """ checksum = file_utils.Checksum(algorithm=hashlib.sha256) for local_path, _ in upload_pairs: checksum.AddFileContents(local_path) if gs_prefix is not None: gs_prefix = '/'.join([gs_prefix, checksum.HexDigest()]) else: gs_prefix = checksum.HexDigest() storage_client = storage_api.StorageClient() dests = [] for local_path, uploaded_path in upload_pairs: obj_ref = storage_util.ObjectReference.FromBucketRef( bucket_ref, '/'.join([gs_prefix, uploaded_path])) obj = storage_client.CopyFileToGCS(local_path, obj_ref) dests.append('/'.join(['gs:/', obj.bucket, obj.name])) return dests
def UploadSource(source_dir, bucket, obj, gen_files=None, skip_files=None): """Upload a gzipped tarball of the source directory to GCS. Note: To provide parity with docker's behavior, we must respect .dockerignore. Args: source_dir: the directory to be archived. bucket: the GCS bucket where the tarball will be stored. obj: the GCS object where the tarball will be stored, in the above bucket. gen_files: dict of filename to (str) contents of generated config and source context files. skip_files: optional, a parsed regex for paths and files to skip, from the service yaml. Raises: UploadFailedError: when the source fails to upload to GCS. """ gen_files = gen_files or {} dockerignore_contents = _GetDockerignoreExclusions(source_dir, gen_files) included_paths = _GetIncludedPaths(source_dir, dockerignore_contents, skip_files) # We can't use tempfile.NamedTemporaryFile here because ... Windows. # See https://bugs.python.org/issue14243. There are small cleanup races # during process termination that will leave artifacts on the filesystem. # eg, CTRL-C on windows leaves both the directory and the file. Unavoidable. # On Posix, `kill -9` has similar behavior, but CTRL-C allows cleanup. with files.TemporaryDirectory() as temp_dir: f = open(os.path.join(temp_dir, 'src.tgz'), 'w+b') with gzip.GzipFile(mode='wb', fileobj=f) as gz: _CreateTar(source_dir, gen_files, included_paths, gz) f.close() storage_client = storage_api.StorageClient() storage_client.CopyFileToGCS(bucket, f.name, obj)
def _ReadDiagnosticsFileUpload(self, object_ref): """Try read an object for a given gcs path. Args: object_ref: An object reference to the file to read. """ storage_api.StorageClient().ReadObject(object_ref)
def _GetObjectOrSubdirObjects(object_ref, object_is_subdir=False, client=None): """Gets object_ref or the objects under object_ref is it's a subdir.""" client = client or storage_api.StorageClient() objects = [] # Check if object_ref referes to an actual object. If it does not exist, we # assume the user is specfying a subdirectory. target_is_subdir = False if not object_is_subdir: try: client.GetObject(object_ref) objects.append(object_ref) except apitools_exceptions.HttpNotFoundError: target_is_subdir = True if target_is_subdir or object_is_subdir: target_path = posixpath.join(object_ref.name, '') subdir_objects = client.ListBucket(object_ref.bucket_ref, target_path) for obj in subdir_objects: if object_is_subdir and obj.name == object_ref.name: # In this case, object_ref is to be treated as a subdir, so if # object_ref happens to also be an object, ignore it. continue objects.append( storage_util.ObjectReference.FromName(object_ref.bucket, obj.name)) return objects
def Run(self, args): log.warn('Importing image, this may take up to 1 hour.') storage_client = storage_api.StorageClient() daisy_bucket = daisy_utils.GetAndCreateDaisyBucket( storage_client=storage_client) # Copy image from source-uri to daisy scratch bucket image_file = os.path.basename(args.source_uri) dest_name = '{0}-{1}'.format(uuid.uuid4(), image_file) dest_path = 'gs://{0}/tmpimage/{1}'.format(daisy_bucket, dest_name) src_object = resources.REGISTRY.Parse(args.source_uri, collection='storage.objects') dest_object = resources.REGISTRY.Parse(dest_path, collection='storage.objects') log.status.write('\nCopying [{0}] to [{1}]\n'.format( args.source_uri, dest_path)) storage_client.Rewrite(src_object, dest_object) variables = """source_disk_file={0},disk_size=50g,image_name={1}""".format( dest_path, args.image_name) tags = ['gce-daisy-image-import'] return daisy_utils.RunDaisyBuild(args, _WORKFLOW, variables, daisy_bucket=daisy_bucket, tags=tags)
def testAddIamPolicyBindingsDoesntCallSetForSameBindings(self): policy = self.storage_v1_messages.Policy( kind='storage#policy', resourceId='projects/_/buckets/{}'.format(self._BUCKET_NAME), version=1, etag=b'CAE3', bindings=[ self.storage_v1_messages.Policy.BindingsValueListEntry( role='roles/storage.legacyBucketOwner', members=[ 'projectEditor:{}'.format(self._PROJECT_ID), 'projectOwner:{}'.format(self._PROJECT_ID), ]), self.storage_v1_messages.Policy.BindingsValueListEntry( role='roles/storage.objectAdmin', members=[ 'user:[email protected]', 'user:[email protected]' ]), ]) self.mocked_storage_v1.buckets.GetIamPolicy.Expect( request=self.storage_v1_messages.StorageBucketsGetIamPolicyRequest( bucket=self._BUCKET_NAME, optionsRequestedPolicyVersion=3), response=policy) client = storage_api.StorageClient() actual_policy = client.AddIamPolicyBindings( self.bucket_reference, [('user:[email protected]', 'roles/storage.objectAdmin'), ('user:[email protected]', 'roles/storage.objectAdmin')]) self.assertEqual(actual_policy, policy)
def Execute(self, callback=None): """Complete one ObjectDeleteTask (safe to run in parallel).""" storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(self.obj_ref, )) if callback: callback()
def Run(self, args): compute_holder = base_classes.ComputeApiHolder(self.ReleaseTrack()) # Fail early if the requested image name is invalid or already exists. _CheckImageName(args.image_name) _CheckForExistingImage(args.image_name, compute_holder) storage_client = storage_api.StorageClient() import_stager = _CreateImportStager(storage_client, args) daisy_vars, workflow = import_stager.Stage() self._ProcessAdditionalArgs(args, daisy_vars) # TODO(b/79591894): Once we've cleaned up the Argo output, replace this # warning message with a ProgressTracker spinner. log.warning('Importing image. This may take up to 2 hours.') tags = ['gce-daisy-image-import'] return daisy_utils.RunDaisyBuild( args, workflow, ','.join(daisy_vars), tags=tags, daisy_bucket=import_stager.GetDaisyBucket(), user_zone=properties.VALUES.compute.zone.Get(), output_filter=_OUTPUT_FILTER, service_account_roles=self._GetServiceAccountRoles())
def ValidateBucketForCertificateAuthority(bucket_name): """Validates that a user-specified bucket can be used with a Private CA. Args: bucket_name: The name of the GCS bucket to validate. Returns: A BucketReference wrapping the given bucket name. Raises: InvalidArgumentException: when the given bucket can't be used with a CA. """ messages = storage_util.GetMessages() client = storage_api.StorageClient(messages=messages) try: bucket = client.GetBucket( bucket_name, messages.StorageBucketsGetRequest.ProjectionValueValuesEnum.full) if not _BucketAllowsPublicObjectReads(bucket): # Show a warning but don't fail, since this could be intentional. log.warning( 'The specified bucket does not publicly expose new objects by ' 'default, so some clients may not be able to access the CA ' 'certificate or CRLs. For more details, see ' 'https://cloud.google.com/storage/docs/access-control/making-data-public' ) return storage_util.BucketReference(bucket_name) except storage_api.BucketNotFoundError: raise exceptions.InvalidArgumentException( 'gcs-bucket', 'The given bucket does not exist.')
def UploadSource(upload_dir, source_files, object_ref, gen_files=None): """Upload a gzipped tarball of the source directory to GCS. Note: To provide parity with docker's behavior, we must respect .dockerignore. Args: upload_dir: the directory to be archived. source_files: [str], relative paths to upload. object_ref: storage_util.ObjectReference, the Cloud Storage location to upload the source tarball to. gen_files: dict of filename to (str) contents of generated config and source context files. """ gen_files = gen_files or {} dockerignore_contents = _GetDockerignoreExclusions(upload_dir, gen_files) included_paths = _GetIncludedPaths(upload_dir, source_files, dockerignore_contents) # We can't use tempfile.NamedTemporaryFile here because ... Windows. # See https://bugs.python.org/issue14243. There are small cleanup races # during process termination that will leave artifacts on the filesystem. # eg, CTRL-C on windows leaves both the directory and the file. Unavoidable. # On Posix, `kill -9` has similar behavior, but CTRL-C allows cleanup. with files.TemporaryDirectory() as temp_dir: f = files.BinaryFileWriter(os.path.join(temp_dir, 'src.tgz')) with gzip.GzipFile(mode='wb', fileobj=f) as gz: _CreateTar(upload_dir, gen_files, included_paths, gz) f.close() storage_client = storage_api.StorageClient() storage_client.CopyFileToGCS(object_ref.bucket_ref, f.name, object_ref.name)
def _Read(uri): """Read a file/object (local file:// or gs:// Cloud Storage path). >>> with _Read('gs://builder/object.txt') as f: ... assert f.read() == 'foo' >>> with _Read('file:///path/to/object.txt') as f: ... assert f.read() == 'bar' Args: uri: str, the path to the file/object to read. Must begin with 'file://' or 'gs://' Yields: a file-like context manager. Raises: FileReadError: If opening or reading the file failed. InvalidRuntimeBuilderPath: If the path is invalid (doesn't begin with an appropriate prefix). """ try: if uri.startswith('file://'): with contextlib.closing(urllib2.urlopen(uri)) as req: yield req elif uri.startswith('gs://'): storage_client = storage_api.StorageClient() object_ = storage_util.ObjectReference.FromUrl(uri) with contextlib.closing(storage_client.ReadObject(object_)) as f: yield f else: raise InvalidRuntimeBuilderURI(uri) except (urllib2.HTTPError, urllib2.URLError, calliope_exceptions.BadFileException) as e: log.debug('', exc_info=True) raise FileReadError(str(e))
def Run(self, args): storage_client = storage_api.StorageClient() daisy_bucket = daisy_utils.GetAndCreateDaisyBucket( storage_client=storage_client) image_uuid = uuid.uuid4() daisy_vars = ['image_name={}'.format(args.image_name)] if args.source_image: # If we're starting from an image, then we've already imported it. workflow = _IMPORT_FROM_IMAGE_WORKFLOW daisy_vars.append( 'translate_workflow={}'.format(_GetTranslateWorkflow(args))) ref = resources.REGISTRY.Parse( args.source_image, collection='compute.images', params={'project': properties.VALUES.core.project.GetOrFail}) # source_name should be of the form 'global/images/image-name'. source_name = ref.RelativeName()[len(ref.Parent().RelativeName() + '/'):] daisy_vars.append('source_image={}'.format(source_name)) else: # If the file is an OVA file, print a warning. if args.source_file.endswith('.ova'): log.warning('The specified input file may contain more than one ' 'virtual disk. Only the first vmdk disk will be ' 'imported. ') elif (args.source_file.endswith('.tar.gz') or args.source_file.endswith('.tgz')): raise exceptions.BadFileException( '"gcloud compute images import" does not support compressed ' 'archives. Please extract your image and try again.\n If you got ' 'this file by exporting an image from Compute Engine (e.g. by ' 'using "gcloud compute images export") then you can instead use ' '"gcloud compute images create" to create your image from your ' '.tar.gz file.') # Get the image into the scratch bucket, wherever it is now. if _IsLocalFile(args.source_file): gcs_uri = _UploadToGcs(args.async, args.source_file, daisy_bucket, image_uuid) else: source_file = _MakeGcsUri(args.source_file) gcs_uri = _CopyToScratchBucket(source_file, image_uuid, storage_client, daisy_bucket) # Import and (maybe) translate from the scratch bucket. daisy_vars.append('source_disk_file={}'.format(gcs_uri)) if args.data_disk: workflow = _IMPORT_WORKFLOW else: workflow = _IMPORT_AND_TRANSLATE_WORKFLOW daisy_vars.append( 'translate_workflow={}'.format(_GetTranslateWorkflow(args))) # TODO(b/79591894): Once we've cleaned up the Argo output, replace this # warning message with a ProgressTracker spinner. log.warning('Importing image. This may take up to 2 hours.') return daisy_utils.RunDaisyBuild(args, workflow, ','.join(daisy_vars), daisy_bucket=daisy_bucket, user_zone=args.zone)
def _DeleteObject(value): """Complete one ObjectDeleteTask (safe to run in parallel).""" object_delete_task, callback = value storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(object_delete_task.obj_ref, )) if callback: callback()
def testListBucket(self): self.ExpectList([('a', 'content'), ('b', 'content'), ('c', 'content2')]) storage_client = storage_api.StorageClient() names = set(o.name for o in storage_client.ListBucket(self.bucket)) self.assertEqual( names, set([self._SHA1_SUMS['content'], self._SHA1_SUMS['content2']]))
def testAlreadyExists(self): """Bucket already exists.""" self.mocked_storage_v1.buckets.Get.Expect( self.storage_v1_messages.StorageBucketsGetRequest( bucket=self._BUCKET_NAME), response=self.storage_v1_messages.Bucket(id=self._BUCKET_NAME)) client = storage_api.StorageClient() client.CreateBucketIfNotExists(self._BUCKET_NAME, self._PROJECT_ID)
def _UploadFileToGcsStorageApi(bucket_ref, source, dest_object): """Upload local source files to GCS staging bucket. Returns upload success.""" client = storage_api.StorageClient() try: client.CopyFileToGCS(bucket_ref, source, dest_object.name) return True except calliope_exceptions.BadFileException: return False
def _DeleteObject(object_delete_task): """Complete one ObjectDeleteTask (safe to run in parallel).""" storage_client = storage_api.StorageClient() bucket_ref = storage_util.BucketReference.FromBucketUrl( object_delete_task.bucket_url) retry.Retryer(max_retrials=3).RetryOnException( storage_client.DeleteObject, args=(bucket_ref, object_delete_task.remote_path))
def _UploadFile(value): """Complete one FileUploadTask (safe to run in parallel).""" file_upload_task, callback = value storage_client = storage_api.StorageClient() retry.Retryer(max_retrials=3).RetryOnException( storage_client.CopyFileToGCS, args=(file_upload_task.local_path, file_upload_task.dest_obj_ref)) if callback: callback()