def testCopy(self): self.copy_mock = self.storage_client_mock.Copy remote1 = storage_util.ObjectReference(self._TEST_BUCKET, 'remote/obj1') remote2 = storage_util.ObjectReference(self._TEST_BUCKET, 'remote/obj2') task = storage_parallel.FileRemoteCopyTask(remote1, remote2) storage_parallel.ExecuteTasks([task]) self.copy_mock.assert_called_once_with(remote1, remote2)
def GetStagingLocation(job_id=None, staging_bucket=None, job_dir=None): """Get the appropriate staging location for the job given the arguments.""" staging_location = None if staging_bucket: staging_location = storage_util.ObjectReference(staging_bucket, job_id) elif job_dir: staging_location = storage_util.ObjectReference( job_dir.bucket_ref, '/'.join( (job_dir.name.rstrip('/'), 'packages'))) return staging_location
def testUpload(self): storage_helpers.Upload( ['foo', '/tmp/bar', 'baz.txt'], 'gs://foo/bar/', storage_client=self.storage_api_client) self.copy_file_mock.assert_has_calls( [mock.call('foo', storage_util.ObjectReference( self.storage_bucket, 'bar/foo')), mock.call('/tmp/bar', storage_util.ObjectReference( self.storage_bucket, 'bar/bar')), mock.call('baz.txt', storage_util.ObjectReference( self.storage_bucket, 'bar/baz.txt'))])
def testImportSourceIsGcsObject(self): env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) other_bucket_object = gcs_util.ObjectReference( self.other_bucket, 'dags/source/a.txt') self.ExpectObjectGet(other_bucket_object) self.ExpectCopy( other_bucket_object, gcs_util.ObjectReference(self.test_gcs_bucket, 'dags/subdir/a.txt')) storage_util.Import(env_ref, other_bucket_object.ToUrl(), 'dags/subdir/')
def testDownload(self): self.download_mock = self.storage_client_mock.CopyFileFromGCS local = '/some/file' remote = storage_util.ObjectReference(self._TEST_BUCKET, 'remote/obj') task = storage_parallel.FileDownloadTask(remote, local) storage_parallel.ExecuteTasks([task]) self.download_mock.assert_called_once_with(remote, local)
def testUpload(self): self.upload_mock = self.storage_client_mock.CopyFileToGCS local = '/some/file' remote = storage_util.ObjectReference(self._TEST_BUCKET, 'remote/obj') task = storage_parallel.FileUploadTask(local, remote) storage_parallel.ExecuteTasks([task]) self.upload_mock.assert_called_once_with(local, remote)
def _MakeTestTasks(self, count): tasks = [] for n in range(count): tasks.append(storage_parallel.ObjectDeleteTask( storage_util.ObjectReference( self._TEST_BUCKET, 'remote{0}'.format(n)))) return tasks
def testDataDeleteRestoresSubdir(self, use_gsutil, exec_mock): """Tests that the data dir is restored if it's missing after deletion.""" self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) subdir_ref = storage_util.ObjectReference(self.test_gcs_bucket, 'data/') self.ExpectObjectGet(subdir_ref, exception=http_error.MakeHttpError(code=404)) self.ExpectObjectInsert(subdir_ref) if use_gsutil: self._SetUpGsutil() fake_exec = kubectl_util.FakeExec() exec_mock.side_effect = fake_exec fake_exec.AddCallback( 0, self.MakeGsutilExecCallback( ['-m', 'rm', '-r', '{}/data/*'.format( self.test_gcs_bucket_path)])) else: self._SetUpStorageApi() self.RunEnvironments('storage', 'data', 'delete', '--project', self.TEST_PROJECT, '--location', self.TEST_LOCATION, '--environment', self.TEST_ENVIRONMENT_ID) if use_gsutil: fake_exec.Verify() else: self.delete_mock.assert_called_once_with( storage_util.BucketReference(self.test_gcs_bucket), '*', 'data')
def testPluginsDeleteTargetSpecified(self, use_gsutil, exec_mock): """Tests successful plugins deleting for a specific file.""" self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) subdir_ref = storage_util.ObjectReference(self.test_gcs_bucket, 'plugins/') self.ExpectObjectGet(subdir_ref) target = 'subdir/file.txt' if use_gsutil: self._SetUpGsutil() fake_exec = kubectl_util.FakeExec() exec_mock.side_effect = fake_exec fake_exec.AddCallback( 0, self.MakeGsutilExecCallback( ['-m', 'rm', '-r', '{}/plugins/{}'.format(self.test_gcs_bucket_path, target)])) else: self._SetUpStorageApi() self.RunEnvironments('storage', 'plugins', 'delete', '--project', self.TEST_PROJECT, '--location', self.TEST_LOCATION, '--environment', self.TEST_ENVIRONMENT_ID, target) if use_gsutil: fake_exec.Verify() else: self.delete_mock.assert_called_once_with( storage_util.BucketReference(self.test_gcs_bucket), target, 'plugins')
def testDeleteSuccessful(self, exec_mock): """Tests successful Delete call.""" target = 'c/d' env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) fake_exec = kubectl_util.FakeExec() exec_mock.side_effect = fake_exec fake_exec.AddCallback( 0, self.MakeGsutilExecCallback( ['-m', 'rm', '-r', '{}/subdir/{}'.format(self.test_gcs_bucket_path, target)])) self.ExpectObjectGet( gcs_util.ObjectReference(self.test_gcs_bucket, 'subdir/')) storage_util.Delete(env_ref, target, 'subdir', release_track=self.track) fake_exec.Verify()
def Delete(env_ref, target, gcs_subdir): """Deletes objects in a folder of an environment's bucket. gsutil deletes directory marker objects even when told to delete just the directory's contents, so we need to check that it exists and create it if it doesn't. A better alternative will be to use the storage API to list objects by prefix and implement deletion ourselves Args: env_ref: googlecloudsdk.core.resources.Resource, Resource representing the Environment in whose corresponding bucket to delete objects. target: str, the path within the gcs_subdir directory in the bucket to delete. gcs_subdir: str, subdir of the Cloud Storage bucket in which to delete. Should not contain slashes, for example "dags". """ gcs_bucket = _GetStorageBucket(env_ref) target_ref = storage_util.ObjectReference( gcs_bucket, posixpath.join(gcs_subdir, target)) try: retval = storage_util.RunGsutilCommand( 'rm', command_args=(['-r', target_ref.ToUrl()]), run_concurrent=True, out_func=log.out.write, err_func=log.err.write) except (execution_utils.PermissionError, execution_utils.InvalidCommandError) as e: raise command_util.GsutilError(six.text_type(e)) if retval: raise command_util.GsutilError('gsutil returned non-zero status code.') _EnsureSubdirExists(gcs_bucket, gcs_subdir)
def _GetObjectOrSubdirObjects(object_ref, object_is_subdir=False, client=None): """Gets object_ref or the objects under object_ref is it's a subdir.""" client = client or storage_api.StorageClient() objects = [] # Check if object_ref referes to an actual object. If it does not exist, we # assume the user is specfying a subdirectory. target_is_subdir = False if not object_is_subdir: try: client.GetObject(object_ref) objects.append(object_ref) except apitools_exceptions.HttpNotFoundError: target_is_subdir = True if target_is_subdir or object_is_subdir: target_path = posixpath.join(object_ref.name, '') subdir_objects = client.ListBucket(object_ref.bucket_ref, target_path) for obj in subdir_objects: if object_is_subdir and obj.name == object_ref.name: # In this case, object_ref is to be treated as a subdir, so if # object_ref happens to also be an object, ignore it. continue objects.append( storage_util.ObjectReference(object_ref.bucket_ref, obj.name)) return objects
def testUploadFile_OneFile(self): tasks = self._MakeTestTasks(1) storage_parallel.UploadFiles(tasks) self.copy_file_mock.assert_called_once_with( 'local0', storage_util.ObjectReference(self._TEST_BUCKET, 'remote0')) self.get_pool_mock.assert_called_once_with(16)
def _EnsureSubdirExists(bucket_ref, subdir): """Checks that a directory marker object exists in the bucket or creates one. The directory marker object is needed for subdir listing to not crash if the directory is empty. Args: bucket_ref: googlecloudsk.api_lib.storage.storage_util.BucketReference, a reference to the environment's bucket subdir: str, the subdirectory to check or recreate. Should not contain slashes. """ subdir_name = '{}/'.format(subdir) subdir_ref = storage_util.ObjectReference(bucket_ref, subdir_name) storage_client = storage_api.StorageClient() try: storage_client.GetObject(subdir_ref) except apitools_exceptions.HttpNotFoundError: # Insert an empty object into the bucket named subdir_name, which will # serve as an empty directory marker. insert_req = storage_client.messages.StorageObjectsInsertRequest( bucket=bucket_ref.bucket, name=subdir_name) upload = transfer.Upload.FromStream(io.BytesIO(), 'application/octet-stream') try: storage_client.client.objects.Insert(insert_req, upload=upload) except apitools_exceptions.HttpError: raise command_util.Error( 'Error re-creating empty {}/ directory. List calls may'.format( subdir) + 'fail, but importing will restore the directory.')
def testDagsDeleteTargetNotSpecified(self, use_gsutil, exec_mock): """Tests successful deletion of the entire DAGs directory.""" self.ExpectEnvironmentGet(self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) subdir_ref = storage_util.ObjectReference(self.test_gcs_bucket, 'dags/') self.ExpectObjectGet(subdir_ref) if use_gsutil: self._SetUpGsutil() fake_exec = kubectl_util.FakeExec() exec_mock.side_effect = fake_exec fake_exec.AddCallback( 0, self.MakeGsutilExecCallback([ '-m', 'rm', '-r', '{}/dags/*'.format(self.test_gcs_bucket_path) ])) else: self._SetUpStorageApi() self.RunEnvironments('storage', 'dags', 'delete', '--project', self.TEST_PROJECT, '--location', self.TEST_LOCATION, '--environment', self.TEST_ENVIRONMENT_ID) if use_gsutil: fake_exec.Verify() else: self.delete_mock.assert_called_once_with( storage_util.BucketReference(self.test_gcs_bucket), '*', 'dags')
def testImportSourceIsLocalDir(self): env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) self.ExpectObjectInsert( gcs_util.ObjectReference( self.test_gcs_bucket, 'dags/subdir/source/a.txt'), file_size=0) self.ExpectObjectInsert( gcs_util.ObjectReference( self.test_gcs_bucket, 'dags/subdir/source/b/c.txt'), file_size=0) storage_util.Import( env_ref, os.path.join(self.temp_path, self.source_dir), 'dags/subdir/')
def testExportSourceIsDirDestIsGcs(self): env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) self._ExpectFailedGet(self.test_gcs_bucket, 'dags/source') self.ExpectObjectList( self.test_gcs_bucket, 'dags/source/', responses=[self.list_response]) self.ExpectCopy( self.object_refs[0], gcs_util.ObjectReference(self.other_bucket, 'dest/source/a.txt')) self.ExpectCopy( self.object_refs[1], gcs_util.ObjectReference(self.other_bucket, 'dest/source/b/c.txt')) storage_util.Export( env_ref, 'dags/source', self.other_bucket_path + '/dest')
def _RunTestWithGivenParallelism(self, num_threads): tasks = self._MakeTestTasks(self._DEFAULT_NUM_TASKS) storage_parallel.DeleteObjects(tasks, num_threads=num_threads) for n in range(self._DEFAULT_NUM_TASKS): self.delete_object_mock.assert_any_call( storage_util.ObjectReference( self._TEST_BUCKET, 'remote{0}'.format(n))) self.assertEqual(self.delete_object_mock.call_count, self._DEFAULT_NUM_TASKS) self.get_pool_mock.assert_called_once_with(num_threads)
def _UploadFileToGcs(source, function_ref, stage_bucket): """Upload local source files to GCS staging bucket.""" zip_file = _GenerateRemoteZipFileName(function_ref.RelativeName()) bucket_ref = storage_util.BucketReference.FromArgument(stage_bucket) gcs_url = storage_util.ObjectReference(bucket_ref, zip_file).ToUrl() upload_result = storage_util.RunGsutilCommand('cp', [source, gcs_url]) if upload_result != 0: raise exceptions.FunctionsError( 'Failed to upload the function source code to the bucket {0}'. format(stage_bucket)) return gcs_url
def testImportSourceIsGcsDirectoryWithAsterisk(self): env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) self.ExpectObjectList( self.other_bucket, 'dags/source/', responses=[self.list_response]) self.ExpectCopy( gcs_util.ObjectReference(self.other_bucket, 'dags/source/a.txt'), gcs_util.ObjectReference( self.test_gcs_bucket, 'dags/subdir/a.txt')) self.ExpectCopy( gcs_util.ObjectReference(self.other_bucket, 'dags/source/b/c.txt'), gcs_util.ObjectReference( self.test_gcs_bucket, 'dags/subdir/b/c.txt')) storage_util.Import( env_ref, self.other_bucket_path + '/dags/source/*', 'dags/subdir/')
def _RunTestWithSuccessAfterNumTries(self, num_tries): self.delete_object_mock.side_effect = MakeRepeatMock(num_tries) tasks = self._MakeTestTasks(self._DEFAULT_NUM_TASKS) storage_parallel.DeleteObjects(tasks) calls = [] self.assertEqual(self.delete_object_mock.call_count, self._DEFAULT_NUM_TASKS * num_tries) for n in range(self._DEFAULT_NUM_TASKS): for _ in range(num_tries): calls.append(mock.call( storage_util.ObjectReference( self._TEST_BUCKET, 'remote{0}'.format(n)))) self.delete_object_mock.assert_has_calls(calls, any_order=True) self.get_pool_mock.assert_called_once_with(16)
def _ImportStorageApi(gcs_bucket, source, destination): """Imports files and directories into a bucket.""" client = storage_api.StorageClient() old_source = source source = source.rstrip('*') # Source ends with an asterisk. This means the user indicates that the source # is a directory so we shouldn't bother trying to see if source is an object. # This is important because we always have certain subdirs created as objects # (e.g. dags/), so if we don't do this check, import/export will just try # and copy this empty object. object_is_subdir = old_source != source if not object_is_subdir: # If source is not indicated to be a subdir, then strip the ending slash # so the specified directory is present in the destination. source = source.rstrip(posixpath.sep) source_is_local = not source.startswith('gs://') if source_is_local and not os.path.exists(source): raise command_util.Error('Source for import does not exist.') # Don't include the specified directory as we want that present in the # destination bucket. source_dirname = _JoinPaths(os.path.dirname(source), '', gsutil_path=not source_is_local) if source_is_local: if os.path.isdir(source): file_chooser = gcloudignore.GetFileChooserForDir(source) for rel_path in file_chooser.GetIncludedFiles(source): file_path = _JoinPaths(source, rel_path) if os.path.isdir(file_path): continue dest_path = _GetDestPath(source_dirname, file_path, destination, False) client.CopyFileToGCS(gcs_bucket, file_path, dest_path) else: # Just upload the file. dest_path = _GetDestPath(source_dirname, source, destination, False) client.CopyFileToGCS(gcs_bucket, source, dest_path) else: source_ref = storage_util.ObjectReference.FromUrl(source) to_import = _GetObjectOrSubdirObjects( source_ref, object_is_subdir=object_is_subdir, client=client) for obj in to_import: dest_object = storage_util.ObjectReference( gcs_bucket, # Use obj.ToUrl() to ensure that the dirname is properly stripped. _GetDestPath(source_dirname, obj.ToUrl(), destination, False)) client.Copy(obj, dest_object)
def _ImportGsutil(gcs_bucket, source, destination): """Imports files and directories into a bucket.""" destination_ref = storage_util.ObjectReference(gcs_bucket, destination) try: retval = storage_util.RunGsutilCommand( 'cp', command_args=(['-r', source, destination_ref.ToUrl()]), run_concurrent=True, out_func=log.out.write, err_func=log.err.write) except (execution_utils.PermissionError, execution_utils.InvalidCommandError) as e: raise command_util.GsutilError(six.text_type(e)) if retval: raise command_util.GsutilError('gsutil returned non-zero status code.')
def Export(env_ref, sources, destination, release_track=base.ReleaseTrack.GA): """Exports files and directories from an environment's Cloud Storage bucket. Args: env_ref: googlecloudsdk.core.resources.Resource, Resource representing the Environment whose bucket from which to export. sources: [str], a list of bucket-relative paths from which to export files. Directory sources are imported recursively; the directory itself will be present in the destination bucket. Can also include wildcards. destination: str, existing local directory or path to a Cloud Storage bucket or directory object to which to export. Must have a single trailing slash but no leading slash. For example, 'dir/foo/bar/'. release_track: base.ReleaseTrack, the release track of command. Will dictate which Composer client library will be used. Returns: None Raises: command_util.Error: if the storage bucket could not be retrieved or a non-Cloud Storage destination that is not a local directory was provided. command_util.GsutilError: the gsutil command failed """ gcs_bucket = _GetStorageBucket(env_ref, release_track=release_track) source_refs = [ storage_util.ObjectReference(gcs_bucket, source) for source in sources ] if destination.startswith('gs://'): destination = posixpath.join(destination.strip(posixpath.sep), '') elif not os.path.isdir(destination): raise command_util.Error('Destination for export must be a directory.') try: retval = storage_util.RunGsutilCommand( 'cp', command_args=(['-r'] + [s.ToUrl() for s in source_refs] + [destination]), run_concurrent=True, out_func=log.out.write, err_func=log.err.write) except (execution_utils.PermissionError, execution_utils.InvalidCommandError) as e: raise command_util.GsutilError(six.text_type(e)) if retval: raise command_util.GsutilError('gsutil returned non-zero status code.')
def _DeleteGsutil(gcs_bucket, target, gcs_subdir): """Deletes objects in a folder of an environment's bucket with gsutil.""" target_ref = storage_util.ObjectReference( gcs_bucket, _JoinPaths(gcs_subdir, target, gsutil_path=True)) try: retval = storage_util.RunGsutilCommand( 'rm', command_args=(['-r', target_ref.ToUrl()]), run_concurrent=True, out_func=log.out.write, err_func=log.err.write) except (execution_utils.PermissionError, execution_utils.InvalidCommandError) as e: raise command_util.GsutilError(six.text_type(e)) if retval: raise command_util.GsutilError('gsutil returned non-zero status code.')
def testImportSourceIsDirWithGcloudIgnore(self): self.Touch(self.temp_path, os.path.join(self.source_dir, '.gcloudignore'), contents='.gcloudignore\nc.txt\n') env_ref = parsers.ParseEnvironment(self.TEST_ENVIRONMENT_NAME) self.ExpectEnvironmentGet( self.TEST_PROJECT, self.TEST_LOCATION, self.TEST_ENVIRONMENT_ID, response=self.MakeEnvironmentWithBucket()) self.ExpectObjectInsert( gcs_util.ObjectReference( self.test_gcs_bucket, 'dags/subdir/source/a.txt'), file_size=0) storage_util.Import( env_ref, os.path.join(self.temp_path, self.source_dir), 'dags/subdir/')
def _DeleteStorageApi(gcs_bucket, target, gcs_subdir): """Deletes objects in a folder of an environment's bucket with storage API.""" client = storage_api.StorageClient() # Explicitly only support target = '*' and no other globbing notation. # This is because the flag help text explicitly says to give a subdir. # Star also has a special meaning and tells the delete function to not try # and get the object. This is necessary because subdirs in the GCS buckets # are created as objects to ensure they exist. delete_all = target == '*' # Listing in a bucket uses a prefix match and doesn't support * notation. target = '' if delete_all else target target_ref = storage_util.ObjectReference( gcs_bucket, _JoinPaths(gcs_subdir, target, gsutil_path=True)) to_delete = _GetObjectOrSubdirObjects(target_ref, object_is_subdir=delete_all, client=client) for obj_ref in to_delete: client.DeleteObject(gcs_bucket, obj_ref.name)
def SetUp(self): properties.VALUES.storage.use_gsutil.Set(False) self.SetTrack(calliope_base.ReleaseTrack.GA) self.source_dir = 'source' self.export_dir = os.path.join(self.temp_path, 'dest') self.other_bucket_path = 'gs://other-bucket' self.other_bucket = 'other-bucket' os.makedirs(self.export_dir) self.file = self.Touch( self.temp_path, os.path.join(self.source_dir, 'a.txt'), makedirs=True) self.file_in_dir = self.Touch(self.temp_path, os.path.join( self.source_dir, 'b', 'c.txt'), makedirs=True) self.objects = [self.storage_messages.Object(name=name) for name in ( 'dags/source/a.txt', 'dags/source/b/c.txt')] self.list_response = self.storage_messages.Objects( items=self.objects) self.object_refs = [ gcs_util.ObjectReference(self.test_gcs_bucket, obj.name) for obj in self.objects]
def _ExportStorageApi(gcs_bucket, source, destination): """Exports files and directories from an environment's GCS bucket.""" old_source = source source = source.rstrip('*') # Source ends with an asterisk. This means the user indicates that the source # is a directory so we shouldn't bother trying to see if source is an object. # This is important because we always have certain subdirs created as objects # (e.g. dags/), so if we don't do this check, import/export will just try # and copy this empty object. object_is_subdir = old_source != source client = storage_api.StorageClient() source_ref = storage_util.ObjectReference(gcs_bucket, source) dest_is_local = True if destination.startswith('gs://'): destination = _JoinPaths(destination.strip(posixpath.sep), '', gsutil_path=True) dest_is_local = False elif not os.path.isdir(destination): raise command_util.Error('Destination for export must be a directory.') source_dirname = _JoinPaths(os.path.dirname(source), '', gsutil_path=True) to_export = _GetObjectOrSubdirObjects(source_ref, object_is_subdir=object_is_subdir, client=client) if dest_is_local: for obj in to_export: dest_path = _GetDestPath(source_dirname, obj.name, destination, True) files.MakeDir(os.path.dirname(dest_path)) # Command description for export commands says overwriting is default # behavior. client.CopyFileFromGCS(obj.bucket_ref, obj.name, dest_path, overwrite=True) else: for obj in to_export: dest_object = storage_util.ObjectReference.FromUrl( _GetDestPath(source_dirname, obj.name, destination, False)) client.Copy(obj, dest_object)
def _UploadFileToGcs(source, function_ref, stage_bucket): """Upload local source files to GCS staging bucket.""" zip_file = _GenerateRemoteZipFileName(function_ref.RelativeName()) bucket_ref = storage_util.BucketReference.FromArgument(stage_bucket) dest_object = storage_util.ObjectReference(bucket_ref, zip_file) # TODO(b/109938541): Remove gsutil implementation after the new implementation # seems stable. use_gsutil = properties.VALUES.storage.use_gsutil.GetBool() if use_gsutil: upload_success = _UploadFileToGcsGsutil(source, dest_object) else: upload_success = _UploadFileToGcsStorageApi(bucket_ref, source, dest_object) if not upload_success: raise exceptions.FunctionsError( 'Failed to upload the function source code to the bucket {0}'. format(stage_bucket)) return dest_object.ToUrl()