def execute(self, context): self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket, self.source_object, self.destination_bucket or self.source_bucket, self.destination_object or self.source_object) hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id, delegate_to=self.delegate_to) hook.copy(self.source_bucket, self.source_object, self.destination_bucket, self.destination_object)
def execute(self, context): hook = GoogleCloudStorageHook( google_cloud_storage_conn_id=self.google_cloud_storage_conn_id, delegate_to=self.delegate_to ) if '*' in self.source_object: wildcard_position = self.source_object.index('*') objects = hook.list(self.source_bucket, prefix=self.source_object[:wildcard_position], delimiter=self.source_object[wildcard_position + 1:]) for source_object in objects: self.log.info('Executing copy of gs://{0}/{1} to ' 'gs://{2}/{3}/{1}'.format(self.source_bucket, source_object, self.destination_bucket, self.destination_object, source_object)) hook.copy(self.source_bucket, source_object, self.destination_bucket, "{}/{}".format(self.destination_object, source_object)) if self.move_object: hook.delete(self.source_bucket, source_object) else: self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket, self.source_object, self.destination_bucket or self.source_bucket, self.destination_object or self.source_object) hook.copy(self.source_bucket, self.source_object, self.destination_bucket, self.destination_object) if self.move_object: hook.delete(self.source_bucket, self.source_object)
def execute(self, context): hook = GoogleCloudStorageHook( google_cloud_storage_conn_id=self.google_cloud_storage_conn_id, delegate_to=self.delegate_to) if '*' in self.source_object: wildcard_position = self.source_object.index('*') objects = hook.list( self.source_bucket, prefix=self.source_object[:wildcard_position], delimiter=self.source_object[wildcard_position + 1:]) for source_object in objects: self.log.info('Executing copy of gs://{0}/{1} to ' 'gs://{2}/{3}/{1}'.format( self.source_bucket, source_object, self.destination_bucket, self.destination_object, source_object)) hook.copy( self.source_bucket, source_object, self.destination_bucket, "{}/{}".format(self.destination_object, source_object)) if self.move_object: hook.delete(self.source_bucket, source_object) else: self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket, self.source_object, self.destination_bucket or self.source_bucket, self.destination_object or self.source_object) hook.copy(self.source_bucket, self.source_object, self.destination_bucket, self.destination_object) if self.move_object: hook.delete(self.source_bucket, self.source_object)
def execute(self, context): self.log.info('Executing copy - Source_Bucket: %s, Source_directory: %s, ' 'Destination_bucket: %s, Destination_directory: %s', self.source_bucket, self.source_object, self.destination_bucket or self.source_bucket, self.destination_directory or self.source_object) hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id, delegate_to=self.delegate_to) self.log.info('Getting list of the files to copy. Source Bucket: %s; Source Object: %s', self.source_bucket, self.source_object) # Create a list of objects to copy from Source bucket. The function uses prefix keyword to pass the name of # the object to copy. self.files_to_copy = hook.list(bucket=self.source_bucket, prefix=self.source_object, delimiter=self.source_files_delimiter) # Log the names of all objects to be copied self.log.info('Files to copy: %s', self.files_to_copy) if self.files_to_copy is not None: for file_to_copy in self.files_to_copy: self.log.info('Source_Bucket: %s, Source_Object: %s, ' 'Destination_bucket: %s, Destination_Directory: %s', self.source_bucket, file_to_copy, self.destination_bucket or self.source_bucket, self.destination_directory + file_to_copy) hook.copy(self.source_bucket, file_to_copy, self.destination_bucket, self.destination_directory + file_to_copy) else: self.log.info('No Files to copy.')
def do_copy_model_to_final(**kwargs): gcs = GoogleCloudStorageHook() # Returns all the objects within the bucket. All sub-buckets are considered # as prefix of the leaves. List does not differentiate files from subbuckets all_jobs_files = gcs.list( bucket=COMPOSER_BUCKET_NAME, prefix='{}/export/estimate'.format(PREFIX_JOBS_EXPORT) ) # Extract the latest model bucket parent of variables/ and saved_model.pbtxt # The max() string contains the latest model folders in 1234567, we need to # extract that using regex # ex: jobs/clv-composer/export/estimate/1234567890/variables/variables.index # returns /1234567890/ latest_model_bucket = re.findall(r'/\d+/', max(all_jobs_files))[0] # List all the files that needs to be copied (only files in the latest bucket # and skip the ones that are not files but sub buckets) for c in [f for f in all_jobs_files if latest_model_bucket in f and f[-1] != '/']: # The model used for training is saved into a 'final' sub bucket of the # export bucket. dest_object = c.split(latest_model_bucket)[1] dest_object = '{}/{}'.format(PREFIX_FINAL_MODEL, dest_object) logging.info("Copying {} to {} ...".format(dest_object, COMPOSER_BUCKET_NAME)) gcs.copy( source_bucket=COMPOSER_BUCKET_NAME, source_object=c, destination_object=dest_object )
def move_objects(source_bucket=None, destination_bucket=None, prefix=None, **kwargs): storage_objects = kwargs["ti"].xcom_pull(task_ids="list_files") hook = GoogleCloudStorageHook() for storage_object in storage_objects: destination_object = storage_object if prefix: destination_object = "{}/{}".format(prefix, storage_object) hook.copy(source_bucket, storage_object, destination_bucket, destination_object) hook.delete(source_bucket, storage_object)
def move_objects(source_bucket=None, destination_bucket=None, prefix=None, **kwargs): storage_objects = kwargs['ti'].xcom_pull(task_ids='list_files') hook = GoogleCloudStorageHook() for storage_object in storage_objects: destination_object = storage_object if prefix: destination_object = f'{prefix}/{storage_object}' hook.copy(source_bucket, storage_object, destination_bucket, destination_object) hook.delete(source_bucket, storage_object)
def execute(self, context): hook = GoogleCloudStorageHook( google_cloud_storage_conn_id=self.google_cloud_storage_conn_id, delegate_to=self.delegate_to) log_message = 'Executing copy of gs://{0}/{1} to gs://{2}/{3}' if self.wildcard in self.source_object: prefix, delimiter = self.source_object.split(self.wildcard, 1) objects = hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter) for source_object in objects: if self.destination_object is None: destination_object = source_object else: destination_object = source_object.replace( prefix, self.destination_object, 1) self.log.info( log_message.format(self.source_bucket, source_object, self.destination_bucket, destination_object)) hook.copy(self.source_bucket, source_object, self.destination_bucket, destination_object) if self.move_object: hook.delete(self.source_bucket, source_object) else: self.log.info( log_message.format( self.source_bucket, self.source_object, self.destination_bucket or self.source_bucket, self.destination_object or self.source_object)) hook.copy(self.source_bucket, self.source_object, self.destination_bucket, self.destination_object) if self.move_object: hook.delete(self.source_bucket, self.source_object)