def execute(self, context):
     self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket, self.source_object,
                   self.destination_bucket or self.source_bucket,
                   self.destination_object or self.source_object)
     hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                                   delegate_to=self.delegate_to)
     hook.copy(self.source_bucket, self.source_object, self.destination_bucket, self.destination_object)
Esempio n. 2
0
    def execute(self, context):

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to
        )

        if '*' in self.source_object:
            wildcard_position = self.source_object.index('*')
            objects = hook.list(self.source_bucket,
                                prefix=self.source_object[:wildcard_position],
                                delimiter=self.source_object[wildcard_position + 1:])
            for source_object in objects:
                self.log.info('Executing copy of gs://{0}/{1} to '
                              'gs://{2}/{3}/{1}'.format(self.source_bucket,
                                                        source_object,
                                                        self.destination_bucket,
                                                        self.destination_object,
                                                        source_object))
                hook.copy(self.source_bucket, source_object,
                          self.destination_bucket, "{}/{}".format(self.destination_object,
                                                                  source_object))
                if self.move_object:
                    hook.delete(self.source_bucket, source_object)

        else:
            self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket,
                          self.source_object,
                          self.destination_bucket or self.source_bucket,
                          self.destination_object or self.source_object)
            hook.copy(self.source_bucket, self.source_object,
                      self.destination_bucket, self.destination_object)

            if self.move_object:
                hook.delete(self.source_bucket, self.source_object)
Esempio n. 3
0
    def execute(self, context):

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to)

        if '*' in self.source_object:
            wildcard_position = self.source_object.index('*')
            objects = hook.list(
                self.source_bucket,
                prefix=self.source_object[:wildcard_position],
                delimiter=self.source_object[wildcard_position + 1:])
            for source_object in objects:
                self.log.info('Executing copy of gs://{0}/{1} to '
                              'gs://{2}/{3}/{1}'.format(
                                  self.source_bucket, source_object,
                                  self.destination_bucket,
                                  self.destination_object, source_object))
                hook.copy(
                    self.source_bucket, source_object, self.destination_bucket,
                    "{}/{}".format(self.destination_object, source_object))
                if self.move_object:
                    hook.delete(self.source_bucket, source_object)

        else:
            self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket,
                          self.source_object, self.destination_bucket
                          or self.source_bucket, self.destination_object
                          or self.source_object)
            hook.copy(self.source_bucket, self.source_object,
                      self.destination_bucket, self.destination_object)

            if self.move_object:
                hook.delete(self.source_bucket, self.source_object)
Esempio n. 4
0
    def execute(self, context):

        self.log.info('Executing copy - Source_Bucket: %s, Source_directory: %s, '
                      'Destination_bucket: %s, Destination_directory: %s',
                      self.source_bucket, self.source_object,
                      self.destination_bucket or self.source_bucket,
                      self.destination_directory or self.source_object)

        hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                                      delegate_to=self.delegate_to)

        self.log.info('Getting list of the files to copy. Source Bucket: %s; Source Object: %s',
                      self.source_bucket, self.source_object)

        # Create a list of objects to copy from Source bucket. The function uses prefix keyword to pass the name of
        # the object to copy.
        self.files_to_copy = hook.list(bucket=self.source_bucket, prefix=self.source_object,
                                       delimiter=self.source_files_delimiter)

        # Log the names of all objects to be copied
        self.log.info('Files to copy: %s', self.files_to_copy)

        if self.files_to_copy is not None:
            for file_to_copy in self.files_to_copy:
                self.log.info('Source_Bucket: %s, Source_Object: %s, '
                              'Destination_bucket: %s, Destination_Directory: %s',
                              self.source_bucket, file_to_copy,
                              self.destination_bucket or self.source_bucket,
                              self.destination_directory + file_to_copy)
                hook.copy(self.source_bucket, file_to_copy,
                          self.destination_bucket, self.destination_directory + file_to_copy)
        else:
            self.log.info('No Files to copy.')
def do_copy_model_to_final(**kwargs):
    gcs = GoogleCloudStorageHook()

    # Returns all the objects within the bucket. All sub-buckets are considered
    # as prefix of the leaves. List does not differentiate files from subbuckets
    all_jobs_files = gcs.list(
        bucket=COMPOSER_BUCKET_NAME,
        prefix='{}/export/estimate'.format(PREFIX_JOBS_EXPORT)
    )

    # Extract the latest model bucket parent of variables/ and saved_model.pbtxt
    # The max() string contains the latest model folders in 1234567, we need to
    # extract that using regex
    # ex: jobs/clv-composer/export/estimate/1234567890/variables/variables.index
    # returns /1234567890/
    latest_model_bucket = re.findall(r'/\d+/', max(all_jobs_files))[0]

    # List all the files that needs to be copied (only files in the latest bucket
    # and skip the ones that are not files but sub buckets)
    for c in [f for f in all_jobs_files
              if latest_model_bucket in f and f[-1] != '/']:

        # The model used for training is saved into a 'final' sub bucket of the
        # export bucket.
        dest_object = c.split(latest_model_bucket)[1]
        dest_object = '{}/{}'.format(PREFIX_FINAL_MODEL, dest_object)

        logging.info("Copying {} to {} ...".format(dest_object, COMPOSER_BUCKET_NAME))

        gcs.copy(
            source_bucket=COMPOSER_BUCKET_NAME,
            source_object=c,
            destination_object=dest_object
        )
Esempio n. 6
0
def move_objects(source_bucket=None, destination_bucket=None, prefix=None, **kwargs):
    storage_objects = kwargs["ti"].xcom_pull(task_ids="list_files")
    hook = GoogleCloudStorageHook()
    for storage_object in storage_objects:
        destination_object = storage_object
        if prefix:
            destination_object = "{}/{}".format(prefix, storage_object)
        hook.copy(source_bucket, storage_object, destination_bucket, destination_object)
        hook.delete(source_bucket, storage_object)
Esempio n. 7
0
def move_objects(source_bucket=None,
                 destination_bucket=None,
                 prefix=None,
                 **kwargs):
    storage_objects = kwargs['ti'].xcom_pull(task_ids='list_files')

    hook = GoogleCloudStorageHook()

    for storage_object in storage_objects:
        destination_object = storage_object
        if prefix:
            destination_object = f'{prefix}/{storage_object}'
        hook.copy(source_bucket, storage_object, destination_bucket,
                  destination_object)
        hook.delete(source_bucket, storage_object)
Esempio n. 8
0
    def execute(self, context):

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to)
        log_message = 'Executing copy of gs://{0}/{1} to gs://{2}/{3}'

        if self.wildcard in self.source_object:
            prefix, delimiter = self.source_object.split(self.wildcard, 1)
            objects = hook.list(self.source_bucket,
                                prefix=prefix,
                                delimiter=delimiter)

            for source_object in objects:
                if self.destination_object is None:
                    destination_object = source_object
                else:
                    destination_object = source_object.replace(
                        prefix, self.destination_object, 1)
                self.log.info(
                    log_message.format(self.source_bucket, source_object,
                                       self.destination_bucket,
                                       destination_object))

                hook.copy(self.source_bucket, source_object,
                          self.destination_bucket, destination_object)
                if self.move_object:
                    hook.delete(self.source_bucket, source_object)

        else:
            self.log.info(
                log_message.format(
                    self.source_bucket, self.source_object,
                    self.destination_bucket or self.source_bucket,
                    self.destination_object or self.source_object))
            hook.copy(self.source_bucket, self.source_object,
                      self.destination_bucket, self.destination_object)

            if self.move_object:
                hook.delete(self.source_bucket, self.source_object)