Exemple #1
0
 def test_ingest_article_zip_starter_(self, fake_boto_conn, fake_logger):
     fake_boto_conn.return_value = FakeBotoConnection()
     self.stater_ingest_article_zip.start(
         settings=settings_mock,
         run=run_example,
         info=S3NotificationInfo.from_dict(
             test_data.ingest_article_zip_data))
    def do_activity(self, data=None):
        """
        Do the work
        """

        self.expanded_bucket_name = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

        info = S3NotificationInfo.from_dict(data)
        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        article_version_id = article_id + '.' + version
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "start",
                                "Starting applying version number to files for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
                
            # Do not rename files if a version number is in the file_name
            m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
            
            if m is not None:
                # Nothing to do
                pass
            
            elif m is None and version is not None:
                expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
                bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
                self.rename_article_s3_objects(bucket_folder_name, version)
                
            self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "end",
                        "Finished applying version number to article " + article_id +
                        " for version " + version + " run " + str(run))


        except Exception as e:
            self.logger.exception("Exception when applying version number to article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "error",
                                    "Error in applying version number to files for " + article_id +
                                    " message:" + e.message)

        return True
    def do_activity(self, data=None):

        try:

            info = S3NotificationInfo.from_dict(data)
            filename = info.file_name[info.file_name.rfind('/')+1:]
            session = Session(self.settings)
            session.store_value(data['run'], 'filename_last_element', filename)

            article_structure = ArticleInfo(filename)

            if article_structure.article_id is None:
                self.logger.error("Name '%s' did not match expected pattern for article id" % filename)
                raise RuntimeError("article_structure.article_id is None. File pattern problem.")

            version = self.get_version(self.settings, article_structure, data['version_lookup_function'])
            session.store_value(data['run'], 'version', version)
            article_id = article_structure.article_id

            self.emit_monitor_event(self.settings, article_id, version, data['run'],
                                    self.pretty_name, "start",
                                    " ".join(("Version Lookup for article", article_id, "version:", version)))

            self.set_monitor_property(self.settings, article_id, "article-id", article_id, "text")
            self.set_monitor_property(self.settings, article_id, "publication-status", "publication in progress",
                                      "text",
                                      version=version)

            self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'],
                                    self.pretty_name, "end",
                                    " ".join(("Finished Version Lookup for article", article_structure.article_id,
                                              "version:", version)))
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when trying to Lookup Version. Error: " + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
def process_data_publishperfectarticle(workflow_name, workflow_data):
    data = {'info': S3NotificationInfo.from_dict(workflow_data),
            'run': str(uuid.uuid4())}
    return data
def process_data_ingestarticlezip(workflow_name, workflow_data):
    data = {'info': S3NotificationInfo.from_dict(workflow_data),
            'run': str(uuid.uuid4())}
    return data
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
 def test_ingest_article_zip_starter_(self, fake_boto_conn, fake_logger):
     fake_boto_conn.return_value = FakeBotoConnection()
     self.stater_ingest_article_zip.start(settings=settings_mock, run=run_example,
                                          info=S3NotificationInfo.from_dict(test_data.ingest_article_zip_data))
    def do_activity(self, data=None):

        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        # set up required connections
        conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
        source_bucket = conn.get_bucket(info.bucket_name)
        dest_bucket = conn.get_bucket(self.settings.publishing_buckets_prefix + self.settings.expanded_bucket)
        session = Session(self.settings)

        article_id_match = re.match(ur'elife-(.*?)-', info.file_name)
        article_id = article_id_match.group(1)
        session.store_value(self.get_workflowId(), 'article_id', article_id)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        # extract any doi, version and updated date information from the filename
        version = None
        # zip name contains version information for previously archived zip files
        m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
        if m is not None:
            version = m.group(1)
        if version is None:
            version = self.get_next_version(article_id)
        if version == '-1':
            return False  # version could not be determined, exit workflow. Can't emit event as no version.

        sm = re.search(ur'.*?-.*?-(.*?)-', info.file_name)
        if sm is not None:
            status = sm.group(1)
        if status is None:
            return False  # version could not be determined, exit workflow. Can't emit event as no version.
        run = str(uuid.uuid4())
        # store version for other activities in this workflow execution
        session.store_value(self.get_workflowId(), 'version', version)

        # TODO : extract and store updated date if supplied

        article_version_id = article_id + '.' + version
        session.store_value(self.get_workflowId(), 'article_version_id', article_version_id)
        session.store_value(self.get_workflowId(), 'run', run)
        session.store_value(self.get_workflowId(), 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)
        self.set_monitor_property(self.settings, article_id, "article_id", article_id, "text")
        try:

            # download zip to temp folder
            tmp = self.get_tmp_dir()
            key = Key(source_bucket)
            key.key = info.file_name
            local_zip_file = self.open_file_from_tmp_dir(info.file_name, mode='wb')
            key.get_contents_to_file(local_zip_file)
            local_zip_file.close()

            bucket_folder_name = article_version_id + '/' + run
            folder_name = path.join(article_version_id, run)

            # extract zip contents
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, info.file_name)) as zf:
                zf.extractall(content_folder)

            # TODO : rename files (versions!)

            # TODO : edit xml and rename references

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                k = Key(dest_bucket)
                k.key = dest_path
                k.set_contents_from_filename(source_path)

            session.store_value(self.get_workflowId(), 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end",
                                    "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) + " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error",
                                    "Error expanding article " + article_id + " message:" + e.message)
            return False

        return True
def process_data_ingestarticlezip(workflow_name, workflow_data):
    data = {
        'info': S3NotificationInfo.from_dict(workflow_data),
        'run': str(uuid.uuid4())
    }
    return data
def process_data_publishperfectarticle(workflow_name, workflow_data):
    data = {
        'info': S3NotificationInfo.from_dict(workflow_data),
        'run': str(uuid.uuid4())
    }
    return data
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True