def do_activity(self, data=None):

        try:
            session = Session(self.settings)
            version = session.get_value(data['run'], 'version')
            filename = session.get_value(data['run'], 'filename_last_element')

            article_structure = ArticleInfo(filename)

            version_date, error = self.get_version(self.settings, article_structure, article_structure.article_id, version)


            if error is not None:
                self.logger.error(error)
                self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'],
                                        self.pretty_name, "error",
                                        " ".join(("Error Looking up version article", article_structure.article_id,
                                                 "message:", error)))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'],
                                    self.pretty_name, "end",
                                    " ".join(("Finished Version Lookup for article", article_structure.article_id,
                                    "version:", version)))

            session.store_value(data['run'], 'update_date', version_date)

            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when trying to Lookup next version")
            self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'], self.pretty_name,
                                    "error", " ".join(("Error looking up version for article",
                                                      article_structure.article_id, "message:", str(e))))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #2
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        try:
            if 'standalone' in data and data['standalone']:
                article_id = data['article_id']
                poa = data['standalone_is_poa']
                (start_msg, end_msg, result) = self.get_events(article_id,
                                                               poa,
                                                               version=None,
                                                               run=None)
                self.logger.info(end_msg[6])
                return result

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')
            file_name = session.get_value(run, 'file_name')
            poa = False
            if "poa" in file_name:
                poa = True
            (start_msg, end_msg,
             success) = self.get_events(article_id, poa, version, run)
            self.emit_monitor_event(*start_msg)
            self.emit_monitor_event(*end_msg)
            return success
        except Exception as e:
            self.logger.exception(
                "Error starting Copy Glencoe Still Images activity")
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        try:
            if 'standalone' in data and data['standalone']:
                article_id = data['article_id']
                poa = data['standalone_is_poa']
                (start_msg, end_msg, result) = self.get_events(article_id, poa, version=None, run=None)
                self.logger.info(end_msg[6])
                return result

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')
            file_name = session.get_value(run, 'file_name')
            poa = False
            if "poa" in file_name:
                poa = True
            (start_msg, end_msg, success) = self.get_events(article_id, poa, version, run)
            self.emit_monitor_event(*start_msg)
            self.emit_monitor_event(*end_msg)
            return success
        except Exception as e:
            self.logger.exception("Error starting Copy Glencoe Still Images activity")
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):
        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Set Publication Status", "start",
                                "Starting Ending setting of publish status for " + article_id)

        try:
            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
            eif_filename = session.get_value(self.get_workflowId(), 'eif_filename')
            data = self.get_eif(conn, eif_filename)
            publication_status = self.get_publication_status(data)
            data['publish'] = publication_status
            self.update_bucket(conn, data, eif_filename)

            self.emit_monitor_event(self.settings, article_id, version, run, "Set Publication Status", "end",
                                    "Ending setting of publish status for " + article_id)

        except Exception as e:
            self.logger.exception("Exception when setting publication status for " + article_id)
            self.emit_monitor_event(self.settings, article_id, version, run, "Set Publication Status", "error",
                                    "Error submitting EIF For article" + article_id + " message:" + e.message)
            return False
        return True
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://",
                                       bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(
                files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace(
                '{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures,
                                                    iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error",
                    "Some images are not available through the IIIF endpoint: "
                    + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished Verification. All endpoints work. Article: " +
                article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error",
                "An error occurred when checking IIIF endpoint. Article " +
                article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data):

        session = Session(self.settings)
        eif_location = session.get_value(data["run"], "eif_location")

        self.emit_monitor_event(
            self.settings,
            data["article_id"],
            data["version"],
            data["run"],
            self.pretty_name,
            "start",
            "Starting to set EIF to publish",
        )

        try:

            if not isinstance(eif_location, basestring):
                self.logger.error(self.pretty_name + " error. eif_location must be string")
                raise Exception("eif_location not available")

            storage_context = StorageContext(self.settings)

            eif_origin = "".join(
                (
                    self.settings.storage_provider,
                    "://",
                    self.settings.publishing_buckets_prefix + self.settings.eif_bucket,
                    "/",
                    eif_location,
                )
            )
        except Exception as e:

            self.emit_monitor_event(
                self.settings, data["article_id"], data["version"], data["run"], self.pretty_name, "error", e.message
            )
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        success, error = self.set_eif_to_publish(storage_context, eif_origin)

        if success:
            self.emit_monitor_event(
                self.settings,
                data["article_id"],
                data["version"],
                data["run"],
                self.pretty_name,
                "end",
                "Finished to set EIF to publish",
            )
            return activity.activity.ACTIVITY_SUCCESS

        self.logger.error(error)
        self.emit_monitor_event(
            self.settings, data["article_id"], data["version"], data["run"], self.pretty_name, "error", error
        )
        return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #7
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting submission convert images to jpg for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)
            storage_provider = self.settings.storage_provider + "://"
            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name

            storage_context = StorageContext(self.settings)
            files_in_bucket = storage_context.list_resources(orig_resource)

            figures = filter(article_structure.article_figure, files_in_bucket)

            # download is not a IIIF asset but is currently kept for compatibility
            # download may become obsolete in future
            formats = {"Original": {
                            "sources": "tif",
                            "format": "jpg",
                            "download": "yes"
                        }}

            for file_name in figures:
                figure_resource = orig_resource + "/" + file_name
                file_path = self.get_tmp_dir() + os.sep + file_name
                file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path)

                cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
                cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                publish_locations = [cdn_resource_path]

                image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name),
                                                 publish_locations, self.logger)

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished converting images for " + article_id + ": " +
                                    str(len(figures)) + " images processed ")
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("An error occurred during " + self.pretty_name)
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error converting images to JPG for article" + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #8
0
    def do_activity(self, data=None):

        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        self.expanded_bucket_name = (self.settings.publishing_buckets_prefix
                                     + self.settings.expanded_bucket)
        self.crossref_bucket_name = (self.settings.publishing_buckets_prefix
                                     + self.settings.poa_packaging_bucket)

        session = Session(self.settings)

        run = data['run']
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')
        expanded_folder_name = session.get_value(run, 'expanded_folder')


        conn = S3Connection(self.settings.aws_access_key_id,
                            self.settings.aws_secret_access_key)
        bucket = conn.get_bucket(self.expanded_bucket_name)

        self.emit_monitor_event(self.settings, article_id, version, run,
                                "Schedule Crossref", "start",
                                "Starting scheduling of crossref deposit for " + article_id)

        try:
            (xml_key, xml_filename) = ConvertJATS.get_article_xml_key(bucket, expanded_folder_name)

            # Rename the XML file to match what is used already
            new_key_name = self.new_crossref_xml_name(
                prefix=self.crossref_outbox_folder,
                journal='elife',
                article_id=str(article_id).zfill(5))

            self.copy_article_xml_to_crossref_outbox(
                new_key_name=new_key_name,
                source_bucket_name=self.expanded_bucket_name,
                old_key_name=xml_key.name)

            self.emit_monitor_event(self.settings, article_id, version, run, "Schedule Crossref",
                                    "end", "Finished scheduling of crossref deposit " + article_id +
                                    " for version " + version + " run " + str(run))
        except Exception as e:
            self.logger.exception("Exception when scheduling crossref")
            self.emit_monitor_event(self.settings, article_id, version, run, "Schedule Crossref",
                                    "error", "Error scheduling crossref " + article_id +
                                    " message:" + e.message)
            return False

        return True
Example #9
0
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        article_id = session.get_value(run, 'article_id')
        version = session.get_value(run, 'version')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting Glencoe video check for " + article_id)
        try:
            expanded_folder = session.get_value(run, 'expanded_folder')
            if expanded_folder is None:
                raise RuntimeError("No session value for expanded folder")

            expanded_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket
            self.logger.info("expanded_bucket: " + expanded_bucket)

            xml_filename = lax_provider.get_xml_file_name(self.settings, expanded_folder, expanded_bucket, version)
            if xml_filename is None:
                raise RuntimeError("No xml_filename found.")

            xml_origin = "".join((self.settings.storage_provider, "://", expanded_bucket, "/", expanded_folder + '/' +
                                  xml_filename))

            storage_context = StorageContext(self.settings)
            xml_content = storage_context.get_resource_as_string(xml_origin)

            if glencoe_check.has_videos(xml_content):
                glencoe_check.validate_sources(glencoe_check.metadata(glencoe_check.check_msid(article_id), self.settings))
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                        "Finished Verification. Glencoe is available. Article: " + article_id)
                return True

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. No Glencoe media tags found in xml. "
                                    "Article: " + article_id)
            return True
        except AssertionError as err:
            self.logger.info(err)
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "Glencoe video is not available for article " + article_id + '; message: ' + str(err))
            time.sleep(60)
            return activity.activity.ACTIVITY_TEMPORARY_FAILURE
        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking for Glencoe video. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        try:

            self.expanded_bucket_name = (
                self.settings.publishing_buckets_prefix +
                self.settings.expanded_bucket)

            run = data['run']
            session = Session(self.settings)
            version = session.get_value(run, 'version')
            article_id = session.get_value(run, 'article_id')

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "start",
                "Starting applying version number to files for " + article_id)
        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:

            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            if version is None:
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error", "Error in applying version number to files for " +
                    article_id + " message: No version available")
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
            self.rename_article_s3_objects(bucket_folder_name, version)

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished applying version number to article " +
                article_id + " for version " + version + " run " + str(run))

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error in applying version number to files for " +
                article_id + " message:" + str(e.message))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Resize Images", "start",
            "Starting submission resize of images for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            cdn_path = article_id

            if self.logger:
                self.logger.info("Converting images for folder %s" %
                                 expanded_folder_name)

            # get information on files in the expanded article bucket for notified zip file
            bucket_folder_name = expanded_folder_name
            bucket, file_infos = self.get_file_infos(bucket_folder_name)

            image_count = 0
            for file_info in file_infos:
                image_count += 1
                key = bucket.get_key(file_info.key)
                # see : http://stackoverflow.com/questions/9954521/s3-boto-list-keys-sometimes-returns-directory-key
                if not key.name.endswith("/"):
                    # process each key in the folder
                    self.process_key(key, cdn_path)
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Resize Images",
                "end", "Finished converting images for " + article_id + ": " +
                str(image_count) + " images processed ")

            self.clean_tmp_dir()

        except Exception as e:
            self.logger.exception("Exception when resizing images")
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Resize Images",
                "error", "Error resizing images for article" + article_id +
                " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run,
                                self.pretty_name, "start",
                                "Depositing Ingest assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            pre_ingest_assets = article_structure.pre_ingest_assets(
                files_in_bucket)

            for file_name in pre_ingest_assets:

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" %
                                     (file_name, cdn_bucket_name))

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Deposited Ingest assets for article " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when Depositing Ingest assets")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error depositing Ingest assets for article " +
                article_id + " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        try:

            self.expanded_bucket_name = (self.settings.publishing_buckets_prefix
                                         + self.settings.expanded_bucket)

            run = data['run']
            session = Session(self.settings)
            version = session.get_value(run, 'version')
            article_id = session.get_value(run, 'article_id')

            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "start",
                                    "Starting applying version number to files for " + article_id)
        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:

            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

            if version is None:
                self.emit_monitor_event(self.settings, article_id, version, run,
                                        self.pretty_name, "error",
                                        "Error in applying version number to files for " + article_id +
                                        " message: No version available")
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
            self.rename_article_s3_objects(bucket_folder_name, version)

            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "end",
                                    "Finished applying version number to article " + article_id +
                                    " for version " + version + " run " + str(run))

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error in applying version number to files for " + article_id +
                                    " message:" + str(e.message))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, "Resize Images", "start",
                                "Starting submission resize of images for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            cdn_path = article_id

            if self.logger:
                self.logger.info("Converting images for folder %s" % expanded_folder_name)

            # get information on files in the expanded article bucket for notified zip file
            bucket_folder_name = expanded_folder_name
            bucket, file_infos = self.get_file_infos(bucket_folder_name)

            image_count = 0
            for file_info in file_infos:
                image_count += 1
                key = bucket.get_key(file_info.key)
                # see : http://stackoverflow.com/questions/9954521/s3-boto-list-keys-sometimes-returns-directory-key
                if not key.name.endswith("/"):
                    # process each key in the folder
                    self.process_key(key, cdn_path)
            self.emit_monitor_event(self.settings, article_id, version, run, "Resize Images", "end",
                                    "Finished converting images for  " + article_id +
                                    str(image_count) + " images processed ")

            self.clean_tmp_dir()

        except Exception as e:
            self.logger.exception("Exception when resizing images")
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    "Resize Images", "error",
                                    "Error resizing images for article" + article_id +
                                    " message:" + e.message)
            return False
        return True
    def do_activity(self, data=None):
        """
        Do the work
        """

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Deposit assets", "start",
                                "Depositing assets for " + article_id)

        try:
            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)

            expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

            expanded_bucket = conn.get_bucket(expanded_folder_bucket)
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = [x.strip() for x in self.settings.no_download_extensions.split(',')]

            keys = self.get_keys(expanded_bucket, expanded_folder_name)
            for key in keys:
                (file_key, file_name) = key
                file_key.copy(cdn_bucket_name, article_id + "/" + file_name)
                if self.logger:
                    self.logger.info("Uploaded key %s to %s" % (file_name, cdn_bucket_name))
                file_name_no_extension, extension = file_name.rsplit('.', 1)
                if extension not in no_download_extensions:
                    download_metadata = file_key.metadata
                    download_metadata['Content-Disposition'] = str(
                        "Content-Disposition: attachment; filename=" + file_name + ";")
                    file_key.copy(cdn_bucket_name, article_id + "/" + file_name_no_extension + "-download." + extension,
                                  metadata=download_metadata)
            self.emit_monitor_event(self.settings, article_id, version, run, "Deposit assets", "end",
                                    "Deposited assets for article " + article_id)

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(self.settings, article_id, version, run, "Deposit assets", "error",
                                    "Error depositing assets for article " + article_id +
                                    " message:" + e.message)
            return False

        return True
    def do_activity(self, data=None):
        """
        Do the work
        """

        self.expanded_bucket_name = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

        info = S3NotificationInfo.from_dict(data)
        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        article_version_id = article_id + '.' + version
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "start",
                                "Starting applying version number to files for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
                
            # Do not rename files if a version number is in the file_name
            m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
            
            if m is not None:
                # Nothing to do
                pass
            
            elif m is None and version is not None:
                expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
                bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
                self.rename_article_s3_objects(bucket_folder_name, version)
                
            self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "end",
                        "Finished applying version number to article " + article_id +
                        " for version " + version + " run " + str(run))


        except Exception as e:
            self.logger.exception("Exception when applying version number to article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "error",
                                    "Error in applying version number to files for " + article_id +
                                    " message:" + e.message)

        return True
    def get_workflow_data(self, data):
        if "publication_data" in data:
            publication_data = json.loads(base64.decodestring(data['publication_data']))
            workflow_data = publication_data['workflow_data']
            return workflow_data

        # added this block because when doing silent corrections we will not have the opportunity to get the data from
        # the previous workflow (PreparePostEIF sets the data and when in silent corrections it is part of the same
        # workflow) currently we cannot mutate the data and pass it through activities, only workflows
        # it is an improvement to be made. Needs research on AWS SWF.
        # it will also work when not in Silent corrections, it will just override the setting with the same data
        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')
        if eif_location is not None:
            data['eif_location'] = eif_location

        return data
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace('{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                        "Some images are not available through the IIIF endpoint: " + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. All endpoints work. Article: " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking IIIF endpoint. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #19
0
    def do_activity(self, data=None):

        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data["run"]
        session = Session(self.settings)
        data['version'] = session.get_value(run, 'version')
        data['article_id'] = session.get_value(run, 'article_id')
        data['status'] = session.get_value(run, 'status')
        data['expanded_folder'] = session.get_value(run, 'expanded_folder')
        data['update_date'] = session.get_value(run, 'update_date')

        queue_connection_settings = {
            "sqs_region": self.settings.sqs_region,
            "aws_access_key_id": self.settings.aws_access_key_id,
            "aws_secret_access_key": self.settings.aws_secret_access_key
        }

        (message, queue, start_event, end_event, end_event_details,
         exception) = self.get_message_queue(
             data, self.settings.consider_Lax_elife_2_0)

        self.emit_monitor_event(*start_event)
        if end_event == "error":
            self.logger.exception(
                "Exception when Preparing Ingest for Lax. Details: %s",
                exception)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        self.write_message(queue_connection_settings, queue, message)

        self.emit_monitor_event(*end_event_details)
        return activity.activity.ACTIVITY_SUCCESS
Example #20
0
    def get_workflow_data(self, data):
        if "publication_data" in data:
            publication_data = json.loads(
                base64.decodestring(data['publication_data']))
            workflow_data = publication_data['workflow_data']
            return workflow_data

        # added this block because when doing silent corrections we will not have the opportunity to get the data from
        # the previous workflow (PreparePostEIF sets the data and when in silent corrections it is part of the same
        # workflow) currently we cannot mutate the data and pass it through activities, only workflows
        # it is an improvement to be made. Needs research on AWS SWF.
        # it will also work when not in Silent corrections, it will just override the setting with the same data
        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')
        if eif_location is not None:
            data['eif_location'] = eif_location

        return data
Example #21
0
    def do_activity(self, data):

        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')

        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "start", "Starting to set EIF to publish")

        try:

            if not isinstance(eif_location, basestring):
                self.logger.error(self.pretty_name +
                                  " error. eif_location must be string")
                raise Exception("eif_location not available")

            storage_context = StorageContext(self.settings)

            eif_origin = "".join((self.settings.storage_provider, "://",
                                  self.settings.publishing_buckets_prefix +
                                  self.settings.eif_bucket, "/", eif_location))
        except Exception as e:

            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "error", e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        success, error = self.set_eif_to_publish(storage_context, eif_origin)

        if success:
            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "end",
                                    "Finished to set EIF to publish")
            return activity.activity.ACTIVITY_SUCCESS

        self.logger.error(error)
        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "error", error)
        return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data["run"]
        session = Session(self.settings)
        data['version'] = session.get_value(run, 'version')
        data['article_id'] = session.get_value(run, 'article_id')
        data['status'] = session.get_value(run, 'status')
        data['expanded_folder'] = session.get_value(run, 'expanded_folder')
        data['update_date'] = session.get_value(run, 'update_date')

        queue_connection_settings = {"sqs_region": self.settings.sqs_region,
                                     "aws_access_key_id":self.settings.aws_access_key_id,
                                     "aws_secret_access_key": self.settings.aws_secret_access_key}

        (message, queue, start_event,
         end_event, end_event_details, exception) = self.get_message_queue(data, self.settings.consider_Lax_elife_2_0)

        self.emit_monitor_event(*start_event)
        if end_event == "error":
            self.logger.exception("Exception when Preparing Ingest for Lax. Details: %s", exception)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        self.write_message(queue_connection_settings, queue, message)

        self.emit_monitor_event(*end_event_details)
        return activity.activity.ACTIVITY_SUCCESS
Example #23
0
    def do_activity(self, data=None):

        try:

            info = S3NotificationInfo.from_dict(data)
            filename = info.file_name[info.file_name.rfind('/')+1:]
            session = Session(self.settings)
            session.store_value(data['run'], 'filename_last_element', filename)

            article_structure = ArticleInfo(filename)

            if article_structure.article_id is None:
                self.logger.error("Name '%s' did not match expected pattern for article id" % filename)
                raise RuntimeError("article_structure.article_id is None. File pattern problem.")

            version = self.get_version(self.settings, article_structure, data['version_lookup_function'])
            session.store_value(data['run'], 'version', version)
            article_id = article_structure.article_id

            self.emit_monitor_event(self.settings, article_id, version, data['run'],
                                    self.pretty_name, "start",
                                    " ".join(("Version Lookup for article", article_id, "version:", version)))

            self.set_monitor_property(self.settings, article_id, "article-id", article_id, "text")
            self.set_monitor_property(self.settings, article_id, "publication-status", "publication in progress",
                                      "text",
                                      version=version)

            self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'],
                                    self.pretty_name, "end",
                                    " ".join(("Finished Version Lookup for article", article_structure.article_id,
                                              "version:", version)))
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when trying to Lookup Version. Error: " + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):
        """
        Do the work
        """

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Deposit assets", "start",
                                "Depositing assets for " + article_id)

        try:
            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)

            expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

            expanded_bucket = conn.get_bucket(expanded_folder_bucket)
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            keys = self.get_keys(expanded_bucket, expanded_folder_name)
            for key in keys:
                (file_key, file_name) = key
                file_key.copy(cdn_bucket_name, article_id + "/" + file_name)
                if self.logger:
                    self.logger.info("Uploaded key %s to %s" % (file_name, cdn_bucket_name))

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(self.settings, article_id, version, run, "Deposit assets", "error",
                                    "Error depositing assets for article " + article_id +
                                    " message:" + e.message)
            return False

        return True
Example #25
0
    def do_activity(self, data=None):

        try:
            session = Session(self.settings)
            version = session.get_value(data['run'], 'version')
            filename = session.get_value(data['run'], 'filename_last_element')

            article_structure = ArticleInfo(filename)

            version_date, error = self.get_version(
                self.settings, article_structure, article_structure.article_id,
                version)

            if error is not None:
                self.logger.error(error)
                self.emit_monitor_event(
                    self.settings, article_structure.article_id, version,
                    data['run'], self.pretty_name, "error", " ".join(
                        ("Error Looking up version article",
                         article_structure.article_id, "message:", error)))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "end", " ".join(
                    ("Finished Version Lookup for article",
                     article_structure.article_id, "version:", version)))

            session.store_value(data['run'], 'update_date', version_date)

            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(
                "Exception when trying to Lookup next version")
            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "error", " ".join(
                    ("Error looking up version for article",
                     article_structure.article_id, "message:", str(e))))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #26
0
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "start",
                                "Starting submission of article EIF " + article_id)

        try:
            eif_filename = session.get_value(self.get_workflowId(), 'eif_filename')
            eif_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket

            if self.logger:
                self.logger.info("Posting file %s" % eif_filename)

            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(eif_bucket)
            key = Key(bucket)
            key.key = eif_filename
            json_output = key.get_contents_as_string()
            destination = self.settings.drupal_EIF_endpoint

            headers = {'content-type': 'application/json'}
            r = requests.post(destination, data=json_output, headers=headers)
            self.logger.info("POST response was %s" % str(r.status_code))
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "start",
                                    "Finish submission of article " + article_id +
                                    " for version " + str(version) + " run " + str(run) + " the response status "
                                                                                          "was " + str(r.status_code))
            if r.status_code == 200:
                # TODO : article path will at some point be available in the respose
                article_path = session.get_value(self.get_workflowId(), 'article_path')
                self.set_monitor_property(self.settings, article_id, 'path', article_path, 'text')
                published = r.json().get('publish')
                if published == "1":
                    self.set_monitor_property(self.settings, article_id, 'publication_status', 'published', "text")

                    # initiate post-publication workflow

                    # assemble data
                    expanded_folder = session.get_value(self.get_workflowId(), 'expanded_folder')
                    status = session.get_value(self.get_workflowId(), 'status')

                    update_date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
                    # TODO: need to replace line above with this once its in the session
                    # update_date = session.get_value(self.get_workflowId(), 'update_date')

                    sqs_conn = boto.sqs.connect_to_region(self.settings.sqs_region,
                                                          aws_access_key_id=self.settings.aws_access_key_id,
                                                          aws_secret_access_key=self.settings.aws_secret_access_key)
                    follow_on_data = {
                        'article_id': article_id,
                        'version': version,
                        'expanded_folder': expanded_folder,
                        'update_date': update_date,
                        'run': run,
                        'status': status,
                        'eif_location': eif_filename,
                    }

                    message = {
                        'workflow_name': 'PostPerfectPublication',
                        'workflow_data': follow_on_data
                    }
                    out_queue = sqs_conn.get_queue(self.settings.workflow_starter_queue)
                    m = Message()
                    m.set_body(json.dumps(message))
                    out_queue.write(m)
                else:
                    self.set_monitor_property(self.settings, article_id, 'publication_status', 'ready', "text")
            else:
                self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "error",
                                        "Website ingest returned an error code: " + str(r.status_code))
                return False
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "end",
                                    "Finished submitting EIF for article  " + article_id +
                                    " status was " + str(r.status_code))

            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "start",
                                    "Finish submission of article " + article_id +
                                    " for version " + str(version) + " run " + str(run) + " the response status ")

        except Exception as e:
            self.logger.exception("Exception when submitting article EIF")
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "error",
                                    "Error submitting EIF For article" + article_id + " message:" + str(e.message))
            return False
        return True
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
Example #28
0
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Depositing assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            # filter figures that have already been copied (see DepositIngestAssets activity)
            pre_ingest_assets = article_structure.pre_ingest_assets(files_in_bucket)

            other_assets = filter(lambda asset: asset not in pre_ingest_assets, files_in_bucket)

            # assets bucket
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions)

            for file_name in other_assets:
                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/"
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                storage_context.copy_resource(orig_resource + file_name, dest_resource + file_name)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name))

                file_name_no_extension, extension = file_name.rsplit('.', 1)
                if extension not in no_download_extensions:
                    content_type = self.content_type_from_file_name(file_name)
                    dict_metadata = {'Content-Disposition':
                                     str("Content-Disposition: attachment; filename=" + file_name + ";"),
                                     'Content-Type': content_type}
                    file_download = file_name_no_extension + "-download." + extension

                    # file is copied with additional metadata
                    storage_context.copy_resource(orig_resource + file_name,
                                                  dest_resource + file_download,
                                                  additional_dict_metadata=dict_metadata)

            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "end",
                                    "Deposited assets for article " + article_id)

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error depositing assets for article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "start",
                                "Starting submission of article EIF " + article_id)

        try:
            eif_filename = session.get_value(self.get_workflowId(), 'eif_filename')
            eif_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket

            if self.logger:
                self.logger.info("Posting file %s" % eif_filename)

            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(eif_bucket)
            key = Key(bucket)
            key.key = eif_filename
            json_output = key.get_contents_as_string()
            destination = self.settings.drupal_EIF_endpoint

            headers = {'content-type': 'application/json'}
            
            auth = None
            if self.settings.drupal_update_user and self.settings.drupal_update_user != '':
                auth = requests.auth.HTTPBasicAuth(self.settings.drupal_update_user,
                                                    self.settings.drupal_update_pass)
                        
            r = requests.post(destination, data=json_output, headers=headers, auth=auth)
            self.logger.info("POST response was %s" % str(r.status_code))
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "start",
                                    "Finish submission of article " + article_id +
                                    " for version " + str(version) + " run " + str(run) + " the response status "
                                                                                          "was " + str(r.status_code))
            # TODO: this is temp
            if r.status_code == 200:
            #if True:
                # TODO : article path will at some point be available in the respose
                article_path = session.get_value(self.get_workflowId(), 'article_path')
                self.set_monitor_property(self.settings, article_id, 'path', article_path, 'text', version=version)

                published = r.json().get('publish')
                # TODO: this is temp
                #published = False

                # assemble data to start post-publication workflow
                expanded_folder = session.get_value(self.get_workflowId(), 'expanded_folder')
                status = session.get_value(self.get_workflowId(), 'status')

                try:
                    update_date = session.get_value(self.get_workflowId(), 'update_date')
                except:
                    # Default
                    update_date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')

                follow_on_data = {
                    'article_id': article_id,
                    'version': version,
                    'expanded_folder': expanded_folder,
                    'update_date': update_date,
                    'run': run,
                    'status': status,
                    'eif_location': eif_filename,
                 }

                message = {
                    'workflow_name': 'PostPerfectPublication',
                    'workflow_data': follow_on_data
                }

                if published is True:
                    self.set_monitor_property(self.settings, article_id, 'publication-status', 'published', "text", version=version)

                    # initiate post-publication workflow now

                    sqs_conn = boto.sqs.connect_to_region(self.settings.sqs_region,
                                                          aws_access_key_id=self.settings.aws_access_key_id,
                                                          aws_secret_access_key=self.settings.aws_secret_access_key)

                    out_queue = sqs_conn.get_queue(self.settings.workflow_starter_queue)
                    m = Message()
                    m.set_body(json.dumps(message))
                    out_queue.write(m)
                else:
                    encoded_message = base64.encodestring(json.dumps(message))
                    # store message in dashboard for later
                    self.set_monitor_property(self.settings, article_id, "_publication-data", encoded_message, "text", version=version)
                    self.set_monitor_property(self.settings, article_id, "publication-status", "ready to publish", "text", version=version)
            else:
                self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "error",
                                        "Website ingest returned an error code: " + str(r.status_code))
                self.logger.error("Body:" + r.text)
                return False
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "end",
                                    "Finished submitting EIF for article  " + article_id +
                                    " status was " + str(r.status_code))

        except Exception as e:
            self.logger.exception("Exception when submitting article EIF")
            self.emit_monitor_event(self.settings, article_id, version, run, "Post EIF", "error",
                                    "Error submitting EIF For article" + article_id + " message:" + str(e.message))
            return False
        return True
    def do_activity(self, data=None):
        """
        Do the work
        """

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        article_version_id = article_id + '.' + version
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "start",
                                "Starting conversion of article xml to EIF for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
            expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket
            print expanded_folder_name

            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(expanded_folder_bucket)

            bucket_folder_name = expanded_folder_name
            (xml_key, xml_filename) = self.get_article_xml_key(bucket, bucket_folder_name)
            if xml_key is None:
                self.logger.error("Article XML path not found")
                return False

            if self.logger:
                self.logger.info("Converting file %s" % xml_filename)

            xml = xml_key.get_contents_as_string()
            if self.logger:
                self.logger.info("Downloaded contents of file %s" % xml_filename)

            json_output = jats_scraper.scrape(xml, article_version=version)

            # Add update date if it is in the session
            update_date = None
            try:
                update_date = session.get_value(self.get_workflowId(), 'update_date')
            except:
                update_date = None
            if update_date:
                json_output = self.add_update_date_to_json(json_output, update_date, xml_filename)

            if self.logger:
                self.logger.info("Scraped file %s" % xml_filename)

            output_folder = article_version_id + '/' + run
            output_name = xml_filename.replace('.xml', '.json')
            output_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket
            output_path = output_folder + '/' + output_name
            destination = conn.get_bucket(output_bucket)
            destination_key = Key(destination)
            output_key = output_path
            destination_key.key = output_key
            destination_key.set_contents_from_string(json_output)

            if self.logger:
                self.logger.info("Uploaded key %s to %s" % (output_path, output_bucket))

            self.set_dashboard_properties(json_output, article_id, version)

            session.store_value(self.get_workflowId(), "eif_filename", output_key)
            eif_object = json.loads(json_output)
            session.store_value(self.get_workflowId(), 'article_path', eif_object.get('path'))
            self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "end",
                                    "XML converted to EIF for article " + article_id + " to " + output_key)

        except Exception as e:
            self.logger.exception("Exception when converting article XML to EIF")
            self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "error",
                                    "Error in conversion of article xml to EIF for " + article_id +
                                    " message:" + e.message)
            return False

        return True
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
Example #32
0
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')
        article_version_id = article_id + '.' + version

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Convert JATS", "start",
            "Starting conversion of article xml to EIF for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            conn = S3Connection(self.settings.aws_access_key_id,
                                self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(expanded_folder_bucket)

            bucket_folder_name = expanded_folder_name
            (xml_key,
             xml_filename) = self.get_article_xml_key(bucket,
                                                      bucket_folder_name)
            if xml_key is None:
                self.logger.error("Article XML path not found")
                return False

            if self.logger:
                self.logger.info("Converting file %s" % xml_filename)

            xml = xml_key.get_contents_as_string()
            if self.logger:
                self.logger.info("Downloaded contents of file %s" %
                                 xml_filename)

            json_output = jats_scraper.scrape(xml, article_version=version)

            # Add update date if it is in the session
            update_date = session.get_value(run, 'update_date')
            if update_date:
                json_output = self.add_update_date_to_json(
                    json_output, update_date, xml_filename)

            if self.logger:
                self.logger.info("Scraped file %s" % xml_filename)

            output_folder = article_version_id + '/' + run
            output_name = xml_filename.replace('.xml', '.json')
            output_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket
            output_path = output_folder + '/' + output_name
            destination = conn.get_bucket(output_bucket)
            destination_key = Key(destination)
            output_key = output_path
            destination_key.key = output_key
            destination_key.set_contents_from_string(json_output)

            if self.logger:
                self.logger.info("Uploaded key %s to %s" %
                                 (output_path, output_bucket))

            self.set_dashboard_properties(json_output, article_id, version)

            session.store_value(run, "eif_location", output_key)
            eif_object = json.loads(json_output)
            session.store_value(run, 'article_path', eif_object.get('path'))
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Convert JATS", "end",
                "XML converted to EIF for article " + article_id + " to " +
                output_key)

        except Exception as e:
            self.logger.exception(
                "Exception when converting article XML to EIF")
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Convert JATS",
                "error", "Error in conversion of article xml to EIF for " +
                article_id + " message:" + e.message)
            return False

        return True
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(
            self.settings, article_id, version, run, self.pretty_name, "start",
            "Starting preparation of article for EIF " + article_id)

        try:
            eif_location = session.get_value(run, 'eif_location')
            eif_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket

            article_path = session.get_value(run, 'article_path')
            self.set_monitor_property(self.settings,
                                      article_id,
                                      'path',
                                      article_path,
                                      'text',
                                      version=version)

            expanded_folder = session.get_value(run, 'expanded_folder')
            status = session.get_value(run, 'status')

            update_date = session.get_value(run, 'update_date')

            carry_over_data = {
                'eif_location': eif_location,
                'eif_bucket': eif_bucket,
                'passthrough': {
                    'article_id': article_id,
                    'version': version,
                    'run': run,
                    'article_path': article_path,
                    'expanded_folder': expanded_folder,
                    'status': status,
                    'update_date': update_date,
                }
            }

            message = carry_over_data

            sqs_conn = boto.sqs.connect_to_region(
                self.settings.sqs_region,
                aws_access_key_id=self.settings.aws_access_key_id,
                aws_secret_access_key=self.settings.aws_secret_access_key)

            out_queue = sqs_conn.get_queue(self.settings.website_ingest_queue)
            m = Message()
            m.set_body(json.dumps(message))
            out_queue.write(m)

            #########

        except Exception as e:
            self.logger.exception("Exception when Preparing for PostEIF")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error submitting EIF For article" + article_id +
                " message:" + str(e.message))
            return False

        self.emit_monitor_event(
            self.settings, article_id, version, run, self.pretty_name, "end",
            "Finished preparation of article for EIF " + article_id)
        return True
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data["run"]
        session = Session(self.settings)
        version = session.get_value(run, "version")
        article_id = session.get_value(run, "article_id")

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Deposit assets", "start", "Depositing assets for " + article_id
        )

        try:
            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)

            expanded_folder_name = session.get_value(run, "expanded_folder")
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

            expanded_bucket = conn.get_bucket(expanded_folder_bucket)
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions)

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"
            published_bucket_path = (
                self.settings.publishing_buckets_prefix + self.settings.published_bucket + "/articles"
            )

            keys = self.get_keys(expanded_bucket, expanded_folder_name)
            for key in keys:
                (file_key, file_name) = key
                # file_key.copy(cdn_bucket_name, article_id + "/" + file_name)

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                additional_dest_resource = storage_provider + published_bucket_path + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)
                storage_context.copy_resource(orig_resource, additional_dest_resource)

                if self.logger:
                    self.logger.info("Uploaded key %s to %s" % (file_name, cdn_bucket_name))
                file_name_no_extension, extension = file_name.rsplit(".", 1)
                if extension not in no_download_extensions:

                    content_type = self.content_type_from_file_name(file_name)
                    dict_metadata = {
                        "Content-Disposition": str("Content-Disposition: attachment; filename=" + file_name + ";"),
                        "Content-Type": content_type,
                    }
                    file_download = file_name_no_extension + "-download." + extension

                    orig_resource_download = dest_resource
                    dest_resource_download = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_download
                    additional_dest_resource_download = (
                        storage_provider + published_bucket_path + "/" + article_id + "/" + file_download
                    )

                    # file is copied with additional metadata
                    storage_context.copy_resource(
                        orig_resource_download, dest_resource_download, additional_dict_metadata=dict_metadata
                    )
                    # additional metadata is already set in origin resource so it will be copied accross by default
                    storage_context.copy_resource(dest_resource_download, additional_dest_resource_download)

            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Deposit assets",
                "end",
                "Deposited assets for article " + article_id,
            )

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Deposit assets",
                "error",
                "Error depositing assets for article " + article_id + " message:" + e.message,
            )
            return False

        return True
    def do_activity(self, data=None):

        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        # set up required connections
        conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
        source_bucket = conn.get_bucket(info.bucket_name)
        dest_bucket = conn.get_bucket(self.settings.publishing_buckets_prefix + self.settings.expanded_bucket)
        session = Session(self.settings)

        article_id_match = re.match(ur'elife-(.*?)-', info.file_name)
        article_id = article_id_match.group(1)
        session.store_value(self.get_workflowId(), 'article_id', article_id)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        # extract any doi, version and updated date information from the filename
        version = None
        # zip name contains version information for previously archived zip files
        m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
        if m is not None:
            version = m.group(1)
        if version is None:
            version = self.get_next_version(article_id)
        if version == '-1':
            return False  # version could not be determined, exit workflow. Can't emit event as no version.

        sm = re.search(ur'.*?-.*?-(.*?)-', info.file_name)
        if sm is not None:
            status = sm.group(1)
        if status is None:
            return False  # version could not be determined, exit workflow. Can't emit event as no version.
        run = str(uuid.uuid4())
        # store version for other activities in this workflow execution
        session.store_value(self.get_workflowId(), 'version', version)

        # TODO : extract and store updated date if supplied

        article_version_id = article_id + '.' + version
        session.store_value(self.get_workflowId(), 'article_version_id', article_version_id)
        session.store_value(self.get_workflowId(), 'run', run)
        session.store_value(self.get_workflowId(), 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)
        self.set_monitor_property(self.settings, article_id, "article_id", article_id, "text")
        try:

            # download zip to temp folder
            tmp = self.get_tmp_dir()
            key = Key(source_bucket)
            key.key = info.file_name
            local_zip_file = self.open_file_from_tmp_dir(info.file_name, mode='wb')
            key.get_contents_to_file(local_zip_file)
            local_zip_file.close()

            bucket_folder_name = article_version_id + '/' + run
            folder_name = path.join(article_version_id, run)

            # extract zip contents
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, info.file_name)) as zf:
                zf.extractall(content_folder)

            # TODO : rename files (versions!)

            # TODO : edit xml and rename references

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                k = Key(dest_bucket)
                k.key = dest_path
                k.set_contents_from_filename(source_path)

            session.store_value(self.get_workflowId(), 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end",
                                    "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) + " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error",
                                    "Error expanding article " + article_id + " message:" + e.message)
            return False

        return True
    def do_activity(self, data=None):
        """
        Do the work
        """

        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), "version")
        article_id = session.get_value(self.get_workflowId(), "article_id")
        article_version_id = article_id + "." + version
        run = session.get_value(self.get_workflowId(), "run")

        self.emit_monitor_event(
            self.settings,
            article_id,
            version,
            run,
            "Convert JATS",
            "start",
            "Starting conversion of article xml to EIF for " + article_id,
        )

        try:

            if self.logger:
                self.logger.info("data: %s" % json.dumps(data, sort_keys=True, indent=4))
            expanded_folder_name = session.get_value(self.get_workflowId(), "expanded_folder")
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket
            print expanded_folder_name

            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(expanded_folder_bucket)

            bucket_folder_name = expanded_folder_name
            (xml_key, xml_filename) = self.get_article_xml_key(bucket, bucket_folder_name)
            if xml_key is None:
                self.logger.error("Article XML path not found")
                return False

            if self.logger:
                self.logger.info("Converting file %s" % xml_filename)

            xml = xml_key.get_contents_as_string()
            if self.logger:
                self.logger.info("Downloaded contents of file %s" % xml_filename)

            json_output = jats_scraper.scrape(xml)

            if self.logger:
                self.logger.info("Scraped file %s" % xml_filename)

            output_folder = article_version_id + "/" + run
            output_name = xml_filename.replace(".xml", ".json")
            output_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket
            output_path = output_folder + "/" + output_name
            destination = conn.get_bucket(output_bucket)
            destination_key = Key(destination)
            output_key = output_path
            destination_key.key = output_key
            destination_key.set_contents_from_string(json_output)

            if self.logger:
                self.logger.info("Uploaded key %s to %s" % (output_path, output_bucket))

            session.store_value(self.get_workflowId(), "eif_filename", output_key)
            eif_object = json.loads(json_output)
            session.store_value(self.get_workflowId(), "article_path", eif_object.get("path"))
            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Post EIF",
                "success",
                "XML converted to EIF for article " + article_id + " to " + output_key,
            )

        except Exception as e:
            self.logger.exception("Exception when converting article XML to EIF")
            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Convert JATS",
                "error",
                "Error in conversion of article xml to EIF for " + article_id + " message:" + e.message,
            )
            return False

        return True
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info("data: %s" % json.dumps(data, sort_keys=True, indent=4))

        run = data["run"]
        session = Session(self.settings)
        version = session.get_value(run, "version")
        article_id = session.get_value(run, "article_id")

        self.emit_monitor_event(
            self.settings,
            article_id,
            version,
            run,
            self.pretty_name,
            "start",
            "Starting preparation of article for EIF " + article_id,
        )

        try:
            eif_location = session.get_value(run, "eif_location")
            eif_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket

            article_path = session.get_value(run, "article_path")
            self.set_monitor_property(self.settings, article_id, "path", article_path, "text", version=version)

            expanded_folder = session.get_value(run, "expanded_folder")
            status = session.get_value(run, "status")

            update_date = session.get_value(run, "update_date")

            carry_over_data = {
                "eif_location": eif_location,
                "eif_bucket": eif_bucket,
                "passthrough": {
                    "article_id": article_id,
                    "version": version,
                    "run": run,
                    "article_path": article_path,
                    "expanded_folder": expanded_folder,
                    "status": status,
                    "update_date": update_date,
                },
            }

            message = carry_over_data

            sqs_conn = boto.sqs.connect_to_region(
                self.settings.sqs_region,
                aws_access_key_id=self.settings.aws_access_key_id,
                aws_secret_access_key=self.settings.aws_secret_access_key,
            )

            out_queue = sqs_conn.get_queue(self.settings.website_ingest_queue)
            m = Message()
            m.set_body(json.dumps(message))
            out_queue.write(m)

            #########

        except Exception as e:
            self.logger.exception("Exception when Preparing for PostEIF")
            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                self.pretty_name,
                "error",
                "Error submitting EIF For article" + article_id + " message:" + str(e.message),
            )
            return False

        self.emit_monitor_event(
            self.settings,
            article_id,
            version,
            run,
            self.pretty_name,
            "end",
            "Finished preparation of article for EIF " + article_id,
        )
        return True