Example #1
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        try:
            if 'standalone' in data and data['standalone']:
                article_id = data['article_id']
                poa = data['standalone_is_poa']
                (start_msg, end_msg, result) = self.get_events(article_id,
                                                               poa,
                                                               version=None,
                                                               run=None)
                self.logger.info(end_msg[6])
                return result

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')
            file_name = session.get_value(run, 'file_name')
            poa = False
            if "poa" in file_name:
                poa = True
            (start_msg, end_msg,
             success) = self.get_events(article_id, poa, version, run)
            self.emit_monitor_event(*start_msg)
            self.emit_monitor_event(*end_msg)
            return success
        except Exception as e:
            self.logger.exception(
                "Error starting Copy Glencoe Still Images activity")
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #2
0
    def do_activity(self, data=None):

        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data["run"]
        session = Session(self.settings)
        data['version'] = session.get_value(run, 'version')
        data['article_id'] = session.get_value(run, 'article_id')
        data['status'] = session.get_value(run, 'status')
        data['expanded_folder'] = session.get_value(run, 'expanded_folder')
        data['update_date'] = session.get_value(run, 'update_date')

        queue_connection_settings = {
            "sqs_region": self.settings.sqs_region,
            "aws_access_key_id": self.settings.aws_access_key_id,
            "aws_secret_access_key": self.settings.aws_secret_access_key
        }

        (message, queue, start_event, end_event, end_event_details,
         exception) = self.get_message_queue(
             data, self.settings.consider_Lax_elife_2_0)

        self.emit_monitor_event(*start_event)
        if end_event == "error":
            self.logger.exception(
                "Exception when Preparing Ingest for Lax. Details: %s",
                exception)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        self.write_message(queue_connection_settings, queue, message)

        self.emit_monitor_event(*end_event_details)
        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://",
                                       bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(
                files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace(
                '{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures,
                                                    iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error",
                    "Some images are not available through the IIIF endpoint: "
                    + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished Verification. All endpoints work. Article: " +
                article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error",
                "An error occurred when checking IIIF endpoint. Article " +
                article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #4
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting submission convert images to jpg for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)
            storage_provider = self.settings.storage_provider + "://"
            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name

            storage_context = StorageContext(self.settings)
            files_in_bucket = storage_context.list_resources(orig_resource)

            figures = filter(article_structure.article_figure, files_in_bucket)

            # download is not a IIIF asset but is currently kept for compatibility
            # download may become obsolete in future
            formats = {"Original": {
                            "sources": "tif",
                            "format": "jpg",
                            "download": "yes"
                        }}

            for file_name in figures:
                figure_resource = orig_resource + "/" + file_name
                file_path = self.get_tmp_dir() + os.sep + file_name
                file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path)

                cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
                cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                publish_locations = [cdn_resource_path]

                image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name),
                                                 publish_locations, self.logger)

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished converting images for " + article_id + ": " +
                                    str(len(figures)) + " images processed ")
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("An error occurred during " + self.pretty_name)
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error converting images to JPG for article" + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #5
0
    def do_activity(self, data=None):

        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        self.expanded_bucket_name = (self.settings.publishing_buckets_prefix
                                     + self.settings.expanded_bucket)
        self.crossref_bucket_name = (self.settings.publishing_buckets_prefix
                                     + self.settings.poa_packaging_bucket)

        session = Session(self.settings)

        run = data['run']
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')
        expanded_folder_name = session.get_value(run, 'expanded_folder')


        conn = S3Connection(self.settings.aws_access_key_id,
                            self.settings.aws_secret_access_key)
        bucket = conn.get_bucket(self.expanded_bucket_name)

        self.emit_monitor_event(self.settings, article_id, version, run,
                                "Schedule Crossref", "start",
                                "Starting scheduling of crossref deposit for " + article_id)

        try:
            (xml_key, xml_filename) = ConvertJATS.get_article_xml_key(bucket, expanded_folder_name)

            # Rename the XML file to match what is used already
            new_key_name = self.new_crossref_xml_name(
                prefix=self.crossref_outbox_folder,
                journal='elife',
                article_id=str(article_id).zfill(5))

            self.copy_article_xml_to_crossref_outbox(
                new_key_name=new_key_name,
                source_bucket_name=self.expanded_bucket_name,
                old_key_name=xml_key.name)

            self.emit_monitor_event(self.settings, article_id, version, run, "Schedule Crossref",
                                    "end", "Finished scheduling of crossref deposit " + article_id +
                                    " for version " + version + " run " + str(run))
        except Exception as e:
            self.logger.exception("Exception when scheduling crossref")
            self.emit_monitor_event(self.settings, article_id, version, run, "Schedule Crossref",
                                    "error", "Error scheduling crossref " + article_id +
                                    " message:" + e.message)
            return False

        return True
Example #6
0
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        article_id = session.get_value(run, 'article_id')
        version = session.get_value(run, 'version')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting Glencoe video check for " + article_id)
        try:
            expanded_folder = session.get_value(run, 'expanded_folder')
            if expanded_folder is None:
                raise RuntimeError("No session value for expanded folder")

            expanded_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket
            self.logger.info("expanded_bucket: " + expanded_bucket)

            xml_filename = lax_provider.get_xml_file_name(self.settings, expanded_folder, expanded_bucket, version)
            if xml_filename is None:
                raise RuntimeError("No xml_filename found.")

            xml_origin = "".join((self.settings.storage_provider, "://", expanded_bucket, "/", expanded_folder + '/' +
                                  xml_filename))

            storage_context = StorageContext(self.settings)
            xml_content = storage_context.get_resource_as_string(xml_origin)

            if glencoe_check.has_videos(xml_content):
                glencoe_check.validate_sources(glencoe_check.metadata(glencoe_check.check_msid(article_id), self.settings))
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                        "Finished Verification. Glencoe is available. Article: " + article_id)
                return True

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. No Glencoe media tags found in xml. "
                                    "Article: " + article_id)
            return True
        except AssertionError as err:
            self.logger.info(err)
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "Glencoe video is not available for article " + article_id + '; message: ' + str(err))
            time.sleep(60)
            return activity.activity.ACTIVITY_TEMPORARY_FAILURE
        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking for Glencoe video. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        try:

            self.expanded_bucket_name = (
                self.settings.publishing_buckets_prefix +
                self.settings.expanded_bucket)

            run = data['run']
            session = Session(self.settings)
            version = session.get_value(run, 'version')
            article_id = session.get_value(run, 'article_id')

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "start",
                "Starting applying version number to files for " + article_id)
        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:

            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            if version is None:
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error", "Error in applying version number to files for " +
                    article_id + " message: No version available")
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
            self.rename_article_s3_objects(bucket_folder_name, version)

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished applying version number to article " +
                article_id + " for version " + version + " run " + str(run))

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error in applying version number to files for " +
                article_id + " message:" + str(e.message))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Resize Images", "start",
            "Starting submission resize of images for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            cdn_path = article_id

            if self.logger:
                self.logger.info("Converting images for folder %s" %
                                 expanded_folder_name)

            # get information on files in the expanded article bucket for notified zip file
            bucket_folder_name = expanded_folder_name
            bucket, file_infos = self.get_file_infos(bucket_folder_name)

            image_count = 0
            for file_info in file_infos:
                image_count += 1
                key = bucket.get_key(file_info.key)
                # see : http://stackoverflow.com/questions/9954521/s3-boto-list-keys-sometimes-returns-directory-key
                if not key.name.endswith("/"):
                    # process each key in the folder
                    self.process_key(key, cdn_path)
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Resize Images",
                "end", "Finished converting images for " + article_id + ": " +
                str(image_count) + " images processed ")

            self.clean_tmp_dir()

        except Exception as e:
            self.logger.exception("Exception when resizing images")
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Resize Images",
                "error", "Error resizing images for article" + article_id +
                " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
        return activity.activity.ACTIVITY_SUCCESS
    def do_activity(self, data=None):

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run,
                                self.pretty_name, "start",
                                "Depositing Ingest assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            pre_ingest_assets = article_structure.pre_ingest_assets(
                files_in_bucket)

            for file_name in pre_ingest_assets:

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" %
                                     (file_name, cdn_bucket_name))

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Deposited Ingest assets for article " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when Depositing Ingest assets")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error depositing Ingest assets for article " +
                article_id + " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #10
0
    def get_workflow_data(self, data):
        if "publication_data" in data:
            publication_data = json.loads(
                base64.decodestring(data['publication_data']))
            workflow_data = publication_data['workflow_data']
            return workflow_data

        # added this block because when doing silent corrections we will not have the opportunity to get the data from
        # the previous workflow (PreparePostEIF sets the data and when in silent corrections it is part of the same
        # workflow) currently we cannot mutate the data and pass it through activities, only workflows
        # it is an improvement to be made. Needs research on AWS SWF.
        # it will also work when not in Silent corrections, it will just override the setting with the same data
        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')
        if eif_location is not None:
            data['eif_location'] = eif_location

        return data
Example #11
0
    def do_activity(self, data):

        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')

        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "start", "Starting to set EIF to publish")

        try:

            if not isinstance(eif_location, basestring):
                self.logger.error(self.pretty_name +
                                  " error. eif_location must be string")
                raise Exception("eif_location not available")

            storage_context = StorageContext(self.settings)

            eif_origin = "".join((self.settings.storage_provider, "://",
                                  self.settings.publishing_buckets_prefix +
                                  self.settings.eif_bucket, "/", eif_location))
        except Exception as e:

            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "error", e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        success, error = self.set_eif_to_publish(storage_context, eif_origin)

        if success:
            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "end",
                                    "Finished to set EIF to publish")
            return activity.activity.ACTIVITY_SUCCESS

        self.logger.error(error)
        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "error", error)
        return activity.activity.ACTIVITY_PERMANENT_FAILURE
Example #12
0
    def do_activity(self, data=None):

        try:
            session = Session(self.settings)
            version = session.get_value(data['run'], 'version')
            filename = session.get_value(data['run'], 'filename_last_element')

            article_structure = ArticleInfo(filename)

            version_date, error = self.get_version(
                self.settings, article_structure, article_structure.article_id,
                version)

            if error is not None:
                self.logger.error(error)
                self.emit_monitor_event(
                    self.settings, article_structure.article_id, version,
                    data['run'], self.pretty_name, "error", " ".join(
                        ("Error Looking up version article",
                         article_structure.article_id, "message:", error)))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "end", " ".join(
                    ("Finished Version Lookup for article",
                     article_structure.article_id, "version:", version)))

            session.store_value(data['run'], 'update_date', version_date)

            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(
                "Exception when trying to Lookup next version")
            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "error", " ".join(
                    ("Error looking up version for article",
                     article_structure.article_id, "message:", str(e))))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(
            self.settings, article_id, version, run, self.pretty_name, "start",
            "Starting preparation of article for EIF " + article_id)

        try:
            eif_location = session.get_value(run, 'eif_location')
            eif_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket

            article_path = session.get_value(run, 'article_path')
            self.set_monitor_property(self.settings,
                                      article_id,
                                      'path',
                                      article_path,
                                      'text',
                                      version=version)

            expanded_folder = session.get_value(run, 'expanded_folder')
            status = session.get_value(run, 'status')

            update_date = session.get_value(run, 'update_date')

            carry_over_data = {
                'eif_location': eif_location,
                'eif_bucket': eif_bucket,
                'passthrough': {
                    'article_id': article_id,
                    'version': version,
                    'run': run,
                    'article_path': article_path,
                    'expanded_folder': expanded_folder,
                    'status': status,
                    'update_date': update_date,
                }
            }

            message = carry_over_data

            sqs_conn = boto.sqs.connect_to_region(
                self.settings.sqs_region,
                aws_access_key_id=self.settings.aws_access_key_id,
                aws_secret_access_key=self.settings.aws_secret_access_key)

            out_queue = sqs_conn.get_queue(self.settings.website_ingest_queue)
            m = Message()
            m.set_body(json.dumps(message))
            out_queue.write(m)

            #########

        except Exception as e:
            self.logger.exception("Exception when Preparing for PostEIF")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error submitting EIF For article" + article_id +
                " message:" + str(e.message))
            return False

        self.emit_monitor_event(
            self.settings, article_id, version, run, self.pretty_name, "end",
            "Finished preparation of article for EIF " + article_id)
        return True
Example #14
0
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')
        article_version_id = article_id + '.' + version

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Convert JATS", "start",
            "Starting conversion of article xml to EIF for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            conn = S3Connection(self.settings.aws_access_key_id,
                                self.settings.aws_secret_access_key)
            bucket = conn.get_bucket(expanded_folder_bucket)

            bucket_folder_name = expanded_folder_name
            (xml_key,
             xml_filename) = self.get_article_xml_key(bucket,
                                                      bucket_folder_name)
            if xml_key is None:
                self.logger.error("Article XML path not found")
                return False

            if self.logger:
                self.logger.info("Converting file %s" % xml_filename)

            xml = xml_key.get_contents_as_string()
            if self.logger:
                self.logger.info("Downloaded contents of file %s" %
                                 xml_filename)

            json_output = jats_scraper.scrape(xml, article_version=version)

            # Add update date if it is in the session
            update_date = session.get_value(run, 'update_date')
            if update_date:
                json_output = self.add_update_date_to_json(
                    json_output, update_date, xml_filename)

            if self.logger:
                self.logger.info("Scraped file %s" % xml_filename)

            output_folder = article_version_id + '/' + run
            output_name = xml_filename.replace('.xml', '.json')
            output_bucket = self.settings.publishing_buckets_prefix + self.settings.eif_bucket
            output_path = output_folder + '/' + output_name
            destination = conn.get_bucket(output_bucket)
            destination_key = Key(destination)
            output_key = output_path
            destination_key.key = output_key
            destination_key.set_contents_from_string(json_output)

            if self.logger:
                self.logger.info("Uploaded key %s to %s" %
                                 (output_path, output_bucket))

            self.set_dashboard_properties(json_output, article_id, version)

            session.store_value(run, "eif_location", output_key)
            eif_object = json.loads(json_output)
            session.store_value(run, 'article_path', eif_object.get('path'))
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Convert JATS", "end",
                "XML converted to EIF for article " + article_id + " to " +
                output_key)

        except Exception as e:
            self.logger.exception(
                "Exception when converting article XML to EIF")
            self.emit_monitor_event(
                self.settings, article_id, version, run, "Convert JATS",
                "error", "Error in conversion of article xml to EIF for " +
                article_id + " message:" + e.message)
            return False

        return True
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True