コード例 #1
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "SendQueuedEmail"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 5
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 5
        self.description = "Send email in the email queue."

        # Data provider
        self.db = dblib.SimpleDB(settings)

        # Default limit of emails per activity
        self.limit = 100

        # Default rate limit
        self.rate_limit_per_sec = 10

        # S3 bucket where email body content is stored
        self.email_body_bucket = settings.bot_bucket
コード例 #2
0
ファイル: cron_NewS3Suppl.py プロジェクト: imclab/elife-bot
 def start(self, ENV = "dev"):
   # Specify run environment settings
   settings = settingsLib.get_settings(ENV)
   
   ping_marker_id = "cron_NewS3Suppl"
   
   # Log
   logFile = "starter.log"
   logger = log.logger(logFile, settings.setLevel, ping_marker_id)
   
   # Data provider
   db = dblib.SimpleDB(settings)
   db.connect()
   
   # SWF meta data provider
   swfmeta = swfmetalib.SWFMeta(settings)
   swfmeta.connect()
   
   # Default, if cron never run before
   last_startTimestamp = 0
   
   # Get the last time this cron was run
   last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(workflow_id = ping_marker_id)
 
   # Start a ping workflow as a marker
   self.start_ping_marker(ping_marker_id, ENV)
 
   # Check for S3 Suppl files that were updated since the last run
   date_format = "%Y-%m-%dT%H:%M:%S.000Z"
   
   # Quick hack - subtract 30 minutes to not ignore the top of the hour
   #   the time between S3Monitor running and this cron starter
   last_startTimestamp_minus_30 = last_startTimestamp - (60*30)
   if(last_startTimestamp_minus_30 < 0):
     last_startTimestamp_minus_30 = 0
   time_tuple = time.gmtime(last_startTimestamp_minus_30)
   
   last_startDate = time.strftime(date_format, time_tuple)
   
   logger.info('last run %s' % (last_startDate))
   
   S3_item_list = db.elife_get_article_S3_file_items(file_data_type = "suppl", latest = True, last_updated_since = last_startDate)
   
   logger.info('Suppl files updated since %s: %s' % (last_startDate, str(len(S3_item_list))))
 
   if(len(S3_item_list) <= 0):
     # No new SVG
     pass
   else:
     # Found new SVG files
     
     # Start a PublishSVG starter
     try:
       starter_name = "starter_PublishSuppl"
       self.import_starter_module(starter_name, logger)
       s = self.get_starter_module(starter_name, logger)
       s.start(ENV = ENV, last_updated_since = last_startDate)
     except:
       logger.info('Error: %s starting %s' % (ping_marker_id, starter_name))
       logger.exception('')
コード例 #3
0
    def __init__(self, settings=None, tmp_dir=None):
        self.settings = settings
        self.tmp_dir = tmp_dir

        # Default tmp_dir if not specified
        self.tmp_dir_default = "article_provider"

        # SimpleDB connection for looking up S3 keys
        self.db = None
        if self.settings is not None:
            # Data provider
            self.db = dblib.SimpleDB(settings)

        # S3 connection
        self.s3_conn = None

        # Default S3 bucket name
        self.bucket_name = None
        if self.settings is not None:
            self.bucket_name = self.settings.bucket

        # Some defaults
        self.related_insight_article = None
        self.was_ever_poa = None
        self.is_poa = None

        # Store the list of DOI id that was ever PoA
        self.was_poa_doi_ids = None
        self.doi_ids = None
        self.article_bucket_published_dates = None

        # For checking published articles need a URL prefix for where to check
        self.lookup_url_prefix = "http://elifesciences.org/lookup/doi/10.7554/eLife."
コード例 #4
0
	def get_docs_from_SimpleDB(self, ENV = "dev", last_updated_since = None, doi_id = None):
		"""
		Get the array of docs from the SimpleDB provider
		"""
		docs = []
		
		# Specify run environment settings
		settings = settingsLib.get_settings(ENV)
		
		db = dblib.SimpleDB(settings)
		db.connect()
		
		if(last_updated_since is not None):
			xml_item_list = db.elife_get_article_S3_file_items(file_data_type = "xml", latest = True, last_updated_since = last_updated_since)
		elif(doi_id is not None):
			xml_item_list = db.elife_get_article_S3_file_items(file_data_type = "xml", latest = True, doi_id = doi_id)
		else:
			# Get all
			xml_item_list = db.elife_get_article_S3_file_items(file_data_type = "xml", latest = True)
			
		for x in xml_item_list:
			tmp = {}
			elife_id = str(x['name']).split("/")[0]
			document = 'https://s3.amazonaws.com/' + x['item_name']
			tmp['elife_id'] = elife_id
			tmp['document'] = document
			docs.append(tmp)
		
		return docs
コード例 #5
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "DepositCrossref"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 30
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 15
        self.description = (
            "Download article XML from crossref outbox, " +
            "generate crossref XML, and deposit with crossref.")

        # Directory where POA library is stored
        self.poa_lib_dir_name = "elife-poa-xml-generation"

        # Where we specify the library to be imported
        self.elife_poa_lib = None

        # Import the libraries we will need
        self.import_imports()

        # Create output directories
        self.create_activity_directories()
        self.date_stamp = self.set_datestamp()

        # Data provider where email body is saved
        self.db = dblib.SimpleDB(settings)

        # Instantiate a new article object to provide some helper functions
        self.article = articlelib.article(self.settings, self.get_tmp_dir())

        # Bucket for outgoing files
        self.publish_bucket = settings.poa_packaging_bucket
        self.outbox_folder = "crossref/outbox/"
        self.published_folder = "crossref/published/"

        # Track the success of some steps
        self.activity_status = None
        self.generate_status = None
        self.approve_status = None
        self.outbox_status = None
        self.publish_status = None

        # HTTP requests status
        self.http_request_status_code = []
        self.http_request_status_text = []

        self.outbox_s3_key_names = None

        # Track XML files selected for pubmed XML
        self.article_published_file_names = []
        self.article_not_published_file_names = []
コード例 #6
0
ファイル: cron_NewS3POA.py プロジェクト: muddasani/elife-bot
    def start(self, settings):

        ping_marker_id = "cron_NewS3POA"

        # Log
        logFile = "starter.log"
        logger = log.logger(logFile, settings.setLevel, ping_marker_id)

        # Data provider
        db = dblib.SimpleDB(settings)
        db.connect()

        # SWF meta data provider
        swfmeta = swfmetalib.SWFMeta(settings)
        swfmeta.connect()

        last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
            workflow_id=ping_marker_id)

        # Start a ping workflow as a marker
        self.start_ping_marker(ping_marker_id, settings)

        # Check for S3 XML files that were updated since the last run
        date_format = "%Y-%m-%dT%H:%M:%S.000Z"

        # Quick hack - subtract 15 minutes,
        #   the time between S3Monitor running and this cron starter
        last_startTimestamp_minus_15 = last_startTimestamp - (60 * 15)
        time_tuple = time.gmtime(last_startTimestamp_minus_15)

        last_startDate = time.strftime(date_format, time_tuple)

        logger.info('last run %s' % (last_startDate))

        xml_item_list = db.elife_get_POA_delivery_S3_file_items(
            last_updated_since=last_startDate)

        logger.info('POA files updated since %s: %s' %
                    (last_startDate, str(len(xml_item_list))))

        if len(xml_item_list) <= 0:
            # No new XML
            pass
        else:
            # Found new XML files

            # Start a PackagePOA starter
            try:
                starter_name = "starter_PackagePOA"
                self.import_starter_module(starter_name, logger)
                s = self.get_starter_module(starter_name, logger)
                s.start(settings=settings, last_updated_since=last_startDate)
            except:
                logger.info('Error: %s starting %s' %
                            (ping_marker_id, starter_name))
                logger.exception('')
コード例 #7
0
def import_simpledb_provider_module(step):
    imported = None
    try:
        import provider.simpleDB as dblib
        world.db = dblib.SimpleDB(world.settings)
        imported = True
    except:
        imported = False
    assert imported is True, \
      "SimpleDB module was imported"
コード例 #8
0
ファイル: cron_FiveMinute.py プロジェクト: imclab/elife-bot
    def start(self, ENV="dev"):
        # Specify run environment settings
        settings = settingsLib.get_settings(ENV)

        ping_marker_id = "cron_FiveMinute"

        # Log
        logFile = "starter.log"
        logger = log.logger(logFile, settings.setLevel, ping_marker_id)

        # Data provider
        db = dblib.SimpleDB(settings)
        db.connect()

        # SWF meta data provider
        swfmeta = swfmetalib.SWFMeta(settings)
        swfmeta.connect()

        last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
            workflow_id=ping_marker_id)

        # Start a ping workflow as a marker
        self.start_ping_marker(ping_marker_id, ENV)

        # Check for S3 XML files that were updated since the last run
        date_format = "%Y-%m-%dT%H:%M:%S.000Z"

        # Date conversion
        time_tuple = time.gmtime(last_startTimestamp)
        last_startDate = time.strftime(date_format, time_tuple)

        logger.info('last run %s %s' % (ping_marker_id, last_startDate))

        # A conditional start for SendQueuedEmail
        #  Only start a workflow if there are emails in the queue ready to send
        item_list = db.elife_get_email_queue_items(
            query_type="count", date_scheduled_before=last_startDate)

        try:
            if (int(item_list[0]["Count"]) > 0):
                # More than one email in the queue, start a workflow
                try:
                    starter_name = "starter_SendQueuedEmail"
                    self.import_starter_module(starter_name, logger)
                    s = self.get_starter_module(starter_name, logger)
                    s.start(ENV=ENV)
                except:
                    logger.info('Error: %s starting %s' %
                                (ping_marker_id, starter_name))
                    logger.exception('')
        except:
            # Some error
            logger.info('Exception encountered starting %s: %s' %
                        (ping_marker_id, last_startDate))
コード例 #9
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "PackagePOA"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 30
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 15
        self.description = "Process POA zip file input, repackage, and save to S3."

        # Directory where POA library is stored
        self.poa_lib_dir_name = "elife-poa-xml-generation"

        # Where we specify the library to be imported
        self.elife_poa_lib = None

        # Import the libraries we will need
        self.import_imports()

        # Create output directories
        self.create_activity_directories()

        # Create an EJP provider to access S3 bucket holding CSV files
        self.ejp = ejplib.EJP(settings, self.get_tmp_dir())

        # Data provider where email body is saved
        self.db = dblib.SimpleDB(settings)

        # Bucket for outgoing files
        self.publish_bucket = settings.poa_packaging_bucket
        self.outbox_folder = "outbox/"

        # Some values to set later
        self.document = None
        self.poa_zip_filename = None
        self.doi = None

        # Capture errors from generating XML
        self.error_count = None
        self.error_messages = None

        # Track the success of some steps
        self.activity_status = None
        self.approve_status = None
        self.process_status = None
        self.generate_xml_status = None
        self.pdf_decap_status = None
コード例 #10
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "PublishFinalPOA"
        self.version = "1"
        self.default_task_heartbeat_timeout = 60 * 30
        self.default_task_schedule_to_close_timeout = 60 * 30
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 15
        self.description = (
            "Download POA files from a bucket, zip each article separately, " +
            "and upload to final bucket.")

        # Local directory settings
        self.TMP_DIR = self.get_tmp_dir() + os.sep + "tmp_dir"
        self.INPUT_DIR = self.get_tmp_dir() + os.sep + "input_dir"
        self.OUTPUT_DIR = self.get_tmp_dir() + os.sep + "output_dir"
        self.JUNK_DIR = self.get_tmp_dir() + os.sep + "junk_dir"
        self.DONE_DIR = self.get_tmp_dir() + os.sep + "done_dir"

        # Bucket for outgoing files
        self.input_bucket = settings.poa_packaging_bucket
        self.outbox_folder = "outbox/"
        self.published_folder_prefix = "published/"
        self.published_folder_name = None

        self.publish_bucket = settings.publishing_buckets_prefix + settings.production_bucket

        # Track the success of some steps
        self.activity_status = None
        self.approve_status = None
        self.publish_status = None

        # More file status tracking for reporting in email
        self.outbox_s3_key_names = []
        self.malformed_ds_file_names = []
        self.empty_ds_file_names = []
        self.unmatched_ds_file_names = []

        # Data provider where email body is saved
        self.db = dblib.SimpleDB(settings)
コード例 #11
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "LensCDNInvalidation"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 5
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 5
        self.description = "Create an invalidation request for the eLife Lens documents in the Cloudfront CDN."

        # Data provider
        self.db = dblib.SimpleDB(settings)
コード例 #12
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "S3Monitor"
        self.version = "1.1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 15
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 20
        self.description = "S3Monitor activity: poll S3 bucket and save object metadata into SimpleDB."

        # Data provider
        self.db = dblib.SimpleDB(settings)
コード例 #13
0
    def __init__(self, settings, logger, conn=None, token=None, activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token, activity_task)

        self.name = "PMCDeposit"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 30
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 15
        self.description = ("Download single zip file an article, repackage it, " +
                            "send to PMC and notify them.")

        # Local directory settings
        self.TMP_DIR = self.get_tmp_dir() + os.sep + "tmp_dir"
        self.INPUT_DIR = self.get_tmp_dir() + os.sep + "input_dir"
        self.JUNK_DIR = self.get_tmp_dir() + os.sep + "junk_dir"
        self.ZIP_DIR = self.get_tmp_dir() + os.sep + "zip_dir"
        self.EPS_DIR = self.get_tmp_dir() + os.sep + "eps_dir"
        self.TIF_DIR = self.get_tmp_dir() + os.sep + "tif_dir"
        self.OUTPUT_DIR = self.get_tmp_dir() + os.sep + "output_dir"

        # Data provider where email body is saved
        self.db = dblib.SimpleDB(settings)

        # Bucket settings
        self.input_bucket = None
        self.input_bucket_default = (settings.publishing_buckets_prefix +
                                     settings.archive_bucket)

        self.publish_bucket = settings.poa_packaging_bucket
        self.published_folder = "pmc/published"
        self.published_zip_folder = "pmc/zip"

        # journal
        self.journal = 'elife'

        # Outgoing FTP settings are set later
        self.FTP_URI = None
        self.FTP_USERNAME = None
        self.FTP_PASSWORD = None
        self.FTP_CWD = None
        self.FTP_SUBDIR = []
コード例 #14
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "LensXMLFilesList"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 5
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 5
        self.description = "Create the eLife Lens xml list file for cache warming, and then save those to the S3 CDN bucket."

        # Data provider
        self.db = dblib.SimpleDB(settings)

        # Create the filesystem provider
        self.fs = fslib.Filesystem(self.get_tmp_dir())
コード例 #15
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "AdminEmailHistory"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 5
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 5
        self.description = "Email administrators a workflow history status message."

        # Data provider
        self.db = dblib.SimpleDB(settings)

        # Default time period, in seconds
        self.time_period = 60 * 60 * 4
コード例 #16
0
    def get_docs_from_SimpleDB(self, settings, last_updated_since=None):
        """
        Get the array of docs from the SimpleDB provider
        """
        docs = []

        db = dblib.SimpleDB(settings)
        db.connect()

        if last_updated_since is not None:
            xml_item_list = db.elife_get_POA_delivery_S3_file_items(
                last_updated_since=last_updated_since)
        else:
            # Get all - not implemented for now to avoid mistakes running too many workflows
            pass

        for x in xml_item_list:
            tmp = {}
            name = x['name']
            tmp['document'] = name
            docs.append(tmp)

        return docs
コード例 #17
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "PublicationEmail"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 5
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 5
        self.description = "Queue emails to notify of a new article publication."

        # Data provider
        self.db = dblib.SimpleDB(settings)

        # Templates provider
        self.templates = templatelib.Templates(settings, self.get_tmp_dir())

        # EJP data provider
        self.ejp = ejplib.EJP(settings, self.get_tmp_dir())

        # Bucket for outgoing files
        self.publish_bucket = settings.poa_packaging_bucket
        self.outbox_folder = "publication_email/outbox/"
        self.published_folder = "publication_email/published/"

        # Track the success of some steps
        self.activity_status = None

        # Track XML files selected for publication
        self.article_xml_filenames = []
        self.xml_file_to_doi_map = {}
        self.articles = []
        self.related_articles = []
        self.articles_approved = []
        self.articles_approved_prepared = []
        self.insight_articles_to_remove_from_outbox = []
        self.articles_do_not_remove_from_outbox = []

        # Default is do not send duplicate emails
        self.allow_duplicates = False

        # Article types for which not to send emails
        self.article_types_do_not_send = []
        self.article_types_do_not_send.append('editorial')
        self.article_types_do_not_send.append('correction')

        # Email types, for sending previews of each template
        self.email_types = []
        self.email_types.append('author_publication_email_POA')
        self.email_types.append('author_publication_email_VOR_after_POA')
        self.email_types.append('author_publication_email_VOR_no_POA')
        self.email_types.append('author_publication_email_Insight_to_VOR')
        self.email_types.append('author_publication_email_Feature')

        self.date_stamp = self.set_datestamp()

        self.admin_email_content = ''
コード例 #18
0
    def do_activity(self, data=None):
        """
        Activity, do the work
        """
        if self.logger:
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        # Data passed to this activity
        elife_id = data["data"]["elife_id"]
        workflow = data["data"]["workflow"]

        # Create output directories
        self.create_activity_directories()

        # Data provider
        self.db = dblib.SimpleDB(self.settings)
        # Connect to DB
        self.db_conn = self.db.connect()

        # Download the S3 objects
        self.download_files_from_s3(elife_id, workflow)

        # Set FTP settings
        self.set_ftp_settings(elife_id, workflow)

        # FTP to endpoint
        try:
            if workflow == 'HEFCE':
                file_type = "/*.zip"
                zipfiles = glob.glob(self.get_tmp_dir() + os.sep +
                                     self.FTP_TO_SOMEWHERE_DIR + file_type)

                #self.ftp_to_endpoint(zipfiles, self.FTP_SUBDIR, passive=True)
                # SFTP now
                sub_dir = "{:05d}".format(int(elife_id))
                self.sftp_to_endpoint(zipfiles, sub_dir)

            if workflow == 'Cengage':
                file_type = "/*.zip"
                zipfiles = glob.glob(self.get_tmp_dir() + os.sep +
                                     self.FTP_TO_SOMEWHERE_DIR + file_type)
                self.ftp_to_endpoint(zipfiles, passive=True)

            if workflow == 'Scopus':
                file_type = "/*.zip"
                zipfiles = glob.glob(self.get_tmp_dir() + os.sep +
                                     self.FTP_TO_SOMEWHERE_DIR + file_type)
                self.ftp_to_endpoint(zipfiles, passive=True)

            if workflow == 'WoS':
                file_type = "/*.zip"
                zipfiles = glob.glob(self.get_tmp_dir() + os.sep +
                                     self.FTP_TO_SOMEWHERE_DIR + file_type)
                self.ftp_to_endpoint(zipfiles, passive=True)

            if workflow == 'GoOA':
                file_type = "/*.zip"
                zipfiles = glob.glob(self.get_tmp_dir() + os.sep +
                                     self.FTP_TO_SOMEWHERE_DIR + file_type)
                self.ftp_to_endpoint(zipfiles, passive=True)
        except:
            # Something went wrong, fail
            if self.logger:
                self.logger.exception(
                    'exception in FTPArticle, data: %s' %
                    json.dumps(data, sort_keys=True, indent=4))
            result = False
            self.clean_tmp_dir()
            return result

        # Return the activity result, True or False
        result = True
        self.clean_tmp_dir()
        return result
コード例 #19
0
    def __init__(self,
                 settings,
                 logger,
                 conn=None,
                 token=None,
                 activity_task=None):
        activity.activity.__init__(self, settings, logger, conn, token,
                                   activity_task)

        self.name = "PubRouterDeposit"
        self.version = "1"
        self.default_task_heartbeat_timeout = 30
        self.default_task_schedule_to_close_timeout = 60 * 30
        self.default_task_schedule_to_start_timeout = 30
        self.default_task_start_to_close_timeout = 60 * 15
        self.description = ("Download article XML from pub_router outbox, \
                            approve each for publication, and deposit files via FTP to pub router."
                            )

        # Create output directories
        self.date_stamp = self.set_datestamp()

        # Data provider where email body is saved
        self.db = dblib.SimpleDB(settings)

        # Instantiate a new article object to provide some helper functions
        self.article = articlelib.article(self.settings, self.get_tmp_dir())

        # Bucket for outgoing files
        self.publish_bucket = settings.poa_packaging_bucket
        self.outbox_folder = None
        self.published_folder = None

        # Bucket settings for source files of FTPArticle workflows
        self.pmc_zip_bucket = settings.poa_packaging_bucket
        self.pmc_zip_folder = "pmc/zip/"

        # Bucket settings for source files of PMCDeposit workflows
        self.archive_bucket = self.settings.publishing_buckets_prefix + self.settings.archive_bucket

        # Track the success of some steps
        self.activity_status = None
        self.ftp_status = None
        self.outbox_status = None
        self.publish_status = None

        self.outbox_s3_key_names = None

        # Type of FTPArticle workflow to start, will be specified in data
        self.workflow = None

        # Track XML files selected
        self.article_xml_filenames = []
        self.xml_file_to_doi_map = {}
        self.articles = []

        #self.article_published_file_names = []
        #self.article_not_published_file_names = []

        self.admin_email_content = ''

        # journal
        self.journal = 'elife'