def start(self, ENV = "dev"): # Specify run environment settings settings = settingsLib.get_settings(ENV) ping_marker_id = "cron_NewS3Suppl" # Log logFile = "starter.log" logger = log.logger(logFile, settings.setLevel, ping_marker_id) # Data provider db = dblib.SimpleDB(settings) db.connect() # SWF meta data provider swfmeta = swfmetalib.SWFMeta(settings) swfmeta.connect() # Default, if cron never run before last_startTimestamp = 0 # Get the last time this cron was run last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(workflow_id = ping_marker_id) # Start a ping workflow as a marker self.start_ping_marker(ping_marker_id, ENV) # Check for S3 Suppl files that were updated since the last run date_format = "%Y-%m-%dT%H:%M:%S.000Z" # Quick hack - subtract 30 minutes to not ignore the top of the hour # the time between S3Monitor running and this cron starter last_startTimestamp_minus_30 = last_startTimestamp - (60*30) if(last_startTimestamp_minus_30 < 0): last_startTimestamp_minus_30 = 0 time_tuple = time.gmtime(last_startTimestamp_minus_30) last_startDate = time.strftime(date_format, time_tuple) logger.info('last run %s' % (last_startDate)) S3_item_list = db.elife_get_article_S3_file_items(file_data_type = "suppl", latest = True, last_updated_since = last_startDate) logger.info('Suppl files updated since %s: %s' % (last_startDate, str(len(S3_item_list)))) if(len(S3_item_list) <= 0): # No new SVG pass else: # Found new SVG files # Start a PublishSVG starter try: starter_name = "starter_PublishSuppl" self.import_starter_module(starter_name, logger) s = self.get_starter_module(starter_name, logger) s.start(ENV = ENV, last_updated_since = last_startDate) except: logger.info('Error: %s starting %s' % (ping_marker_id, starter_name)) logger.exception('')
def start(self, settings): ping_marker_id = "cron_NewS3POA" # Log logFile = "starter.log" logger = log.logger(logFile, settings.setLevel, ping_marker_id) # Data provider db = dblib.SimpleDB(settings) db.connect() # SWF meta data provider swfmeta = swfmetalib.SWFMeta(settings) swfmeta.connect() last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp( workflow_id=ping_marker_id) # Start a ping workflow as a marker self.start_ping_marker(ping_marker_id, settings) # Check for S3 XML files that were updated since the last run date_format = "%Y-%m-%dT%H:%M:%S.000Z" # Quick hack - subtract 15 minutes, # the time between S3Monitor running and this cron starter last_startTimestamp_minus_15 = last_startTimestamp - (60 * 15) time_tuple = time.gmtime(last_startTimestamp_minus_15) last_startDate = time.strftime(date_format, time_tuple) logger.info('last run %s' % (last_startDate)) xml_item_list = db.elife_get_POA_delivery_S3_file_items( last_updated_since=last_startDate) logger.info('POA files updated since %s: %s' % (last_startDate, str(len(xml_item_list)))) if len(xml_item_list) <= 0: # No new XML pass else: # Found new XML files # Start a PackagePOA starter try: starter_name = "starter_PackagePOA" self.import_starter_module(starter_name, logger) s = self.get_starter_module(starter_name, logger) s.start(settings=settings, last_updated_since=last_startDate) except: logger.info('Error: %s starting %s' % (ping_marker_id, starter_name)) logger.exception('')
def start(self, ENV="dev"): # Specify run environment settings settings = settingsLib.get_settings(ENV) ping_marker_id = "cron_FiveMinute" # Log logFile = "starter.log" logger = log.logger(logFile, settings.setLevel, ping_marker_id) # Data provider db = dblib.SimpleDB(settings) db.connect() # SWF meta data provider swfmeta = swfmetalib.SWFMeta(settings) swfmeta.connect() last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp( workflow_id=ping_marker_id) # Start a ping workflow as a marker self.start_ping_marker(ping_marker_id, ENV) # Check for S3 XML files that were updated since the last run date_format = "%Y-%m-%dT%H:%M:%S.000Z" # Date conversion time_tuple = time.gmtime(last_startTimestamp) last_startDate = time.strftime(date_format, time_tuple) logger.info('last run %s %s' % (ping_marker_id, last_startDate)) # A conditional start for SendQueuedEmail # Only start a workflow if there are emails in the queue ready to send item_list = db.elife_get_email_queue_items( query_type="count", date_scheduled_before=last_startDate) try: if (int(item_list[0]["Count"]) > 0): # More than one email in the queue, start a workflow try: starter_name = "starter_SendQueuedEmail" self.import_starter_module(starter_name, logger) s = self.get_starter_module(starter_name, logger) s.start(ENV=ENV) except: logger.info('Error: %s starting %s' % (ping_marker_id, starter_name)) logger.exception('') except: # Some error logger.info('Exception encountered starting %s: %s' % (ping_marker_id, last_startDate))
def workflow_conditional_start(ENV, starter_name, start_seconds, data=None, workflow_id=None, workflow_name=None, workflow_version=None): """ Given workflow criteria, check the workflow completion history for the last time run If it last run more than start_seconds ago, start a new workflow """ diff_seconds = None last_startTimestamp = None settings = settingsLib.get_settings(ENV) swfmeta = swfmetalib.SWFMeta(settings) swfmeta.connect() last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp( workflow_id=workflow_id, workflow_name=workflow_name, workflow_version=workflow_version) current_timestamp = calendar.timegm(time.gmtime()) if (last_startTimestamp is not None): diff_seconds = current_timestamp - start_seconds - last_startTimestamp print diff_seconds if (diff_seconds >= 0 or last_startTimestamp is None): # Start a new workflow # Load the starter module module_name = "starter." + starter_name importlib.import_module(module_name) full_path = "starter." + starter_name + "." + starter_name + "()" s = eval(full_path) # Customised start functions if (starter_name == "starter_S3Monitor"): s.start(ENV=ENV, workflow="S3Monitor") elif (starter_name == "starter_AdminEmail"): s.start(ENV=ENV, workflow="AdminEmail") elif (starter_name == "cron_NewS3XML" or starter_name == "cron_NewS3PDF" or starter_name == "cron_NewS3SVG" or starter_name == "cron_FiveMinute" or starter_name == "cron_NewS3Suppl" or starter_name == "cron_NewS3JPG"): s.start(ENV=ENV)
def import_swfmeta_provider_module(step): imported = None # Check for world.settings, if not specified set to None try: if not world.settings: world.settings = None except AttributeError: world.settings = None try: import provider.swfmeta as swfmetalib world.swfmeta = swfmetalib.SWFMeta(world.settings) imported = True except: imported = False assert imported is True, \ "SWFMeta module was imported"
def do_activity(self, data=None): """ WorkflowConflictCheck activity, do the work """ if (self.logger): self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) is_open = None workflow_id = None workflow_name = None workflow_version = None try: workflow_id = data["data"]["workflow_id"] except KeyError: pass try: workflow_name = data["data"]["workflow_name"] except KeyError: pass try: workflow_version = data["data"]["workflow_version"] except KeyError: pass swfmeta = swfmetalib.SWFMeta(self.settings) swfmeta.connect() is_open = swfmeta.is_workflow_open(workflow_id=workflow_id, workflow_name=workflow_name, workflow_version=workflow_version) # Return logic: if is_open is False, then return True as being no conflict # But, if is_open is True, do not return a value, causing this activity to timeout if is_open is False: return True else: return None
def get_workflow_count_by_closestatus(self, time_period, current_timestamp): """ Given the time_period in seconds, and the current_timestamp use the SWFMeta provider to count closed workflows """ close_status_list = [ "COMPLETED", "FAILED", "CANCELED", "TERMINATED", "CONTINUED_AS_NEW", "TIMED_OUT" ] swfmeta = swfmetalib.SWFMeta(self.settings) swfmeta.connect() start_latest_date_timestamp = current_timestamp start_oldest_date_timestamp = start_latest_date_timestamp - time_period workflow_count = {} for close_status in close_status_list: count = swfmeta.get_closed_workflow_execution_count( domain=self.settings.domain, start_oldest_date=start_oldest_date_timestamp, start_latest_date=start_latest_date_timestamp, close_status=close_status) run_count = None try: run_count = count["count"] except: run_count = None if (run_count): workflow_count[close_status] = run_count else: workflow_count[close_status] = 0 return workflow_count
def workflow_conditional_start(settings, starter_name, start_seconds, data=None, workflow_id=None, workflow_name=None, workflow_version=None): """ Given workflow criteria, check the workflow completion history for the last time run If it last run more than start_seconds ago, start a new workflow """ diff_seconds = None last_startTimestamp = None swfmeta = swfmetalib.SWFMeta(settings) swfmeta.connect() last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp( workflow_id=workflow_id, workflow_name=workflow_name, workflow_version=workflow_version) current_timestamp = calendar.timegm(time.gmtime()) if last_startTimestamp is not None: diff_seconds = current_timestamp - start_seconds - last_startTimestamp print diff_seconds if diff_seconds >= 0 or last_startTimestamp is None: # Start a new workflow # Load the starter module module_name = "starter." + starter_name importlib.import_module(module_name) full_path = "starter." + starter_name + "." + starter_name + "()" s = eval(full_path) # Customised start functions if starter_name == "starter_S3Monitor": if workflow_id == "S3Monitor": s.start(settings=settings, workflow="S3Monitor") if workflow_id == "S3Monitor_POA": s.start(settings=settings, workflow="S3Monitor_POA") elif starter_name == "starter_AdminEmail": s.start(settings=settings, workflow="AdminEmail") elif starter_name == "starter_PubmedArticleDeposit": # Special for pubmed, only start a workflow if the outbox is not empty bucket_name = settings.poa_packaging_bucket outbox_folder = "pubmed/outbox/" # Connect to S3 and bucket s3_conn = S3Connection(settings.aws_access_key_id, settings.aws_secret_access_key) bucket = s3_conn.lookup(bucket_name) s3_key_names = get_s3_key_names_from_bucket(bucket=bucket, prefix=outbox_folder) if len(s3_key_names) > 0: s.start(settings=settings) elif starter_name == "starter_PubRouterDeposit": # PubRouterDeposit has different variants specified by the workflow variable workflow = workflow_id.split("_")[-1] s.start(settings=settings, workflow=workflow) elif (starter_name == "cron_FiveMinute" or starter_name == "starter_PublishPOA" or starter_name == "cron_NewS3POA" or starter_name == "starter_PublicationEmail" or starter_name == "starter_DepositCrossref"): s.start(settings=settings)