Beispiel #1
0
 def start(self, ENV = "dev"):
   # Specify run environment settings
   settings = settingsLib.get_settings(ENV)
   
   ping_marker_id = "cron_NewS3Suppl"
   
   # Log
   logFile = "starter.log"
   logger = log.logger(logFile, settings.setLevel, ping_marker_id)
   
   # Data provider
   db = dblib.SimpleDB(settings)
   db.connect()
   
   # SWF meta data provider
   swfmeta = swfmetalib.SWFMeta(settings)
   swfmeta.connect()
   
   # Default, if cron never run before
   last_startTimestamp = 0
   
   # Get the last time this cron was run
   last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(workflow_id = ping_marker_id)
 
   # Start a ping workflow as a marker
   self.start_ping_marker(ping_marker_id, ENV)
 
   # Check for S3 Suppl files that were updated since the last run
   date_format = "%Y-%m-%dT%H:%M:%S.000Z"
   
   # Quick hack - subtract 30 minutes to not ignore the top of the hour
   #   the time between S3Monitor running and this cron starter
   last_startTimestamp_minus_30 = last_startTimestamp - (60*30)
   if(last_startTimestamp_minus_30 < 0):
     last_startTimestamp_minus_30 = 0
   time_tuple = time.gmtime(last_startTimestamp_minus_30)
   
   last_startDate = time.strftime(date_format, time_tuple)
   
   logger.info('last run %s' % (last_startDate))
   
   S3_item_list = db.elife_get_article_S3_file_items(file_data_type = "suppl", latest = True, last_updated_since = last_startDate)
   
   logger.info('Suppl files updated since %s: %s' % (last_startDate, str(len(S3_item_list))))
 
   if(len(S3_item_list) <= 0):
     # No new SVG
     pass
   else:
     # Found new SVG files
     
     # Start a PublishSVG starter
     try:
       starter_name = "starter_PublishSuppl"
       self.import_starter_module(starter_name, logger)
       s = self.get_starter_module(starter_name, logger)
       s.start(ENV = ENV, last_updated_since = last_startDate)
     except:
       logger.info('Error: %s starting %s' % (ping_marker_id, starter_name))
       logger.exception('')
Beispiel #2
0
    def start(self, settings):

        ping_marker_id = "cron_NewS3POA"

        # Log
        logFile = "starter.log"
        logger = log.logger(logFile, settings.setLevel, ping_marker_id)

        # Data provider
        db = dblib.SimpleDB(settings)
        db.connect()

        # SWF meta data provider
        swfmeta = swfmetalib.SWFMeta(settings)
        swfmeta.connect()

        last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
            workflow_id=ping_marker_id)

        # Start a ping workflow as a marker
        self.start_ping_marker(ping_marker_id, settings)

        # Check for S3 XML files that were updated since the last run
        date_format = "%Y-%m-%dT%H:%M:%S.000Z"

        # Quick hack - subtract 15 minutes,
        #   the time between S3Monitor running and this cron starter
        last_startTimestamp_minus_15 = last_startTimestamp - (60 * 15)
        time_tuple = time.gmtime(last_startTimestamp_minus_15)

        last_startDate = time.strftime(date_format, time_tuple)

        logger.info('last run %s' % (last_startDate))

        xml_item_list = db.elife_get_POA_delivery_S3_file_items(
            last_updated_since=last_startDate)

        logger.info('POA files updated since %s: %s' %
                    (last_startDate, str(len(xml_item_list))))

        if len(xml_item_list) <= 0:
            # No new XML
            pass
        else:
            # Found new XML files

            # Start a PackagePOA starter
            try:
                starter_name = "starter_PackagePOA"
                self.import_starter_module(starter_name, logger)
                s = self.get_starter_module(starter_name, logger)
                s.start(settings=settings, last_updated_since=last_startDate)
            except:
                logger.info('Error: %s starting %s' %
                            (ping_marker_id, starter_name))
                logger.exception('')
Beispiel #3
0
    def start(self, ENV="dev"):
        # Specify run environment settings
        settings = settingsLib.get_settings(ENV)

        ping_marker_id = "cron_FiveMinute"

        # Log
        logFile = "starter.log"
        logger = log.logger(logFile, settings.setLevel, ping_marker_id)

        # Data provider
        db = dblib.SimpleDB(settings)
        db.connect()

        # SWF meta data provider
        swfmeta = swfmetalib.SWFMeta(settings)
        swfmeta.connect()

        last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
            workflow_id=ping_marker_id)

        # Start a ping workflow as a marker
        self.start_ping_marker(ping_marker_id, ENV)

        # Check for S3 XML files that were updated since the last run
        date_format = "%Y-%m-%dT%H:%M:%S.000Z"

        # Date conversion
        time_tuple = time.gmtime(last_startTimestamp)
        last_startDate = time.strftime(date_format, time_tuple)

        logger.info('last run %s %s' % (ping_marker_id, last_startDate))

        # A conditional start for SendQueuedEmail
        #  Only start a workflow if there are emails in the queue ready to send
        item_list = db.elife_get_email_queue_items(
            query_type="count", date_scheduled_before=last_startDate)

        try:
            if (int(item_list[0]["Count"]) > 0):
                # More than one email in the queue, start a workflow
                try:
                    starter_name = "starter_SendQueuedEmail"
                    self.import_starter_module(starter_name, logger)
                    s = self.get_starter_module(starter_name, logger)
                    s.start(ENV=ENV)
                except:
                    logger.info('Error: %s starting %s' %
                                (ping_marker_id, starter_name))
                    logger.exception('')
        except:
            # Some error
            logger.info('Exception encountered starting %s: %s' %
                        (ping_marker_id, last_startDate))
Beispiel #4
0
def workflow_conditional_start(ENV,
                               starter_name,
                               start_seconds,
                               data=None,
                               workflow_id=None,
                               workflow_name=None,
                               workflow_version=None):
    """
  Given workflow criteria, check the workflow completion history for the last time run
  If it last run more than start_seconds ago, start a new workflow
  """

    diff_seconds = None
    last_startTimestamp = None

    settings = settingsLib.get_settings(ENV)

    swfmeta = swfmetalib.SWFMeta(settings)
    swfmeta.connect()

    last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
        workflow_id=workflow_id,
        workflow_name=workflow_name,
        workflow_version=workflow_version)

    current_timestamp = calendar.timegm(time.gmtime())

    if (last_startTimestamp is not None):
        diff_seconds = current_timestamp - start_seconds - last_startTimestamp
        print diff_seconds

    if (diff_seconds >= 0 or last_startTimestamp is None):
        # Start a new workflow
        # Load the starter module
        module_name = "starter." + starter_name
        importlib.import_module(module_name)
        full_path = "starter." + starter_name + "." + starter_name + "()"
        s = eval(full_path)

        # Customised start functions
        if (starter_name == "starter_S3Monitor"):
            s.start(ENV=ENV, workflow="S3Monitor")
        elif (starter_name == "starter_AdminEmail"):
            s.start(ENV=ENV, workflow="AdminEmail")
        elif (starter_name == "cron_NewS3XML"
              or starter_name == "cron_NewS3PDF"
              or starter_name == "cron_NewS3SVG"
              or starter_name == "cron_FiveMinute"
              or starter_name == "cron_NewS3Suppl"
              or starter_name == "cron_NewS3JPG"):
            s.start(ENV=ENV)
Beispiel #5
0
def import_swfmeta_provider_module(step):
    imported = None
    # Check for world.settings, if not specified set to None
    try:
        if not world.settings:
            world.settings = None
    except AttributeError:
        world.settings = None

    try:
        import provider.swfmeta as swfmetalib
        world.swfmeta = swfmetalib.SWFMeta(world.settings)
        imported = True
    except:
        imported = False
    assert imported is True, \
      "SWFMeta module was imported"
Beispiel #6
0
    def do_activity(self, data=None):
        """
    WorkflowConflictCheck activity, do the work
    """
        if (self.logger):
            self.logger.info('data: %s' %
                             json.dumps(data, sort_keys=True, indent=4))

        is_open = None

        workflow_id = None
        workflow_name = None
        workflow_version = None

        try:
            workflow_id = data["data"]["workflow_id"]
        except KeyError:
            pass
        try:
            workflow_name = data["data"]["workflow_name"]
        except KeyError:
            pass
        try:
            workflow_version = data["data"]["workflow_version"]
        except KeyError:
            pass

        swfmeta = swfmetalib.SWFMeta(self.settings)
        swfmeta.connect()
        is_open = swfmeta.is_workflow_open(workflow_id=workflow_id,
                                           workflow_name=workflow_name,
                                           workflow_version=workflow_version)

        # Return logic: if is_open is False, then return True as being no conflict
        #  But, if is_open is True, do not return a value, causing this activity to timeout
        if is_open is False:
            return True
        else:
            return None
    def get_workflow_count_by_closestatus(self, time_period,
                                          current_timestamp):
        """
    Given the time_period in seconds, and the current_timestamp
    use the SWFMeta provider to count closed workflows
    """

        close_status_list = [
            "COMPLETED", "FAILED", "CANCELED", "TERMINATED",
            "CONTINUED_AS_NEW", "TIMED_OUT"
        ]

        swfmeta = swfmetalib.SWFMeta(self.settings)
        swfmeta.connect()

        start_latest_date_timestamp = current_timestamp
        start_oldest_date_timestamp = start_latest_date_timestamp - time_period

        workflow_count = {}

        for close_status in close_status_list:
            count = swfmeta.get_closed_workflow_execution_count(
                domain=self.settings.domain,
                start_oldest_date=start_oldest_date_timestamp,
                start_latest_date=start_latest_date_timestamp,
                close_status=close_status)
            run_count = None
            try:
                run_count = count["count"]
            except:
                run_count = None

            if (run_count):
                workflow_count[close_status] = run_count
            else:
                workflow_count[close_status] = 0

        return workflow_count
Beispiel #8
0
def workflow_conditional_start(settings,
                               starter_name,
                               start_seconds,
                               data=None,
                               workflow_id=None,
                               workflow_name=None,
                               workflow_version=None):
    """
    Given workflow criteria, check the workflow completion history for the last time run
    If it last run more than start_seconds ago, start a new workflow
    """

    diff_seconds = None
    last_startTimestamp = None

    swfmeta = swfmetalib.SWFMeta(settings)
    swfmeta.connect()

    last_startTimestamp = swfmeta.get_last_completed_workflow_execution_startTimestamp(
        workflow_id=workflow_id,
        workflow_name=workflow_name,
        workflow_version=workflow_version)

    current_timestamp = calendar.timegm(time.gmtime())

    if last_startTimestamp is not None:
        diff_seconds = current_timestamp - start_seconds - last_startTimestamp
        print diff_seconds

    if diff_seconds >= 0 or last_startTimestamp is None:
        # Start a new workflow
        # Load the starter module
        module_name = "starter." + starter_name
        importlib.import_module(module_name)
        full_path = "starter." + starter_name + "." + starter_name + "()"
        s = eval(full_path)

        # Customised start functions
        if starter_name == "starter_S3Monitor":

            if workflow_id == "S3Monitor":
                s.start(settings=settings, workflow="S3Monitor")
            if workflow_id == "S3Monitor_POA":
                s.start(settings=settings, workflow="S3Monitor_POA")

        elif starter_name == "starter_AdminEmail":
            s.start(settings=settings, workflow="AdminEmail")

        elif starter_name == "starter_PubmedArticleDeposit":
            # Special for pubmed, only start a workflow if the outbox is not empty
            bucket_name = settings.poa_packaging_bucket
            outbox_folder = "pubmed/outbox/"

            # Connect to S3 and bucket
            s3_conn = S3Connection(settings.aws_access_key_id,
                                   settings.aws_secret_access_key)
            bucket = s3_conn.lookup(bucket_name)

            s3_key_names = get_s3_key_names_from_bucket(bucket=bucket,
                                                        prefix=outbox_folder)
            if len(s3_key_names) > 0:
                s.start(settings=settings)

        elif starter_name == "starter_PubRouterDeposit":
            # PubRouterDeposit has different variants specified by the workflow variable
            workflow = workflow_id.split("_")[-1]
            s.start(settings=settings, workflow=workflow)

        elif (starter_name == "cron_FiveMinute"
              or starter_name == "starter_PublishPOA"
              or starter_name == "cron_NewS3POA"
              or starter_name == "starter_PublicationEmail"
              or starter_name == "starter_DepositCrossref"):
            s.start(settings=settings)