Beispiel #1
0
 def _set_ongoing_job(self, class_):
     app_name = class_.app_name
     info = self.job_state_database.get(app_name)
     if info:
         # Was it already ongoing?
         if info.get('ongoing'):
             # Unless it's been ongoing for ages, raise OngoingJobError
             age_hours = (utc_now() - info['ongoing']).seconds / 3600.0
             if age_hours < self.config.crontabber.max_ongoing_age_hours:
                 raise OngoingJobError(info['ongoing'])
             else:
                 self.config.logger.debug(
                     '{} has been ongoing for {:2} hours. '
                     'Ignore it and running the app anyway.'.format(
                         app_name,
                         age_hours,
                     ))
         info['ongoing'] = utc_now()
     else:
         depends_on = getattr(class_, 'depends_on', [])
         if isinstance(depends_on, basestring):
             depends_on = [depends_on]
         elif not isinstance(depends_on, list):
             depends_on = list(depends_on)
         info = {
             'next_run': None,
             'first_run': None,
             'last_run': None,
             'last_success': None,
             'last_error': {},
             'error_count': 0,
             'depends_on': depends_on,
             'ongoing': utc_now(),
         }
     self.job_state_database[app_name] = info
Beispiel #2
0
 def time_to_run(self, class_, time_):
     """return true if it's time to run the job.
     This is true if there is no previous information about its last run
     or if the last time it ran and set its next_run to a date that is now
     past.
     """
     app_name = class_.app_name
     try:
         info = self.job_state_database[app_name]
     except KeyError:
         if time_:
             h, m = [int(x) for x in time_.split(':')]
             # only run if this hour and minute is < now
             now = utc_now()
             if now.hour > h:
                 return True
             elif now.hour == h and now.minute >= m:
                 return True
             return False
         else:
             # no past information, run now
             return True
     next_run = info['next_run']
     if next_run < utc_now():
         return True
     return False
Beispiel #3
0
 def time_to_run(self, class_, time_):
     """return true if it's time to run the job.
     This is true if there is no previous information about its last run
     or if the last time it ran and set its next_run to a date that is now
     past.
     """
     app_name = class_.app_name
     try:
         info = self.job_state_database[app_name]
     except KeyError:
         if time_:
             h, m = [int(x) for x in time_.split(':')]
             # only run if this hour and minute is < now
             now = utc_now()
             if now.hour > h:
                 return True
             elif now.hour == h and now.minute >= m:
                 return True
             return False
         else:
             # no past information, run now
             return True
     next_run = info['next_run']
     if next_run < utc_now():
         return True
     return False
Beispiel #4
0
 def list_jobs(self, stream=None):
     if not stream:
         stream = sys.stdout
     _fmt = '%Y-%m-%d %H:%M:%S'
     _now = utc_now()
     PAD = 15
     for class_name, job_class in self.config.crontabber.jobs.class_list:
         class_config = self.config.crontabber['class-%s' % class_name]
         freq = class_config.frequency
         if class_config.time:
             freq += ' @ %s' % class_config.time
         class_name = job_class.__module__ + '.' + job_class.__name__
         print >>stream, '=== JOB ' + '=' * 72
         print >>stream, 'Class:'.ljust(PAD), class_name
         print >>stream, 'App name:'.ljust(PAD), job_class.app_name
         print >>stream, 'Frequency:'.ljust(PAD), freq
         try:
             info = self.job_state_database[job_class.app_name]
         except KeyError:
             print >>stream, '*NO PREVIOUS RUN INFO*'
             continue
         if info.get('ongoing'):
             print >>stream, 'Ongoing now!'.ljust(PAD),
             print >>stream, 'Started', '%s ago' % timesince(
                 _now, info.get('ongoing')
             )
         print >>stream, 'Last run:'.ljust(PAD),
         if info['last_run']:
             print >>stream, info['last_run'].strftime(_fmt).ljust(20),
             print >>stream, '(%s ago)' % timesince(info['last_run'], _now)
         else:
             print >>stream, 'none'
         print >>stream, 'Last success:'.ljust(PAD),
         if info.get('last_success'):
             print >>stream, info['last_success'].strftime(_fmt).ljust(20),
             print >>stream, ('(%s ago)' %
                              timesince(info['last_success'], _now))
         else:
             print >>stream, 'no previous successful run'
         print >>stream, 'Next run:'.ljust(PAD),
         if info['next_run']:
             print >>stream, info['next_run'].strftime(_fmt).ljust(20),
             if _now > info['next_run']:
                 print >>stream, ('(was %s ago)' %
                                  timesince(info['next_run'], _now))
             else:
                 print >>stream, '(in %s)' % timesince(
                     _now,
                     info['next_run']
                 )
         else:
             print >>stream, 'none'
         if info.get('last_error'):
             print >>stream, 'Error!!'.ljust(PAD),
             print >>stream, '(%s times)' % info['error_count']
             print >>stream, 'Traceback (most recent call last):'
             print >>stream, info['last_error']['traceback'],
             print >>stream, '%s:' % info['last_error']['type'],
             print >>stream, info['last_error']['value']
         print >>stream, ''
Beispiel #5
0
 def list_jobs(self, stream=None):
     if not stream:
         stream = sys.stdout
     _fmt = '%Y-%m-%d %H:%M:%S'
     _now = utc_now()
     PAD = 15
     for class_name, job_class in self.config.crontabber.jobs.class_list:
         class_config = self.config.crontabber['class-%s' % class_name]
         freq = class_config.frequency
         if class_config.time:
             freq += ' @ %s' % class_config.time
         class_name = job_class.__module__ + '.' + job_class.__name__
         print >> stream, '=== JOB ' + '=' * 72
         print >> stream, 'Class:'.ljust(PAD), class_name
         print >> stream, 'App name:'.ljust(PAD), job_class.app_name
         print >> stream, 'Frequency:'.ljust(PAD), freq
         try:
             info = self.job_state_database[job_class.app_name]
         except KeyError:
             print >> stream, '*NO PREVIOUS RUN INFO*'
             continue
         if info.get('ongoing'):
             print >> stream, 'Ongoing now!'.ljust(PAD),
             print >> stream, 'Started', '%s ago' % timesince(
                 _now, info.get('ongoing'))
         print >> stream, 'Last run:'.ljust(PAD),
         if info['last_run']:
             print >> stream, info['last_run'].strftime(_fmt).ljust(20),
             print >> stream, '(%s ago)' % timesince(info['last_run'], _now)
         else:
             print >> stream, 'none'
         print >> stream, 'Last success:'.ljust(PAD),
         if info.get('last_success'):
             print >> stream, info['last_success'].strftime(_fmt).ljust(20),
             print >> stream, ('(%s ago)' %
                               timesince(info['last_success'], _now))
         else:
             print >> stream, 'no previous successful run'
         print >> stream, 'Next run:'.ljust(PAD),
         if info['next_run']:
             print >> stream, info['next_run'].strftime(_fmt).ljust(20),
             if _now > info['next_run']:
                 print >> stream, ('(was %s ago)' %
                                   timesince(info['next_run'], _now))
             else:
                 print >> stream, '(in %s)' % timesince(
                     _now, info['next_run'])
         else:
             print >> stream, 'none'
         if info.get('last_error'):
             print >> stream, 'Error!!'.ljust(PAD),
             print >> stream, '(%s times)' % info['error_count']
             print >> stream, 'Traceback (most recent call last):'
             print >> stream, info['last_error']['traceback'],
             print >> stream, '%s:' % info['last_error']['type'],
             print >> stream, info['last_error']['value']
         print >> stream, ''
Beispiel #6
0
    def time_to_run(self, class_, time_):
        """return true if it's time to run the job.
        This is true if there is no previous information about its last run
        or if the last time it ran and set its next_run to a date that is now
        past.
        """
        app_name = class_.app_name
        try:
            info = self.job_state_database[app_name]
        except KeyError:
            if time_:
                h, m = [int(x) for x in time_.split(':')]
                # only run if this hour and minute is < now
                now = utc_now()
                if now.hour > h:
                    return True
                elif now.hour == h and now.minute >= m:
                    return True
                return False
            else:
                # no past information, run now
                return True
        next_run = info['next_run']

        if not next_run:
            # It has never run before.
            # If it has an active ongoing status it means two
            # independent threads tried to start it. The second one
            # (by a tiny time margin) will have a job_class whose
            # `ongoing` value has already been set.
            # If that's the case, let it through because it will
            # commence and break due to RowLevelLockError in the
            # state's __setitem__ method.
            return bool(info['ongoing'])

        if next_run < utc_now():
            return True
        return False
Beispiel #7
0
 def check_dependencies(self, class_):
     try:
         depends_on = class_.depends_on
     except AttributeError:
         # that's perfectly fine
         return True, None
     if isinstance(depends_on, basestring):
         depends_on = [depends_on]
     for dependency in depends_on:
         try:
             job_info = self.job_state_database[dependency]
         except KeyError:
             # the job this one depends on hasn't been run yet!
             return False, "%r hasn't been run yet" % dependency
         if job_info.get('last_error'):
             # errored last time it ran
             return False, "%r errored last time it ran" % dependency
         if job_info['next_run'] < utc_now():
             # the dependency hasn't recently run
             return False, "%r hasn't recently run" % dependency
     # no reason not to stop this class
     return True, None
Beispiel #8
0
 def check_dependencies(self, class_):
     try:
         depends_on = class_.depends_on
     except AttributeError:
         # that's perfectly fine
         return True, None
     if isinstance(depends_on, basestring):
         depends_on = [depends_on]
     for dependency in depends_on:
         try:
             job_info = self.job_state_database[dependency]
         except KeyError:
             # the job this one depends on hasn't been run yet!
             return False, "%r hasn't been run yet" % dependency
         if job_info.get('last_error'):
             # errored last time it ran
             return False, "%r errored last time it ran" % dependency
         if job_info['next_run'] < utc_now():
             # the dependency hasn't recently run
             return False, "%r hasn't recently run" % dependency
     # no reason not to stop this class
     return True, None
Beispiel #9
0
    def _run_one(self, job_class, config, force=False):
        _debug = self.config.logger.debug
        seconds = convert_frequency(config.frequency)
        time_ = config.time
        if not force:
            if not self.time_to_run(job_class, time_):
                _debug("skipping %r because it's not time to run", job_class)
                return
            ok, dependency_error = self.check_dependencies(job_class)
            if not ok:
                _debug("skipping %r dependencies aren't met [%s]", job_class,
                       dependency_error)
                return

        _debug('about to run %r', job_class)
        app_name = job_class.app_name
        info = self.job_state_database.get(app_name)

        last_success = None
        now = utc_now()
        log_run = True
        try:
            t0 = time.time()
            for last_success in self._run_job(job_class, config, info):
                t1 = time.time()
                _debug('successfully ran %r on %s', job_class, last_success)
                self._remember_success(job_class, last_success, t1 - t0)
                # _run_job() returns a generator, so we don't know how
                # many times this will loop. Anyway, we need to reset the
                # 't0' for the next loop if there is one.
                t0 = time.time()
            exc_type = exc_value = exc_tb = None
        except (OngoingJobError, RowLevelLockError):
            # It's not an actual runtime error. It just basically means
            # you can't start crontabber right now.
            log_run = False
            raise
        except:
            t1 = time.time()
            exc_type, exc_value, exc_tb = sys.exc_info()

            # when debugging tests that mock logging, uncomment this otherwise
            # the exc_info=True doesn't compute and record what the exception
            # was
            #raise  # noqa

            if self.config.sentry and self.config.sentry.dsn:
                assert raven, "raven not installed"
                try:
                    client = raven.Client(dsn=self.config.sentry.dsn)
                    identifier = client.get_ident(client.captureException())
                    self.config.logger.info(
                        'Error captured in Sentry. Reference: %s' % identifier)
                except Exception:
                    # Blank exceptions like this is evil but a failure to send
                    # the exception to Sentry is much less important than for
                    # crontabber to carry on. This is especially true
                    # considering that raven depends on network I/O.
                    _debug('Failed to capture and send error to Sentry',
                           exc_info=True)

            _debug('error when running %r on %s',
                   job_class,
                   last_success,
                   exc_info=True)
            self._remember_failure(job_class, t1 - t0, exc_type, exc_value,
                                   exc_tb)

        finally:
            if log_run:
                self._log_run(job_class, seconds, time_, last_success, now,
                              exc_type, exc_value, exc_tb)
Beispiel #10
0
    def _run_one(self, job_class, config, force=False):
        _debug = self.config.logger.debug
        seconds = convert_frequency(config.frequency)
        time_ = config.time
        if not force:
            if not self.time_to_run(job_class, time_):
                _debug("skipping %r because it's not time to run", job_class)
                return
            ok, dependency_error = self.check_dependencies(job_class)
            if not ok:
                _debug(
                    "skipping %r dependencies aren't met [%s]",
                    job_class, dependency_error
                )
                return

        _debug('about to run %r', job_class)
        app_name = job_class.app_name
        info = self.job_state_database.get(app_name)

        last_success = None
        now = utc_now()
        try:
            t0 = time.time()
            for last_success in self._run_job(job_class, config, info):
                t1 = time.time()
                _debug('successfully ran %r on %s', job_class, last_success)
                self._remember_success(job_class, last_success, t1 - t0)
                # _run_job() returns a generator, so we don't know how
                # many times this will loop. Anyway, we need to reset the
                # 't0' for the next loop if there is one.
                t0 = time.time()
            exc_type = exc_value = exc_tb = None
        except:
            t1 = time.time()
            exc_type, exc_value, exc_tb = sys.exc_info()

            # when debugging tests that mock logging, uncomment this otherwise
            # the exc_info=True doesn't compute and record what the exception
            # was
            #raise

            if self.config.sentry and self.config.sentry.dsn:
                assert raven, "raven not installed"
                try:
                    client = raven.Client(dsn=self.config.sentry.dsn)
                    identifier = client.get_ident(client.captureException())
                    self.config.logger.info(
                        'Error captured in Sentry. Reference: %s' % identifier
                    )
                except Exception:
                    # Blank exceptions like this is evil but a failure to send
                    # the exception to Sentry is much less important than for
                    # crontabber to carry on. This is especially true
                    # considering that raven depends on network I/O.
                    _debug('Failed to capture and send error to Sentry',
                           exc_info=True)

            _debug('error when running %r on %s',
                   job_class, last_success, exc_info=True)
            self._remember_failure(
                job_class,
                t1 - t0,
                exc_type,
                exc_value,
                exc_tb
            )

        finally:
            self._log_run(job_class, seconds, time_, last_success, now,
                          exc_type, exc_value, exc_tb)
Beispiel #11
0
    def main(self, function=None, once=True):
        if not function:
            function = self._run_proxy
        now = utc_now()

        # handle one of four possible cases

        # case 1: no backfill, just run this job now
        if once:
            function()
            yield now
            return

        # case 2: this could be a backfil, but we don't have any
        #   job information.  Run it with today's date
        if not self.job_information:
            function(now)
            yield now
            return

        # back fill cases:
        # figure out when it was last run successfully
        last_success = self.job_information.get(
            'last_success',
            self.job_information.get('first_run')
        )

        # case 3: either it has never run successfully or it was previously run
        #   before the 'first_run' key was added (legacy).
        if not last_success:
            self.config.logger.warning(
                'No previous last_success information available'
            )
            # just run it for the time 'now'
            function(now)
            yield now
            return

        # case 4:
        when = last_success
        # The last_success datetime is originally based on the
        # first_run. From then onwards it just adds the interval to
        # it so the hour is not likely to drift from that original
        # time.
        # However, what can happen is that on a given day, "now" is
        # LESS than the day before. This can happen because the jobs
        # that are run BEFORE are variable in terms of how long it
        # takes. Thus, one day, now might be "18:02" and the next day
        # the it's "18:01". If this happens the original difference
        # will prevent it from running the backfill again.
        #
        # For more info see the
        # test_backfilling_with_configured_time_slow_job unit test.
        if self.config.time:
            # So, reset the hour/minute part to always match the
            # intention.
            h, m = [int(x) for x in self.config.time.split(':')]
            when = when.replace(
                hour=h,
                minute=m,
                second=0,
                microsecond=0
            )
        seconds = convert_frequency(self.config.frequency)
        interval = datetime.timedelta(seconds=seconds)
        # loop over each missed interval from the time of the last success,
        # forward by each interval until it reaches the time 'now'.  Run the
        # cron app once for each interval.
        while (when + interval) < now:
            when += interval
            function(when)
            yield when
Beispiel #12
0
    def main(self, function=None, once=True):
        if not function:
            function = self._run_proxy
        now = utc_now()

        # handle one of four possible cases

        # case 1: no backfill, just run this job now
        if once:
            function()
            yield now
            return

        # case 2: this could be a backfil, but we don't have any
        #   job information.  Run it with today's date
        if not self.job_information:
            function(now)
            yield now
            return

        # back fill cases:
        # figure out when it was last run successfully
        last_success = self.job_information.get(
            'last_success', self.job_information.get('first_run'))

        # case 3: either it has never run successfully or it was previously run
        #   before the 'first_run' key was added (legacy).
        if not last_success:
            self.config.logger.warning(
                'No previous last_success information available')
            # just run it for the time 'now'
            function(now)
            yield now
            return

        # case 4:
        when = last_success
        # The last_success datetime is originally based on the
        # first_run. From then onwards it just adds the interval to
        # it so the hour is not likely to drift from that original
        # time.
        # However, what can happen is that on a given day, "now" is
        # LESS than the day before. This can happen because the jobs
        # that are run BEFORE are variable in terms of how long it
        # takes. Thus, one day, now might be "18:02" and the next day
        # the it's "18:01". If this happens the original difference
        # will prevent it from running the backfill again.
        #
        # For more info see the
        # test_backfilling_with_configured_time_slow_job unit test.
        if self.config.time:
            # So, reset the hour/minute part to always match the
            # intention.
            h, m = [int(x) for x in self.config.time.split(':')]
            when = when.replace(hour=h, minute=m, second=0, microsecond=0)
        seconds = convert_frequency(self.config.frequency)
        interval = datetime.timedelta(seconds=seconds)
        # loop over each missed interval from the time of the last success,
        # forward by each interval until it reaches the time 'now'.  Run the
        # cron app once for each interval.
        while (when + interval) < now:
            when += interval
            function(when)
            yield when