Ejemplo n.º 1
0
def test_get_run_times_for_backfill_job(db):
    now = timezone.now()
    now = now.replace(
        hour=12,
        minute=0,
        second=0,
        microsecond=0
    )

    job_spec = {
        'cmd': 'fakejob',
        'backfill': True,
        'frequency': '1h',
    }

    with freezegun.freeze_time(now, tz_offset=0):
        # With backfill=True, but never been run, yield [now]
        last_success = None
        assert list(get_run_times(job_spec, last_success=last_success)) == [now]

        # frequency, but no time, last_success one hour ago, yield [now]
        last_success = now - datetime.timedelta(hours=1)
        actual = list(get_run_times(job_spec, last_success=last_success))
        expected = [now]
        assert actual == expected

        # frequency, but no time, last_success three hours ago, yields
        # [2 hours ago, 1 hour ago, now]
        last_success = now - datetime.timedelta(hours=3)
        actual = list(get_run_times(job_spec, last_success=last_success))
        expected = [
            now - datetime.timedelta(hours=2),
            now - datetime.timedelta(hours=1),
            now
        ]
        assert actual == expected

        # frequency and time
        last_success = now - datetime.timedelta(hours=24)
        job_spec['frequency'] = '1d'
        job_spec['time'] = '10:30'
        actual = list(get_run_times(job_spec, last_success=last_success))
        expected = [
            datetime.datetime(
                year=now.year,
                month=now.month,
                day=now.day,
                hour=10,
                minute=30,
                second=0,
                microsecond=0,
                tzinfo=now.tzinfo
            )
        ]
        assert actual == expected
Ejemplo n.º 2
0
def test_get_run_times(db):
    now = timezone.now()

    job_spec = {"cmd": "fakejob", "backfill": False}

    with freezegun.freeze_time(now, tz_offset=0):
        # With backfill=False, yield [now]
        assert list(get_run_times(job_spec, last_success=now)) == [now]
Ejemplo n.º 3
0
def test_get_run_times(db):
    now = timezone.now()

    job_spec = {
        'cmd': 'fakejob',
        'backfill': False,
    }

    with freezegun.freeze_time(now, tz_offset=0):
        # With backfill=False, yield [now]
        assert list(get_run_times(job_spec, last_success=now)) == [now]
Ejemplo n.º 4
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec['cmd']

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(
            job_spec.get('frequency', DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info('about to run %s', cmd)

        now = timezone.now()
        log_run = True
        exc_type = exc_value = exc_tb = None
        start_time = None
        run_time = None

        with self.lock_job(job_spec['cmd']):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get('backfill', False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs['run_time'] = format_datetime(run_time)

                    if job_spec.get('last_success', False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs['last_success'] = format_datetime(
                            last_success)

                    logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info('successfully ran %s on %s', cmd, run_time)
                    last_success = run_time
                    self._remember_success(cmd, last_success,
                                           end_time - start_time)

            except OngoingJobError:
                log_run = False
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = (''.join(
                    traceback.format_exception(*sys.exc_info())).replace(
                        '\n', '\\n'))

                logger.error('error when running %s (%s): %s', cmd, run_time,
                             single_line_tb)
                self._remember_failure(cmd, end_time - start_time, exc_type,
                                       exc_value, exc_tb)

            finally:
                if log_run:
                    self._log_run(cmd, seconds, job_spec.get('time'), run_time,
                                  now, exc_type, exc_value, exc_tb)
Ejemplo n.º 5
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec["cmd"]

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(
            job_spec.get("frequency", DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info("about to run %s", cmd)

        now = timezone.now()
        start_time = None
        run_time = None

        with self.lock_job(job_spec["cmd"]):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get("backfill", False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs["run_time"] = format_datetime(run_time)

                    if job_spec.get("last_success", False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs["last_success"] = format_datetime(
                            last_success)

                    logger.info("running: %s %s %s", cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info("successfully ran %s on %s", cmd, run_time)
                    last_success = run_time

                    self._remember_success(cmd, last_success,
                                           end_time - start_time)

                    # Log each backfill task as a successful completion so that if
                    # one of them fails, we start at the failure date rather than
                    # all the way back at the beginning.
                    self._log_run(
                        cmd,
                        seconds,
                        job_spec.get("time"),
                        run_time,
                        now,
                    )

            except OngoingJobError:
                # Catch and raise this so it doesn't get handled by the Exception
                # handling
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = "".join(
                    traceback.format_exception(*sys.exc_info())).replace(
                        "\n", "\\n")

                logger.error("error when running %s (%s): %s", cmd, run_time,
                             single_line_tb)
                self._remember_failure(cmd, end_time - start_time, exc_type,
                                       exc_value, exc_tb)
                self._log_run(
                    cmd,
                    seconds,
                    job_spec.get("time"),
                    run_time,
                    now,
                    exc_type,
                    exc_value,
                    exc_tb,
                )
Ejemplo n.º 6
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec['cmd']

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(job_spec.get('frequency', DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info('about to run %s', cmd)

        now = timezone.now()
        log_run = True
        exc_type = exc_value = exc_tb = None
        start_time = None
        run_time = None

        with self.lock_job(job_spec['cmd']):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get('backfill', False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs['run_time'] = format_datetime(run_time)

                    if job_spec.get('last_success', False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs['last_success'] = format_datetime(last_success)

                    logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info('successfully ran %s on %s', cmd, run_time)
                    last_success = run_time
                    self._remember_success(cmd, last_success, end_time - start_time)

            except OngoingJobError:
                log_run = False
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = (
                    ''.join(traceback.format_exception(*sys.exc_info()))
                    .replace('\n', '\\n')
                )

                # Send error to sentry, log it, and remember the failure
                capture_error(settings.SENTRY_DSN)
                logger.error('error when running %s (%s): %s', cmd, run_time, single_line_tb)
                self._remember_failure(
                    cmd,
                    end_time - start_time,
                    exc_type,
                    exc_value,
                    exc_tb
                )

            finally:
                if log_run:
                    self._log_run(
                        cmd,
                        seconds,
                        job_spec.get('time'),
                        run_time,
                        now,
                        exc_type, exc_value, exc_tb
                    )