Exemple #1
0
 def _email_error(self, task, formatted_traceback, subject, headline):
     formatted_subject = subject.format(task=task, host=self.host)
     formatted_headline = headline.format(task=task, host=self.host)
     command = subprocess.list2cmdline(sys.argv)
     message = notifications.format_task_error(
         formatted_headline, task, command, formatted_traceback)
     notifications.send_error_email(formatted_subject, message, task.owner_email)
Exemple #2
0
    def set_status(self, task, new_status, config=None):
        if new_status == FAILED:
            assert config is not None

        if new_status == DISABLED and task.status == RUNNING:
            return

        if task.status == DISABLED:
            if new_status == DONE:
                self.re_enable(task)

            # don't allow workers to override a scheduler disable
            elif task.scheduler_disable_time is not None:
                return

        if new_status == FAILED and task.can_disable():
            task.add_failure()
            if task.has_excessive_failures():
                task.scheduler_disable_time = time.time()
                new_status = DISABLED
                notifications.send_error_email(
                    'Luigi Scheduler: DISABLED {task} due to excessive failures'.format(task=task.id),
                    '{task} failed {failures} times in the last {window} seconds, so it is being '
                    'disabled for {persist} seconds'.format(
                        failures=config.disable_failures,
                        task=task.id,
                        window=config.disable_window,
                        persist=config.disable_persist,
                    ))
        elif new_status == DISABLED:
            task.scheduler_disable_time = None

        self._status_tasks[task.status].pop(task.id)
        self._status_tasks[new_status][task.id] = task
        task.status = new_status
Exemple #3
0
    def _email_complete_error(self, task, formatted_traceback):
        # like logger.exception but with WARNING level
        subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
        headline = "Will not schedule task or any dependencies due to error in complete() method"

        message = notifications.format_task_error(headline, task, formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)
Exemple #4
0
 def _email_error(self, task, formatted_traceback, subject, headline):
     formatted_subject = subject.format(task=task, host=self.host)
     formatted_headline = headline.format(task=task, host=self.host)
     command = subprocess.list2cmdline(sys.argv)
     message = notifications.format_task_error(
         formatted_headline, task, command, formatted_traceback)
     notifications.send_error_email(formatted_subject, message, task.owner_email)
Exemple #5
0
 def on_success(self):
     """
     Piggyback on `send_error_email`, but technically it's just a notification."
     """
     message = "A new AI export file has been built, at {path} ({size} bytes)".format(
         path=self.output().path, size=os.stat(self.output().path).st_size)
     send_error_email('AI export ready at %s' % self.output().path, message)
Exemple #6
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        expl = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [dep.task_id for dep in self.task.deps() if not dep.complete()]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            if self.task.run == NotImplemented:
                # External task
                # TODO(erikbern): We should check for task completeness after non-external tasks too!
                # This will resolve #814 and make things a lot more consistent
                status = DONE if self.task.complete() else FAILED
            else:
                new_deps = self._run_get_new_deps()
                status = DONE if not new_deps else SUSPENDED

            if status == SUSPENDED:
                logger.info(
                    '[pid %s] Worker %s new requirements      %s',
                    os.getpid(), self.worker_id, self.task.task_id)

            elif status == DONE:
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)
                expl = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(), self.worker_id, self.task)
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task

            raw_error_message = self.task.on_failure(ex)
            notification_error_message = notifications.wrap_traceback(raw_error_message)
            expl = json.dumps(raw_error_message)
            formatted_error_message = notifications.format_task_error(subject, self.task,
                                                                      formatted_exception=notification_error_message)
            notifications.send_error_email(subject, formatted_error_message, self.task.owner_email)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, expl, missing, new_deps))
Exemple #7
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [
                dep.task_id for dep in self.task.deps() if not dep.complete()
            ]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' %
                                   (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            if self.task.run == NotImplemented:
                # External task
                # TODO(erikbern): We should check for task completeness after non-external tasks too!
                # This will resolve #814 and make things a lot more consistent
                status = DONE if self.task.complete() else FAILED
            else:
                new_deps = self._run_get_new_deps()
                status = DONE if not new_deps else SUSPENDED

            if status == SUSPENDED:
                logger.info('[pid %s] Worker %s new requirements      %s',
                            os.getpid(), self.worker_id, self.task.task_id)

            elif status == DONE:
                self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                        time.time() - t0)
                error_message = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
Exemple #8
0
    def _get_work(self):
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id,
                                     host=self.host,
                                     assistant=self._assistant)
        n_pending_tasks = r['n_pending_tasks']
        task_id = r['task_id']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically',
                        task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._scheduler.add_task(worker=self._id,
                                         task_id=task_id,
                                         status=FAILED,
                                         runnable=False,
                                         assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
Exemple #9
0
 def _email_unexpected_error(self, task, formatted_traceback):
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(
         task=task, host=self.host)
     message = "Luigi framework error:\n{traceback}".format(
         traceback=formatted_traceback)
     notifications.send_error_email(subject, message)
Exemple #10
0
    def _get_work(self):
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id, host=self.host, assistant=self._assistant)
        n_pending_tasks = r['n_pending_tasks']
        task_id = r['task_id']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically', task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._scheduler.add_task(worker=self._id, task_id=task_id, status=FAILED, runnable=False,
                                         assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
    def run(self):

        with self.input().open() as csv:
            logs = pd.read_csv(csv)

        log_summary = logs.groupby([
            'task_name', 'log_level'
        ])['log_string'].count().to_frame().unstack(level='log_level')
        log_summary.columns = log_summary.columns.to_flat_index().map(
            lambda title: title[-1])
        log_summary = log_summary.rename(
            {
                'ERROR': 'error_count',
                'WARNING': 'warning_count'
            }, axis=1).reset_index()
        for type_ in ['error_count', 'warning_count']:
            if type_ not in log_summary:
                log_summary[type_] = None
            log_summary[type_] = log_summary[type_].fillna(0).astype(int)

        django_renderer = utils.load_django_renderer()
        send_error_email(subject=f"Weekly log report ({len(logs)} incidents)",
                         message=django_renderer(
                             'data/strings/log_report_email.html',
                             context=dict(host=socket.gethostname(),
                                          logs=logs,
                                          log_summary=log_summary)))

        with self.output().open('w') as file:
            file.write("Done.")
Exemple #12
0
 def _send_error_notification(self, raw_error_message):
     subject = "Luigi: %s FAILED" % self.task
     notification_error_message = notifications.wrap_traceback(raw_error_message)
     formatted_error_message = notifications.format_task_error(
         subject, self.task, formatted_exception=notification_error_message
     )
     notifications.send_error_email(subject, formatted_error_message, self.task.owner_email)
Exemple #13
0
    def _email_complete_error(self, task, formatted_traceback):
        # like logger.exception but with WARNING level
        subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
        headline = "Will not schedule task or any dependencies due to error in complete() method"

        message = notifications.format_task_error(headline, task, formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)
Exemple #14
0
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id, host=self.host, assistant=self._assistant)
        n_pending_tasks = r["n_pending_tasks"]
        task_id = r["task_id"]
        running_tasks = r["running_tasks"]
        n_unique_pending = r["n_unique_pending"]

        self._get_work_response_history.append(dict(task_id=task_id, running_tasks=running_tasks))

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info("Did not schedule %s, will load it dynamically", task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = load_task(
                    module=r.get("task_module"), task_name=r["task_family"], params_str=r["task_params"]
                )
            except TaskClassException as ex:
                msg = "Cannot find task for %s" % task_id
                logger.exception(msg)
                subject = "Luigi: %s" % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(
                    worker=self._id, task_id=task_id, status=FAILED, runnable=False, assistant=self._assistant
                )
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
Exemple #15
0
    def _email_unexpected_error(self, task, formatted_traceback):
        subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(
            task=task, host=self.host)
        headline = "Luigi framework error"

        message = notifications.format_task_error(headline, task,
                                                  formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)
Exemple #16
0
    def _email_dependency_error(self, task, formatted_traceback):
        subject = "Luigi: {task} failed scheduling. Host: {host}".format(
            task=task, host=self.host)
        headline = "Will not schedule task or any dependencies due to error in deps() method"

        message = notifications.format_task_error(headline, task,
                                                  formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)
Exemple #17
0
 def _send_error_notification(self, raw_error_message):
     subject = "Luigi: %s FAILED" % self.task
     notification_error_message = notifications.wrap_traceback(
         raw_error_message)
     formatted_error_message = notifications.format_task_error(
         subject, self.task, formatted_exception=notification_error_message)
     notifications.send_error_email(subject, formatted_error_message,
                                    self.task.owner_email)
Exemple #18
0
 def _email_complete_error(self, task, formatted_traceback):
     # like logger.exception but with WARNING level
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: {task} failed scheduling. Host: {host}".format(
         task=task, host=self.host)
     message = "Will not schedule {task} or any dependencies due to error in complete() method:\n{traceback}".format(
         task=task, traceback=formatted_traceback)
     notifications.send_error_email(subject, message)
Exemple #19
0
 def _email_complete_error(self, task, formatted_traceback):
     # like logger.exception but with WARNING level
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
     message = "Will not schedule {task} or any dependencies due to error in complete() method:\n{traceback}".format(
         task=task, traceback=formatted_traceback
     )
     notifications.send_error_email(subject, message)
Exemple #20
0
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0, WORKER_STATE_DISABLED
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(
            worker=self._id,
            host=self.host,
            assistant=self._assistant,
            current_tasks=list(self._running_tasks.keys()),
        )
        n_pending_tasks = r['n_pending_tasks']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']
        # TODO: For a tiny amount of time (a month?) we'll keep forwards compatibility
        # That is you can user a newer client than server (Sep 2016)
        worker_state = r.get('worker_state',
                             WORKER_STATE_ACTIVE)  # state according to server!
        task_id = self._get_work_task_id(r)

        self._get_work_response_history.append({
            'task_id': task_id,
            'running_tasks': running_tasks,
        })

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically',
                        task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(worker=self._id,
                               task_id=task_id,
                               status=FAILED,
                               runnable=False,
                               assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        if task_id is not None and 'batch_task_ids' in r:
            batch_tasks = filter(None, [
                self._scheduled_tasks.get(batch_id)
                for batch_id in r['batch_task_ids']
            ])
            self._batch_running_tasks[task_id] = batch_tasks

        return task_id, running_tasks, n_pending_tasks, n_unique_pending, worker_state
Exemple #21
0
def celebrate_success(task_param):
    """Will be called directly after a successful execution
       of `run` on any Task subclass (i.e. all luigi Tasks)
    """
    task_id_str = task.Task.__repr__(task_param)
    if hasattr(task_param, 'email_on_success') and task_param.email_on_success:
        # Send to same email list that error emails go out on
        notifications.send_error_email(
            "SUCCESS: " + task_id_str,
            task_id_str + " completed at " + str(datetime.now()))
Exemple #22
0
 def _handle_task_load_error(self, exception, task_ids):
     msg = 'Cannot find task(s) sent by scheduler: {}'.format(','.join(task_ids))
     logger.exception(msg)
     subject = 'Luigi: {}'.format(msg)
     error_message = notifications.wrap_traceback(exception)
     for task_id in task_ids:
         self._add_task(
             worker=self._id,
             task_id=task_id,
             status=FAILED,
             runnable=False,
             expl=error_message,
         )
     notifications.send_error_email(subject, error_message)
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [dep.task_id for dep in self.task.deps() if not dep.complete()]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            new_deps = self._run_get_new_deps()

            if new_deps is None:
                status = DONE
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)
                error_message = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

            else:
                status = SUSPENDED
                logger.info(
                    '[pid %s] Worker %s new requirements      %s',
                    os.getpid(), self.worker_id, self.task.task_id)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(), self.worker_id, self.task)
            error_message = notifications.wrap_traceback(self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
Exemple #24
0
 def _handle_task_load_error(self, exception, task_ids):
     msg = 'Cannot find task(s) sent by scheduler: {}'.format(','.join(task_ids))
     logger.exception(msg)
     subject = 'Luigi: {}'.format(msg)
     error_message = notifications.wrap_traceback(exception)
     for task_id in task_ids:
         self._add_task(
             worker=self._id,
             task_id=task_id,
             status=FAILED,
             runnable=False,
             expl=error_message,
         )
     notifications.send_error_email(subject, error_message)
Exemple #25
0
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(
            worker=self._id,
            host=self.host,
            assistant=self._assistant,
            current_tasks=list(self._running_tasks.keys()),
        )
        n_pending_tasks = r['n_pending_tasks']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']
        task_id = self._get_work_task_id(r)

        self._get_work_response_history.append({
            'task_id': task_id,
            'running_tasks': running_tasks,
        })

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically', task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(worker=self._id, task_id=task_id, status=FAILED, runnable=False,
                               assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        if task_id is not None and 'batch_task_ids' in r:
            batch_tasks = filter(None, [
                self._scheduled_tasks.get(batch_id) for batch_id in r['batch_task_ids']])
            self._batch_running_tasks[task_id] = batch_tasks

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
Exemple #26
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        try:
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                status = DONE if self.task.complete() else FAILED
                logger.debug('[pid %s] Task %s has status %s', os.getpid(),
                             self.task, status)
            finally:
                self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                        time.time() - t0)

            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception('[pid %s] Worker %s failed    %s', os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            logger.debug('Putting result into queue: %s %s %s',
                         self.task.task_id, status, error_message)
            self.result_queue.put(
                (self.task.task_id, status, error_message, [], []))
Exemple #27
0
    def set_status(self, task, new_status, config=None):
        if new_status == FAILED:
            assert config is not None

        # not sure why we have SUSPENDED, as it can never be set
        if new_status == SUSPENDED:
            new_status = PENDING

        if new_status == DISABLED and task.status == RUNNING:
            return

        if task.status == DISABLED:
            if new_status == DONE:
                self.re_enable(task)

            # don't allow workers to override a scheduler disable
            elif task.scheduler_disable_time is not None:
                return

        if new_status == FAILED and task.can_disable():
            task.add_failure()
            if task.has_excessive_failures():
                task.scheduler_disable_time = time.time()
                new_status = DISABLED
                notifications.send_error_email(
                    'Luigi Scheduler: DISABLED {task} due to excessive failures'
                    .format(task=task.id),
                    '{task} failed {failures} times in the last {window} seconds, so it is being '
                    'disabled for {persist} seconds'.format(
                        failures=config.disable_failures,
                        task=task.id,
                        window=config.disable_window,
                        persist=config.disable_persist,
                    ))
        elif new_status == DISABLED:
            task.scheduler_disable_time = None

        self._status_tasks[task.status].pop(task.id)
        self._status_tasks[new_status][task.id] = task
        task.status = new_status
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        try:
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                status = DONE if self.task.complete() else FAILED
                logger.debug('[pid %s] Task %s has status %s', os.getpid(), self.task, status)
            finally:
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)

            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception('[pid %s] Worker %s failed    %s', os.getpid(), self.worker_id, self.task)
            error_message = notifications.wrap_traceback(self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            logger.debug('Putting result into queue: %s %s %s', self.task.task_id, status, error_message)
            self.result_queue.put(
                (self.task.task_id, status, error_message, [], []))
Exemple #29
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [
                dep.task_id for dep in self.task.deps() if not dep.complete()
            ]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' %
                                   (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                task_gen = self.task.run()
                if isinstance(task_gen, types.GeneratorType):  # new deps
                    next_send = None
                    while True:
                        try:
                            if next_send is None:
                                requires = six.next(task_gen)
                            else:
                                requires = task_gen.send(next_send)
                        except StopIteration:
                            break

                        new_req = flatten(requires)
                        status = (RUNNING if all(
                            t.complete() for t in new_req) else SUSPENDED)
                        new_deps = [(t.task_module, t.task_family,
                                     t.to_str_params()) for t in new_req]
                        if status == RUNNING:
                            self.result_queue.put((self.task.task_id, status,
                                                   '', missing, new_deps))
                            next_send = getpaths(requires)
                            new_deps = []
                        else:
                            logger.info(
                                '[pid %s] Worker %s new requirements      %s',
                                os.getpid(), self.worker_id, self.task.task_id)
                            return
            finally:
                if status != SUSPENDED:
                    self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                            time.time() - t0)
            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)
            status = DONE

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
Exemple #30
0
 def testNotificationsNoSNS(self):
     with patch.object(SNSConnection, 'publish') as mock_publish:
         notifications.send_error_email("error", "something bad happened!")
     
     assert not mock_publish.called
Exemple #31
0
    def testNotificationsSNS(self):
        with patch.object(SNSConnection, 'publish') as mock_publish:
            notifications.send_error_email("error", "something bad happened!")

        mock_publish.assert_called_once_with("test-topic", "something bad happened!", "error")
Exemple #32
0
 def _email_error(self, task, formatted_traceback, subject, headline):
     formatted_subject = subject.format(task=task, host=self.host)
     message = notifications.format_task_error(headline, task,
                                               formatted_traceback)
     notifications.send_error_email(formatted_subject, message,
                                    task.owner_email)
Exemple #33
0
    def _email_dependency_error(self, task, formatted_traceback):
        subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
        headline = "Will not schedule task or any dependencies due to error in deps() method"

        message = notifications.format_task_error(headline, task, formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)
Exemple #34
0
 def _email_unexpected_error(self, task, formatted_traceback):
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(task=task, host=self.host)
     message = "Luigi framework error:\n{traceback}".format(traceback=formatted_traceback)
     notifications.send_error_email(subject, message)
Exemple #35
0
 def _email_error(self, task, formatted_traceback, subject, headline):
     formatted_subject = subject.format(task=task, host=self.host)
     message = notifications.format_task_error(headline, task, formatted_traceback)
     notifications.send_error_email(formatted_subject, message, task.owner_email)
Exemple #36
0
    def _email_unexpected_error(self, task, formatted_traceback):
        subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(task=task, host=self.host)
        headline = "Luigi framework error"

        message = notifications.format_task_error(headline, task, formatted_traceback)
        notifications.send_error_email(subject, message, task.owner_email)