예제 #1
0
    def _get_work(self):
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id,
                                     host=self.host,
                                     assistant=self._assistant)
        n_pending_tasks = r['n_pending_tasks']
        task_id = r['task_id']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically',
                        task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._scheduler.add_task(worker=self._id,
                                         task_id=task_id,
                                         status=FAILED,
                                         runnable=False,
                                         assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
예제 #2
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [
                dep.task_id for dep in self.task.deps() if not dep.complete()
            ]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' %
                                   (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            if self.task.run == NotImplemented:
                # External task
                # TODO(erikbern): We should check for task completeness after non-external tasks too!
                # This will resolve #814 and make things a lot more consistent
                status = DONE if self.task.complete() else FAILED
            else:
                new_deps = self._run_get_new_deps()
                status = DONE if not new_deps else SUSPENDED

            if status == SUSPENDED:
                logger.info('[pid %s] Worker %s new requirements      %s',
                            os.getpid(), self.worker_id, self.task.task_id)

            elif status == DONE:
                self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                        time.time() - t0)
                error_message = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
예제 #3
0
파일: worker.py 프로젝트: ThQ/luigi
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id, host=self.host, assistant=self._assistant)
        n_pending_tasks = r["n_pending_tasks"]
        task_id = r["task_id"]
        running_tasks = r["running_tasks"]
        n_unique_pending = r["n_unique_pending"]

        self._get_work_response_history.append(dict(task_id=task_id, running_tasks=running_tasks))

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info("Did not schedule %s, will load it dynamically", task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = load_task(
                    module=r.get("task_module"), task_name=r["task_family"], params_str=r["task_params"]
                )
            except TaskClassException as ex:
                msg = "Cannot find task for %s" % task_id
                logger.exception(msg)
                subject = "Luigi: %s" % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(
                    worker=self._id, task_id=task_id, status=FAILED, runnable=False, assistant=self._assistant
                )
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
예제 #4
0
파일: worker.py 프로젝트: nirmeshk/luigi
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        expl = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [dep.task_id for dep in self.task.deps() if not dep.complete()]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            if self.task.run == NotImplemented:
                # External task
                # TODO(erikbern): We should check for task completeness after non-external tasks too!
                # This will resolve #814 and make things a lot more consistent
                status = DONE if self.task.complete() else FAILED
            else:
                new_deps = self._run_get_new_deps()
                status = DONE if not new_deps else SUSPENDED

            if status == SUSPENDED:
                logger.info(
                    '[pid %s] Worker %s new requirements      %s',
                    os.getpid(), self.worker_id, self.task.task_id)

            elif status == DONE:
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)
                expl = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(), self.worker_id, self.task)
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task

            raw_error_message = self.task.on_failure(ex)
            notification_error_message = notifications.wrap_traceback(raw_error_message)
            expl = json.dumps(raw_error_message)
            formatted_error_message = notifications.format_task_error(subject, self.task,
                                                                      formatted_exception=notification_error_message)
            notifications.send_error_email(subject, formatted_error_message, self.task.owner_email)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, expl, missing, new_deps))
예제 #5
0
파일: worker.py 프로젝트: ryantuck/luigi
 def _send_error_notification(self, raw_error_message):
     subject = "Luigi: %s FAILED" % self.task
     notification_error_message = notifications.wrap_traceback(raw_error_message)
     formatted_error_message = notifications.format_task_error(
         subject, self.task, formatted_exception=notification_error_message
     )
     notifications.send_error_email(subject, formatted_error_message, self.task.owner_email)
예제 #6
0
파일: worker.py 프로젝트: ainkov/luigi
    def _get_work(self):
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(worker=self._id, host=self.host, assistant=self._assistant)
        n_pending_tasks = r['n_pending_tasks']
        task_id = r['task_id']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically', task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._scheduler.add_task(worker=self._id, task_id=task_id, status=FAILED, runnable=False,
                                         assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
예제 #7
0
 def _email_unexpected_error(self, task, formatted_traceback):
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(
         task=task, host=self.host)
     message = "Luigi framework error:\n{traceback}".format(
         traceback=formatted_traceback)
     notifications.send_error_email(subject, message)
예제 #8
0
 def _email_complete_error(self, task, formatted_traceback):
     # like logger.exception but with WARNING level
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: {task} failed scheduling. Host: {host}".format(
         task=task, host=self.host)
     message = "Will not schedule {task} or any dependencies due to error in complete() method:\n{traceback}".format(
         task=task, traceback=formatted_traceback)
     notifications.send_error_email(subject, message)
예제 #9
0
 def _send_error_notification(self, raw_error_message):
     subject = "Luigi: %s FAILED" % self.task
     notification_error_message = notifications.wrap_traceback(
         raw_error_message)
     formatted_error_message = notifications.format_task_error(
         subject, self.task, formatted_exception=notification_error_message)
     notifications.send_error_email(subject, formatted_error_message,
                                    self.task.owner_email)
예제 #10
0
파일: worker.py 프로젝트: cpapazian/luigi
 def _email_complete_error(self, task, formatted_traceback):
     # like logger.exception but with WARNING level
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: {task} failed scheduling. Host: {host}".format(task=task, host=self.host)
     message = "Will not schedule {task} or any dependencies due to error in complete() method:\n{traceback}".format(
         task=task, traceback=formatted_traceback
     )
     notifications.send_error_email(subject, message)
예제 #11
0
파일: worker.py 프로젝트: yonashub/luigi
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0, WORKER_STATE_DISABLED
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(
            worker=self._id,
            host=self.host,
            assistant=self._assistant,
            current_tasks=list(self._running_tasks.keys()),
        )
        n_pending_tasks = r['n_pending_tasks']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']
        # TODO: For a tiny amount of time (a month?) we'll keep forwards compatibility
        # That is you can user a newer client than server (Sep 2016)
        worker_state = r.get('worker_state',
                             WORKER_STATE_ACTIVE)  # state according to server!
        task_id = self._get_work_task_id(r)

        self._get_work_response_history.append({
            'task_id': task_id,
            'running_tasks': running_tasks,
        })

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically',
                        task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(worker=self._id,
                               task_id=task_id,
                               status=FAILED,
                               runnable=False,
                               assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        if task_id is not None and 'batch_task_ids' in r:
            batch_tasks = filter(None, [
                self._scheduled_tasks.get(batch_id)
                for batch_id in r['batch_task_ids']
            ])
            self._batch_running_tasks[task_id] = batch_tasks

        return task_id, running_tasks, n_pending_tasks, n_unique_pending, worker_state
예제 #12
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [dep.task_id for dep in self.task.deps() if not dep.complete()]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None

            new_deps = self._run_get_new_deps()

            if new_deps is None:
                status = DONE
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)
                error_message = json.dumps(self.task.on_success())
                logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                            self.worker_id, self.task.task_id)
                self.task.trigger_event(Event.SUCCESS, self.task)

            else:
                status = SUSPENDED
                logger.info(
                    '[pid %s] Worker %s new requirements      %s',
                    os.getpid(), self.worker_id, self.task.task_id)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(), self.worker_id, self.task)
            error_message = notifications.wrap_traceback(self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
예제 #13
0
 def _handle_task_load_error(self, exception, task_ids):
     msg = 'Cannot find task(s) sent by scheduler: {}'.format(','.join(task_ids))
     logger.exception(msg)
     subject = 'Luigi: {}'.format(msg)
     error_message = notifications.wrap_traceback(exception)
     for task_id in task_ids:
         self._add_task(
             worker=self._id,
             task_id=task_id,
             status=FAILED,
             runnable=False,
             expl=error_message,
         )
     notifications.send_error_email(subject, error_message)
예제 #14
0
파일: worker.py 프로젝트: yonromai/luigi
 def _handle_task_load_error(self, exception, task_ids):
     msg = 'Cannot find task(s) sent by scheduler: {}'.format(','.join(task_ids))
     logger.exception(msg)
     subject = 'Luigi: {}'.format(msg)
     error_message = notifications.wrap_traceback(exception)
     for task_id in task_ids:
         self._add_task(
             worker=self._id,
             task_id=task_id,
             status=FAILED,
             runnable=False,
             expl=error_message,
         )
     notifications.send_error_email(subject, error_message)
예제 #15
0
파일: worker.py 프로젝트: ehdr/luigi
    def _get_work(self):
        if self._stop_requesting_work:
            return None, 0, 0, 0
        logger.debug("Asking scheduler for work...")
        r = self._scheduler.get_work(
            worker=self._id,
            host=self.host,
            assistant=self._assistant,
            current_tasks=list(self._running_tasks.keys()),
        )
        n_pending_tasks = r['n_pending_tasks']
        running_tasks = r['running_tasks']
        n_unique_pending = r['n_unique_pending']
        task_id = self._get_work_task_id(r)

        self._get_work_response_history.append({
            'task_id': task_id,
            'running_tasks': running_tasks,
        })

        if task_id is not None and task_id not in self._scheduled_tasks:
            logger.info('Did not schedule %s, will load it dynamically', task_id)

            try:
                # TODO: we should obtain the module name from the server!
                self._scheduled_tasks[task_id] = \
                    load_task(module=r.get('task_module'),
                              task_name=r['task_family'],
                              params_str=r['task_params'])
            except TaskClassException as ex:
                msg = 'Cannot find task for %s' % task_id
                logger.exception(msg)
                subject = 'Luigi: %s' % msg
                error_message = notifications.wrap_traceback(ex)
                notifications.send_error_email(subject, error_message)
                self._add_task(worker=self._id, task_id=task_id, status=FAILED, runnable=False,
                               assistant=self._assistant)
                task_id = None
                self.run_succeeded = False

        if task_id is not None and 'batch_task_ids' in r:
            batch_tasks = filter(None, [
                self._scheduled_tasks.get(batch_id) for batch_id in r['batch_task_ids']])
            self._batch_running_tasks[task_id] = batch_tasks

        return task_id, running_tasks, n_pending_tasks, n_unique_pending
예제 #16
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        try:
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                status = DONE if self.task.complete() else FAILED
                logger.debug('[pid %s] Task %s has status %s', os.getpid(),
                             self.task, status)
            finally:
                self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                        time.time() - t0)

            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception('[pid %s] Worker %s failed    %s', os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            logger.debug('Putting result into queue: %s %s %s',
                         self.task.task_id, status, error_message)
            self.result_queue.put(
                (self.task.task_id, status, error_message, [], []))
예제 #17
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(), self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        try:
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                status = DONE if self.task.complete() else FAILED
                logger.debug('[pid %s] Task %s has status %s', os.getpid(), self.task, status)
            finally:
                self.task.trigger_event(
                    Event.PROCESSING_TIME, self.task, time.time() - t0)

            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception('[pid %s] Worker %s failed    %s', os.getpid(), self.worker_id, self.task)
            error_message = notifications.wrap_traceback(self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            logger.debug('Putting result into queue: %s %s %s', self.task.task_id, status, error_message)
            self.result_queue.put(
                (self.task.task_id, status, error_message, [], []))
예제 #18
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [
                dep.task_id for dep in self.task.deps() if not dep.complete()
            ]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' %
                                   (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                task_gen = self.task.run()
                if isinstance(task_gen, types.GeneratorType):  # new deps
                    next_send = None
                    while True:
                        try:
                            if next_send is None:
                                requires = six.next(task_gen)
                            else:
                                requires = task_gen.send(next_send)
                        except StopIteration:
                            break

                        new_req = flatten(requires)
                        status = (RUNNING if all(
                            t.complete() for t in new_req) else SUSPENDED)
                        new_deps = [(t.task_module, t.task_family,
                                     t.to_str_params()) for t in new_req]
                        if status == RUNNING:
                            self.result_queue.put((self.task.task_id, status,
                                                   '', missing, new_deps))
                            next_send = getpaths(requires)
                            new_deps = []
                        else:
                            logger.info(
                                '[pid %s] Worker %s new requirements      %s',
                                os.getpid(), self.worker_id, self.task.task_id)
                            return
            finally:
                if status != SUSPENDED:
                    self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                            time.time() - t0)
            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)
            status = DONE

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))
예제 #19
0
파일: worker.py 프로젝트: ainkov/luigi
 def _email_unexpected_error(self, task, formatted_traceback):
     formatted_traceback = notifications.wrap_traceback(formatted_traceback)
     subject = "Luigi: Framework error while scheduling {task}. Host: {host}".format(task=task, host=self.host)
     message = "Luigi framework error:\n{traceback}".format(traceback=formatted_traceback)
     notifications.send_error_email(subject, message)