Esempio n. 1
0
    def _build(self, catch_exceptions):
        """
        Private API for building DAGs. This is what executors should call.
        Unlike the public method, this one does not call render, as it
        should happen via a dag.render() call. It takes care of running the
        task and updating status accordingly

        Parameters
        ----------
        catch_exceptions : bool
            If True, catches exceptions during execution and shows a chained
            exception at the end: [original exception] then
            [exception with context info]. Set it to False when debugging
            tasks to drop-in a debugging session at the failing line.
        """
        if not catch_exceptions:
            res = self._run()
            self._post_run_actions()
            return res, self.product.metadata.to_dict()
        else:
            try:
                # TODO: this calls download, if this happens. should
                # hooks be executed when dwnloading? if so, we could
                # change the ran? column from the task report to something
                # like:
                # ran/downloaded/skipped and use that to determine if we should
                # run hooks
                res = self._run()
            except Exception as e:
                msg = 'Error building task "{}"'.format(self.name)
                self._logger.exception(msg)
                self.exec_status = TaskStatus.Errored

                # if there isn't anything left to run, raise exception here
                if self.on_failure is None:
                    if isinstance(e, DAGBuildEarlyStop):
                        raise DAGBuildEarlyStop(
                            'Stopping task {} gracefully'.format(
                                self.name)) from e
                    else:
                        # FIXME: this makes the traceback longer, consider
                        # removing it. The only information this nested
                        # exception provides is the name of the task but we
                        # are still able to provide that if theh executor
                        # has the option to capture exceptions turned on.
                        # An option to consider is to
                        raise TaskBuildError(msg) from e

                build_success = False
                build_exception = e
            else:
                build_success = True
                build_exception = None

            if build_success:
                try:
                    self._post_run_actions()
                except Exception as e:
                    self.exec_status = TaskStatus.Errored
                    msg = ('Exception when running on_finish '
                           'for task "{}": {}'.format(self.name, e))
                    self._logger.exception(msg)

                    if isinstance(e, DAGBuildEarlyStop):
                        raise DAGBuildEarlyStop(
                            'Stopping task {} gracefully'.format(
                                self.name)) from e
                    else:
                        raise TaskBuildError(msg) from e
                else:
                    # sucessful task execution, on_finish hook execution,
                    # metadata saving and upload
                    self.exec_status = TaskStatus.Executed

                return res, self.product.metadata.to_dict()
            # error bulding task
            else:
                try:
                    self._run_on_failure()
                except Exception as e:
                    msg = ('Exception when running on_failure '
                           'for task "{}": {}'.format(self.name, e))
                    self._logger.exception(msg)
                    raise TaskBuildError(msg) from e

                if isinstance(build_exception, DAGBuildEarlyStop):
                    raise DAGBuildEarlyStop(
                        'Stopping task {} gracefully'.format(
                            self.name)) from build_exception
                else:
                    msg = 'Error building task "{}"'.format(self.name)
                    raise TaskBuildError(msg) from build_exception
Esempio n. 2
0
    def __call__(self, dag, show_progress):
        super().__call__(dag)

        exceptions_all = BuildExceptionsCollector()
        warnings_all = BuildWarningsCollector()
        task_reports = []

        task_kwargs = {'catch_exceptions': self._catch_exceptions}

        scheduled = [
            dag[t] for t in dag if dag[t].exec_status != TaskStatus.Skipped
        ]

        if show_progress:
            scheduled = tqdm(scheduled, total=len(scheduled))

        for t in scheduled:
            if t.exec_status == TaskStatus.Aborted:
                continue

            if show_progress:
                scheduled.set_description('Building task "{}"'.format(t.name))

            if self._build_in_subprocess:
                fn = LazyFunction(
                    build_in_subprocess, {
                        'task': t,
                        'build_kwargs': task_kwargs,
                        'reports_all': task_reports
                    }, t)
            else:
                fn = LazyFunction(
                    build_in_current_process, {
                        'task': t,
                        'build_kwargs': task_kwargs,
                        'reports_all': task_reports
                    }, t)

            if self._catch_warnings:
                fn = LazyFunction(fn=catch_warnings,
                                  kwargs={
                                      'fn': fn,
                                      'warnings_all': warnings_all
                                  },
                                  task=t)
            else:
                # NOTE: this isn't doing anything
                fn = LazyFunction(fn=pass_exceptions,
                                  kwargs={'fn': fn},
                                  task=t)

            if self._catch_exceptions:
                fn = LazyFunction(fn=catch_exceptions,
                                  kwargs={
                                      'fn': fn,
                                      'exceptions_all': exceptions_all
                                  },
                                  task=t)

            fn()

        # end of for loop

        if warnings_all and self._catch_warnings:
            # NOTE: maybe raise one by one to keep the warning type
            warnings.warn(str(warnings_all))

        if exceptions_all and self._catch_exceptions:
            early_stop = any(
                [isinstance(m.obj, DAGBuildEarlyStop) for m in exceptions_all])
            if early_stop:
                raise DAGBuildEarlyStop('Ealy stopping DAG execution, '
                                        'at least one of the tasks that '
                                        'failed raised a DAGBuildEarlyStop '
                                        'exception:\n{}'.format(
                                            str(exceptions_all)))
            else:
                raise DAGBuildError(str(exceptions_all))

        return task_reports
Esempio n. 3
0
def early_stop():
    raise DAGBuildEarlyStop('Ending gracefully')
Esempio n. 4
0
def early_stop_root(product):
    raise DAGBuildEarlyStop('Ending gracefully')
Esempio n. 5
0
def dump_on_finish(product):
    df = pd.read_csv(str(product))

    # if we dumped data but got no new observations, stop execution gracefully
    if not df.shape[0]:
        raise DAGBuildEarlyStop('No new observations')
Esempio n. 6
0
    def _build(self, catch_exceptions):
        """
        Private API for building DAGs. This is what executors should call.
        Unlike the public method, this one does not call render, as it
        should happen via a dag.render() call. It takes care of running the
        task and updating status accordingly

        Parameters
        ----------
        catch_exceptions : bool
            If True, catches exceptions during execution and shows a chained
            exception at the end: [original exception] then
            [exception with context info]. Set it to False when debugging
            tasks to drop-in a debugging session at the failing line.
        """

        if not catch_exceptions:
            res = self._run()
            self._finish_task_execution()
            return res, self.product.metadata.to_dict()
        else:
            try:
                res = self._run()
            except Exception as e:
                msg = 'Error building task "{}"'.format(self.name)
                self._logger.exception(msg)
                self.exec_status = TaskStatus.Errored

                # if there isn't anything left to run, raise exception here
                if self.on_failure is None:
                    if isinstance(e, DAGBuildEarlyStop):
                        raise DAGBuildEarlyStop(
                            'Stopping task {} gracefully'.format(
                                self.name)) from e
                    else:
                        # FIXME: this makes the traceback longer, consider
                        # removing it. The only information this nested
                        # exception provides is the name of the task but we
                        # are still able to provide that if theh executor
                        # has the option to capture exceptions turned on.
                        # An option to consider is to
                        raise TaskBuildError(msg) from e

                build_success = False
                build_exception = e
            else:
                build_success = True
                build_exception = None

            if build_success:
                try:
                    # FIXME: move metadata saving and product checking,
                    # the error message is misleading
                    # this not only runs the hook, but also
                    # calls save metadata and checks that the product exists
                    self._finish_task_execution()
                except Exception as e:
                    self.exec_status = TaskStatus.Errored
                    msg = ('Exception when running on_finish '
                           'for task "{}": {}'.format(self.name, e))
                    self._logger.exception(msg)

                    if isinstance(e, DAGBuildEarlyStop):
                        raise DAGBuildEarlyStop(
                            'Stopping task {} gracefully'.format(
                                self.name)) from e
                    else:
                        raise TaskBuildError(msg) from e
                else:
                    self.exec_status = TaskStatus.Executed

                return res, self.product.metadata.to_dict()
            else:
                try:
                    self._run_on_failure()
                except Exception as e:
                    msg = ('Exception when running on_failure '
                           'for task "{}": {}'.format(self.name, e))
                    self._logger.exception(msg)
                    raise TaskBuildError(msg) from e

                if isinstance(build_exception, DAGBuildEarlyStop):
                    raise DAGBuildEarlyStop(
                        'Stopping task {} gracefully'.format(
                            self.name)) from build_exception
                else:
                    msg = 'Error building task "{}"'.format(self.name)
                    raise TaskBuildError(msg) from build_exception