Example #1
0
def namespace(namespace=None, scope=""):
    """
    Call to set namespace of tasks declared after the call.

    It is often desired to call this function with the keyword argument
    ``scope=__name__``.

    The ``scope`` keyword makes it so that this call is only effective for task
    classes with a matching [*]_ ``__module__``. The default value for
    ``scope`` is the empty string, which means all classes. Multiple calls with
    the same scope simply replace each other.

    The namespace of a :py:class:`Task` can also be changed by specifying the property
    ``task_namespace``.

    .. code-block:: python

        class Task2(dbnd.Task):
            task_namespace = 'namespace2'

    This explicit setting takes priority over whatever is set in the
    ``namespace()`` method, and it's also inherited through normal python
    inheritence.

    There's no equivalent way to set the ``task_family``.

    .. [*] When there are multiple levels of matching module scopes like
           ``a.b`` vs ``a.b.c``, the more specific one (``a.b.c``) wins.
    .. seealso:: The new and better scaling :py:func:`auto_namespace`
    """
    get_task_registry().register_namespace(scope=scope, namespace=namespace or "")
Example #2
0
def namespace(namespace=None, scope=""):
    """
    Call to set namespace of tasks declared after the call.

    It is often desired to call this function with the keyword argument
    ``scope=__name__``.

    The ``scope`` keyword makes it so that this call is only effective for task
    classes with a matching ``__module__``. The default value for
    ``scope`` is the empty string, which means all classes. Multiple calls with
    the same scope simply replace each other.

    The namespace of a ``Task`` can also be changed by specifying the property
    ``task_namespace``.

    ::

        class Task2(dbnd.Task):
            task_namespace = 'namespace2'

    This explicit setting takes priority over whatever is set in the
    ``namespace()`` method, and it's also inherited through normal python
    inheritence.

    There's no equivalent way to set the ``task_family``.
    """
    get_task_registry().register_namespace(scope=scope,
                                           namespace=namespace or "")
Example #3
0
    def test_no_error_on_same_from(self):
        @task
        def task_with_from():
            return

        with config({"task_with_from": {"_from": "task_with_from"}}):
            get_task_registry().build_dbnd_task("task_with_from")
Example #4
0
 def test_error_on_same_from(self):
     with pytest.raises(Exception):
         with config({
                 "unknown_task_with_from": {
                     "_from": "unknown_task_with_from"
                 }
         }):
             get_task_registry().build_dbnd_task("unknown_task_with_from")
Example #5
0
    def parse_from_str(self, input):
        """
        Parse a task_famly using the :class:`~dbnd._core.register.Register`
        """

        task_cls = get_task_registry().get_task_cls(input)
        return task_cls()
Example #6
0
    def class_or_func_decorator(class_or_func):
        # this code will run during compile time, when we apply dbnd decorator (for example: @task)
        task_decorator = TaskDecorator(class_or_func,
                                       decorator_kwargs=decorator_kwargs)
        tp = task_decorator.task_passport

        # we need to manually register the task here, since in regular flow
        # this happens in TaskMetaclass, but it's not invoked here due to lazy
        # evaluation task_cls
        r = get_task_registry()
        r.register_task_cls_factory(
            task_cls_factory=task_decorator.get_task_cls,
            full_task_family=tp.full_task_family,
            task_family=tp.task_family,
        )
        if task_decorator.is_class:
            # we will change metaclass for UserClass so we will process all UserClass calls
            #
            # @task
            # class UserClass():
            #     pass
            # so the the moment user call UserClass(), -> _DecoratedUserClassMeta.__call__ will be called
            dbnd_decorated_class = six.add_metaclass(
                _UserClassWithTaskDecoratorMetaclass)(class_or_func)
            dbnd_decorated_class.task_decorator = task_decorator
            task_decorator.class_or_func = dbnd_decorated_class
            return dbnd_decorated_class
        else:
            # @task
            # def user_func():
            #     pass
            # we will return our wrapper, that will be called during a runtime,
            # when user calls his own code.
            return build_dbnd_decorated_func(task_decorator)
Example #7
0
    def _build(
        cls, cls_name, module_name, task_namespace, conf__task_family,
    ):
        full_task_family = "%s.%s" % (module_name, cls_name)
        full_task_family_short = "%s.%s" % (_short_name(module_name), cls_name)

        if not is_defined(task_namespace):
            namespace_at_class_time = get_task_registry().get_namespace(module_name)
            if namespace_at_class_time == _SAME_AS_PYTHON_MODULE:
                task_namespace = module_name
            else:
                task_namespace = namespace_at_class_time

        if conf__task_family:
            task_family = conf__task_family
            task_config_section = task_family
        elif task_namespace:
            task_family = "{}.{}".format(task_namespace, cls_name)
            task_config_section = task_family
        else:
            task_family = cls_name
            task_config_section = full_task_family

        return TaskPassport(
            full_task_family=full_task_family,
            full_task_family_short=full_task_family_short,
            task_family=task_family,
            task_config_section=task_config_section,
        )
Example #8
0
    def __new__(mcs, classname, bases, classdict):
        """
        Custom class creation for namespacing.

        Also register all subclasses.

        When the set or inherited namespace evaluates to ``None``, set the task namespace to
        whatever the currently declared namespace is.
        """
        cls = super(TaskMetaclass, mcs).__new__(
            mcs, classname, bases, classdict
        )  # type: typing.Type[_BaseTask]

        # we are starting from "not clean" classdict -> it's deserialization
        if classdict.get("task_definition") is not None:
            return cls

        td = cls.task_definition = TaskDefinition(cls, classdict)

        # now we will assign all params
        set_params = td.class_params if cls.is_tracking_mode else td.all_task_params
        for k, v in six.iteritems(set_params):
            setattr(cls, k, v)

        # every time we see new implementation, we want it to have an priority over old implementation
        # we need to switch to dict() and store history else where
        r = get_task_registry()
        r.register_task(cls)

        return cls
Example #9
0
    def __new__(mcs, classname, bases, classdict):
        """
        Custom class creation for namespacing.

        Also register all subclasses.

        When the set or inherited namespace evaluates to ``None``, set the task namespace to
        whatever the currently declared namespace is.
        """
        cls = super(TaskMetaclass,
                    mcs).__new__(mcs, classname, bases,
                                 classdict)  # type: typing.Type[_BaseTask]

        # we are starting from "not clean" classdict ->
        # A. it's deserialization
        # B. it was calculated before
        if classdict.get("task_definition") is not None:
            return cls

        cls.task_definition = TaskDefinition.from_task_cls(task_class=cls,
                                                           classdict=classdict)

        # now we will assign all calculated parameters
        # so instead of ParameterFactory, we will have ParameterDefinition
        for k, v in six.iteritems(cls.task_definition.task_param_defs):
            setattr(cls, k, v)

        # every time we see new implementation, we want it to have an priority over old implementation
        # we need to switch to dict() and store history else where
        r = get_task_registry()
        r.register_task(cls)

        return cls
Example #10
0
def _get_task_cls(luigi_task):
    # type: (luigi.Task) -> Type[_LuigiTask]
    """
    Returns the right dbnd-luigi class wrapper base on existing relevant tracker or by creating new one
    """
    task_family = luigi_task.get_task_family()

    registry = get_task_registry()
    try:
        dbnd_task_cls = registry.get_task_cls(str(task_family))
    except TaskClassNotFoundException:
        dbnd_task_cls = _build_new_task_cls(luigi_task)
        logger.info("Creating new class %s", task_family)

    return dbnd_task_cls
Example #11
0
    def test_auto_complete_renew(self):
        @task
        def my_task_autocomplete(a):
            # type: (int)->str
            """
            my task help
            """
            return "ok"

        task_registry = get_task_registry()
        task_classes = task_registry.list_dbnd_task_classes()
        logging.info("task_classes: %s", list(task_registry.list_dbnd_task_classes()))
        completer.refresh(task_classes)
        task_completer = completer.task()
        actual = task_completer(None, None, "my_tas")
        assert actual == [("my_task_autocomplete", "my task help")]
Example #12
0
    def _build_root_task(self, run):
        # type: (DatabandRun) -> Task
        if self.is_submitter and not self.is_driver:
            return self._build_submit_task(run)
        else:
            if run.root_task:
                # user has created DatabandRun with existing task
                self.task_meta.add_child(run.root_task.task_id)
                return run.root_task

            logger.info("Building main task '%s'", run.root_task_name)
            root_task = get_task_registry().build_dbnd_task(run.root_task_name)
            logger.info(
                "Task %s has been created (%s children)",
                root_task.task_id,
                len(root_task.ctrl.task_dag.subdag_tasks()),
            )
            return root_task
Example #13
0
def _list_tasks(ctx, module, search, is_config):
    from dbnd import Config
    from dbnd._core.context.databand_context import new_dbnd_context
    from dbnd._core.parameter.parameter_definition import _ParameterKind

    formatter = ctx.make_formatter()

    load_user_modules(config, modules=module)

    with new_dbnd_context():
        tasks = get_task_registry().list_dbnd_task_classes()

    for task_cls in tasks:
        td = task_cls.task_definition
        full_task_family = td.full_task_family
        task_family = td.task_family

        if not (task_family.startswith(search) or full_task_family.startswith(search)):
            continue

        if issubclass(task_cls, Config) != is_config:
            continue

        dl = []
        for param_name, param_obj in td.task_param_defs.items():
            if param_obj.system or param_obj.kind == _ParameterKind.task_output:
                continue
            if not is_config and param_name in COMMON_PARAMS:
                continue
            param_help = _help(param_obj.description)
            dl.append((param_name, param_help))

        if dl:
            with formatter.section(
                "{task_family} ({full_task_family})".format(
                    full_task_family=full_task_family, task_family=task_family
                )
            ):
                formatter.write_dl(dl)

    click.echo(formatter.getvalue().rstrip("\n"))
Example #14
0
def build_task(root_task, **kwargs):
    from dbnd import new_dbnd_context

    with new_dbnd_context(conf={root_task: kwargs}):
        return get_task_registry().build_dbnd_task(task_name=root_task)
Example #15
0
def run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    verbose,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project_name,
    name,
    description,
    run_driver,
    alternative_task_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
):
    """
    Run a task or a DAG

    To see tasks use `dbnd show-tasks` (tab completion is available).
    """

    from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext
    from dbnd._core.utils.structures import combine_mappings
    from dbnd import config

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project-name
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=filter_dict_remove_false_values(
            dict(
                verbose=verbose > 0,
                describe=describe,
                env=env,
                conf_file=conf_file,
                project_name=project_name,
            )
        ),
        run=filter_dict_remove_false_values(
            dict(
                name=name,
                parallel=parallel,
                description=description,
                is_archived=describe,
            )
        ),
    )

    if submit_driver is not None:
        main_switches["run"]["submit_driver"] = bool(submit_driver)
    if submit_tasks is not None:
        main_switches["run"]["submit_tasks"] = bool(submit_tasks)
    if disable_web_tracker:
        main_switches.setdefault("core", {})["tracker_api"] = "disabled"

    if task_version is not None:
        main_switches["task"] = {"task_version": task_version}

    cmd_line_config = parse_and_build_config_store(
        source="cli", config_values=main_switches
    )

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(_overrides, source="--set-override", override=True)
        )
    if interactive:
        cmd_line_config.update(
            _parse_cli([{"run.interactive": True}], source="--interactive")
        )
    if verbose > 1:
        cmd_line_config.update(
            _parse_cli([{"task_build.verbose": True}], source="-v -v")
        )

    if cmd_line_config:
        config.set_values(cmd_line_config, source="cmdline")
    if verbose:
        logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config))

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date
        )

    with new_dbnd_context(
        name="run", module=module
    ) as context:  # type: DatabandContext
        task_registry = get_task_registry()

        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

        # modules are loaded, we can load the task
        task_cls = None
        if task_name:
            task_cls = task_registry.get_task_cls(task_name)
            if alternative_task_name:
                task_cls = build_dynamic_task(
                    original_cls=task_cls, new_cls_name=alternative_task_name
                )
                task_name = alternative_task_name

        # --set-root
        # now we can get it config, as it's not main task, we can load config after the configuration is loaded
        if task_cls is not None:
            if root_task_config:
                # adding root task to configuration
                config.set_values(
                    {task_cls.task_definition.task_config_section: root_task_config},
                    source="--set-root",
                )

        if is_help or not task_name:
            print_help(ctx, task_cls)
            return

        return context.dbnd_run_task(
            task_or_task_name=task_name,
            run_uid=run_driver,
            scheduled_run_info=scheduled_run_info,
        )
Example #16
0
    def run_driver(self):
        logger.info("Running driver... Driver PID: %s", os.getpid())

        run = self.run  # type: DatabandRun
        settings = run.context.settings
        run_executor = run.run_executor
        remote_engine = run_executor.remote_engine

        settings.git.validate_git_policy()
        # let prepare for remote execution
        remote_engine.prepare_for_run(run)

        if self.root_task_name_to_build:

            if self.force_task_name:
                kwargs = {"task_name": self.force_task_name}

                logger.info(
                    "Building main task '%s' with name %s",
                    self.root_task_name_to_build,
                    self.force_task_name,
                )
            else:
                logger.info("Building main task '%s'",
                            self.root_task_name_to_build)
                kwargs = {}
            root_task = get_task_registry().build_dbnd_task(
                self.root_task_name_to_build, task_kwargs=kwargs)
            logger.info(
                "Task %s has been created (%s children)",
                root_task.task_id,
                len(root_task.ctrl.task_dag.subdag_tasks()),
            )
            run.root_task = root_task

        # assert that graph is DAG
        run.root_task.task_dag.topological_sort()

        # now we init all task runs for all tasks in the pipeline
        task_runs = self._init_task_runs_for_execution(
            task_engine=remote_engine)
        root_task_run = run.root_task_run
        run.root_task.ctrl.banner(
            "Main task '%s' has been created!" % root_task_run.task_af_id,
            color="cyan",
            task_run=root_task_run,
        )

        if self.run_config.dry:
            run.root_task.ctrl.describe_dag.describe_dag()
            logger.warning(
                "Execution has been stopped due to run.dry=True flag!")
            return run

        print_tasks_tree(root_task_run.task, task_runs)
        if self._is_save_run_pickle(task_runs, remote_engine):
            run_executor.save_run_pickle()

        task_runs_to_run = [tr for tr in task_runs if not tr.is_skipped]

        # THIS IS THE POINT WHEN WE SUBMIT ALL TASKS TO EXECUTION
        # we should make sure that we create executor without driver task
        task_executor = get_task_executor(
            run,
            task_executor_type=run_executor.task_executor_type,
            host_engine=run_executor.host_engine,
            target_engine=remote_engine,
            task_runs=task_runs_to_run,
        )

        hearbeat = None
        if self.send_heartbeat:
            # this will wrap the executor with "heartbeat" process
            hearbeat = start_heartbeat_sender(self)

        with nested(hearbeat):
            task_executor.do_run()

        # We need place the pipeline's task_band in the place we required to by outside configuration
        if settings.run.run_result_json_path:
            new_path = settings.run.run_result_json_path
            try:
                self.result_location.copy(new_path)
            except Exception as e:
                logger.exception(
                    "Couldn't copy the task_band from {old_path} to {new_path}. Failed with this error: {error}"
                    .format(old_path=self.result_location.path,
                            new_path=new_path,
                            error=e))

            else:
                logger.info(
                    "Copied the pipeline's task_band to {new_path}".format(
                        new_path=new_path))

        # if we are in the driver, we want to print banner after executor__task banner
        run.set_run_state(RunState.SUCCESS)

        root_task = self.run.root_task_run.task
        msg = "Your run has been successfully executed!"
        if self.run.duration:
            msg = "Your run has been successfully executed in %s" % self.run.duration
        run_msg = "\n%s\n%s\n" % (
            root_task.ctrl.banner(
                "Main task '%s' is ready!" % root_task.task_name,
                color="green",
                task_run=self.run.root_task_run,
            ),
            run.describe.run_banner(msg, color="green", show_tasks_info=True),
        )
        logger.info(run_msg)

        return run
 def test_can_not_find(self):
     with pytest.raises(TaskClassNotFoundException):
         get_task_registry().get_task_cls("t_config")
Example #18
0
def cmd_run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    _extend,
    verbose,
    print_task_band,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project,
    name,
    description,
    run_driver,
    override_run_uid,
    alternative_task_name,
    job_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
    open_web_tab,
    docker_build_tag,
):
    """
    Run a task or a DAG

    To see all available tasks use `dbnd show-tasks` (tab completion is available).
    `dbnd show-configs` will print all available configs.
    """

    from dbnd import config
    from dbnd._core.context.databand_context import DatabandContext, new_dbnd_context
    from dbnd._core.utils.structures import combine_mappings

    task_registry = get_task_registry()

    # we need to do it before we are looking for the task cls
    load_user_modules(dbnd_config=config, modules=module)

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=dict(
            verbose=verbose > 0,
            print_task_band=print_task_band,
            describe=describe,
            env=env,
            conf_file=conf_file,
            project=project,
        ),
        run=dict(
            name=name,
            parallel=parallel,
            interactive=interactive,
            description=description,
            is_archived=describe,
            open_web_tracker_in_browser=open_web_tab,
            submit_driver=_nullable_flag(submit_driver),
            submit_tasks=_nullable_flag(submit_tasks),
        ),
        kubernetes=dict(docker_build_tag=docker_build_tag),
        task=dict(task_version=task_version),
        task_build=dict(verbose=True if verbose > 1 else None),
        core=dict(tracker_api="disabled" if disable_web_tracker else None),
    )

    main_switches = cleanup_empty_switches(main_switches)

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    cmd_line_config = parse_and_build_config_store(source="cli",
                                                   config_values=main_switches)
    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _extend:
        cmd_line_config.update(
            _parse_cli(_extend, source="--extend-config", extend=True))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(
                _overrides,
                source="--set-override",
                priority=ConfigValuePriority.OVERRIDE,
            ))

    # --set-root
    if root_task_config:
        task_cls = task_registry.get_task_cls(task_name)
        task_section = task_cls.task_definition.task_config_section
        # adding root task to configuration
        cmd_line_config.update(
            parse_and_build_config_store(
                config_values={task_section: root_task_config},
                source="--set-root"))

    # UPDATE CURRENT CONFIG with CLI values
    if cmd_line_config:
        if verbose:
            logger.info("CLI config: \n%s",
                        pformat_config_store_as_table(cmd_line_config))
        config.set_values(cmd_line_config, source="cmdline")

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()

    # initialize basic logging (until we get to the context logging
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name,
            scheduled_date=scheduled_date)

    # update completer
    if config.getboolean("databand", "completer"):
        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

    # bootstrap and modules are loaded, we can load the task
    task_cls = None
    if task_name:
        task_cls = task_registry.get_task_cls(task_name)

    if not task_name:
        print_help(ctx, None)
        return

    if is_help:
        print_help(ctx, task_cls)
        return

    with tracking_mode_context(tracking=False), new_dbnd_context(
            name="run") as context:  # type: DatabandContext
        if context.settings.system.describe:
            # we want to print describe without triggering real run
            logger.info("Building main task '%s'", task_name)
            root_task = get_task_registry().build_dbnd_task(task_name)
            root_task.ctrl.describe_dag.describe_dag()
            # currently there is bug with the click version we have when using python 2
            # so we don't use the click.echo function
            # https://github.com/pallets/click/issues/564
            print("Task %s has been described!" % task_name)
            return root_task
        return context.dbnd_run_task(
            task_or_task_name=task_name,
            force_task_name=alternative_task_name,
            job_name=job_name or alternative_task_name or task_name,
            run_uid=run_driver or override_run_uid,
            existing_run=run_driver is not None,
            scheduled_run_info=scheduled_run_info,
            project=project,
        )
Example #19
0
 def test_ambigious(self):
     actual = get_task_registry()._get_task_cls("RAmbiguousClass")
     assert actual == DbndTaskRegistry.AMBIGUOUS_CLASS
Example #20
0
    def decorated(class_or_func):
        try:
            func_spec = build_task_decorator_spec(
                class_or_func=class_or_func,
                decorator_kwargs=decorator_kwargs,
                default_result=task_default_result,
            )
        except Exception as ex:
            logger.error(
                "Failed to create task %s: %s\n%s\n",
                class_or_func.__name__,
                str(ex),
                user_side_code(context=5),
                exc_info=show_exc_info(ex),
            )
            raise

        fp = TaskClsBuilder(func_spec, task_type, task_defaults)

        if func_spec.is_class:
            wrapper = six.add_metaclass(_DecoratedUserClassMeta)(class_or_func)
            fp._callable_item = wrapper

        else:

            @functools.wraps(class_or_func)
            def wrapper(*args, **kwargs):
                if in_tracking_mode():
                    with fp.tracking_context(args,
                                             kwargs) as track_result_callback:
                        return track_result_callback(fp.func(*args, **kwargs))

                return _call_handler(
                    fp.get_task_cls(),
                    call_user_code=fp.func,
                    call_args=args,
                    call_kwargs=kwargs,
                )

            wrapper.dbnd_run = fp.dbnd_run

        wrapper.__is_dbnd_task__ = True
        wrapper.func = class_or_func

        # we're using CallableLazyObjectProxy to have lazy evaluation for creating task_cls
        # this is only orchestration scenarios
        task_cls = CallableLazyObjectProxy(fp.get_task_cls)
        wrapper.task_cls = task_cls
        wrapper.task = task_cls
        wrapper.t = task_cls

        # we need lazy task_definition here, for example for dbnd_task_as_bash_operator
        wrapper.task_definition = CallableLazyObjectProxy(
            fp.get_task_definition)

        # we need to manually register the task here, since in regular flow
        # this happens in TaskMetaclass, but it's not invoked here due to lazy
        # evaluation using CallableLazyObjectProxy
        tp = TaskPassport.from_func_spec(func_spec, decorator_kwargs)

        # TODO: we can use CallableLazyObjectProxy object (task_cls) instead of task_cls_factory
        r = get_task_registry()
        r.register_task_cls_factory(
            task_cls_factory=fp.get_task_cls,
            full_task_family=tp.full_task_family,
            task_family=tp.task_family,
        )

        return wrapper
Example #21
0
 def test_full_name_not_ambigious(self):
     actual = get_task_registry().get_task_cls(
         "test_dbnd.task_build.test_task_registry.RAmbiguousClass")
     assert actual == RAmbiguousClass
Example #22
0
    def run_driver(self):
        logger.info("Running driver... Driver PID: %s", os.getpid())

        run = self.run  # type: DatabandRun
        settings = run.context.settings
        run_executor = run.run_executor
        remote_engine = run_executor.remote_engine

        settings.git.validate_git_policy()
        # let prepare for remote execution
        remote_engine.prepare_for_run(run)

        if self.root_task_name_to_build:
            logger.info("Building main task '%s'",
                        self.root_task_name_to_build)
            root_task = get_task_registry().build_dbnd_task(
                self.root_task_name_to_build)
            logger.info(
                "Task %s has been created (%s children)",
                root_task.task_id,
                len(root_task.ctrl.task_dag.subdag_tasks()),
            )
            run.root_task = root_task

        # assert that graph is DAG
        run.root_task.task_dag.topological_sort()

        # now we init all task runs for all tasks in the pipeline
        task_runs = self._init_task_runs_for_execution(
            task_engine=remote_engine)
        root_task_run = run.root_task_run
        run.root_task.ctrl.banner(
            "Main task '%s' has been created!" % root_task_run.task_af_id,
            color="cyan",
            task_run=root_task_run,
        )

        if self.run_config.dry:
            run.root_task.ctrl.describe_dag.describe_dag()
            logger.warning(
                "Execution has been stopped due to run.dry=True flag!")
            return run

        print_tasks_tree(root_task_run.task, task_runs)
        if self._is_save_run_pickle(task_runs, remote_engine):
            run_executor.save_run_pickle()

        task_runs_to_run = [tr for tr in task_runs if not tr.is_skipped]

        # THIS IS THE POINT WHEN WE SUBMIT ALL TASKS TO EXECUTION
        # we should make sure that we create executor without driver task
        task_executor = get_task_executor(
            run,
            task_executor_type=run_executor.task_executor_type,
            host_engine=run_executor.host_engine,
            target_engine=remote_engine,
            task_runs=task_runs_to_run,
        )

        hearbeat = None
        if self.send_heartbeat:
            # this will wrap the executor with "heartbeat" process
            hearbeat = start_heartbeat_sender(self)

        with nested(hearbeat):
            task_executor.do_run()

        # if we are in the driver, we want to print banner after executor__task banner
        run.set_run_state(RunState.SUCCESS)
        logger.info(run.describe.run_banner_for_finished())
        return run