コード例 #1
0
def create_dynamic_task(func_call):
    # type: (FuncCall) -> Task
    task_cls, call_args, call_kwargs = (
        func_call.task_cls,
        func_call.call_args,
        func_call.call_kwargs.copy(),
    )
    from dbnd import pipeline, PipelineTask
    from dbnd._core.decorator.dbnd_decorator import _default_output

    parent_task = current_task_run().task
    dbnd_run = get_databand_run()

    if task_cls._conf__decorator_spec is not None:
        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            task_cls._conf__decorator_spec.args, call_args, call_kwargs)

    # Map all kwargs to the "original" target of that objects
    # for example: for DataFrame we'll try to find a relevant target that were used to read it
    # get all possible value's targets
    call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
    for p_name, value_origin in call_kwargs_as_targets.items():
        root_target = value_origin.origin_target
        path = root_target.path if hasattr(root_target, "path") else None
        call_kwargs[p_name] = InlineTarget(
            root_target=root_target,
            obj=call_kwargs[p_name],
            value_type=value_origin.value_type,
            source=value_origin.origin_target.source,
            path=path,
        )

    call_kwargs.setdefault("task_is_dynamic", True)
    call_kwargs.setdefault("task_in_memory_outputs",
                           parent_task.settings.dynamic_task.in_memory_outputs)

    # in case of pipeline - we'd like to run it as regular task
    # if False and issubclass(task_cls, PipelineTask):
    #     # TODO: do we want to support this behavior
    #     task_cls = task(task_cls._conf__decorator_spec.item).task_cls

    if issubclass(task_cls, PipelineTask):
        # if it's pipeline - create new databand run
        # create override _task_default_result to be object instead of target
        task_cls = pipeline(task_cls._conf__decorator_spec.item,
                            _task_default_result=_default_output).task_cls

        # instantiate inline pipeline
        t = task_cls(*call_args, **call_kwargs)
        return t
    else:
        # instantiate inline task
        t = task_cls(*call_args, **call_kwargs)

        # update upstream/downstream relations - needed for correct tracking
        # we can have the task as upstream , as it was executed already
        if not parent_task.task_dag.has_upstream(t):
            parent_task.set_upstream(t)
        return t
コード例 #2
0
ファイル: task_decorator.py プロジェクト: databand-ai/dbnd
    def _run_task_from_another_task_execution(
        self, parent_task, call_args, call_kwargs
    ):
        # type: (TaskDecorator, Task, *Any, **Any) -> TaskRun
        # task is running from another task
        task_cls = self.get_task_cls()
        from dbnd import PipelineTask, pipeline
        from dbnd._core.task_build.dbnd_decorator import _default_output

        dbnd_run = get_databand_run()

        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            self.get_callable_spec().args, call_args, call_kwargs
        )

        # Map all kwargs to the "original" target of that objects
        # for example: for DataFrame we'll try to find a relevant target that were used to read it
        # get all possible value's targets
        call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
        for p_name, value_origin in call_kwargs_as_targets.items():
            root_target = value_origin.origin_target
            path = root_target.path if hasattr(root_target, "path") else None
            original_object = call_kwargs[p_name]
            call_kwargs[p_name] = InlineTarget(
                root_target=root_target,
                obj=original_object,
                value_type=value_origin.value_type,
                source=value_origin.origin_target.source,
                path=path,
            )

        call_kwargs.setdefault("task_is_dynamic", True)
        call_kwargs.setdefault(
            "task_in_memory_outputs",
            parent_task.settings.run.task_run_at_execution_time_in_memory_outputs,
        )

        if issubclass(task_cls, PipelineTask):
            # if it's pipeline - create new databand run
            # create override _task_default_result to be object instead of target
            task_cls = pipeline(
                self.class_or_func, _task_default_result=_default_output
            ).task_cls

            # instantiate inline pipeline
            task = task_cls(*call_args, **call_kwargs)
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            # instantiate inline task (dbnd object)
            task = task_cls(*call_args, **call_kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            from dbnd._core.task_build.task_cls__call_state import TaskCallState

            task._dbnd_call_state = TaskCallState(should_store_result=True)
            try:
                task_run = dbnd_run.run_executor.run_task_at_execution_time(
                    task, task_engine=current_task_run().task_engine
                )

                # this will work only for _DecoratedTask
                if task._dbnd_call_state.result_saved:
                    return task._dbnd_call_state.result

            finally:
                # we'd better clean _invoke_result to avoid memory leaks
                task._dbnd_call_state = None

        # if we are inside run, we want to have real values, not deferred!
        if task.task_definition.single_result_output:
            return task.__class__.result.load_from_target(task.result)
            # we have func without result, just fallback to None
        return task