def create_dynamic_task(func_call): # type: (FuncCall) -> Task task_cls, call_args, call_kwargs = ( func_call.task_cls, func_call.call_args, func_call.call_kwargs.copy(), ) from dbnd import pipeline, PipelineTask from dbnd._core.decorator.dbnd_decorator import _default_output parent_task = current_task_run().task dbnd_run = get_databand_run() if task_cls._conf__decorator_spec is not None: # orig_call_args, orig_call_kwargs = call_args, call_kwargs call_args, call_kwargs = args_to_kwargs( task_cls._conf__decorator_spec.args, call_args, call_kwargs) # Map all kwargs to the "original" target of that objects # for example: for DataFrame we'll try to find a relevant target that were used to read it # get all possible value's targets call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs) for p_name, value_origin in call_kwargs_as_targets.items(): root_target = value_origin.origin_target path = root_target.path if hasattr(root_target, "path") else None call_kwargs[p_name] = InlineTarget( root_target=root_target, obj=call_kwargs[p_name], value_type=value_origin.value_type, source=value_origin.origin_target.source, path=path, ) call_kwargs.setdefault("task_is_dynamic", True) call_kwargs.setdefault("task_in_memory_outputs", parent_task.settings.dynamic_task.in_memory_outputs) # in case of pipeline - we'd like to run it as regular task # if False and issubclass(task_cls, PipelineTask): # # TODO: do we want to support this behavior # task_cls = task(task_cls._conf__decorator_spec.item).task_cls if issubclass(task_cls, PipelineTask): # if it's pipeline - create new databand run # create override _task_default_result to be object instead of target task_cls = pipeline(task_cls._conf__decorator_spec.item, _task_default_result=_default_output).task_cls # instantiate inline pipeline t = task_cls(*call_args, **call_kwargs) return t else: # instantiate inline task t = task_cls(*call_args, **call_kwargs) # update upstream/downstream relations - needed for correct tracking # we can have the task as upstream , as it was executed already if not parent_task.task_dag.has_upstream(t): parent_task.set_upstream(t) return t
def _run_task_from_another_task_execution( self, parent_task, call_args, call_kwargs ): # type: (TaskDecorator, Task, *Any, **Any) -> TaskRun # task is running from another task task_cls = self.get_task_cls() from dbnd import PipelineTask, pipeline from dbnd._core.task_build.dbnd_decorator import _default_output dbnd_run = get_databand_run() # orig_call_args, orig_call_kwargs = call_args, call_kwargs call_args, call_kwargs = args_to_kwargs( self.get_callable_spec().args, call_args, call_kwargs ) # Map all kwargs to the "original" target of that objects # for example: for DataFrame we'll try to find a relevant target that were used to read it # get all possible value's targets call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs) for p_name, value_origin in call_kwargs_as_targets.items(): root_target = value_origin.origin_target path = root_target.path if hasattr(root_target, "path") else None original_object = call_kwargs[p_name] call_kwargs[p_name] = InlineTarget( root_target=root_target, obj=original_object, value_type=value_origin.value_type, source=value_origin.origin_target.source, path=path, ) call_kwargs.setdefault("task_is_dynamic", True) call_kwargs.setdefault( "task_in_memory_outputs", parent_task.settings.run.task_run_at_execution_time_in_memory_outputs, ) if issubclass(task_cls, PipelineTask): # if it's pipeline - create new databand run # create override _task_default_result to be object instead of target task_cls = pipeline( self.class_or_func, _task_default_result=_default_output ).task_cls # instantiate inline pipeline task = task_cls(*call_args, **call_kwargs) # if it's pipeline - create new databand run run = dbnd_run.context.dbnd_run_task(task) task_run = run.get_task_run(task.task_id) else: # instantiate inline task (dbnd object) task = task_cls(*call_args, **call_kwargs) # update upstream/downstream relations - needed for correct tracking # we can have the task as upstream , as it was executed already if not parent_task.task_dag.has_upstream(task): parent_task.set_upstream(task) from dbnd._core.task_build.task_cls__call_state import TaskCallState task._dbnd_call_state = TaskCallState(should_store_result=True) try: task_run = dbnd_run.run_executor.run_task_at_execution_time( task, task_engine=current_task_run().task_engine ) # this will work only for _DecoratedTask if task._dbnd_call_state.result_saved: return task._dbnd_call_state.result finally: # we'd better clean _invoke_result to avoid memory leaks task._dbnd_call_state = None # if we are inside run, we want to have real values, not deferred! if task.task_definition.single_result_output: return task.__class__.result.load_from_target(task.result) # we have func without result, just fallback to None return task