Ejemplos de get_databand_run en Python, ejemplos de dbnd._core.current.get_databand_run en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: single_dag_run_job.py Proyecto: databand-ai/dbnd

    def sync_task_run_attempts_retries(self, ti_status):
        databand_run = get_databand_run()
        for dag_run in ti_status.active_runs:
            for ti in dag_run.get_task_instances():
                task_run = databand_run.get_task_run_by_af_id(
                    ti.task_id
                )  # type: TaskRun
                # looking for retry tasks

                af_task_try_number = get_af_task_try_number(ti)
                if task_run and task_run.attempt_number != af_task_try_number:
                    self.log.info(
                        "Found a new attempt for task %60s (%s -> %s) in Airflow. Submitting to Databand.",
                        ti.task_id,
                        task_run.attempt_number,
                        af_task_try_number,
                    )
                    # update in memory object with new attempt number
                    task_run.update_task_run_attempt(af_task_try_number)
                    # sync the tracker with the new task_run_attempt
                    databand_run.tracker.tracking_store.add_task_runs(
                        run=databand_run, task_runs=[task_run]
                    )
                    report_airflow_task_instance(
                        ti.dag_id, ti.execution_date, [task_run]
                    )

Ejemplo n.º 2

0

Mostrar archivo

def create_dynamic_task(func_call):
    # type: (FuncCall) -> Task
    task_cls, call_args, call_kwargs = (
        func_call.task_cls,
        func_call.call_args,
        func_call.call_kwargs.copy(),
    )
    from dbnd import pipeline, PipelineTask
    from dbnd._core.decorator.dbnd_decorator import _default_output

    parent_task = current_task_run().task
    dbnd_run = get_databand_run()

    if task_cls._conf__decorator_spec is not None:
        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            task_cls._conf__decorator_spec.args, call_args, call_kwargs)

    # Map all kwargs to the "original" target of that objects
    # for example: for DataFrame we'll try to find a relevant target that were used to read it
    # get all possible value's targets
    call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
    for p_name, value_origin in call_kwargs_as_targets.items():
        root_target = value_origin.origin_target
        path = root_target.path if hasattr(root_target, "path") else None
        call_kwargs[p_name] = InlineTarget(
            root_target=root_target,
            obj=call_kwargs[p_name],
            value_type=value_origin.value_type,
            source=value_origin.origin_target.source,
            path=path,
        )

    call_kwargs.setdefault("task_is_dynamic", True)
    call_kwargs.setdefault("task_in_memory_outputs",
                           parent_task.settings.dynamic_task.in_memory_outputs)

    # in case of pipeline - we'd like to run it as regular task
    # if False and issubclass(task_cls, PipelineTask):
    #     # TODO: do we want to support this behavior
    #     task_cls = task(task_cls._conf__decorator_spec.item).task_cls

    if issubclass(task_cls, PipelineTask):
        # if it's pipeline - create new databand run
        # create override _task_default_result to be object instead of target
        task_cls = pipeline(task_cls._conf__decorator_spec.item,
                            _task_default_result=_default_output).task_cls

        # instantiate inline pipeline
        t = task_cls(*call_args, **call_kwargs)
        return t
    else:
        # instantiate inline task
        t = task_cls(*call_args, **call_kwargs)

        # update upstream/downstream relations - needed for correct tracking
        # we can have the task as upstream , as it was executed already
        if not parent_task.task_dag.has_upstream(t):
            parent_task.set_upstream(t)
        return t

Ejemplo n.º 3

0

Mostrar archivo

Archivo: dbnd_execute.py Proyecto: lbtanh/dbnd

def dbnd_execute_airflow_operator(airflow_operator, context):
    """
    Airflow Operator execute function
    """
    dbnd_task_id = getattr(airflow_operator, "dbnd_task_id", None)
    if not dbnd_task_id:
        return airflow_operator.execute(context)

    # operator is wrapped/created by databand
    if isinstance(airflow_operator, DbndOperator):
        return airflow_operator.execute(context)

    from dbnd._core.current import get_databand_run

    # this is the Airflow native Operator
    # we will want to call it with Databand wrapper
    # we are at the airflow operator that is part of databand dag
    dbnd_task_run = get_databand_run().get_task_run_by_id(dbnd_task_id)
    if isinstance(dbnd_task_run.task, AirflowOperatorAsDbndTask):
        # we need to update it with latest, as we have "templated" and copy airflow operator object
        dbnd_task_run.task.airflow_op = airflow_operator
        return dbnd_task_run.runner.execute(context)
    else:
        logging.info(
            "Found airflow operator with dbnd_task_id that can not be run by dbnd: %s",
            airflow_operator,
        )
        return airflow_operator.execute(context)

Ejemplo n.º 4

0

Mostrar archivo

    def _task_submit(self):
        spark_ctrl = self._get_spark_ctrl()
        if self._use_spark_context_inplace():
            logger.info("Reusing existing spark session in inline task "
                        "due to spark_local.enable_spark_context_inplace")
            return self._task_run()
        dr = get_databand_run()
        if not dr.driver_dump.exists():
            raise DatabandConfigError(
                "Please configure your cloud to always_save_pipeline=True, we need to pickle pipeline first"
            )
        driver_dump = self.current_task_run.run.driver_task.driver_dump
        self._application_args = [
            "execute",
            "--dbnd-run",
            spark_ctrl.sync(driver_dump),
            "task",
            "--task-id",
            self.task_id,
        ]

        if spark_ctrl.config.disable_tracking_api:
            self._application_args[1:1] = ["--disable-tracking-api"]

        return spark_ctrl.run_pyspark(
            pyspark_script=databand_lib_path("_core", "cli", "main.py"))

Ejemplo n.º 5

0

Mostrar archivo

 def read_metrics(metrics_task_id):
     # type: ( str) -> Dict
     source_task_attempt_folder = (
         get_databand_run().get_task_run(metrics_task_id).attempt_folder
     )
     metrics = read_task_metrics(source_task_attempt_folder)
     return metrics

Ejemplo n.º 6

0

Mostrar archivo

Archivo: single_dag_run_job.py Proyecto: databand-ai/dbnd

    def _update_databand_task_run_states(self, run):
        """
        Sync states between DBND and Airflow
        we need to sync state into Tracker,
        if we use "remote" executors (parallel/k8s) we need to copy state into
        current process (scheduler)
        """

        # this is the only state we want to propogate into Databand
        # all other state changes are managed by databand itself by it's own state machine
        databand_run = get_databand_run()

        task_runs = []

        # sync all states

        # These tasks need special treatment because Airflow doesn't manage sub-pipelines
        #   for this, we need to process failures in child tasks first
        #   and decide if the parent sub-pipeline has failed
        upstream_failed_tasks: typing.List[TaskInstance] = []

        for ti in run.get_task_instances():
            task_run = databand_run.get_task_run_by_af_id(ti.task_id)  # type: TaskRun
            if not task_run:
                continue

            # UPSTREAM FAILED tasks are not going to "run" , so no code will update their state
            if (
                ti.state == State.UPSTREAM_FAILED
                and task_run.task_run_state != TaskRunState.UPSTREAM_FAILED
            ):
                upstream_failed_tasks.append(ti)

            # update only in memory state
            if (
                ti.state == State.SUCCESS
                and task_run.task_run_state != TaskRunState.SUCCESS
            ):
                task_run.set_task_run_state(TaskRunState.SUCCESS, track=False)
            if (
                ti.state == State.FAILED
                and task_run.task_run_state != TaskRunState.FAILED
            ):
                task_run.set_task_run_state(TaskRunState.FAILED, track=False)

        # process them at the last step, when we have knowledge about the child tasks
        for ti in upstream_failed_tasks:
            task_run: TaskRun = databand_run.get_task_run_by_af_id(ti.task_id)

            state = databand_run.get_upstream_failed_task_run_state(task_run)
            logger.info("Setting %s to %s", task_run.task.task_id, state)
            task_run.set_task_run_state(state, track=False)
            task_runs.append(task_run)

        # optimization to write all updates in batch
        if task_runs:
            databand_run.tracker.set_task_run_states(task_runs)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: single_dag_run_job.py Proyecto: kalebinn/dbnd

    def _collect_errors(self, ti_status, session=None):
        err = ""
        if ti_status.failed:
            dr = get_databand_run()
            upstream_failed = []
            failed = []
            for fail_info in ti_status.failed:
                airflow_task_id = fail_info[1]
                task_run = dr.get_task_run(airflow_task_id)
                task_name = task_run.task.task_name
                if task_run.task_run_state == State.UPSTREAM_FAILED:
                    # we don't want to show upstream failed in the list
                    upstream_failed.append(task_name)
                else:
                    failed.append(task_name)
            if upstream_failed:
                err += (
                    "Task that didn't run because "
                    "of failed dependency:\n\t{}\n".format("\n\t".join(upstream_failed))
                )
            if failed:
                err += "Failed tasks are:\n\t{}".format("\n\t".join(failed))
        if ti_status.deadlocked:
            err += (
                "---------------------------------------------------\n"
                "DagRunJob is deadlocked."
            )
            deadlocked_depends_on_past = any(
                t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=False),
                    session=session,
                    verbose=self.verbose,
                )
                != t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=True),
                    session=session,
                    verbose=self.verbose,
                )
                for t in ti_status.deadlocked
            )
            if deadlocked_depends_on_past:
                err += (
                    "Some of the deadlocked tasks were unable to run because "
                    'of "depends_on_past" relationships. Try running the '
                    "backfill with the option "
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    "the command line."
                )
            err += " These tasks have succeeded:\n{}\n".format(ti_status.succeeded)
            err += " These tasks are running:\n{}\n".format(ti_status.running)
            err += " These tasks have failed:\n{}\n".format(ti_status.failed)
            err += " These tasks are skipped:\n{}\n".format(ti_status.skipped)
            err += " These tasks are deadlocked:\n{}\n".format(ti_status.deadlocked)

        return err

Ejemplo n.º 8

0

Mostrar archivo

def create_and_run_dynamic_task_safe(func_call, parent_task_run):
    # type: (FuncCall,TaskRun ) -> Any
    try:
        task = create_dynamic_task(func_call)  # type: Task
    except MissingParameterError:
        # We can't handle MissingParameterError, function invocation will always fail
        raise
    except Exception:
        _handle_dynamic_error("task-create", func_call)
        return func_call.invoke()

    try:
        from dbnd._core.decorator.func_task_call import TaskCallState, CALL_FAILURE_OBJ

        task._dbnd_call_state = TaskCallState(should_store_result=True)
        # this is the real run of the decorated function
        from dbnd import PipelineTask

        dbnd_run = get_databand_run()

        if isinstance(task, PipelineTask):
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            task_run = dbnd_run.run_executor.run_dynamic_task(
                task, task_engine=current_task_run().task_engine)
            if task._dbnd_call_state.result_saved:
                return task._dbnd_call_state.result

        t = task_run.task
        # if we are inside run, we want to have real values, not deferred!
        if t.task_definition.single_result_output:
            return t.__class__.result.load_from_target(t.result)
            # we have func without result, just fallback to None

        return t
    except Exception:
        if task and task._dbnd_call_state:
            if task._dbnd_call_state.finished:
                # if function was invoked and finished - than we failed in dbnd post-exec
                # just return invoke_result to user
                _handle_dynamic_error("task-post-execute", func_call)
                return task._dbnd_call_state.result
            if task._dbnd_call_state.started:
                # if started but not finished -> it was user code exception -> re-raise
                raise

        # not started - our exception on pre-exec, run user code
        _handle_dynamic_error("task-pre-execute", func_call)
        return func_call.invoke()
    finally:
        # we'd better clean _invoke_result to avoid memory leaks
        task._dbnd_call_state = None

Ejemplo n.º 9

0

Mostrar archivo

def run_dynamic_task_safe(task, func_call):
    # type: (Task, FuncCall) -> Union[Any]
    try:
        from dbnd._core.decorator.func_task_call import TaskCallState, CALL_FAILURE_OBJ

        task._dbnd_call_state = TaskCallState(should_store_result=True)
        # this is the real run of the decorated function
        from dbnd import PipelineTask

        dbnd_run = get_databand_run()

        if isinstance(task, PipelineTask):
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            task_run = dbnd_run.run_dynamic_task(
                task, task_engine=current_task_run().task_engine
            )
            if task._dbnd_call_state.result_saved:
                return task._dbnd_call_state.result

        t = task_run.task
        # if we are inside run, we want to have real values, not deferred!
        if t.task_definition.single_result_output:
            return t.__class__.result.load_from_target(t.result)
            # we have func without result, just fallback to None

        return t
    except Exception:
        if task and task._dbnd_call_state:
            if task._dbnd_call_state.finished:
                # if function was invoked and finished - than we failed in dbnd post-exec
                # just return invoke_result to user
                logger.warning("Error during dbnd post-exec, ignoring", exc_info=True)
                return task._dbnd_call_state.result
            if task._dbnd_call_state.started:
                # if started but not finished -> it was user code exception -> re-raise
                raise

        # not started - our exception on pre-exec, run user code
        logger.warning("Error during dbnd task-pre-execute, ignoring", exc_info=True)
        return func_call.invoke()
    finally:
        # we'd better clean _invoke_result to avoid memory leaks
        task._dbnd_call_state = None

Ejemplo n.º 10

0

Mostrar archivo

Archivo: single_dag_run_job.py Proyecto: turbaszek/dbnd

    def _update_databand_task_run_states(self, run):
        # we are going to update UPSTREAM_FAILED only
        # this is the only state we want to propogate into Databand
        # all other state changes are managed by databand itself by it's own state machine
        dr = get_databand_run()

        task_runs = []
        for ti in run.get_task_instances():
            if ti.state != State.UPSTREAM_FAILED:
                continue
            task_run = dr.get_task_run_by_af_id(ti.task_id)  # type: TaskRun
            if not task_run:
                continue
            if task_run.task_run_state != State.UPSTREAM_FAILED:
                task_run.set_task_run_state(TaskRunState.UPSTREAM_FAILED, track=False)
                task_runs.append(task_run)
        if task_runs:
            dr.tracker.set_task_run_states(task_runs)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: single_dag_run_job.py Proyecto: kalebinn/dbnd

    def _update_databand_task_run_states(self, run):
        """
        Sync states between DBND and Airflow
        we need to sync state into Tracker,
        if we use "remote" executors (parallel/k8s) we need to copy state into
        current process (scheduler)
        """

        # this is the only state we want to propogate into Databand
        # all other state changes are managed by databand itself by it's own state machine
        dr = get_databand_run()

        task_runs = []

        # sync all states
        for ti in run.get_task_instances():
            task_run = dr.get_task_run_by_af_id(ti.task_id)  # type: TaskRun
            if not task_run:
                continue

            # UPSTREAM FAILED tasks are not going to "run" , so no code will update their state
            if (
                ti.state == State.UPSTREAM_FAILED
                and task_run.task_run_state != TaskRunState.UPSTREAM_FAILED
            ):
                task_run.set_task_run_state(TaskRunState.UPSTREAM_FAILED, track=False)
                task_runs.append(task_run)
            # update only in memory state
            if (
                ti.state == State.SUCCESS
                and task_run.task_run_state != TaskRunState.SUCCESS
            ):
                task_run.set_task_run_state(TaskRunState.SUCCESS, track=False)
            if (
                ti.state == State.FAILED
                and task_run.task_run_state != TaskRunState.FAILED
            ):
                task_run.set_task_run_state(TaskRunState.FAILED, track=False)

        # optimization to write all updates in batch
        if task_runs:
            dr.tracker.set_task_run_states(task_runs)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: task_ctrl.py Proyecto: kalebinn/dbnd

 def task_run(self):
     # type: ()-> TaskRun
     run = get_databand_run()
     return run.get_task_run(self.task.task_id)

Ejemplo n.º 13

0

Mostrar archivo

 def current_task_run(self):
     # type: ()->TaskRun
     return get_databand_run().get_task_run(self.task_id)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: callable_tracking.py Proyecto: databand-ai/dbnd

    def tracking_context(self, call_args, call_kwargs):
        user_code_called = False  # whether we got to executing of user code
        user_code_finished = False  # whether we passed executing of user code
        func_call = None
        try:
            # 1. check that we don't have too many calls
            if self._call_count_limit_exceeded():
                yield _do_nothing_decorator
                return

            # 2. Start or reuse existing "main tracking task" that is root for tracked tasks
            if not try_get_current_task():
                """
                try to get existing task, and if not exists - try to get/create inplace_task_run
                """
                from dbnd._core.tracking.script_tracking_manager import (
                    try_get_inplace_tracking_task_run, )

                inplace_tacking_task = try_get_inplace_tracking_task_run()
                if not inplace_tacking_task:
                    # we didn't manage to start inplace tracking task run, we will not be able to track
                    yield _do_nothing_decorator
                    return

            tracking_task_definition = self.get_tracking_task_definition()
            callable_spec = tracking_task_definition.task_decorator.get_callable_spec(
            )

            func_call = TrackedFuncCallWithResult(
                callable=self.callable,
                call_args=tuple(
                    call_args),  # prevent original call_args modification
                call_kwargs=dict(
                    call_kwargs),  # prevent original kwargs modification
            )
            # replace any position argument with kwarg if it possible
            args, kwargs = args_to_kwargs(callable_spec.args,
                                          func_call.call_args,
                                          func_call.call_kwargs)

            # instantiate inline task
            task = TrackingTask.for_func(tracking_task_definition, args,
                                         kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            parent_task = current_task_run().task
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            # checking if any of the inputs are the outputs of previous task.
            # we can add that task as upstream.
            dbnd_run = get_databand_run()
            call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(kwargs)
            for value_origin in call_kwargs_as_targets.values():
                up_task = value_origin.origin_target.task
                task.set_upstream(up_task)

            # creating task_run as a task we found mid-run
            task_run = dbnd_run.create_task_run_at_execution_time(
                task, task_engine=current_task_run().task_engine)

            should_capture_log = (
                TrackingConfig.from_databand_context().capture_tracking_log)
            with task_run.runner.task_run_execution_context(
                    handle_sigterm=True, capture_log=should_capture_log):
                task_run.set_task_run_state(state=TaskRunState.RUNNING)

                _log_inputs(task_run)

                # if we reached this line, then all tracking initialization is
                # finished successfully, and we're going to execute user code
                user_code_called = True

                try:
                    # tracking_context is context manager - user code will run on yield
                    yield func_call.set_result

                    # if we reached this line, this means that user code finished
                    # successfully without any exceptions
                    user_code_finished = True
                # We catch BaseException since we want to catch KeyboardInterrupts as well
                except BaseException as ex:
                    task_run.finished_time = utcnow()

                    error = TaskRunError.build_from_ex(ex, task_run)
                    task_run.set_task_run_state(TaskRunState.FAILED,
                                                error=error)
                    raise

                else:
                    task_run.finished_time = utcnow()

                    # func_call.result should contain result, log it
                    _log_result(task_run, func_call.result)

                    task_run.set_task_run_state(TaskRunState.SUCCESS)
        except BaseException:
            if user_code_called and not user_code_finished:
                # if we started to call the user code and not got to user_code_finished
                # line - it means there was user code exception - so just re-raise it
                raise
            # else it's either we didn't reached calling user code, or already passed it
            # then it's some dbnd tracking error - just log it
            if func_call:
                _handle_tracking_error("tracking-init", func_call)
            else:
                log_exception_to_server()
        # if we didn't reached user_code_called=True line - there was an error during
        # dbnd tracking initialization, so nothing is done - user function wasn't called yet
        if not user_code_called:
            # tracking_context is context manager - user code will run on yield
            yield _do_nothing_decorator
            return

Ejemplo n.º 15

0

Mostrar archivo

Archivo: task_decorator.py Proyecto: databand-ai/dbnd

    def _run_task_from_another_task_execution(
        self, parent_task, call_args, call_kwargs
    ):
        # type: (TaskDecorator, Task, *Any, **Any) -> TaskRun
        # task is running from another task
        task_cls = self.get_task_cls()
        from dbnd import PipelineTask, pipeline
        from dbnd._core.task_build.dbnd_decorator import _default_output

        dbnd_run = get_databand_run()

        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            self.get_callable_spec().args, call_args, call_kwargs
        )

        # Map all kwargs to the "original" target of that objects
        # for example: for DataFrame we'll try to find a relevant target that were used to read it
        # get all possible value's targets
        call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
        for p_name, value_origin in call_kwargs_as_targets.items():
            root_target = value_origin.origin_target
            path = root_target.path if hasattr(root_target, "path") else None
            original_object = call_kwargs[p_name]
            call_kwargs[p_name] = InlineTarget(
                root_target=root_target,
                obj=original_object,
                value_type=value_origin.value_type,
                source=value_origin.origin_target.source,
                path=path,
            )

        call_kwargs.setdefault("task_is_dynamic", True)
        call_kwargs.setdefault(
            "task_in_memory_outputs",
            parent_task.settings.run.task_run_at_execution_time_in_memory_outputs,
        )

        if issubclass(task_cls, PipelineTask):
            # if it's pipeline - create new databand run
            # create override _task_default_result to be object instead of target
            task_cls = pipeline(
                self.class_or_func, _task_default_result=_default_output
            ).task_cls

            # instantiate inline pipeline
            task = task_cls(*call_args, **call_kwargs)
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            # instantiate inline task (dbnd object)
            task = task_cls(*call_args, **call_kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            from dbnd._core.task_build.task_cls__call_state import TaskCallState

            task._dbnd_call_state = TaskCallState(should_store_result=True)
            try:
                task_run = dbnd_run.run_executor.run_task_at_execution_time(
                    task, task_engine=current_task_run().task_engine
                )

                # this will work only for _DecoratedTask
                if task._dbnd_call_state.result_saved:
                    return task._dbnd_call_state.result

            finally:
                # we'd better clean _invoke_result to avoid memory leaks
                task._dbnd_call_state = None

        # if we are inside run, we want to have real values, not deferred!
        if task.task_definition.single_result_output:
            return task.__class__.result.load_from_target(task.result)
            # we have func without result, just fallback to None
        return task

Ejemplo n.º 16

0

Mostrar archivo

Archivo: task_cls_builder.py Proyecto: kalebinn/dbnd

    def tracking_context(self, call_args, call_kwargs):
        user_code_called = False  # whether we got to executing of user code
        user_code_finished = False  # whether we passed executing of user code
        func_call = None
        try:
            func_call = FuncCallWithResult(
                task_cls=self.get_tracking_task_cls(),
                call_user_code=self.func,
                call_args=tuple(
                    call_args),  # prevent original call_args modification
                call_kwargs=dict(
                    call_kwargs),  # prevent original kwargs modification
            )

            # 1. check that we don't have too many calls
            # 2. Start or reuse existing "inplace_task" that is root for tracked tasks
            if not self._call_count_limit_exceeded(
            ) and _get_or_create_inplace_task():
                cls = func_call.task_cls

                # replace any position argument with kwarg if it possible
                args, kwargs = args_to_kwargs(
                    cls._conf__decorator_spec.args,
                    func_call.call_args,
                    func_call.call_kwargs,
                )

                # instantiate inline task
                task = cls._create_task(args, kwargs)

                # update upstream/downstream relations - needed for correct tracking
                # we can have the task as upstream , as it was executed already
                parent_task = current_task_run().task
                if not parent_task.task_dag.has_upstream(task):
                    parent_task.set_upstream(task)

                # checking if any of the inputs are the outputs of previous task.
                # we can add that task as upstream.
                dbnd_run = get_databand_run()
                call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(
                    kwargs)
                for value_origin in call_kwargs_as_targets.values():
                    up_task = value_origin.origin_target.task
                    task.set_upstream(up_task)

                # creating task_run as a task we found mid-run
                task_run = dbnd_run.create_dynamic_task_run(
                    task, task_engine=current_task_run().task_engine)

                with task_run.runner.task_run_execution_context(
                        handle_sigterm=True):
                    task_run.set_task_run_state(state=TaskRunState.RUNNING)

                    _log_inputs(task_run)

                    # if we reached this line, then all tracking initialization is
                    # finished successfully, and we're going to execute user code
                    user_code_called = True

                    try:
                        # tracking_context is context manager - user code will run on yield
                        yield func_call.set_result

                        # if we reached this line, this means that user code finished
                        # successfully without any exceptions
                        user_code_finished = True
                    except Exception as ex:
                        task_run.finished_time = utcnow()

                        error = TaskRunError.build_from_ex(ex, task_run)
                        task_run.set_task_run_state(TaskRunState.FAILED,
                                                    error=error)
                        raise
                    else:
                        task_run.finished_time = utcnow()

                        # func_call.result should contain result, log it
                        _log_result(task_run, func_call.result)

                        task_run.set_task_run_state(TaskRunState.SUCCESS)
        except Exception:
            if user_code_called and not user_code_finished:
                # if we started to call the user code and not got to user_code_finished
                # line - it means there was user code exception - so just re-raise it
                raise
            # else it's either we didn't reached calling user code, or already passed it
            # then it's some dbnd tracking error - just log it
            if func_call:
                _handle_dynamic_error("tracking-init", func_call)
        # if we didn't reached user_code_called=True line - there was an error during
        # dbnd tracking initialization, so nothing is done - user function wasn't called yet
        if not user_code_called:
            # tracking_context is context manager - user code will run on yield
            yield _passthrough_decorator

Ejemplo n.º 17

0

Mostrar archivo

def _create_dynamic_task_run(func_call):
    task = create_dynamic_task(func_call)
    dbnd_run = get_databand_run()
    task_run = dbnd_run.create_dynamic_task_run(
        task, task_engine=current_task_run().task_engine)
    return task_run

Ejemplo n.º 18

0

Mostrar archivo

Archivo: dbnd_execute.py Proyecto: lbtanh/dbnd

def _dbnd_operator_to_taskrun(operator):
    # type: (DbndOperator)-> TaskRun
    from dbnd._core.current import get_databand_run

    return get_databand_run().get_task_run_by_id(operator.dbnd_task_id)