Exemple #1
0
    def run(self):
        self.log_metric("metric_int", 1)
        self.log_metric("metric_str", "str")

        current_task_run().set_external_resource_urls({"someurl": "http://localhost"})

        super(TTaskWithMetrics, self).run()
Exemple #2
0
    def kill_run(self, message=None):
        _is_killed.set()

        # When initiating kill_run, the api's kill_run sends a signal to all running runs,
        # to change their state to shutdown, which in the end sets it to cancelled.
        # the task which initiated the killing, the current task run, should have a state of Failed, and not Canceled.
        # It is important to set it with an error, to allow the passing of error message, to be displayed in the UI
        # as the error_message for the whole run.
        tr = current_task_run()
        if tr.run == self.run:
            task_run_error = TaskRunError.build_from_message(
                task_run=tr,
                msg=message or DEFAULT_TASK_CANCELED_ERR_MSG,
                help_msg="task with task_run_uid:%s initiated kill_run" %
                (tr.task_run_uid),
                ex_class=DbndCanceledRunError,
            )
            tr.set_task_run_state(TaskRunState.FAILED,
                                  track=True,
                                  error=task_run_error)
        try:
            kill_run(str(self.run.run_uid), ctx=self.run.context)
        except Exception as e:
            raise DatabandFailFastError(
                "Could not send request to kill databand run!", e)
        if tr.run == self.run:
            raise DatabandError(message or DEFAULT_TASK_CANCELED_ERR_MSG)
Exemple #3
0
    def run(self):
        driver_task_run = current_task_run()
        run = driver_task_run.run  # type: DatabandRun
        if self.is_submitter:
            run.set_run_state(RunState.RUNNING)

        ctx = run.context
        ctx.settings.git.validate_git_policy()

        # let prepare for remote execution
        run.remote_engine.prepare_for_run(run)

        task_runs = self.build_root_task_runs(run)

        hearbeat = None

        # right now we run describe in local controller only, but we should do that for more
        if self.is_driver:
            if run.context.settings.system.describe:
                run.describe_dag.describe_dag()
                logger.info(run.describe.run_banner("Described!",
                                                    color="blue"))
                return

            root_task_run = run.root_task_run
            run.root_task.ctrl.banner(
                "Main task '%s' has been created!" % root_task_run.task_af_id,
                color="cyan",
                task_run=root_task_run,
            )

            print_tasks_tree(root_task_run.task, task_runs)

            if self.is_save_run(run, task_runs):
                run.save_run()

            if self.sends_heartbeat:
                hearbeat = start_heartbeat_sender(driver_task_run)

        task_runs_to_run = [tr for tr in task_runs if not tr.is_skipped]

        # create executor without driver task!
        task_executor = get_task_executor(
            run,
            task_executor_type=self.task_executor_type,
            host_engine=self.host_engine,
            target_engine=run.root_task_run.task_engine,
            task_runs=task_runs_to_run,
        )

        with nested(hearbeat):
            task_executor.do_run()

        if self.is_driver:
            # This is great success!
            run.set_run_state(RunState.SUCCESS)
            logger.info(run.describe.run_banner_for_finished())
            return run
        else:
            logger.info(run.describe.run_banner_for_submitted())
Exemple #4
0
def create_dynamic_task(func_call):
    # type: (FuncCall) -> Task
    task_cls, call_args, call_kwargs = (
        func_call.task_cls,
        func_call.call_args,
        func_call.call_kwargs.copy(),
    )
    from dbnd import pipeline, PipelineTask
    from dbnd._core.decorator.dbnd_decorator import _default_output

    parent_task = current_task_run().task
    dbnd_run = get_databand_run()

    if task_cls._conf__decorator_spec is not None:
        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            task_cls._conf__decorator_spec.args, call_args, call_kwargs)

    # Map all kwargs to the "original" target of that objects
    # for example: for DataFrame we'll try to find a relevant target that were used to read it
    # get all possible value's targets
    call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
    for p_name, value_origin in call_kwargs_as_targets.items():
        root_target = value_origin.origin_target
        path = root_target.path if hasattr(root_target, "path") else None
        call_kwargs[p_name] = InlineTarget(
            root_target=root_target,
            obj=call_kwargs[p_name],
            value_type=value_origin.value_type,
            source=value_origin.origin_target.source,
            path=path,
        )

    call_kwargs.setdefault("task_is_dynamic", True)
    call_kwargs.setdefault("task_in_memory_outputs",
                           parent_task.settings.dynamic_task.in_memory_outputs)

    # in case of pipeline - we'd like to run it as regular task
    # if False and issubclass(task_cls, PipelineTask):
    #     # TODO: do we want to support this behavior
    #     task_cls = task(task_cls._conf__decorator_spec.item).task_cls

    if issubclass(task_cls, PipelineTask):
        # if it's pipeline - create new databand run
        # create override _task_default_result to be object instead of target
        task_cls = pipeline(task_cls._conf__decorator_spec.item,
                            _task_default_result=_default_output).task_cls

        # instantiate inline pipeline
        t = task_cls(*call_args, **call_kwargs)
        return t
    else:
        # instantiate inline task
        t = task_cls(*call_args, **call_kwargs)

        # update upstream/downstream relations - needed for correct tracking
        # we can have the task as upstream , as it was executed already
        if not parent_task.task_dag.has_upstream(t):
            parent_task.set_upstream(t)
        return t
Exemple #5
0
    def _get_job_status_banner(self, description):
        t = self.task
        b = TextBanner(
            "Training Job %s is running at SageMaker:" %
            description.get("TrainingJobName", None),
            color="yellow",
        )

        b.column("TASK", t.task_id)
        b.column(
            "JOB STATUS",
            description.get("TrainingJobStatus", None) + " -> " +
            description.get("SecondaryStatus", None),
        )
        b.column(
            "JOB RESOURCES",
            description["ResourceConfig"]["InstanceType"] + " x " +
            str(description["ResourceConfig"]["InstanceCount"]),
        )

        tracker_url = current_task_run().task_tracker_url
        if tracker_url:
            b.column("DATABAND LOG", tracker_url)
        b.column("JOB WEB UI",
                 self._job_url(description.get("TrainingJobName", None)))
        b.column("CLOUDWATCH URL", self._logs_url())

        b.new_line()

        b.column("JOB ARN", description.get("TrainingJobArn", None))
        b.new_section()

        return b.getvalue()
Exemple #6
0
 def run(self):
     run_executor = current_task_run().run.run_executor
     run_executor_type = run_executor.run_executor_type
     if run_executor_type == SystemTaskName.driver:
         return run_executor.run_driver()
     elif run_executor_type == SystemTaskName.driver_submit:
         return run_executor.run_submitter()
     else:
         raise DatabandSystemError("Unsupported run executor type: %s",
                                   run_executor_type)
Exemple #7
0
def create_and_run_dynamic_task_safe(func_call, parent_task_run):
    # type: (FuncCall,TaskRun ) -> Any
    try:
        task = create_dynamic_task(func_call)  # type: Task
    except MissingParameterError:
        # We can't handle MissingParameterError, function invocation will always fail
        raise
    except Exception:
        _handle_dynamic_error("task-create", func_call)
        return func_call.invoke()

    try:
        from dbnd._core.decorator.func_task_call import TaskCallState, CALL_FAILURE_OBJ

        task._dbnd_call_state = TaskCallState(should_store_result=True)
        # this is the real run of the decorated function
        from dbnd import PipelineTask

        dbnd_run = get_databand_run()

        if isinstance(task, PipelineTask):
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            task_run = dbnd_run.run_executor.run_dynamic_task(
                task, task_engine=current_task_run().task_engine)
            if task._dbnd_call_state.result_saved:
                return task._dbnd_call_state.result

        t = task_run.task
        # if we are inside run, we want to have real values, not deferred!
        if t.task_definition.single_result_output:
            return t.__class__.result.load_from_target(t.result)
            # we have func without result, just fallback to None

        return t
    except Exception:
        if task and task._dbnd_call_state:
            if task._dbnd_call_state.finished:
                # if function was invoked and finished - than we failed in dbnd post-exec
                # just return invoke_result to user
                _handle_dynamic_error("task-post-execute", func_call)
                return task._dbnd_call_state.result
            if task._dbnd_call_state.started:
                # if started but not finished -> it was user code exception -> re-raise
                raise

        # not started - our exception on pre-exec, run user code
        _handle_dynamic_error("task-pre-execute", func_call)
        return func_call.invoke()
    finally:
        # we'd better clean _invoke_result to avoid memory leaks
        task._dbnd_call_state = None
Exemple #8
0
    def run(self):
        driver_task_run = current_task_run()
        run = driver_task_run.run  # type: DatabandRun
        task_runs = self.build_root_task_runs(run)

        hearbeat = None

        # right now we run describe in local controller only, but we should do that for more
        if self.is_driver:
            if run.context.settings.system.describe:
                run.describe_dag.describe_dag()
                logger.info(run.describe.run_banner("Described!",
                                                    color="blue"))
                return

            root_task_run = run.root_task_run
            run.root_task.ctrl.banner(
                "Main task '%s' has been created!" % root_task_run.task_af_id,
                color="cyan",
                task_run=root_task_run,
            )
            from dbnd._core.task_ctrl.task_dag_describe import DescribeDagCtrl

            completed = {tr.task.task_id: tr.is_reused for tr in task_runs}
            run_describe_dag = DescribeDagCtrl(root_task_run.task,
                                               DescribeFormat.short,
                                               complete_status=completed)
            run_describe_dag.tree_view(describe_format=DescribeFormat.short)

            if self.is_save_run(run, task_runs):
                run.save_run()

            if self.sends_heartbeat:
                hearbeat = start_heartbeat_sender(driver_task_run)

        # create executor without driver task!
        task_executor = get_task_executor(
            run,
            task_executor_type=self.task_executor_type,
            host_engine=self.host_engine,
            target_engine=run.root_task_run.task_engine,
            task_runs=task_runs,
        )

        with nested(hearbeat):
            task_executor.do_run()

        if self.is_driver:
            # This is great success!
            run.set_run_state(RunState.SUCCESS)
            logger.info(run.describe.run_banner_for_finished())
            return run
        else:
            logger.info(run.describe.run_banner_for_submitted())
Exemple #9
0
def _create_dynamic_task(func_call):
    # type: (FuncCall) -> Task
    task_cls = func_call.task_cls
    call_args, call_kwargs = args_to_kwargs(
        task_cls._conf__decorator_spec.args, func_call.call_args,
        func_call.call_kwargs)

    # instantiate inline task
    t = task_cls._create_task(call_args, call_kwargs)

    # update upstream/downstream relations - needed for correct tracking
    # we can have the task as upstream , as it was executed already
    parent_task = current_task_run().task
    if not parent_task.task_dag.has_upstream(t):
        parent_task.set_upstream(t)
    return t
Exemple #10
0
def operation_int(
    input_a, input_b=0, pause=0.0, log_metrics=True, external_resources=0
):
    # type: (int, int, float, bool, int) -> int
    if log_metrics:
        log_metric("input_a", input_a)
        log_metric("input_b", input_b)

    tr = current_task_run()
    for i in range(external_resources):
        tr.set_external_resource_urls(
            {"url_%s_%d" % (tr.task.task_id, i): "http://localhost"}
        )
    if pause:
        sleep(pause)
    return input_a + input_b
Exemple #11
0
    def run(self):
        executor_task_run = current_task_run()
        run_executor = executor_task_run.run.run_executor
        run_executor_type = run_executor.run_executor_type
        try:
            if run_executor_type == SystemTaskName.driver:
                return run_executor.run_driver()
            elif run_executor_type == SystemTaskName.driver_submit:
                return run_executor.run_submitter()
            else:
                raise DatabandSystemError("Unsupported run executor type: %s" %
                                          run_executor_type)
        except BaseException as ex:
            # we print it on any exception
            logger.warning("Run failure: %s" % ex)
            logger.warning(
                "\n\n\n\n{sep}\n\n   -= Your run has failed, please review errors below =-\n\n{sep}\n"
                .format(sep=console_utils.error_separator()))

            failed_msgs = []
            canceled_msgs = []
            for task_run in executor_task_run.run.get_task_runs():
                if task_run.task_run_state == TaskRunState.FAILED:
                    failed_msgs.append(
                        task_run.task.ctrl.banner(msg="Task has failed!",
                                                  color="red",
                                                  task_run=task_run))
                elif task_run.task_run_state == TaskRunState.CANCELLED:
                    canceled_msgs.append(
                        task_run.task.ctrl.banner(
                            msg="Task has been canceled!",
                            color="yellow",
                            task_run=task_run,
                        ))

            if canceled_msgs:
                logger.warning(
                    "\nNumber of canceled tasks={count}:\n{banner}\n".format(
                        banner="\n".join(canceled_msgs),
                        count=len(canceled_msgs)))

            if failed_msgs:
                logger.warning(
                    "\nNumber of failed tasks={count}:\n{banner}\n".format(
                        banner="\n".join(failed_msgs), count=len(failed_msgs)))
            raise
Exemple #12
0
def run_dynamic_task_safe(task, func_call):
    # type: (Task, FuncCall) -> Union[Any]
    try:
        from dbnd._core.decorator.func_task_call import TaskCallState, CALL_FAILURE_OBJ

        task._dbnd_call_state = TaskCallState(should_store_result=True)
        # this is the real run of the decorated function
        from dbnd import PipelineTask

        dbnd_run = get_databand_run()

        if isinstance(task, PipelineTask):
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            task_run = dbnd_run.run_dynamic_task(
                task, task_engine=current_task_run().task_engine
            )
            if task._dbnd_call_state.result_saved:
                return task._dbnd_call_state.result

        t = task_run.task
        # if we are inside run, we want to have real values, not deferred!
        if t.task_definition.single_result_output:
            return t.__class__.result.load_from_target(t.result)
            # we have func without result, just fallback to None

        return t
    except Exception:
        if task and task._dbnd_call_state:
            if task._dbnd_call_state.finished:
                # if function was invoked and finished - than we failed in dbnd post-exec
                # just return invoke_result to user
                logger.warning("Error during dbnd post-exec, ignoring", exc_info=True)
                return task._dbnd_call_state.result
            if task._dbnd_call_state.started:
                # if started but not finished -> it was user code exception -> re-raise
                raise

        # not started - our exception on pre-exec, run user code
        logger.warning("Error during dbnd task-pre-execute, ignoring", exc_info=True)
        return func_call.invoke()
    finally:
        # we'd better clean _invoke_result to avoid memory leaks
        task._dbnd_call_state = None
Exemple #13
0
    def _get_batch_progresss_banner(self, batch_response):
        """
        {
          'id': 6,
          'state': 'success',
          'appId': 'application_1534487568579_0008',
          'appInfo': {
            'driverLogUrl': None,
            'sparkUiUrl': 'http://ip-172-31-70-109.ec2.internal:20888/proxy/application_1534487568579_0008/'
          },
          'log': [
            '\nYARN Diagnostics: '
          ]
        }
        :param batch_response:
        :return:
        """
        t = self.task
        b = TextBanner("Spark Task %s is running at Livy:" % t.task_id,
                       color="yellow")

        b.column("TASK", t.task_id)
        b.column("JOB STATE", batch_response.get("state", None))

        tracker_url = current_task_run().task_tracker_url
        if tracker_url:
            b.column("DATABAND LOG", tracker_url)

        b.new_line()

        b.column("LIVY ID", batch_response.get("id", None))

        if "appId" in batch_response:
            b.column("APP ID", batch_response["appId"])

            app_info = batch_response["appInfo"]
            b.column("DRIVER LOG", app_info["driverLogUrl"])
            if "sparkUiUrl" in app_info:
                spark_url = app_info["sparkUiUrl"]
                b.column(
                    "SPARK",
                    colored(spark_url, on_color="on_blue", attrs=["bold"]))
        b.new_section()

        return b.getvalue()
Exemple #14
0
    def _get_step_banner(self, step):
        """
        {
          'id': 6,
          'state': 'success',
        }
        """
        t = self.task
        b = TextBanner("Spark Task %s is running at Emr:" % t.task_id, color="yellow")

        b.column("TASK", t.task_id)
        b.column("EMR STEP STATE", step["Step"]["Status"]["State"])

        tracker_url = current_task_run().task_tracker_url
        if tracker_url:
            b.column("DATABAND LOG", tracker_url)

        b.new_line()
        b.column("EMR STEP ID", step["Step"]["Id"])
        b.new_section()
        return b.getvalue()
Exemple #15
0
    def _run_task_from_another_task_execution(
        self, parent_task, call_args, call_kwargs
    ):
        # type: (TaskDecorator, Task, *Any, **Any) -> TaskRun
        # task is running from another task
        task_cls = self.get_task_cls()
        from dbnd import PipelineTask, pipeline
        from dbnd._core.task_build.dbnd_decorator import _default_output

        dbnd_run = get_databand_run()

        # orig_call_args, orig_call_kwargs = call_args, call_kwargs
        call_args, call_kwargs = args_to_kwargs(
            self.get_callable_spec().args, call_args, call_kwargs
        )

        # Map all kwargs to the "original" target of that objects
        # for example: for DataFrame we'll try to find a relevant target that were used to read it
        # get all possible value's targets
        call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(call_kwargs)
        for p_name, value_origin in call_kwargs_as_targets.items():
            root_target = value_origin.origin_target
            path = root_target.path if hasattr(root_target, "path") else None
            original_object = call_kwargs[p_name]
            call_kwargs[p_name] = InlineTarget(
                root_target=root_target,
                obj=original_object,
                value_type=value_origin.value_type,
                source=value_origin.origin_target.source,
                path=path,
            )

        call_kwargs.setdefault("task_is_dynamic", True)
        call_kwargs.setdefault(
            "task_in_memory_outputs",
            parent_task.settings.run.task_run_at_execution_time_in_memory_outputs,
        )

        if issubclass(task_cls, PipelineTask):
            # if it's pipeline - create new databand run
            # create override _task_default_result to be object instead of target
            task_cls = pipeline(
                self.class_or_func, _task_default_result=_default_output
            ).task_cls

            # instantiate inline pipeline
            task = task_cls(*call_args, **call_kwargs)
            # if it's pipeline - create new databand run
            run = dbnd_run.context.dbnd_run_task(task)
            task_run = run.get_task_run(task.task_id)
        else:
            # instantiate inline task (dbnd object)
            task = task_cls(*call_args, **call_kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            from dbnd._core.task_build.task_cls__call_state import TaskCallState

            task._dbnd_call_state = TaskCallState(should_store_result=True)
            try:
                task_run = dbnd_run.run_executor.run_task_at_execution_time(
                    task, task_engine=current_task_run().task_engine
                )

                # this will work only for _DecoratedTask
                if task._dbnd_call_state.result_saved:
                    return task._dbnd_call_state.result

            finally:
                # we'd better clean _invoke_result to avoid memory leaks
                task._dbnd_call_state = None

        # if we are inside run, we want to have real values, not deferred!
        if task.task_definition.single_result_output:
            return task.__class__.result.load_from_target(task.result)
            # we have func without result, just fallback to None
        return task
Exemple #16
0
    def _call_handler(cls, call_user_code, call_args, call_kwargs):
        """
        -= Use "Step into My Code"" to get back from Databand code! =-

        decorated object call/creation  ( my_func(), MyDecoratedTask()
        """
        force_invoke = call_kwargs.pop("__force_invoke", False)
        if force_invoke or not is_databand_enabled():
            # 1. Databand is not enabled
            # 2. we have this call coming from Task.run / Task.band direct invocation
            return call_user_code(*call_args, **call_kwargs)

        if is_in_airflow_dag_build_context():
            return build_task_at_airflow_dag_context(task_cls=cls,
                                                     call_args=call_args,
                                                     call_kwargs=call_kwargs)

        if not has_current_task():
            ######
            # DBND HANDLING OF CALL
            airflow_task_context = try_get_airflow_context()
            if airflow_task_context:
                return track_airflow_dag_run_operator_run(
                    task_cls=cls,
                    call_args=call_args,
                    call_kwargs=call_kwargs,
                    airflow_task_context=airflow_task_context,
                )
            else:
                # direct call to the function
                return call_user_code(*call_args, **call_kwargs)

        # now we can make some decisions what we do with the call
        # it's not coming from _invoke_func
        # but from   user code ...   some_func()  or SomeTask()
        current = current_task()
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline context, we are building execution plan
            t = cls(*call_args, **call_kwargs)

            # we are in inline debug mode -> we are going to execute the task
            # we are in the band
            # and want to return result of the object
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs ( result, another output.. ) -> just return task object
            return t

        if phase is TaskContextPhase.RUN:
            # we are in the run function!
            if (current.settings.dynamic_task.enabled
                    and current.task_supports_dynamic_tasks):
                # isinstance() check required to prevent infinite recursion when @task is on
                # class and not on func (example: see test_task_decorated_class.py)
                # and the current task supports inline calls
                # that's extra mechanism in addition to __force_invoke
                # on pickle/unpickle isinstance fails to run.
                task_run = run_dynamic_task(
                    parent_task_run=current_task_run(),
                    task_cls=cls,
                    call_args=call_args,
                    call_kwargs=call_kwargs,
                )
                t = task_run.task
                # if we are inside run, we want to have real values, not deferred!
                if t.task_definition.single_result_output:
                    return t.__class__.result.load_from_target(t.result)
                    # we have func without result, just fallback to None
                return t

        # we can not call it in"databand" way, fallback to normal execution
        return call_user_code(*call_args, **call_kwargs)
Exemple #17
0
def spawn_sub_pipelines(pipe_num=20):
    for i in range(pipe_num):
        logger.info("Running pipeline %s", i)
        large_pipe_int.dbnd_run(task_version="%s.%s" %
                                (current_task_run().task.task_version, i))
Exemple #18
0
    def tracking_context(self, call_args, call_kwargs):
        user_code_called = False  # whether we got to executing of user code
        user_code_finished = False  # whether we passed executing of user code
        func_call = None
        try:
            # 1. check that we don't have too many calls
            if self._call_count_limit_exceeded():
                yield _do_nothing_decorator
                return

            # 2. Start or reuse existing "main tracking task" that is root for tracked tasks
            if not try_get_current_task():
                """
                try to get existing task, and if not exists - try to get/create inplace_task_run
                """
                from dbnd._core.tracking.script_tracking_manager import (
                    try_get_inplace_tracking_task_run, )

                inplace_tacking_task = try_get_inplace_tracking_task_run()
                if not inplace_tacking_task:
                    # we didn't manage to start inplace tracking task run, we will not be able to track
                    yield _do_nothing_decorator
                    return

            tracking_task_definition = self.get_tracking_task_definition()
            callable_spec = tracking_task_definition.task_decorator.get_callable_spec(
            )

            func_call = TrackedFuncCallWithResult(
                callable=self.callable,
                call_args=tuple(
                    call_args),  # prevent original call_args modification
                call_kwargs=dict(
                    call_kwargs),  # prevent original kwargs modification
            )
            # replace any position argument with kwarg if it possible
            args, kwargs = args_to_kwargs(callable_spec.args,
                                          func_call.call_args,
                                          func_call.call_kwargs)

            # instantiate inline task
            task = TrackingTask.for_func(tracking_task_definition, args,
                                         kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            parent_task = current_task_run().task
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            # checking if any of the inputs are the outputs of previous task.
            # we can add that task as upstream.
            dbnd_run = get_databand_run()
            call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(kwargs)
            for value_origin in call_kwargs_as_targets.values():
                up_task = value_origin.origin_target.task
                task.set_upstream(up_task)

            # creating task_run as a task we found mid-run
            task_run = dbnd_run.create_task_run_at_execution_time(
                task, task_engine=current_task_run().task_engine)

            should_capture_log = (
                TrackingConfig.from_databand_context().capture_tracking_log)
            with task_run.runner.task_run_execution_context(
                    handle_sigterm=True, capture_log=should_capture_log):
                task_run.set_task_run_state(state=TaskRunState.RUNNING)

                _log_inputs(task_run)

                # if we reached this line, then all tracking initialization is
                # finished successfully, and we're going to execute user code
                user_code_called = True

                try:
                    # tracking_context is context manager - user code will run on yield
                    yield func_call.set_result

                    # if we reached this line, this means that user code finished
                    # successfully without any exceptions
                    user_code_finished = True
                # We catch BaseException since we want to catch KeyboardInterrupts as well
                except BaseException as ex:
                    task_run.finished_time = utcnow()

                    error = TaskRunError.build_from_ex(ex, task_run)
                    task_run.set_task_run_state(TaskRunState.FAILED,
                                                error=error)
                    raise

                else:
                    task_run.finished_time = utcnow()

                    # func_call.result should contain result, log it
                    _log_result(task_run, func_call.result)

                    task_run.set_task_run_state(TaskRunState.SUCCESS)
        except BaseException:
            if user_code_called and not user_code_finished:
                # if we started to call the user code and not got to user_code_finished
                # line - it means there was user code exception - so just re-raise it
                raise
            # else it's either we didn't reached calling user code, or already passed it
            # then it's some dbnd tracking error - just log it
            if func_call:
                _handle_tracking_error("tracking-init", func_call)
            else:
                log_exception_to_server()
        # if we didn't reached user_code_called=True line - there was an error during
        # dbnd tracking initialization, so nothing is done - user function wasn't called yet
        if not user_code_called:
            # tracking_context is context manager - user code will run on yield
            yield _do_nothing_decorator
            return
Exemple #19
0
    def tracking_context(self, call_args, call_kwargs):
        user_code_called = False  # whether we got to executing of user code
        user_code_finished = False  # whether we passed executing of user code
        func_call = None
        try:
            func_call = FuncCallWithResult(
                task_cls=self.get_tracking_task_cls(),
                call_user_code=self.func,
                call_args=tuple(
                    call_args),  # prevent original call_args modification
                call_kwargs=dict(
                    call_kwargs),  # prevent original kwargs modification
            )

            # 1. check that we don't have too many calls
            # 2. Start or reuse existing "inplace_task" that is root for tracked tasks
            if not self._call_count_limit_exceeded(
            ) and _get_or_create_inplace_task():
                cls = func_call.task_cls

                # replace any position argument with kwarg if it possible
                args, kwargs = args_to_kwargs(
                    cls._conf__decorator_spec.args,
                    func_call.call_args,
                    func_call.call_kwargs,
                )

                # instantiate inline task
                task = cls._create_task(args, kwargs)

                # update upstream/downstream relations - needed for correct tracking
                # we can have the task as upstream , as it was executed already
                parent_task = current_task_run().task
                if not parent_task.task_dag.has_upstream(task):
                    parent_task.set_upstream(task)

                # checking if any of the inputs are the outputs of previous task.
                # we can add that task as upstream.
                dbnd_run = get_databand_run()
                call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(
                    kwargs)
                for value_origin in call_kwargs_as_targets.values():
                    up_task = value_origin.origin_target.task
                    task.set_upstream(up_task)

                # creating task_run as a task we found mid-run
                task_run = dbnd_run.create_dynamic_task_run(
                    task, task_engine=current_task_run().task_engine)

                with task_run.runner.task_run_execution_context(
                        handle_sigterm=True):
                    task_run.set_task_run_state(state=TaskRunState.RUNNING)

                    _log_inputs(task_run)

                    # if we reached this line, then all tracking initialization is
                    # finished successfully, and we're going to execute user code
                    user_code_called = True

                    try:
                        # tracking_context is context manager - user code will run on yield
                        yield func_call.set_result

                        # if we reached this line, this means that user code finished
                        # successfully without any exceptions
                        user_code_finished = True
                    except Exception as ex:
                        task_run.finished_time = utcnow()

                        error = TaskRunError.build_from_ex(ex, task_run)
                        task_run.set_task_run_state(TaskRunState.FAILED,
                                                    error=error)
                        raise
                    else:
                        task_run.finished_time = utcnow()

                        # func_call.result should contain result, log it
                        _log_result(task_run, func_call.result)

                        task_run.set_task_run_state(TaskRunState.SUCCESS)
        except Exception:
            if user_code_called and not user_code_finished:
                # if we started to call the user code and not got to user_code_finished
                # line - it means there was user code exception - so just re-raise it
                raise
            # else it's either we didn't reached calling user code, or already passed it
            # then it's some dbnd tracking error - just log it
            if func_call:
                _handle_dynamic_error("tracking-init", func_call)
        # if we didn't reached user_code_called=True line - there was an error during
        # dbnd tracking initialization, so nothing is done - user function wasn't called yet
        if not user_code_called:
            # tracking_context is context manager - user code will run on yield
            yield _passthrough_decorator
Exemple #20
0
def external_links_task():
    tr = current_task_run()
    tr.set_external_resource_urls({"test_url": "https://databand.ai/"})
    return None
Exemple #21
0
def _create_dynamic_task_run(func_call):
    task = create_dynamic_task(func_call)
    dbnd_run = get_databand_run()
    task_run = dbnd_run.create_dynamic_task_run(
        task, task_engine=current_task_run().task_engine)
    return task_run