예제 #1
0
    def from_env(cls, current_run):
        # type: (DatabandRun) -> RootRunInfo
        parent_run = try_get_databand_run()
        if parent_run:
            # take from parent
            root_run_info = parent_run.root_run_info
            # update parent run info if required
            root_task_run = try_get_current_task_run()
            if root_task_run:
                root_task_run_uid = root_task_run.task_run_uid
                root_run_info = attr.evolve(
                    root_run_info, root_task_run_uid=root_task_run_uid)

            return root_run_info

        # take from env
        root_run_uid = os.environ.get(DBND_ROOT_RUN_UID)
        root_run_url = os.environ.get(DBND_ROOT_RUN_TRACKER_URL)
        root_task_run_uid = os.environ.get(DBND_PARENT_TASK_RUN_UID)

        if not root_run_uid:
            # current run is the main run
            root_run_uid = current_run.run_uid
            root_run_url = current_run.tracker.run_url

        return cls(
            root_run_uid=root_run_uid,
            root_run_url=root_run_url,
            root_task_run_uid=root_task_run_uid,
        )
예제 #2
0
    def _update_node_name(self, pod_name, pod_data):
        if self.processed_pods.get(pod_name):
            self.log.debug(
                "Pod %s has already been logged to metrics - skipping")
            return
        node_name = pod_data.spec.node_name
        if not node_name:
            return
        # Some events are missing the node name, but it will get there for sure
        try:
            task_id = pod_data.metadata.labels.get("task_id")
            if not task_id:
                return

            dr = try_get_databand_run()
            if not dr:
                return
            task_run = dr.get_task_run(task_id)
            if not task_run:
                return

            self.metrics_logger.log_pod_information(task_run.task, pod_name,
                                                    node_name)
        except Exception as ex:
            logger.info("Failed to gather node name for %s", pod_name)
        finally:
            self.processed_pods[pod_name] = True
def stop():
    msg("stopping!")
    task = try_get_current_task()
    msg("Current tasks looks like: %s" % (task))

    run = try_get_databand_run()
    if run:
        run.kill()
    return
예제 #4
0
파일: dbnd_execute.py 프로젝트: lbtanh/dbnd
def dbnd_operator__kill(dbnd_operator):
    from dbnd._core.current import try_get_databand_run

    run = try_get_databand_run()
    if not run:
        return

    task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)
    return task_run.task.on_kill()
예제 #5
0
 def _log_parameter_value(self, runtime_value, value, task):
     if try_get_databand_run() and task.current_task_run:
         task.current_task_run.tracker.log_parameter_data(
             parameter=self,
             target=value,
             value=runtime_value,
             operation_type=DbndTargetOperationType.read,
             operation_status=DbndTargetOperationStatus.OK,
         )
예제 #6
0
def context_to_airflow_vars(context, in_env_var_format=False):
    # original_context_to_airflow_vars is created during function override in patch_models()
    params = airflow.utils.operator_helpers.original_context_to_airflow_vars(
        context=context, in_env_var_format=in_env_var_format
    )
    if in_env_var_format:
        dbnd_run = try_get_databand_run()
        if dbnd_run:
            params.update(dbnd_run.get_context_spawn_env())
    return params
예제 #7
0
def get_task_run_from_pod_data(pod_data):
    labels = pod_data.metadata.labels
    if "task_id" not in labels:
        return None
    task_id = labels["task_id"]

    dr = try_get_databand_run()
    if not dr:
        return None

    return dr.get_task_run_by_af_id(task_id)
예제 #8
0
파일: caching.py 프로젝트: kalebinn/dbnd
    def _resolve_cache_file_name(file_path):
        run = try_get_databand_run()
        if not run:
            raise Exception(
                "No databand run found to when creating cache file")
        dbnd_local_root = run.get_current_dbnd_local_root()
        cache_dir = get_or_create_folder_in_dir("cache", dbnd_local_root.path)

        file_name = os.path.basename(
            file_path) + DbndLocalFileMetadataRegistry.ext
        return os.path.join(cache_dir, file_name)
예제 #9
0
def fake_task_inside_dag():
    log_metric("Testing", "Metric")
    run = try_get_databand_run()
    assert run is not None, "Task should run in databand run, check airflow tracking!"
    root_task = run.root_task

    # Validate regular subdag properties
    assert run.job_name == "%s.%s.fake_task_inside_dag" % (PARENT_DAG,
                                                           CHILD_DAG)
    assert root_task.task_name == "fake_task_inside_dag__execute"

    return "Regular test"
예제 #10
0
def context_to_airflow_vars(context, in_env_var_format=False):
    # original_context_to_airflow_vars is created during function override in patch_models()
    params = airflow.utils.operator_helpers._original_context_to_airflow_vars(
        context=context, in_env_var_format=in_env_var_format)
    if in_env_var_format:
        dbnd_run = try_get_databand_run()
        if dbnd_run:
            params.update(dbnd_run.get_context_spawn_env())

    try_number = str(context['task_instance'].try_number)
    params.update({"AIRFLOW_CTX_TRY_NUMBER": try_number})
    return params
예제 #11
0
def fake_task_inside_dag():
    log_metric("Testing", "Metric")
    run = try_get_databand_run()
    assert run is not None, "Task should run in databand run, check airflow tracking!"
    root_task = run.root_task

    # Validate regular subdag properties
    assert run.job_name == "%s.%s" % (PARENT_DAG, CHILD_DAG)
    # this test got problematic cause airflow_inplace task named as the script that ran it
    assert root_task.task_name

    return "Regular test"
예제 #12
0
파일: dbnd_execute.py 프로젝트: lbtanh/dbnd
def dbnd_operator__execute(dbnd_operator, context):
    from dbnd._core.current import try_get_databand_run
    from dbnd._core.run.databand_run import DatabandRun
    from targets import target

    run = try_get_databand_run()
    if not run:
        # we are not inside dbnd run, probably we are running from native airflow
        # let's try to load it:
        try:

            executor_config = dbnd_operator.executor_config
            logger.info("context: %s", context)

            logger.info("task.executor_config: %s",
                        dbnd_operator.executor_config)
            logger.info("ti.executor_config: %s",
                        context["ti"].executor_config)
            driver_dump = executor_config["DatabandExecutor"].get(
                "dbnd_driver_dump")
            print(
                "Running dbnd task %s from %s" %
                (dbnd_operator.dbnd_task_id, driver_dump),
                file=sys.__stderr__,
            )

            if executor_config["DatabandExecutor"].get(
                    "remove_airflow_std_redirect", False):
                sys.stdout = sys.__stdout__
                sys.stderr = sys.__stderr__

            dbnd_bootstrap()
            dbnd_airflow_bootstrap()
            run = DatabandRun.load_run(dump_file=target(driver_dump),
                                       disable_tracking_api=False)
        except Exception as e:
            print(
                "Failed to load dbnd task in native airflow execution! Exception: %s"
                % (e, ),
                file=sys.__stderr__,
            )
            dump_trace()
            raise

        with run.run_context() as dr:
            task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)
            ret_value = task_run.runner.execute(airflow_context=context)
    else:
        task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)
        ret_value = task_run.runner.execute(airflow_context=context)

    return ret_value
예제 #13
0
    def start(self, root_task_name, job_name=None):
        if self._run or self._active or try_get_databand_run():
            return

        airflow_context = try_get_airflow_context()
        set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context)

        # 1. create proper DatabandContext so we can create other objects
        dc = self._enter_cm(new_dbnd_context())  # type: DatabandContext

        if airflow_context:
            root_task_or_task_name = AirflowOperatorRuntimeTask.build_from_airflow_context(
                airflow_context
            )
            source = UpdateSource.airflow_tracking
            job_name = "{}.{}".format(airflow_context.dag_id, airflow_context.task_id)
        else:
            root_task_or_task_name = _build_inline_root_task(root_task_name)
            source = UpdateSource.dbnd

        # create databand run
        # this will create databand run with driver and root tasks.

        # create databand run
        # we will want to preserve
        self._run = self._enter_cm(
            new_databand_run(
                context=dc,
                task_or_task_name=root_task_or_task_name,
                job_name=job_name,
                existing_run=False,
                source=source,
                af_context=airflow_context,
            )
        )  # type: DatabandRun

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        self._run._init_without_run()

        self._start_taskrun(self._run.driver_task_run)
        self._start_taskrun(self._run.root_task_run)
        self._task_run = self._run.root_task_run
        return self._task_run
예제 #14
0
    def start(self, root_task_name=None, airflow_context=None):
        if self._run or self._active or try_get_databand_run():
            return

        # we probably should use only airlfow context via parameter.
        # also, there are mocks that cover only get_dbnd_project_config().airflow_context
        airflow_context = airflow_context or get_dbnd_project_config().airflow_context()
        set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context)

        dc = self._enter_cm(
            new_dbnd_context(name="inplace_tracking")
        )  # type: DatabandContext

        if airflow_context:
            root_task, job_name, source = build_run_time_airflow_task(airflow_context)
        else:
            root_task = _build_inline_root_task(root_task_name)
            job_name = root_task.task_name
            source = UpdateSource.dbnd

        self._run = run = self._enter_cm(
            new_databand_run(
                context=dc,
                job_name=job_name,
                existing_run=False,
                source=source,
                af_context=airflow_context,
            )
        )  # type: DatabandRun
        self._run.root_task = root_task

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        root_task_run = run._build_and_add_task_run(root_task)
        root_task_run.is_root = True

        # No need to track the state because we track in init_run
        run.root_task_run.set_task_run_state(TaskRunState.RUNNING, track=False)
        run.tracker.init_run()

        self._enter_cm(run.root_task_run.runner.task_run_execution_context())
        self._task_run = run.root_task_run

        return self._task_run
예제 #15
0
def get_local_tempfile(*path):
    run = try_get_databand_run()
    if run:
        tempdir = run.get_current_dbnd_local_root().partition("tmp").path
    else:
        tempdir = tempfile.gettempdir()

    path = os.path.join(tempdir, "databand-tmp-%09d" % random.randrange(0, 1e10), *path)
    base_dir = os.path.dirname(path)
    try:
        if not os.path.exists(base_dir):
            os.makedirs(base_dir)
    except Exception as ex:
        logger.info("Failed to create temp dir %s: %s", base_dir, ex)
    return path
예제 #16
0
    def run_next(self, next_job):
        """

        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key
        self.log.debug(
            "Kube POD to submit: image=%s with %s",
            self.kube_config.kube_image,
            str(next_job),
        )

        dr = try_get_databand_run()
        task_run = dr.get_task_run_by_af_id(task_id)
        pod_command = [str(c) for c in command]
        task_engine = task_run.task_engine  # type: KubernetesEngineConfig
        pod = task_engine.build_pod(
            task_run=task_run,
            cmds=pod_command,
            labels={
                "airflow-worker": self.worker_uuid,
                "dag_id": make_safe_label_value(dag_id),
                "task_id": make_safe_label_value(task_run.task_af_id),
                "execution_date": self._datetime_to_label_safe_datestring(
                    execution_date
                ),
                "try_number": str(try_number),
            },
            try_number=try_number,
            include_system_secrets=True,
        )

        pod_ctrl = self.kube_dbnd.get_pod_ctrl_for_pod(pod)
        self.submitted_pods[pod.name] = SubmittedPodState(
            pod_name=pod.name,
            task_run=task_run,
            scheduler_key=key,
            submitted_at=utcnow(),
        )

        pod_ctrl.run_pod(pod=pod, task_run=task_run, detach_run=True)
        self.metrics_logger.log_pod_submitted(task_run.task, pod_name=pod.name)
예제 #17
0
    def run_next_kube_job(self, key, command):
        dag_id, task_id, execution_date, try_number = key
        self.log.debug(
            "Kube POD to submit: image=%s with %s [%s]",
            self.kube_config.kube_image,
            str(key),
            str(command),
        )

        databand_run = try_get_databand_run()
        task_run = databand_run.get_task_run_by_af_id(task_id)

        pod_command = [str(c) for c in command]
        task_engine = task_run.task_engine  # type: KubernetesEngineConfig
        pod: "k8s.V1Pod" = task_engine.build_pod(
            task_run=task_run,
            cmds=pod_command,
            labels={
                "airflow-worker":
                self._version_independent_worker_id(),
                "dag_id":
                make_safe_label_value(dag_id),
                "task_id":
                make_safe_label_value(task_run.task_af_id),
                "execution_date":
                self._datetime_to_label_safe_datestring(execution_date),
                "try_number":
                str(try_number),
            },
            try_number=try_number,
            include_system_secrets=True,
        )
        pod_ctrl = self.kube_dbnd.get_pod_ctrl(pod.metadata.name,
                                               pod.metadata.namespace,
                                               config=task_engine)
        self.submitted_pods[pod.metadata.name] = SubmittedPodState(
            pod_name=pod.metadata.name,
            task_run=task_run,
            scheduler_key=key,
            submitted_at=utcnow(),
        )

        pod_ctrl.run_pod(pod=pod, task_run=task_run, detach_run=True)
        self.metrics_logger.log_pod_submitted(task_run.task,
                                              pod_name=pod.metadata.name)
예제 #18
0
def build_file_logger(name, fmt=None):
    """
    Create a logger which write only to a file.
    the file will be located under the run dict.
    """
    file_logger = logging.getLogger("{}_{}".format(__name__, name))
    file_logger.propagate = False

    run = try_get_databand_run()
    if run:
        log_file = run.run_local_root.partition("{}.logs".format(name))
        logger.info("Api-clients {name} logs writing into {path}".format(
            name=name, path=log_file))
        handler = create_file_handler(str(log_file), fmt=fmt)
        file_logger.addHandler(handler)
        file_logger.setLevel(logging.INFO)

    return file_logger
예제 #19
0
파일: base_task.py 프로젝트: turbaszek/dbnd
    def __getattribute__(self, name):
        def _get(n):
            return super(_BaseTask, self).__getattribute__(n)

        value = _get(name)
        try:
            _task_auto_read = _get("_task_auto_read")
        except Exception:
            return value

        # already cached
        if _task_auto_read is None or name in _task_auto_read:
            return value

        parameter = _get("_params").get_param(name)

        # we are not parameter
        # or there is nothing to "deferefence"
        # TODO: rebase  : value is None
        if not parameter:
            return value

        runtime_value = parameter.calc_runtime_value(value, task=self)

        if parameter.is_output():
            # if it's outpus, we should not "cache" it
            # otherwise we will try to save it on autosave ( as it was changed)
            return runtime_value
        elif isinstance(value, Target):
            if try_get_databand_run():
                task_run = self.current_task_run
                if task_run:
                    task_run.tracker.log_target(
                        parameter=parameter,
                        target=value,
                        value=runtime_value,
                        operation_type=DbndTargetOperationType.read,
                        operation_status=DbndTargetOperationStatus.OK,
                    )

        # for the cache, so next time we don't need to calculate it
        setattr(self, name, runtime_value)
        _task_auto_read.add(name)
        return runtime_value
예제 #20
0
    def start(self, root_task_name, job_name=None):
        if self._run:
            return
        if self._started or self._disabled:  # started or failed
            return

        try:
            if try_get_databand_run():
                return

            self._started = True

            # 1. create proper DatabandContext so we can create other objects
            set_tracking_config_overide(use_dbnd_log=True)
            # create databand context
            dc = self._enter_cm(new_dbnd_context())  # type: DatabandContext

            root_task = _build_inline_root_task(root_task_name)

            # create databand run
            self._run = self._enter_cm(
                new_databand_run(
                    context=dc,
                    task_or_task_name=root_task,
                    existing_run=False,
                    job_name=job_name,
                ))  # type: DatabandRun

            self._run._init_without_run()

            if not self._atexit_registered:
                atexit.register(self.stop)
            sys.excepthook = self.stop_on_exception

            self._start_taskrun(self._run.driver_task_run)
            self._start_taskrun(self._run.root_task_run)
            self._task_run = self._run.root_task_run
            return self._task_run
        except Exception:
            _handle_inline_error("inline-start")
            self._disabled = True
            return
        finally:
            self._started = True
예제 #21
0
def print_driver_events():
    try:
        dbnd_run = try_get_databand_run()
        engine_config = dbnd_run.run_executor.remote_engine
        kube_client = engine_config.get_kube_client()
        from socket import gethostname

        driver_pod_name = gethostname()
        logger.info("Driver pod name is %s" % (driver_pod_name, ))
        field_selector = "involvedObject.name=%s" % driver_pod_name
        logger.info("Field selector is %s" % (field_selector, ))
        driver_events = kube_client.list_namespaced_event(
            namespace=engine_config.namespace, field_selector=field_selector)
        logger.info("Found %s driver events" % (len(driver_events.items), ))
        for event in driver_events.items:
            message = create_log_message_from_event(event)
            logger.info(message)
    except Exception as e:
        logger.info("Could not retrieve driver events! Exception: %s", e)
예제 #22
0
    def start(self, root_task_name=None, airflow_context=None):
        if self._run or self._active or try_get_databand_run():
            return

        airflow_context = airflow_context or try_get_airflow_context()
        set_tracking_config_overide(use_dbnd_log=True,
                                    airflow_context=airflow_context)

        dc = self._enter_cm(new_dbnd_context())  # type: DatabandContext

        if airflow_context:
            root_task, job_name, source = build_run_time_airflow_task(
                airflow_context)
        else:
            root_task = _build_inline_root_task(root_task_name)
            job_name = None
            source = UpdateSource.dbnd

        self._run = self._enter_cm(
            new_databand_run(
                context=dc,
                task_or_task_name=root_task,
                job_name=job_name,
                existing_run=False,
                source=source,
                af_context=airflow_context,
                send_heartbeat=False,
            ))  # type: DatabandRun

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        self._run._init_without_run()
        self._start_taskrun(self._run.driver_task_run)
        self._start_taskrun(self._run.root_task_run)
        self._task_run = self._run.root_task_run

        return self._task_run
예제 #23
0
파일: dbnd_execute.py 프로젝트: lbtanh/dbnd
def dbnd_operator__get_task_retry_delay(dbnd_operator):
    """
    This method overrides the task retry delay found in airflow.
    We must override the actual task retry delay from airflow to ensure that we can control the retry delay
    per task, for example when we send pods to retry, we may want a different delay rather than another engine
    """
    from dbnd._core.current import try_get_databand_run

    run = try_get_databand_run()
    if not run:
        return

    task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)

    if task_run.task_engine.task_definition.task_family == "kubernetes":
        # If we are running in K8s - use pod retry delay instead of task retry delay
        return task_run.task_engine.pod_retry_delay
    else:
        return task_run.task.task_retry_delay
예제 #24
0
def fake_task_inside_dag():
    log_metric("Testing", "Metric")
    run = try_get_databand_run()
    assert run is not None, "Task should run in databand run, check airflow tracking!"
    root_task = run.root_task

    # Validate regular subdag properties
    assert run.job_name == PARENT_DAG
    assert root_task.task_name == "DAG__runtime"

    # Validate relationships
    ## sub dag
    child_task = list(root_task.task_dag.upstream)[0]
    assert "fake_task_inside_dag" in child_task.task_name
    assert child_task.dag_id == FULL_DAG_NAME
    ## function task
    func_task = list(child_task.task_dag.upstream)[0]
    assert fake_task_inside_dag.__name__ in func_task.task_name

    return "Regular test"
예제 #25
0
    def start(self):
        self.log.info("Starting Kubernetes executor... PID: %s", os.getpid())

        dbnd_run = try_get_databand_run()
        if dbnd_run:
            if AIRFLOW_VERSION_2:
                self.worker_uuid = str(dbnd_run.run_uid)
            else:
                self.worker_uuid = (KubeWorkerIdentifier.
                                    get_or_create_current_kube_worker_uuid())
        else:
            self.worker_uuid = str(uuid.uuid4())

        self.log.debug("Start with worker_uuid: %s", self.worker_uuid)

        # always need to reset resource version since we don't know
        # when we last started, note for behavior below
        # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs
        # /CoreV1Api.md#list_namespaced_pod
        # KubeResourceVersion.reset_resource_version()
        self.task_queue = self._manager.Queue()
        self.result_queue = self._manager.Queue()

        self.kube_client = self.kube_dbnd.kube_client
        self.kube_scheduler = DbndKubernetesScheduler(
            self.kube_config,
            self.task_queue,
            self.result_queue,
            self.kube_client,
            self.worker_uuid,
            kube_dbnd=self.kube_dbnd,
        )

        if self.kube_dbnd.engine_config.debug:
            self.log.setLevel(logging.DEBUG)
            self.kube_scheduler.log.setLevel(logging.DEBUG)

        if AIRFLOW_VERSION_1:
            self._inject_secrets()
        self.clear_not_launched_queued_tasks()
        self._flush_result_queue()
예제 #26
0
파일: config.py 프로젝트: kalebinn/dbnd
def get_local_tempfile(*path):
    run = try_get_databand_run()
    if run:
        dbnd_local_root = run.get_current_dbnd_local_root()
        if dbnd_local_root.exists():
            # on remote engine temp defined at driver can be un-awailable
            # simple workaround to use tmp folder on the machine
            tempdir = dbnd_local_root.partition("tmp").path
        else:
            # fallback to simple temp dir
            tempdir = tempfile.gettempdir()
    else:
        tempdir = tempfile.gettempdir()

    path = os.path.join(tempdir, "databand-tmp-%09d" % random.randrange(0, 1e10), *path)
    base_dir = os.path.dirname(path)
    try:
        if not os.path.exists(base_dir):
            os.makedirs(base_dir)
    except Exception as ex:
        logger.info("Failed to create temp dir %s: %s", base_dir, ex)
    return path
예제 #27
0
    def stop(self, at_exit=True, update_run_state=True):
        if update_run_state:
            databand_run = try_get_databand_run()
            if databand_run:
                root_tr = databand_run.task.current_task_run
                root_tr.finished_time = utcnow()

                for tr in databand_run.task_runs:
                    if tr.task_run_state == TaskRunState.FAILED:
                        root_tr.set_task_run_state(
                            TaskRunState.UPSTREAM_FAILED)
                        databand_run.set_run_state(RunState.FAILED)
                        break
                else:
                    root_tr.set_task_run_state(TaskRunState.SUCCESS)
                    databand_run.set_run_state(RunState.SUCCESS)
                logger.info(databand_run.describe.run_banner_for_finished())

        self._close_all_context_managers()
        if at_exit and is_airflow_enabled():
            from airflow.settings import dispose_orm

            dispose_orm()
예제 #28
0
    def _store_value_origin_target(self, value, target):
        dbnd_run = try_get_databand_run()
        if not dbnd_run:
            return

        dbnd_run.target_origin.add(target, value, self.value_type)
예제 #29
0
    def start(self,
              root_task_name=None,
              project_name=None,
              airflow_context=None):
        if self._run or self._active or try_get_databand_run():
            return

        # we probably should use only airlfow context via parameter.
        # also, there are mocks that cover only get_dbnd_project_config().airflow_context
        airflow_context = airflow_context or get_dbnd_project_config(
        ).airflow_context()
        if airflow_context:
            _set_dbnd_config_from_airflow_connections()

        _set_tracking_config_overide(airflow_context=airflow_context)
        dc = self._enter_cm(
            new_dbnd_context(name="inplace_tracking"))  # type: DatabandContext

        if not root_task_name:
            # extract the name of the script we are running (in Airflow scenario it will be just "airflow")
            root_task_name = sys.argv[0].split(os.path.sep)[-1]

        if airflow_context:
            root_task, job_name, source, run_uid = build_run_time_airflow_task(
                airflow_context, root_task_name)
            try_number = airflow_context.try_number
        else:
            root_task = _build_inline_root_task(root_task_name)
            job_name = root_task_name
            source = UpdateSource.generic_tracking
            run_uid = None
            try_number = 1

        tracking_source = (
            None  # TODO_CORE build tracking_source -> typeof TrackingSourceSchema
        )
        self._run = run = self._enter_cm(
            new_databand_run(
                context=dc,
                job_name=job_name,
                run_uid=run_uid,
                existing_run=run_uid is not None,
                source=source,
                af_context=airflow_context,
                tracking_source=tracking_source,
                project_name=project_name,
            ))  # type: DatabandRun

        self._run.root_task = root_task

        self.update_run_from_airflow_context(airflow_context)

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        root_task_run = run._build_and_add_task_run(
            root_task, task_af_id=root_task.task_name, try_number=try_number)

        root_task_run.is_root = True

        run.tracker.init_run()
        run.root_task_run.set_task_run_state(TaskRunState.RUNNING)

        should_capture_log = TrackingConfig.from_databand_context(
        ).capture_tracking_log
        self._enter_cm(
            run.root_task_run.runner.task_run_execution_context(
                capture_log=should_capture_log, handle_sigterm=False))
        self._task_run = run.root_task_run

        return self._task_run
예제 #30
0
 def _create_pod_id(dag_id, task_id):
     task_run = try_get_databand_run().get_task_run(task_id)
     return task_run.job_id__dns1123