Ejemplo n.º 1
0
def register_dbnd_plugins():
    if get_dbnd_project_config().is_no_plugins:
        return

    global _dbnd_plugins_registered
    if _dbnd_plugins_registered:
        return
    _dbnd_plugins_registered = True

    fix_sys_path_str()
    if not get_dbnd_project_config().disable_pluggy_entrypoint_loading:
        pm.load_setuptools_entrypoints("dbnd")
        pm.check_pending()
Ejemplo n.º 2
0
def _default_configuration_paths():
    # we always have "library config"
    yield databand_config_path("databand-core.cfg")

    system_config = os.path.expanduser("/etc/databand.cfg")
    if os.path.isfile(system_config):
        yield system_config

    # now we can start to look for project configs
    dbnd_project_config = get_dbnd_project_config()

    possible_locations = [
        databand_system_path("databand-system.cfg"),
        dbnd_project_config.dbnd_project_path("conf",
                                              "databand.cfg"),  # deprecated
        dbnd_project_config.dbnd_project_path("databand.cfg"),  # deprecated
        get_dbnd_custom_config(),
        dbnd_project_config.dbnd_project_path("project.cfg"),
    ]
    env_config = get_dbnd_environ_config_file()
    if env_config:
        possible_locations.append(env_config)
    for value in possible_locations:
        value = expand_env_var(value)
        if os.path.isfile(value):
            yield value

    user_config = expand_env_var("~/.dbnd/databand.cfg")
    if os.path.isfile(user_config):
        yield user_config

    if is_unit_test_mode():
        tests_config_path = databand_system_path("databand-test.cfg")
        if os.path.exists(tests_config_path):
            yield tests_config_path
Ejemplo n.º 3
0
    def build_init_args(self):
        # type: () -> InitRunArgs

        run = self.run
        task_run_info = self.build_task_runs_info(run.task_runs)
        init_args = InitRunArgs(
            run_uid=run.run_uid,
            root_run_uid=run.root_run_info.root_run_uid,
            task_runs_info=task_run_info,
            driver_task_uid=run.driver_task_run.task_run_uid,
            task_run_env=run.context.task_run_env,
            source=run.source,
            af_context=run.af_context,
        )

        if not run.existing_run or get_dbnd_project_config().resubmit_run:
            # even if it's existing run, may be we are running from Airflow
            # so the run is actually "submitted", ( the root airflow job has no info..,
            # we want to capture "real" info of the run
            init_args.new_run_info = self._run_to_run_info()

        if run.scheduled_run_info:
            init_args.scheduled_run_info = run.scheduled_run_info

        if run.root_run_info.root_task_run_uid:
            rel = (run.root_run_info.root_task_run_uid,
                   init_args.driver_task_uid)
            task_run_info.parent_child_map.add(rel)
            task_run_info.upstreams_map.add(rel)

        return init_args
Ejemplo n.º 4
0
def tracking_start_base(job_name, project_name=None, airflow_context=None):
    """
    Starts handler for tracking the current running script.
    Would not start a new one if script manager if already exists
    """
    dbnd_project_config = get_dbnd_project_config()
    if dbnd_project_config.disabled:
        # we are not tracking if dbnd is disabled
        return None

    global _dbnd_script_manager
    if not _dbnd_script_manager:
        # setting the context to tracking to prevent conflicts from dbnd orchestration
        dbnd_project_config._dbnd_tracking = True

        dsm = _DbndScriptTrackingManager()
        try:
            # we use job name for both job_name and root_task_name of the run
            dsm.start(job_name, project_name, airflow_context)
            if dsm._active:
                _dbnd_script_manager = dsm

        except Exception:
            _handle_tracking_error("dbnd-tracking-start")

            # disabling the project so we don't start any new handler in this execution
            dbnd_project_config.disabled = True
            return None

    if _dbnd_script_manager and _dbnd_script_manager._active:
        # this is the root task run of the tracking, its representing the script context.
        return _dbnd_script_manager._task_run
Ejemplo n.º 5
0
 def __init__(self, task_cls):
     self.task_cls = task_cls  # type: Type[Task]
     # this will make class look like a origin function
     self.task_func = self.task_cls._conf__decorator_spec.item
     functools.update_wrapper(self, self.task_func)
     self._call_count = 0
     self._call_as_func = False
     self._max_call_count = get_dbnd_project_config().max_calls_per_run
Ejemplo n.º 6
0
def dbnd_run_start(name=None):
    if get_dbnd_project_config().disabled:
        return None

    global _dbnd_inline_manager
    if not _dbnd_inline_manager:
        dsm = _DbndInplaceRunManager()
        try:
            dsm.start(root_task_name=name)
            if dsm._active:
                _dbnd_inline_manager = dsm
        except Exception:
            _handle_inline_run_error("inline-start")
            get_dbnd_project_config().disabled = True
            return None
    if _dbnd_inline_manager and _dbnd_inline_manager._active:
        return _dbnd_inline_manager._task_run
Ejemplo n.º 7
0
def is_verbose():
    context = try_get_databand_context()
    if context and getattr(context, "system_settings", None):
        if context.system_settings.verbose:
            # only if True, otherwise check project config too
            return True

    return get_dbnd_project_config().is_verbose()
Ejemplo n.º 8
0
    def __init__(self, task_decorator):
        # type: (CallableTrackingManager, TaskDecorator) -> None
        self.task_decorator = task_decorator

        self._tracking_task_definition = None
        self._call_count = 0
        self._call_as_func = False
        self._max_call_count = get_dbnd_project_config().max_calls_per_run
Ejemplo n.º 9
0
def dbnd_run_start(name=None, airflow_context=None):
    if get_dbnd_project_config().disabled:
        return None

    global _dbnd_inline_manager
    if not _dbnd_inline_manager:
        dsm = _DbndInplaceRunManager()
        try:
            dsm.start(name, airflow_context)

            if dsm._active:
                _dbnd_inline_manager = dsm
        except Exception as e:
            logger.error(e, exc_info=True)
            _handle_inline_run_error("inline-start")
            get_dbnd_project_config().disabled = True
            return None
    if _dbnd_inline_manager and _dbnd_inline_manager._active:
        return _dbnd_inline_manager._task_run
Ejemplo n.º 10
0
def _is_airflow_enabled():
    if get_dbnd_project_config().is_no_modules:
        return False
    if pm.has_plugin("dbnd-airflow"):
        return True

    # TODO: make decision based on plugin only
    try:
        import dbnd_airflow  # noqa: F401

        return True
    except ImportError:
        return False
Ejemplo n.º 11
0
    def start(self, root_task_name=None, airflow_context=None):
        if self._run or self._active or try_get_databand_run():
            return

        # we probably should use only airlfow context via parameter.
        # also, there are mocks that cover only get_dbnd_project_config().airflow_context
        airflow_context = airflow_context or get_dbnd_project_config().airflow_context()
        set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context)

        dc = self._enter_cm(
            new_dbnd_context(name="inplace_tracking")
        )  # type: DatabandContext

        if airflow_context:
            root_task, job_name, source = build_run_time_airflow_task(airflow_context)
        else:
            root_task = _build_inline_root_task(root_task_name)
            job_name = root_task.task_name
            source = UpdateSource.dbnd

        self._run = run = self._enter_cm(
            new_databand_run(
                context=dc,
                job_name=job_name,
                existing_run=False,
                source=source,
                af_context=airflow_context,
            )
        )  # type: DatabandRun
        self._run.root_task = root_task

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        root_task_run = run._build_and_add_task_run(root_task)
        root_task_run.is_root = True

        # No need to track the state because we track in init_run
        run.root_task_run.set_task_run_state(TaskRunState.RUNNING, track=False)
        run.tracker.init_run()

        self._enter_cm(run.root_task_run.runner.task_run_execution_context())
        self._task_run = run.root_task_run

        return self._task_run
Ejemplo n.º 12
0
    def __init__(self, func_spec, task_type, task_defaults):
        # type: (TaskClsBuilder, _TaskDecoratorSpec, Type[_DecoratedTask], Any) -> None
        self.func_spec = func_spec
        self.task_type = task_type
        self.task_defaults = task_defaults

        self._normal_task_cls = None
        self._tracking_task_cls = None
        # self.task_cls = task_cls  # type: Type[Task]
        # this will make class look like a origin function
        functools.update_wrapper(self, self.func)
        self._call_count = 0
        self._call_as_func = False
        self._max_call_count = get_dbnd_project_config().max_calls_per_run

        self._callable_item = None
Ejemplo n.º 13
0
def dbnd_run_start(name=None, airflow_context=None):
    dbnd_project_config = get_dbnd_project_config()
    if dbnd_project_config.disabled:
        return None

    global _dbnd_script_manager
    if not _dbnd_script_manager:
        dbnd_project_config._dbnd_tracking = True

        dsm = _DbndScriptTrackingManager()
        try:
            dsm.start(name, airflow_context)

            if dsm._active:
                _dbnd_script_manager = dsm
        except Exception as e:
            logger.error(e, exc_info=True)
            _handle_tracking_error("dbnd-tracking-start")
            dbnd_project_config.disabled = True
            return None
    if _dbnd_script_manager and _dbnd_script_manager._active:
        return _dbnd_script_manager._task_run
Ejemplo n.º 14
0
def _is_verbose():
    config = get_dbnd_project_config()
    return config.is_verbose()
Ejemplo n.º 15
0
def try_get_inplace_tracking_task_run():
    # type: ()->Optional[TaskRun]
    if get_dbnd_project_config().is_tracking_mode():
        return dbnd_tracking_start()
Ejemplo n.º 16
0
    def start(self,
              root_task_name=None,
              project_name=None,
              airflow_context=None):
        if self._run or self._active or try_get_databand_run():
            return

        # we probably should use only airlfow context via parameter.
        # also, there are mocks that cover only get_dbnd_project_config().airflow_context
        airflow_context = airflow_context or get_dbnd_project_config(
        ).airflow_context()
        if airflow_context:
            _set_dbnd_config_from_airflow_connections()

        _set_tracking_config_overide(airflow_context=airflow_context)
        dc = self._enter_cm(
            new_dbnd_context(name="inplace_tracking"))  # type: DatabandContext

        if not root_task_name:
            # extract the name of the script we are running (in Airflow scenario it will be just "airflow")
            root_task_name = sys.argv[0].split(os.path.sep)[-1]

        if airflow_context:
            root_task, job_name, source, run_uid = build_run_time_airflow_task(
                airflow_context, root_task_name)
            try_number = airflow_context.try_number
        else:
            root_task = _build_inline_root_task(root_task_name)
            job_name = root_task_name
            source = UpdateSource.generic_tracking
            run_uid = None
            try_number = 1

        tracking_source = (
            None  # TODO_CORE build tracking_source -> typeof TrackingSourceSchema
        )
        self._run = run = self._enter_cm(
            new_databand_run(
                context=dc,
                job_name=job_name,
                run_uid=run_uid,
                existing_run=run_uid is not None,
                source=source,
                af_context=airflow_context,
                tracking_source=tracking_source,
                project_name=project_name,
            ))  # type: DatabandRun

        self._run.root_task = root_task

        self.update_run_from_airflow_context(airflow_context)

        if not self._atexit_registered:
            _set_process_exit_handler(self.stop)
            self._atexit_registered = True

        sys.excepthook = self.stop_on_exception
        self._active = True

        # now we send data to DB
        root_task_run = run._build_and_add_task_run(
            root_task, task_af_id=root_task.task_name, try_number=try_number)

        root_task_run.is_root = True

        run.tracker.init_run()
        run.root_task_run.set_task_run_state(TaskRunState.RUNNING)

        should_capture_log = TrackingConfig.from_databand_context(
        ).capture_tracking_log
        self._enter_cm(
            run.root_task_run.runner.task_run_execution_context(
                capture_log=should_capture_log, handle_sigterm=False))
        self._task_run = run.root_task_run

        return self._task_run
Ejemplo n.º 17
0
def _call_handler(task_cls, call_user_code, call_args, call_kwargs):
    """
    -= Use "Step into My Code"" to get back from Databand code! =-

    decorated object call/creation  ( my_func(), MyDecoratedTask()
    """
    force_invoke = call_kwargs.pop("__force_invoke", False)
    dbnd_project_config = get_dbnd_project_config()

    if force_invoke or dbnd_project_config.disabled:
        # 1. Databand is not enabled
        # 2. we have this call coming from Task.run / Task.band direct invocation
        return call_user_code(*call_args, **call_kwargs)
    func_call = FuncCall(
        task_cls=task_cls,
        call_args=call_args,
        call_kwargs=call_kwargs,
        call_user_code=call_user_code,
    )

    if is_in_airflow_dag_build_context(
    ):  # we are in Airflow DAG building mode
        return build_task_at_airflow_dag_context(task_cls=task_cls,
                                                 call_args=call_args,
                                                 call_kwargs=call_kwargs)

    current = try_get_current_task()
    if not current:
        from dbnd._core.tracking.script_tracking_manager import (
            try_get_inplace_tracking_task_run, )

        task_run = try_get_inplace_tracking_task_run()
        if task_run:
            current = task_run.task

    if not current:  # direct call to the function
        return func_call.invoke()

    ######
    # current is not None, and we are not in trackign/airflow/luigi
    # DBND Orchestration mode
    # we can be in the context of .run() or in .band()
    # called from  user code using some_func()  or SomeTask()
    # this call path is not coming from it's not coming from _invoke_func
    phase = current_phase()
    if phase is TaskContextPhase.BUILD:
        # we are in the @pipeline context, we are building execution plan
        t = task_cls(*call_args, **call_kwargs)

        # we are in inline debug mode -> we are going to execute the task
        # we are in the band
        # and want to return result of the object
        if t.task_definition.single_result_output:
            return t.result

        # we have multiple outputs ( result, another output.. )
        # -> just return task object
        return t

    if phase is TaskContextPhase.RUN:
        # we are in the run function!
        if (current.settings.dynamic_task.enabled
                and current.task_supports_dynamic_tasks):
            # isinstance() check required to prevent infinite recursion when @task is on
            # class and not on func (example: see test_task_decorated_class.py)
            # and the current task supports inline calls
            # that's extra mechanism in addition to __force_invoke
            # on pickle/unpickle isinstance fails to run.
            return create_and_run_dynamic_task_safe(func_call=func_call,
                                                    parent_task_run=current)

    # we can not call it in"databand" way, fallback to normal execution
    return func_call.invoke()
Ejemplo n.º 18
0
def _is_tracking_mode():
    return get_dbnd_project_config().is_tracking_mode()