Esempio n. 1
0
    def __init__(self, config, task_cls, task_args, task_kwargs):
        # type:(DbndConfig, Type[_BaseTask], Any, Any)->None
        self.task_cls = task_cls
        self.task_definition = task_cls.task_definition  # type: TaskDefinition

        # keep copy of user inputs
        self.task_kwargs__ctor = task_kwargs.copy()
        self.task_args__ctor = list(task_args)

        self.parent_task = try_get_current_task()

        # let find if we are running this constructor withing another Databand Task
        self.dbnd_context = get_databand_context()
        self.task_call_source = [
            self.dbnd_context.user_code_detector.find_user_side_frame(2)
        ]
        if self.task_call_source and self.parent_task:
            self.task_call_source.extend(
                self.parent_task.task_meta.task_call_source)

        self.task_family = self.task_definition.task_family
        self.task_name = self.task_family

        self.multi_sec_conf = MultiSectionConfig(config, [])

        self._task_params = self.task_definition._task_params.clone()

        self.ctor_kwargs = {}

        self._exc_desc = self.task_family
        self.task_errors = []
Esempio n. 2
0
 def _get_current_task_run(self):
     task = try_get_current_task()
     if task is None:
         # TODO: fake task
         raise NotImplementedError(
             "DatabandStore usage outside of DBND task is not implemented yet."
         )
     return task.current_task_run
def stop():
    msg("stopping!")
    task = try_get_current_task()
    msg("Current tasks looks like: %s" % (task))

    run = try_get_databand_run()
    if run:
        run.kill()
    return
Esempio n. 4
0
    def __init__(self, config, task_cls, task_definition, task_args,
                 task_kwargs):
        # type:(DbndConfig, Type[_TaskWithParams],TaskDefinition, Any, Any)->None
        self.task_cls = task_cls
        self.task_definition = task_definition

        # keep copy of user inputs
        self.task_kwargs__ctor = task_kwargs.copy()
        self.task_args__ctor = list(task_args)

        self.task_env_config = None  # type: Optional[EnvConfig]

        self.parent_task = try_get_current_task()
        self._ctor_as_str = "%s@%s" % (
            _get_call_repr(
                self.task_passport.task_family,
                self.task_args__ctor,
                self.task_kwargs__ctor,
            ),
            str(self.task_cls),
        )

        # extract all "system" keywords from kwargs
        # support task_family in kwargs -> use it as task_name (old behavior)
        task_family = task_kwargs.get("task_family",
                                      self.task_passport.task_family)
        self.task_name = task_kwargs.pop("task_name", task_family)
        self.task_config_override = task_kwargs.pop("override", None) or {}
        task_config_sections_extra = task_kwargs.pop("task_config_sections",
                                                     None)
        self.task_kwargs = task_kwargs

        self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub("_", self.task_name)

        self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config)
        self.verbose_build = self.task_factory_config.verbose

        # current config, NOTE: it's Singleton
        self.config = config
        self.config_sections = []

        self.task_errors = []
        self.build_warnings = []

        # will be used for ConfigValue
        self.config_sections = self._get_task_config_sections(
            config=config,
            task_config_sections_extra=task_config_sections_extra)
Esempio n. 5
0
    def _create_task(cls, args, kwargs):
        task_definition = cls.task_definition
        # we need to have context initialized before we start to run all logic in config() scope
        # update config with current class defaults
        # we apply them to config only if there are no values (this is defaults)
        with config(
                config_values=task_definition.task_defaults_config_store,
                source=task_definition.task_passport.format_source_name(
                    "defaults"),
                merge_settings=ConfigMergeSettings.on_non_exists_only,
        ) as task_config:

            tracking_mode = TaskEssence.TRACKING.is_included(cls)

            # create task meta first
            task_meta_factory = (TrackedTaskMetaFactory
                                 if tracking_mode else TaskMetaFactory)
            factory = task_meta_factory(config=task_config,
                                        task_cls=cls,
                                        task_args=args,
                                        task_kwargs=kwargs)
            task_meta = factory.create_dbnd_task_meta()

            # If a Task has already been instantiated with the same parameters,
            # the previous instance is returned to reduce number of object instances.
            tic = get_databand_context().task_instance_cache
            task = tic.get_task_obj_by_id(task_meta.obj_key.id)
            if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"):
                task = cls._build_task_obj(task_meta)
                tic.register_task_obj_instance(task)

                # now the task is created - all nested constructors will see it as parent
                with task_context(task, TaskContextPhase.BUILD):
                    task._initialize()
                    task._validate()
                    task.task_meta.config_layer = config.config_layer

                tic.register_task_instance(task)

            parent_task = try_get_current_task()
            if (parent_task and hasattr(task, "task_id")
                    and (task.task_essence != TaskEssence.CONFIG)):
                parent_task.descendants.add_child(task.task_id)

            return task
Esempio n. 6
0
    def __call__(cls, *args, **kwargs):
        """
        Custom class instantiation utilizing instance cache.
        """

        # use-case of TaskClass() call from airflow context during DAG creation
        _dbnd_disable_airflow_inplace = kwargs.pop(
            "_dbnd_disable_airflow_inplace", False)
        if (is_in_airflow_dag_build_context() and TaskEssence.is_task_cls(cls)
                and not _dbnd_disable_airflow_inplace
                and not getattr(cls, "_dbnd_decorated_task", False)):
            kwargs = kwargs.copy()
            kwargs["_dbnd_disable_airflow_inplace"] = True
            return build_task_at_airflow_dag_context(task_cls=cls,
                                                     call_args=args,
                                                     call_kwargs=kwargs)

        task_definition = cls.task_definition
        # we need to have context initialized before we start to run all logic in config() scope

        # create new config layer, so when we are out of this process -> config is back to the previous value
        with config(
                config_values={},
                source=task_definition.task_passport.format_source_name(
                    "ctor"),
        ) as task_config:
            factory = TaskFactory(
                config=task_config,
                task_cls=cls,
                task_definition=cls.task_definition,
                task_args=args,
                task_kwargs=kwargs,
            )
            task_object = factory.build_task_object(cls)

        parent_task = try_get_current_task()
        if (parent_task and hasattr(task_object, "task_id")
                and (task_object.task_essence != TaskEssence.CONFIG)):
            parent_task.descendants.add_child(task_object.task_id)

        return task_object
Esempio n. 7
0
    def __init__(self, config, task_cls, task_args, task_kwargs):
        # type:(DbndConfig, Type[_BaseTask], Any, Any)->None
        self.task_cls = task_cls
        self.task_definition = task_cls.task_definition  # type: TaskDefinition

        # keep copy of user inputs
        self.task_kwargs__ctor = task_kwargs.copy()
        self.task_args__ctor = list(task_args)

        self.parent_task = try_get_current_task()

        self.task_family = self.task_definition.task_family
        self.task_name = self.task_family

        self.multi_sec_conf = MultiSectionConfig(config, [])

        self._task_params = self.task_definition.task_params.copy()

        self.ctor_kwargs = {}

        self._exc_desc = self.task_family
        self.task_errors = []
Esempio n. 8
0
def create_dbnd_task(config, new_task_factory, task_cls, task_args, task_kwargs):
    # type:(DbndConfig, Any, Type[_BaseTask], Any, Any, bool)->None
    tracking_mode = task_cls.is_tracking_mode

    task_meta_factory = TrackedTaskMetaFactory if tracking_mode else TaskMetaFactory
    factory = task_meta_factory(
        config=config, task_cls=task_cls, task_args=task_args, task_kwargs=task_kwargs,
    )

    task_meta = factory.create_dbnd_task_meta()

    # If a Task has already been instantiated with the same parameters,
    # the previous instance is returned to reduce number of object instances.
    tic = get_databand_context().task_instance_cache
    task = tic.get_task_obj_by_id(task_meta.obj_key.id)
    if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"):
        task = new_task_factory(task_meta)
        tic.register_task_obj_instance(task)

        # now the task is created - all nested constructors will see it as parent
        with task_context(task, TaskContextPhase.BUILD):
            task._initialize()
            task._validate()
            task.task_meta.config_layer = config.config_layer

        tic.register_task_instance(task)

    parent_task = try_get_current_task()
    if (
        parent_task
        and hasattr(task, "task_id")
        and isinstance(task, _TaskParamContainer)
    ):
        parent_task.task_meta.add_child(task.task_id)

    return task
Esempio n. 9
0
    def _call_handler(cls, call_user_code, call_args, call_kwargs):
        """
        -= Use "Step into My Code"" to get back from Databand code! =-

        decorated object call/creation  ( my_func(), MyDecoratedTask()
        """
        force_invoke = call_kwargs.pop("__force_invoke", False)
        if force_invoke or not is_databand_enabled():
            # 1. Databand is not enabled
            # 2. we have this call coming from Task.run / Task.band direct invocation
            return call_user_code(*call_args, **call_kwargs)
        func_call = FuncCall(
            task_cls=cls,
            call_args=call_args,
            call_kwargs=call_kwargs,
            call_user_code=call_user_code,
        )

        if is_in_airflow_dag_build_context(
        ):  # we are in Airflow DAG building mode
            return build_task_at_airflow_dag_context(task_cls=cls,
                                                     call_args=call_args,
                                                     call_kwargs=call_kwargs)

        airflow_task_context = try_get_airflow_context()
        if airflow_task_context:
            return track_airflow_dag_run_operator_run(
                func_call=func_call, airflow_task_context=airflow_task_context)

        current = try_get_current_task()
        if not current and is_inplace_run():
            from dbnd._core.inplace_run.inplace_run_manager import dbnd_run_start

            task_run = dbnd_run_start()
            if task_run:
                current = task_run.task

        if not current:  # direct call to the function
            return func_call.invoke()

        ######
        # DBND HANDLING OF CALL
        # now we can make some decisions what we do with the call
        # it's not coming from _invoke_func
        # but from   user code ...   some_func()  or SomeTask()
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline context, we are building execution plan
            t = cls(*call_args, **call_kwargs)

            # we are in inline debug mode -> we are going to execute the task
            # we are in the band
            # and want to return result of the object
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs ( result, another output.. )
            # -> just return task object
            return t

        if phase is TaskContextPhase.RUN:
            # we are in the run function!
            if (current.settings.dynamic_task.enabled
                    and current.task_supports_dynamic_tasks):
                # isinstance() check required to prevent infinite recursion when @task is on
                # class and not on func (example: see test_task_decorated_class.py)
                # and the current task supports inline calls
                # that's extra mechanism in addition to __force_invoke
                # on pickle/unpickle isinstance fails to run.
                return create_and_run_dynamic_task_safe(func_call=func_call)

        # we can not call it in"databand" way, fallback to normal execution
        return func_call.invoke()
Esempio n. 10
0
    def __init__(self, dbnd_context, config, new_task_factory, task_cls,
                 task_args, task_kwargs):
        # type:(DatabandContext, DbndConfig, Any, Type[_BaseTask], Any, Any)->None
        self.task_cls = task_cls
        self.task_definition = task_cls.task_definition  # type: TaskDefinition
        self.new_task_factory = new_task_factory

        # keep copy of user inputs
        self.task_kwargs__ctor = task_kwargs.copy()
        self.task_args__ctor = list(task_args)

        self.parent_task = try_get_current_task()

        self.config = config
        self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config)
        self.verbose_build = self.task_factory_config.verbose

        # let find if we are running this constructor withing another Databand Task
        self.dbnd_context = dbnd_context
        self.task_call_source = [
            self.dbnd_context.user_code_detector.find_user_side_frame(2)
        ]
        if self.task_call_source and self.parent_task:
            self.task_call_source.extend(
                self.parent_task.task_meta.task_call_source)

        self.task_family = task_kwargs.pop("task_family", None)
        # extra params from constructor
        self.task_name = task_kwargs.pop("task_name", None)
        kwargs_task_config_sections = task_kwargs.pop("task_config_sections",
                                                      None)

        self.task_config_override = task_kwargs.pop("override", None) or {}
        self.task_kwargs = task_kwargs

        if not self.task_family:
            self.task_family = self.task_definition.task_family
        if self.task_name:
            self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub(
                "_", self.task_name)

        # user gives explicit name, or it full_task_family
        self.task_main_config_section = (
            self.task_name or self.task_definition.task_config_section)

        if self.task_name is None:
            self.task_name = self.task_family

        # there is priority of task name over task family, as name is more specific
        sections = [self.task_name]
        # _from at config files
        sections.extend(self._get_task_from_sections(config, self.task_name))

        sections.extend(
            [self.task_family, self.task_definition.full_task_family])

        if kwargs_task_config_sections:
            sections.extend(kwargs_task_config_sections)

        # adding "default sections"  - LOWEST PRIORITY
        if issubclass(self.task_definition.task_class, _TaskParamContainer):
            sections += [CONF_TASK_SECTION]

        from dbnd._core.task.config import Config

        if issubclass(self.task_definition.task_class, Config):
            sections += [CONF_CONFIG_SECTION]

        sections = list(unique_everseen(filter(None, sections)))

        self.task_config_sections = sections

        self.task_params = list(self.task_definition.task_params.values()
                                )  # type: List[ParameterDefinition]
        self.ctor_kwargs = None
        # utilities section
        self.build_warnings = []
        self._exc_desc = "%s(%s)" % (
            self.task_family,
            ", ".join(("%s=%s" % (p, safe_string(repr(k), 300))
                       for p, k in iteritems(self.task_kwargs__ctor))),
        )
        self.task_errors = []