Exemple #1
0
    def parse_value(self, value, load_value=None, target_config=None):
        """
        Parse an individual value from the input.

        probably this is the most important code in user value parsing
        :param str value: the value to parse.
        :return: the parsed value.
        """

        from dbnd._core.utils.task_utils import to_targets
        from targets.inmemory_target import InMemoryTarget
        from targets.values.target_values import _TargetValueType

        if load_value is None:
            load_value = self.load_on_build

        value = self._interpolate_from_str(value)
        if value is None:
            return value

        if isinstance(value, six.string_types):
            # we are in the string mode
            # it's can be "serialized to string" or path value
            if load_value:
                # we can just load value from string
                if self.support_from_str:
                    value = self.parse_from_str(value)
                    value = self.normalize(value)
                    return value

            # otherwise - the value is a path!
            target_kwargs = {}
            if target_config:
                target_kwargs["config"] = target_config

            return to_targets(json_utils.loads(value),
                              from_string_kwargs=target_kwargs)

        from dbnd._core.task import Task
        from targets import Target

        if isinstance(value, Task):
            return to_targets(value)

        if isinstance(value, Target):
            return value

        # so we have a value that is obviously "Data" type,
        # we want to be able to supporet "load_value" behaviour
        if not load_value and not isinstance(self, _TargetValueType):
            return InMemoryTarget(value, value_type=self)

        value = self.normalize(value)
        return value
Exemple #2
0
    def initialize_required(self):
        # regular requirements -- just all inputs
        inputs = {"user": {}, "system": {}}

        # we take all parameters that are inputs (not outputs)
        # however Primitive parameters are inputs only if they are Target (deferred)
        #           if isinstance(p, _TargetParameter) or isinstance(value, Target)

        for p, value in self.params.get_param_values(input_only=True):
            if value is None:
                continue
            value = traverse(value,
                             convert_f=_find_target,
                             filter_none=True,
                             filter_empty=True)

            if not value:
                continue

            inputs[_section(p)][p.name] = value

        def _extend_system_section(key, extra):
            if not extra:
                return
            inputs["system"][key] = extra

        from dbnd import PipelineTask

        if isinstance(self.task, PipelineTask):
            task_output_values = {}
            for p, value in self.params.get_param_values(output_only=True,
                                                         user_only=True):

                if p.name == "task_band" or isinstance(p, FuncResultParameter):
                    continue

                if is_not_defined(value):
                    raise friendly_error.task_build.pipeline_task_has_unassigned_outputs(
                        task=self.task, param=p)
                task_output_values[p.name] = value

            _extend_system_section("band", task_output_values)

        # find all child pipelines and make them upstreams to the task
        _extend_system_section(
            "pipelines",
            {p.task_id: p
             for p in self._get_all_child_pipelines()})
        # now may be user still use function _requires - so let add that to dependencies
        _extend_system_section("required", self.task._requires())

        return to_targets(inputs)
Exemple #3
0
    def initialize_required(self):
        # regular requirements -- just all inputs
        inputs = {"user": {}, "system": {}}

        # we take all parameters that are inputs (not outputs)
        # however Primitive parameters are inputs only if they are Target (deferred)
        #           if isinstance(p, _TargetParameter) or isinstance(value, Target)

        for p, value in self.params.get_params_with_value(
                ParameterFilters.INPUTS):
            if value is None:
                continue
            value = traverse(value,
                             convert_f=_find_target,
                             filter_none=True,
                             filter_empty=True)

            if not value:
                continue

            inputs[_section(p)][p.name] = value

        def _extend_system_section(key, extra):
            if not extra:
                return
            inputs["system"][key] = extra

        from dbnd import PipelineTask

        if isinstance(self.task, PipelineTask):
            task_output_values = {}
            for p, value in self.params.get_params_with_value(
                    ParameterFilters.USER_OUTPUTS):
                if p.name == "task_band" or isinstance(p, FuncResultParameter):
                    continue

                # band outputs are going to be required as inputs!
                # @pipeline can run only when all of it's "outputs" are ready
                task_output_values[p.name] = value

            _extend_system_section("band", task_output_values)

        # find all child pipelines and make them upstreams to the task
        _extend_system_section(
            "pipelines",
            {p.task_id: p
             for p in self._get_all_child_pipelines()})
        # now may be user still use function _requires - so let add that to dependencies
        _extend_system_section("required", self.task._requires())

        return to_targets(inputs)
    def normalize(self, x):  # type: (T) -> T
        """
        Given a parsed parameter value, normalizes it.

        The value can either be the result of parse(), the default value or
        arguments passed into the task's constructor by instantiation.

        This is very implementation defined, but can be used to validate/clamp
        valid values. For example, if you wanted to only accept even integers,
        and "correct" odd values to the nearest integer, you can implement
        normalize as ``x // 2 * 2``.
        """
        if isinstance(self.value_type, _TargetValueType):
            # can not move to value_type, we need target_config

            from dbnd._core.utils.task_utils import to_targets

            return to_targets(x, from_string_kwargs=dict(config=self.target_config))
        return self.value_type.normalize(x)
Exemple #5
0
    def initialize_outputs(self):
        """
        The default output that this Task produces. Use outputs! Override only if you are writing "base" class
        """
        task = self.task

        outputs = {"user": {}, "system": {}}

        for p, value in self.params.get_params_with_value(
                ParameterFilters.OUTPUTS):
            if is_not_defined(value):
                value = p.build_output(task=task)
                setattr(self.task, p.name, value)

            if isinstance(p, FuncResultParameter):
                continue

            value = traverse_and_set_target(value, p._target_source(self.task))
            outputs[_section(p)][p.name] = value

        custom_outputs = self.task._output()
        if custom_outputs:
            if outputs["user"]:
                warnings.warn(
                    "Task %s has custom outputs in _output() function, all other outputs will be removed: %s"
                    % (task, outputs["user"]),
                    stacklevel=2,
                )
                outputs["user"] = custom_outputs

        # take ownership of all outputs and clean it, just in case
        # usually all outputs are assigned to task

        # just in case we have some "outputs" with Tasks
        outputs = to_targets(outputs)
        self.task_outputs = traverse_and_set_target(
            outputs, target_source=TargetSource(task_id=self.task_id))
Exemple #6
0
    def normalize_to_target(self, value):
        from dbnd._core.utils.task_utils import to_targets

        return to_targets(value)
Exemple #7
0
 def normalize(self, x):
     # can not move to value_type, we need target_config
     return to_targets(x,
                       from_string_kwargs=dict(config=self.target_config))
Exemple #8
0
    def parse_value(self, value, load_value=None, target_config=None):
        """
        Parse an individual value from the input.

        probably this is the most important code in user value parsing
        :param str value: the value to parse.
        :return: the parsed value.
        """

        from dbnd._core.utils.task_utils import to_targets
        from targets.inmemory_target import InMemoryTarget
        from targets.values.target_values import _TargetValueType
        from targets import Target

        if load_value is None:
            load_value = self.load_on_build

        value = self._interpolate_from_str(value)
        if value is None:
            return value

        if isinstance(value, six.string_types):
            # we are in the string mode
            # it's can be "serialized to string" or path value
            if load_value:
                # in case we have simple type -> just load/parse it
                if self.support_from_str:
                    value = self.parse_from_str(value)
                    value = self.normalize(value)
                    return value

            # otherwise - the data is "Complex object"
            # our assumption is that it can not be loaded from string
            # the value is a path!
            target_kwargs = {}
            if target_config:
                target_kwargs["config"] = target_config

            # Check for glob path
            if _is_glob_path(value):
                from targets import target

                return target(value, config=target_config)
            """
            it's possible that we have a list of targets, or just a single target (all targets should be loaded as
            single object). we need to support:
                1. /some/path
                2. /some/path,....
                3. ["/some_path",..]
            we will try to parse it as list, if we get list with one element (1) -> we can  return it, otherwise we
            wrap it with MultiTarget
            """
            from targets.values.structure import ListValueType

            # Parse into value type list
            list_of_targets = ListValueType().parse_from_str(value)
            # Apply all values from config
            list_of_targets = to_targets(list_of_targets,
                                         from_string_kwargs=target_kwargs)

            if len(list_of_targets) == 1:
                return list_of_targets[0]
            else:
                from targets.multi_target import MultiTarget

                return MultiTarget(list_of_targets)

        from dbnd._core.task import Task

        if isinstance(value, Task):
            return to_targets(value)

        if isinstance(value, Target):
            return value

        # so we have a value that is obviously "Data" type,
        # we want to be able to support "load_value" behaviour
        if not load_value and not isinstance(self, _TargetValueType):
            return InMemoryTarget(value, value_type=self)

        value = self.normalize(value)
        return value
Exemple #9
0
    def initialize_band(self):
        try:
            band_context = []
            if is_airflow_enabled():
                from dbnd_airflow.dbnd_task_executor.airflow_operators_catcher import (
                    get_databand_op_catcher_dag, )

                band_context.append(get_databand_op_catcher_dag())

            original_param_values = []
            for param_value in self.task.task_params.get_param_values(
                    ParameterFilters.OUTPUTS):
                if param_value.name == "task_band" or isinstance(
                        param_value.parameter, FuncResultParameter):
                    continue
                original_param_values.append((param_value, param_value.value))

            with nested(*band_context):
                band = self.task.band()
                # this one would be normalized
                self.task._task_band_result = band
            self.task_band_result = band  # real value

            from dbnd import PipelineTask

            if isinstance(self.task, PipelineTask):
                # after .band has finished, all user outputs of the .band should be defined
                for param_value, _ in original_param_values:
                    # we want to validate only user facing parameters
                    # they should have assigned values by this moment,
                    # pipeline task can not have None outputs, after band call
                    if param_value.parameter.system:
                        continue
                    if is_not_defined(param_value.value):
                        raise friendly_error.task_build.pipeline_task_has_unassigned_outputs(
                            task=self.task, param=param_value.parameter)

            # now let's normalize if user has changed outputs
            for param_value, original_value in original_param_values:
                if param_value.value is original_value:
                    continue

                try:
                    from dbnd._core.utils.task_utils import to_targets

                    normalized_value = to_targets(param_value.value)
                    param_value.update_param_value(normalized_value)
                except Exception as ex:
                    raise friendly_error.task_build.failed_to_assign_param_value_at_band(
                        ex, param_value.parameter, param_value.value,
                        self.task)

        except Exception as ex:
            logger.warning(
                self.visualiser.banner(
                    msg="Failed to run %s" % _band_call_str(self.task),
                    color="red",
                    exc_info=sys.exc_info(),
                ))

            if self.task.task_decorator:
                # just re-raise, we already have an error from the "run" function
                raise

            raise friendly_error.task_build.failed_to_call_band(ex, self.task)
Exemple #10
0
    def auto_load_save_params(self):
        task = self.task
        original_values = task._params.get_param_values()

        # we don't support "nested" calls for now,
        # let's not overcomplicate code for non existing scenario

        if task._task_auto_read is not None:
            logger.warning(
                "You are running in {task} within already existing TaskAutoParamsReadWrite context".format(
                    task=task
                )
            )

        if self.auto_read:
            task._task_auto_read_original = {p.name: v for p, v in original_values}
            task._task_auto_read = set()

        dirty = self.save_on_change
        try:
            yield original_values
            # now we disable "auto read"
            task._task_auto_read = None
            task._task_auto_read_original = None
            # from here we are going to read "the value" without autoresolving

            current_values = {
                p.name: value for p, value in task._params.get_param_values()
            }
            if not self.save_on_change and not self.normalize_on_change:
                return

            changed = []
            for p, original_value in original_values:
                current_value = current_values[p.name]
                if id(original_value) != id(current_value):
                    # nothing to do original_value is the same
                    changed.append((p, original_value, current_value))

            if self.save_on_change:
                try:
                    for p, original_value, current_value in changed:
                        # TODO: implement Atomic commit
                        if p.is_output():
                            self.auto_save_param(p, original_value, current_value)
                finally:
                    for p, original_value, current_value in changed:
                        setattr(task, p.name, original_value)
                    dirty = False

            elif self.normalize_on_change:
                for p, original_value, current_value in changed:
                    try:
                        # probably we are in the band
                        # we are going just to normalize the value
                        if p.is_output():
                            from dbnd._core.utils.task_utils import to_targets

                            normalized_value = to_targets(current_value)
                        else:
                            normalized_value = p.normalize(current_value)
                        if id(normalized_value) != id(current_value):
                            setattr(task, p.name, normalized_value)
                    except Exception as ex:
                        raise friendly_error.task_build.failed_to_assign_param_value_at_band(
                            ex, p, current_value, task
                        )
        finally:
            task._task_auto_read = None
            if dirty:
                for p, original_value in original_values:
                    setattr(task, p.name, original_value)
Exemple #11
0
    def auto_load_save_params(self):
        task = self.task
        original_values = task._params.get_param_values()

        if self.auto_read:
            task.load_task_runtime_values()
            original_values = task._params.get_param_values()
            task._task_auto_read_original = {
                p.name: v
                for p, v in original_values
            }

        dirty = self.save_on_change
        try:
            yield original_values
            # now we disable "auto read"
            task._task_auto_read_original = None
            # from here we are going to read "the value" without autoresolving

            current_values = {
                p.name: value
                for p, value in task._params.get_param_values()
            }
            if not self.save_on_change and not self.normalize_on_change:
                return

            changed = []
            for p, original_value in original_values:
                current_value = current_values[p.name]
                if id(original_value) != id(current_value):
                    # nothing to do original_value is the same
                    changed.append((p, original_value, current_value))

            if self.save_on_change:
                try:
                    for p, original_value, current_value in changed:
                        # TODO: implement Atomic commit
                        if p.is_output():
                            self.auto_save_param(p, original_value,
                                                 current_value)
                finally:
                    for p, original_value, current_value in changed:
                        setattr(task, p.name, original_value)
                    dirty = False

            elif self.normalize_on_change:
                for p, original_value, current_value in changed:
                    try:
                        # probably we are in the band
                        # we are going just to normalize the value
                        if p.is_output():
                            from dbnd._core.utils.task_utils import to_targets

                            normalized_value = to_targets(current_value)
                        else:
                            normalized_value = p.normalize(current_value)
                        if id(normalized_value) != id(current_value):
                            setattr(task, p.name, normalized_value)
                    except Exception as ex:
                        raise friendly_error.task_build.failed_to_assign_param_value_at_band(
                            ex, p, current_value, task)
        finally:
            if dirty:
                for p, original_value in original_values:
                    setattr(task, p.name, original_value)
Exemple #12
0
def data_combine(inputs, sort=False):
    targets = flatten(to_targets(inputs))
    if sort:
        targets = sorted(targets, key=lambda x: x.path)
    data = MultiTarget(targets)
    return data