Exemple #1
0
    def task_to_targets(self, task, targets):
        # type: (Task, Dict[str, TargetInfo]) -> List[TargetInfo]
        """
        :param run:
        :param task:
        :param targets: all known targets for current run, so we have uniq list of targets (by path)
        :return:
        """

        run = self.run
        task_targets = []

        def process_target(target, name):
            # type: (Target, str) -> None
            target_path = str(target)
            dbnd_target = targets.get(target_path)
            if not dbnd_target:
                # we see this target for the first time
                target_task_run_uid = (
                    None  # let assume that Target is now owned by any task
                )
                # let try to find it's owner, so we create target that relates to some Task
                # if `task` is pipeline, the target owner is going to be different task
                if target.task:
                    target_task_run = run.get_task_run(target.task.task_id)
                    if target_task_run:
                        target_task_run_uid = target_task_run.task_run_uid

                dbnd_target = targets[target_path] = TargetInfo(
                    path=target_path,
                    created_date=utcnow(),
                    task_run_uid=target_task_run_uid,
                    parameter_name=name,
                )
                logger.debug(
                    "New Target: %s -> %s ->  %s",
                    target.task,
                    target_task_run_uid,
                    target_path,
                )
            task_targets.append(dbnd_target)

        rels = task.ctrl.relations
        for io_params in chain(rels.task_outputs.values(),
                               rels.task_inputs.values()):
            for name, t in io_params.items():
                traverse(t, convert_f=partial(process_target, name=name))

        return task_targets
    def calc_runtime_value(self, value, task):
        if value is None:
            return value

        if isinstance(self.value_type, _TargetValueType):
            # if it "target" type, let read it into "user friendly" format
            # regardless it's input or output,
            # so if function has  param = output[Path] - it will get Path

            return traverse(value, self.value_type.target_to_value)

        # usually we should not load "outputs" on read
        if self.is_output():
            # actually we should not load it, so just return
            return value

        if isinstance(value, Target):
            try:
                runtime_value = self.load_from_target(value)
                if self.is_input():
                    self._log_parameter_value(runtime_value, value, task)
                return runtime_value
            except Exception as ex:
                raise friendly_error.failed_to_read_target_as_task_input(
                    ex=ex, task=task, parameter=self, target=value
                )

        if (
            isinstance(self.value_type, _StructureValueType)
            and self.value_type.sub_value_type
        ):
            try:

                def load_with_preview(val):
                    runtime_val = self.value_type.sub_value_type.load_runtime(val)
                    if self.is_input() and isinstance(val, Target):
                        # Optimisation opportunity: log all targets in a single call
                        self._log_parameter_value(runtime_val, val, task)

                    return runtime_val

                return traverse(value, convert_f=load_with_preview)
            except Exception as ex:
                raise friendly_error.failed_to_read_task_input(
                    ex=ex, task=task, parameter=self, target=value
                )

        return value
Exemple #3
0
    def initialize_relations(self):
        # STEP 0 - run band function
        self.initialize_band()

        # STEP 1 - calculate all inputs and _required
        self.initialize_required()

        # STEP 2 ( now we have all inputs, we can calculate real signature)
        # support for two phase build
        # will be called from MetaClass

        params = self.params.get_params_serialized(significant_only=True,
                                                   input_only=True)
        task_inputs_as_str = traverse(self.task_inputs,
                                      convert_f=str,
                                      filter_none=True,
                                      filter_empty=True)
        params.append(("_task_inputs", task_inputs_as_str))

        # we do it again, now we have all inputs calculated
        self.task_meta.initialize_task_id(params)

        # for airflow operator task handling:
        airflow_task_id_p = self.params.get_param("airflow_task_id")
        if airflow_task_id_p:
            self.task_meta.task_id = self.task.airflow_task_id

        # STEP 3  - now let update outputs
        self.initialize_outputs()

        self.task_meta.initialize_task_output_id(self._get_outputs_to_sign())
Exemple #4
0
    def to_str(self, x):
        if self.sub_value_type:
            x = traverse(x, self.sub_value_type.to_str)

        # we sort the set before we serialize!
        x = sorted(x, key=lambda x: json_utils.dumps_canonical(x))
        return json_utils.dumps_canonical(x)
Exemple #5
0
    def initialize_relations(self):
        # STEP 0 - run band function
        self.initialize_band()

        # STEP 1 - calculate all inputs and _required
        try:
            self.task_inputs = self.initialize_required()
        except Exception:
            logger.warning("Failed to calculate relationships for %s" %
                           self.task_id,
                           exc_info=True)
            self.task_inputs = {}
            if not self.task.task_is_dynamic:
                raise

        # STEP 2 ( now we have all inputs, we can calculate real signature)
        # support for two phase build
        # will be called from MetaClass

        params = self.params.get_params_serialized(significant_only=True,
                                                   input_only=True)

        if "user" in self.task_inputs:
            # TODO : why do we need to convert all "user side" inputs?
            # what if the input is insignificant?
            system_input = self.task_inputs.get("system")
            if system_input and "band" in system_input:
                band_input = system_input["band"]
                task_inputs_user_only = {
                    "user": self.task_inputs.get("user"),
                    "system": {
                        "band": band_input
                    },
                }
            else:
                task_inputs_user_only = {"user": self.task_inputs.get("user")}
            task_inputs_as_str = traverse(
                task_inputs_user_only,
                convert_f=str,
                filter_none=True,
                filter_empty=True,
            )

            if task_inputs_as_str is None:
                task_inputs_as_str = ""

            params.append(("_task_inputs", task_inputs_as_str))

        # we do it again, now we have all inputs calculated
        self.task_meta.initialize_task_id(params)

        # for airflow operator task handling:
        airflow_task_id_p = self.params.get_param("airflow_task_id")
        if airflow_task_id_p:
            self.task_meta.task_id = self.task.airflow_task_id

        # STEP 3  - now let update outputs
        self.initialize_outputs()

        self.task_meta.initialize_task_output_id(self._get_outputs_to_sign())
Exemple #6
0
    def f_io(self, structure):

        structure_str = traverse_to_str(structure)
        structure_str = traverse(
            structure_str,
            lambda x: x if not x or len(x) <= 600 else
            ("%s... (%s files)" % (x[:400], len(x.split(",")))),
        )
        dumped = json_utils.dumps(structure_str, indent=2)
        return dumped
Exemple #7
0
    def f_io(self, structure):

        structure_str = traverse_to_str(structure)
        structure_str = traverse(
            structure_str,
            lambda x: x if not x or len(x) <= 600 else
            ("%s... (%s files)" % (x[:400], len(x.split(",")))),
        )
        dumped = json_utils.dumps(structure_str, indent=2)
        if isinstance(structure_str, dict):
            dumped = self._hjson_optimizer.sub("\g<1>", dumped)
        return dumped
Exemple #8
0
    def initialize_required(self):
        # regular requirements -- just all inputs
        inputs = {"user": {}, "system": {}}

        # we take all parameters that are inputs (not outputs)
        # however Primitive parameters are inputs only if they are Target (deferred)
        #           if isinstance(p, _TargetParameter) or isinstance(value, Target)

        for p, value in self.params.get_param_values(input_only=True):
            if value is None:
                continue
            value = traverse(value,
                             convert_f=_find_target,
                             filter_none=True,
                             filter_empty=True)

            if not value:
                continue

            inputs[_section(p)][p.name] = value

        def _extend_system_section(key, extra):
            if not extra:
                return
            inputs["system"][key] = extra

        from dbnd import PipelineTask

        if isinstance(self.task, PipelineTask):
            task_output_values = {}
            for p, value in self.params.get_param_values(output_only=True,
                                                         user_only=True):

                if p.name == "task_band" or isinstance(p, FuncResultParameter):
                    continue

                if is_not_defined(value):
                    raise friendly_error.task_build.pipeline_task_has_unassigned_outputs(
                        task=self.task, param=p)
                task_output_values[p.name] = value

            _extend_system_section("band", task_output_values)

        # find all child pipelines and make them upstreams to the task
        _extend_system_section(
            "pipelines",
            {p.task_id: p
             for p in self._get_all_child_pipelines()})
        # now may be user still use function _requires - so let add that to dependencies
        _extend_system_section("required", self.task._requires())

        return to_targets(inputs)
Exemple #9
0
    def initialize_required(self):
        # regular requirements -- just all inputs
        inputs = {"user": {}, "system": {}}

        # we take all parameters that are inputs (not outputs)
        # however Primitive parameters are inputs only if they are Target (deferred)
        #           if isinstance(p, _TargetParameter) or isinstance(value, Target)

        for p, value in self.params.get_params_with_value(
                ParameterFilters.INPUTS):
            if value is None:
                continue
            value = traverse(value,
                             convert_f=_find_target,
                             filter_none=True,
                             filter_empty=True)

            if not value:
                continue

            inputs[_section(p)][p.name] = value

        def _extend_system_section(key, extra):
            if not extra:
                return
            inputs["system"][key] = extra

        from dbnd import PipelineTask

        if isinstance(self.task, PipelineTask):
            task_output_values = {}
            for p, value in self.params.get_params_with_value(
                    ParameterFilters.USER_OUTPUTS):
                if p.name == "task_band" or isinstance(p, FuncResultParameter):
                    continue

                # band outputs are going to be required as inputs!
                # @pipeline can run only when all of it's "outputs" are ready
                task_output_values[p.name] = value

            _extend_system_section("band", task_output_values)

        # find all child pipelines and make them upstreams to the task
        _extend_system_section(
            "pipelines",
            {p.task_id: p
             for p in self._get_all_child_pipelines()})
        # now may be user still use function _requires - so let add that to dependencies
        _extend_system_section("required", self.task._requires())

        return to_targets(inputs)
Exemple #10
0
    def calc_runtime_value(self, value, task):
        if value is None:
            return value

        if isinstance(self.value_type, _TargetValueType):
            # if it "target" type, let read it into "user friendly" format
            # regardless it's input or output

            return traverse(value, self.value_type.target_to_value)

        # usually we should not load "outputs" on read
        if self.is_output():
            # actually we should not load it, so just return
            return value

        if isinstance(value, Target):
            try:
                return self.load_from_target(value)
            except Exception as ex:
                raise friendly_error.failed_to_read_target_as_task_input(
                    ex=ex, task=task, parameter=self, target=value
                )

        if (
            isinstance(self.value_type, _StructureValueType)
            and self.value_type.sub_value_type
        ):
            try:
                return traverse(
                    value, convert_f=self.value_type.sub_value_type.load_runtime
                )
            except Exception as ex:
                raise friendly_error.failed_to_read_task_input(
                    ex=ex, task=task, parameter=self, target=value
                )

        return value
Exemple #11
0
    def parse_value(self, value, load_value=None, target_config=None):
        """
        parse structure first
        parse every element
        """
        if value is None:
            return value

        if isinstance(value, six.string_types):
            return super(_StructureValueType, self).parse_value(
                value=value, load_value=load_value, target_config=target_config
            )
        else:
            if self.sub_value_type:
                value = traverse(
                    struct=value, convert_f=self.sub_value_type.parse_value
                )
        return value
Exemple #12
0
    def parse_value(self, value, load_value=None, target_config=None, sub_value=False):
        """
        parse structure first
        parse every element
        """
        if value is None:
            return value

        if not sub_value and isinstance(value, six.string_types):
            return super(_StructureValueType, self).parse_value(
                value=value, load_value=load_value, target_config=target_config
            )

        if self.sub_value_type:
            return traverse(
                struct=value,
                convert_f=partial(self.sub_value_type.parse_value, sub_value=True),
            )

        return value
Exemple #13
0
    def parse_from_str(self, x):
        """
               Parses an immutable and ordered ``dict`` from a JSON string using standard JSON library.
        Parse an individual value from the input.

        """

        # if isinstance(value, Mapping):
        #     # we are good to go, it'x dictionary already
        #     return value
        if not x:
            return self._generate_empty_default()

        # this is string and we need to parse it
        if not isinstance(x, six.string_types):
            raise DatabandConfigError(
                "Can't parse '%x' into parameter. Value should be string" % x
            )

        x = x.strip()
        if not x:
            return self._generate_empty_default()

        if x[0] in _PARSABLE_PARAM_PREFIX:
            value = json_utils.loads(x)
        else:
            value = self._parse_from_str_simple(x)

            if not self.is_type_of(value):
                raise DatabandConfigError(
                    "Can't parse '%s' into %s" % (value, self.type)
                )
        if self.sub_value_type:
            value = traverse(value, self.sub_value_type.parse_value)

        return value
Exemple #14
0
 def test_flattern_file_target(self):
     nested_v = target("/tmp")
     value = {"a": {"b": nested_v}}
     actual = traverse(value, convert_f=_find_target, filter_none=True)
     assert actual
     assert actual.get("a").get("b") == nested_v
Exemple #15
0
 def to_str(self, x):
     if self.sub_value_type:
         x = traverse(x, self.sub_value_type.to_str)
     return json_utils.dumps_safe(x)
Exemple #16
0
 def to_str_lines(self, x):
     if self.sub_value_type:
         x = traverse(x, self.sub_value_type.to_str)
     return x
Exemple #17
0
def traverse_and_set_target(target, target_source):
    return traverse(target,
                    convert_f=lambda t: __set_target(
                        target=t, target_source=target_source))
Exemple #18
0
 def parse_from_str_lines(self, lines):
     value = lines
     if self.sub_value_type:
         value = traverse(value, self.sub_value_type.parse_from_str)
     return value
Exemple #19
0
    def initialize_relations(self):
        # STEP 0 - run band function
        self.initialize_band()

        # STEP 1 - calculate all inputs and _required
        try:
            self.task_inputs = self.initialize_required()
        except Exception:
            logger.warning("Failed to calculate relationships for %s" %
                           self.task_id,
                           exc_info=True)
            self.task_inputs = {}
            if not self.task.task_is_dynamic:
                raise

        # STEP 2 ( now we have all inputs, we can calculate real signature)
        # support for two phase build
        # will be called from MetaClass

        params = self.params.get_params_serialized(
            ParameterFilters.SIGNIFICANT_INPUTS)

        if "user" in self.task_inputs:
            # TODO : why do we need to convert all "user side" inputs?
            # what if the input is insignificant?
            system_input = self.task_inputs.get("system")
            if system_input and "band" in system_input:
                band_input = system_input["band"]
                task_inputs_user_only = {
                    "user": self.task_inputs.get("user"),
                    "system": {
                        "band": band_input
                    },
                }
            else:
                task_inputs_user_only = {"user": self.task_inputs.get("user")}
            task_inputs_as_str = traverse(
                task_inputs_user_only,
                convert_f=str,
                filter_none=True,
                filter_empty=True,
            )

            if task_inputs_as_str is None:
                task_inputs_as_str = ""

            params.append(("_task_inputs", task_inputs_as_str))

        # IMPORTANT PART: we initialize task_id here again
        # after all values are calculated (all task_inputs are assigned)
        # we do it again, now we have all inputs calculated
        task = self.task
        task.task_signature_obj = build_signature(
            name=task.task_name,
            params=params,
            extra=task.task_definition.task_signature_extra,
        )
        task.task_id = "{}__{}".format(task.task_name,
                                       task.task_signature_obj.signature)

        # for airflow operator task handling:
        airflow_task_id_p = self.params.get_param("airflow_task_id")
        if airflow_task_id_p:
            self.task.task_id = self.task.airflow_task_id

        # STEP 3  - now let update outputs
        self.initialize_outputs()

        outputs_sig = self._get_outputs_to_sign()
        if outputs_sig:
            sig = build_signature_from_values("task_outputs", outputs_sig)
            task.task_outputs_signature_obj = sig
        else:
            task.task_outputs_signature_obj = task.task_signature_obj
Exemple #20
0
def targets_to_str(obj_or_struct):
    return traverse(obj_or_struct, convert_f=target_to_str, filter_none=True)
Exemple #21
0
def to_targets(obj_or_struct, from_string_kwargs=None):
    return traverse(
        obj_or_struct,
        convert_f=partial(_to_target, from_string_kwargs=from_string_kwargs),
        filter_none=True,
    )
Exemple #22
0
def to_tasks(obj_or_struct):
    return traverse(obj_or_struct, convert_f=_to_task, filter_none=True)
Exemple #23
0
 def normalize(self, value):
     if self.sub_value_type:
         value = traverse(value, self.sub_value_type.normalize)
     return value
Exemple #24
0
 def load_runtime(self, value, **kwargs):
     if self.sub_value_type:
         return traverse(value, self.sub_value_type.load_runtime)
     return value