Example #1
0
 def test_set(self):
     a = build_signature("t", [("a", {
         1: {target("a"), target("b")}
     })]).signature
     assert (a == build_signature("t", [("a", {
         1: {target("b"), target("a")}
     })]).signature)
Example #2
0
    def _calculate_task_meta_key(self):
        params = [(p_value.name, p_value.parameter.signature(p_value.value))
                  for p_value in self.task_params
                  if not p_value.parameter.is_output()
                  and p_value.parameter.significant]
        override_signature = {}
        for p_obj, p_val in six.iteritems(self.task_config_override):
            if isinstance(p_obj, ParameterDefinition):
                override_key = "%s.%s" % (p_obj.task_cls.get_task_family(),
                                          p_obj.name)
                override_value = (p_val if isinstance(p_val, six.string_types)
                                  else p_obj.signature(p_val))
            else:
                # very problematic approach till we fix the override structure
                override_key = str(p_obj)
                override_value = str(p_val)
            override_signature[override_key] = override_value

        params.append(("task_override", override_signature))

        # task schema id is unique per Class definition.
        # so if we have new implementation - we will not a problem with rerunning it
        full_task_name = "%s@%s(object=%s)" % (
            self.task_name,
            self.task_definition.full_task_family,
            str(id(self.task_definition)),
        )

        return build_signature(name=full_task_name, params=params)
Example #3
0
    def initialize_task_id(self, params=None):
        name = self.task_name
        extra = {}
        if config.getboolean("task_build", "sign_with_full_qualified_name"):
            extra["full_task_family"] = self.task_definition.full_task_family
        if config.getboolean("task_build", "sign_with_task_code"):
            extra["task_code_hash"] = user_friendly_signature(
                self.task_definition.task_source_code)

        signature = build_signature(name=name, params=params, extra=extra)
        self.task_id, self.task_signature = (signature.id, signature.signature)

        self.task_signature_source = signature.signature_source
Example #4
0
    def initialize_relations(self):
        # STEP 0 - run band function
        self.initialize_band()

        # STEP 1 - calculate all inputs and _required
        try:
            self.task_inputs = self.initialize_required()
        except Exception:
            logger.warning("Failed to calculate relationships for %s" %
                           self.task_id,
                           exc_info=True)
            self.task_inputs = {}
            if not self.task.task_is_dynamic:
                raise

        # STEP 2 ( now we have all inputs, we can calculate real signature)
        # support for two phase build
        # will be called from MetaClass

        params = self.params.get_params_serialized(
            ParameterFilters.SIGNIFICANT_INPUTS)

        if "user" in self.task_inputs:
            # TODO : why do we need to convert all "user side" inputs?
            # what if the input is insignificant?
            system_input = self.task_inputs.get("system")
            if system_input and "band" in system_input:
                band_input = system_input["band"]
                task_inputs_user_only = {
                    "user": self.task_inputs.get("user"),
                    "system": {
                        "band": band_input
                    },
                }
            else:
                task_inputs_user_only = {"user": self.task_inputs.get("user")}
            task_inputs_as_str = traverse(
                task_inputs_user_only,
                convert_f=str,
                filter_none=True,
                filter_empty=True,
            )

            if task_inputs_as_str is None:
                task_inputs_as_str = ""

            params.append(("_task_inputs", task_inputs_as_str))

        # IMPORTANT PART: we initialize task_id here again
        # after all values are calculated (all task_inputs are assigned)
        # we do it again, now we have all inputs calculated
        task = self.task
        task.task_signature_obj = build_signature(
            name=task.task_name,
            params=params,
            extra=task.task_definition.task_signature_extra,
        )
        task.task_id = "{}__{}".format(task.task_name,
                                       task.task_signature_obj.signature)

        # for airflow operator task handling:
        airflow_task_id_p = self.params.get_param("airflow_task_id")
        if airflow_task_id_p:
            self.task.task_id = self.task.airflow_task_id

        # STEP 3  - now let update outputs
        self.initialize_outputs()

        outputs_sig = self._get_outputs_to_sign()
        if outputs_sig:
            sig = build_signature_from_values("task_outputs", outputs_sig)
            task.task_outputs_signature_obj = sig
        else:
            task.task_outputs_signature_obj = task.task_signature_obj
Example #5
0
    def build_task_object(self, task_metaclass):
        databand_context = get_databand_context()

        # convert args to kwargs, validate values
        self.task_kwargs = self._build_and_validate_task_ctor_kwargs(
            self.task_args__ctor, self.task_kwargs)

        self._log_build_step("Resolving task params with %s" %
                             self.config_sections)
        try:
            task_param_values = self._build_task_param_values()
            task_params = Parameters(source=self._ctor_as_str,
                                     param_values=task_param_values)

        except Exception:
            self._log_config(force_log=True)
            raise

        task_enabled = True
        if self.parent_task and not self.parent_task.ctrl.should_run():
            task_enabled = False

        # load from task_band if exists
        task_band_param = task_params.get_param_value(TASK_BAND_PARAMETER_NAME)
        if task_band_param and task_band_param.value:
            task_band = task_band_param.value

            # we are going to load all task parameters from task_band
            task_params = self.load_task_params_from_task_band(
                task_band, task_params)

        params = task_params.get_params_signatures(
            ParameterFilters.SIGNIFICANT_INPUTS)

        # we add override to Object Cache signature
        override_signature = self._get_override_params_signature()
        # task schema id is unique per Class definition.
        # so if we have new implementation - we will not a problem with rerunning it
        full_task_name = "%s@%s(object=%s)" % (
            self.task_name,
            self.task_definition.full_task_family,
            str(id(self.task_definition)),
        )

        # now we don't know the real signature - so we calculate signature based on all known params
        cache_object_signature = build_signature(
            name=full_task_name,
            params=params,
            extra={"task_override": override_signature},
        )
        self._log_build_step("Task task_signature %s" %
                             str(cache_object_signature.signature))

        # If a Task has already been instantiated with the same parameters,
        # the previous instance is returned to reduce number of object instances.
        tic = databand_context.task_instance_cache
        cached_task_object = tic.get_cached_task_obj(cache_object_signature)
        if cached_task_object and not hasattr(cached_task_object,
                                              "_dbnd_no_cache"):
            return cached_task_object

        # we want to have task id immediately, so we can initialize outputs/use by user
        # we should switch to SIGNIFICANT_INPUT here
        task_signature_obj = build_signature(
            name=self.task_name,
            params=params,
            extra=self.task_definition.task_signature_extra,
        )

        task_children_scope_params = self._calculate_task_children_scope_params(
            task_params=task_params)

        task = task_metaclass._build_task_obj(
            task_definition=self.task_definition,
            task_name=self.task_name,
            task_params=task_params,
            task_signature_obj=task_signature_obj,
            task_config_override=self.task_config_override,
            task_config_layer=self.config.config_layer,
            task_enabled=task_enabled,
            task_sections=self.config_sections,
            task_children_scope_params=task_children_scope_params,
        )
        tic.register_task_obj_cache_instance(
            task, task_obj_cache_signature=cache_object_signature)

        task.task_call_source = [
            databand_context.user_code_detector.find_user_side_frame(2)
        ]
        if task.task_call_source and self.parent_task:
            task.task_call_source.extend(self.parent_task.task_call_source)

        # now the task is created - all nested constructors will see it as parent
        with task_context(task, TaskContextPhase.BUILD):
            task._initialize()
            task._validate()

            # it might be that config has been changed even more
            task.task_config_layer = self.config.config_layer

        # only now we know "task_id" so we can register in "publicaly facing cache
        tic.register_task_instance(task)

        return task
Example #6
0
 def test_dict(self):
     a = build_signature("t", [("a", {1: 2, 2: 3})]).signature
     assert a == build_signature("t", [("a", {2: 3, 1: 2})]).signature
Example #7
0
 def test_simple(self):
     assert build_signature("t", [("a", "b")])