def guess_func_return_type(f_spec): return_key = "return" r = get_types_registry() return_func_spec = f_spec.annotations.get(return_key, NOTHING) if is_defined(return_func_spec): if return_func_spec is None: return None # for -> (int,DataFrame) if isinstance(return_func_spec, tuple): return_func_spec = [ ("result_%s" % idx, ret_part_type) for idx, ret_part_type in enumerate(return_func_spec, start=1) ] # easy way to check that it's NamedTuple elif hasattr(return_func_spec, "_field_types"): return_func_spec = list( six.iteritems(return_func_spec._field_types)) # case of named tuple elif is_Tuple(return_func_spec): # for -> Tuple[int,DataFrame] return_func_spec = get_Tuple_params(return_func_spec) return_func_spec = [ ("result_%s" % idx, ret_part_type) for idx, ret_part_type in enumerate(return_func_spec, start=1) ] if isinstance(return_func_spec, list): result = [] for field_name, ret_part_type in return_func_spec: field_value_type = r.get_value_type_of_type( ret_part_type, inline_value_type=True) result.append((field_name, field_value_type)) return result else: # fallback to regular parsing return r.get_value_type_of_type(return_func_spec, inline_value_type=True) doc_annotation = f_spec.doc_annotations.get(return_key, NOTHING) if is_defined(doc_annotation): if doc_annotation == "None": return None # if it "return" , it parsed into list # doc_annotation if isinstance(doc_annotation, six.string_types): return r.get_value_type_of_type_str(doc_annotation) or NOTHING # so we have multiple params result = [] for idx, ret_part_type in enumerate(doc_annotation, start=1): field_value_type = r.get_value_type_of_type_str(ret_part_type) result.append(("result_%s" % idx, field_value_type)) return result return NOTHING
def _build_parameter(self, context="inline"): s = self.parameter # type: ParameterDefinition update_kwargs = {} value_type = self._build_value_type(context) validator = s.validator if s.choices: validator = ChoiceValidator(s.choices) if is_not_defined(s.default): if s.empty_default: update_kwargs["default"] = value_type._generate_empty_default() if not is_defined(s.load_on_build): update_kwargs["load_on_build"] = value_type.load_on_build # create value meta if s.value_meta_conf is None: update_kwargs["value_meta_conf"] = ValueMetaConf( log_preview=s.log_preview, log_preview_size=s.log_preview_size, log_schema=s.log_schema, log_size=s.log_size, log_stats=s.log_stats, log_histograms=s.log_histograms, ) # Whether different values for this parameter will differentiate otherwise equal tasks description = s.description or "" if not is_defined(description): if s.is_output() and s.default_output_description: description = s.default_output_description elif not s.load_on_build and s.default_input_description: description = s.default_input_description else: description = s.default_description if s.validator: description = _add_description(description, validator.description) update_kwargs["description"] = description() # We need to keep track of this to get the order right (see Task class) ParameterDefinition._total_counter += 1 if s.kind == _ParameterKind.task_output: update_kwargs["significant"] = False updated = self.modify( value_type=value_type, value_type_defined=value_type, validator=validator, description=description, parameter_id=ParameterDefinition._total_counter, **update_kwargs ) return updated.parameter
def _build_value_type(self, context): s = self.parameter value_type = s.value_type default = s.default if value_type is None: if is_defined(default): value_type = get_value_type_of_obj(default) if value_type is None: raise friendly_error.task_parameters.no_value_type_from_default( default, context=context) elif value_type is None: # we don't have value type! let's fail! if s.load_on_build is False: # we are in data mode s.value_type = TargetValueType else: raise friendly_error.task_parameters.no_value_type_defined_in_parameter( context=context) else: # let validate that what we have can be ValueType! resolved_value_type = get_value_type_of_type(value_type) if resolved_value_type is None: # we don't have value type! let's fail! raise friendly_error.task_parameters.unknown_value_type_in_parameter( value_type) value_type = resolved_value_type # type: ValueType if s.sub_type: sub_value_type = get_value_type_of_type(s.sub_type) if isinstance(value_type, _StructureValueType): value_type = value_type.__class__( sub_value_type=sub_value_type) else: raise friendly_error.task_parameters.sub_type_with_non_structural_value( context=context, value_type=value_type, sub_type=s.sub_type) return value_type
def _build( cls, cls_name, module_name, task_namespace, conf__task_family, ): full_task_family = "%s.%s" % (module_name, cls_name) full_task_family_short = "%s.%s" % (_short_name(module_name), cls_name) if not is_defined(task_namespace): namespace_at_class_time = get_task_registry().get_namespace(module_name) if namespace_at_class_time == _SAME_AS_PYTHON_MODULE: task_namespace = module_name else: task_namespace = namespace_at_class_time if conf__task_family: task_family = conf__task_family task_config_section = task_family elif task_namespace: task_family = "{}.{}".format(task_namespace, cls_name) task_config_section = task_family else: task_family = cls_name task_config_section = full_task_family return TaskPassport( full_task_family=full_task_family, full_task_family_short=full_task_family_short, task_family=task_family, task_config_section=task_config_section, )
def get_project_git(): global _project_git_version if is_defined(_project_git_version): return _project_git_version _project_git_version = get_git_commit(project_path(), verbose=is_verbose()) return _project_git_version
def __init__(self, task_class, classdict): super(TaskDefinition, self).__init__() self.task_definition_uid = get_uuid() self.hidden = False self.task_class = task_class # type: Type[Task] self.task_passport = TaskPassport.from_task_cls(task_class) # TODO: maybe use properties or other way to delegate those... self.full_task_family = self.task_passport.full_task_family self.full_task_family_short = self.task_passport.full_task_family_short self.task_family = self.task_passport.task_family self.task_config_section = self.task_passport.task_config_section # all the attributes that points to_Parameter self.task_params = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_params, self.defaults = self._calculate_task_class_values(classdict) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output(self.task_params) defaults = { p.name: p.default for p in self.task_params.values() if is_defined(p.default) } self.task_defaults_config_store = parse_and_build_config_store( source=self.task_passport.format_source_name("defaults"), config_values={self.task_config_section: defaults}, set_if_not_exists_only=True, ) self.task_defaults_config_store.update( parse_and_build_config_store( source=self.task_passport.format_source_name("defaults_section"), config_values=self.defaults, ) ) # now, if we have overloads in code ( calculated in task_definition): # class T(BaseT): # some_base_t_property = new_value if self.task_class._conf__track_source_code: self.task_source_code = _get_task_source_code(self.task_class) self.task_module_code = _get_task_module_source_code(self.task_class) self.task_source_file = _get_source_file(self.task_class) else: self.task_source_code = None self.task_module_code = "" self.task_source_file = None
def __get_relevant_params(self): relevant = [] for p, value in self.params.get_param_values(input_only=True, user_only=True): if is_defined(p.default): try: same_value = value == p.default except Exception: same_value = p.signature(value) == p.signature(p.default) if same_value: continue relevant.append((p, value)) return sorted(relevant, key=lambda x: x[0].name)
def _build_func_spec_params(self, decorator_kwargs_params): params = {} # let go over all kwargs of the functions for k in self.callable_spec.args: if ( k in self.exclude or k in decorator_kwargs_params ): # excluded or processed already continue context = self._get_param_context(k) default = self.callable_spec.defaults.get(k, NOTHING) if isinstance(default, ParameterFactory): # it's inplace defition # user_param= parameter[str] params[k] = build_parameter(default, context=context) elif k in self.base_params: # we have param definition from "base class" # so we just need to provide a default value for the parameter # otherwise - do nothing, as we are good if is_defined(default): params[k] = default else: try: # regular value param_value_type = guess_func_arg_value_type( self.callable_spec, k, default ) if param_value_type is None: # fallback to "object" param_value_type = DefaultObjectValueType() param = get_parameter_for_value_type(param_value_type) param = param.default(default) params[k] = build_parameter(param, context=context) except Exception: logger.exception("Failed to analyze function arg %s", context) raise if self.callable_spec.varargs: # create a param with the name of `*args` argument of the function params[self.callable_spec.varargs] = build_parameter( parameter[list], context=self._get_param_context("*args") ) if self.callable_spec.varkw: # create a param with the name of `**kwargs` argument of the function params[self.callable_spec.varkw] = build_parameter( parameter[typing.Dict[str, typing.Any]], context=self._get_param_context("**kwargs"), ) return params
def __get_relevant_params(self): relevant = [] for p, value in self.params.get_params_with_value( ParameterFilters.USER_INPUTS): if is_defined(p.default): try: same_value = value == p.default except Exception: same_value = p.signature(value) == p.signature(p.default) if same_value: continue relevant.append((p, value)) return sorted(relevant, key=lambda x: x[0].name)
def _build_user_task_family(self): if self.task_class._conf__task_family: return self.task_class._conf__task_family if is_defined(self.task_class.task_namespace): namespace = self.task_class.task_namespace elif self.namespace_at_class_time == _SAME_AS_PYTHON_MODULE: namespace = self.task_class.__module__ else: namespace = self.namespace_at_class_time if namespace: return "{}.{}".format(namespace, self.task_class.__name__) return None
def guess_func_arg_value_type(f_spec, name, default_value): # type: (_TaskDecoratorSpec, str, Any) -> ValueType r = get_types_registry() annotation = f_spec.annotations.get(name) if annotation is not None: return r.get_value_type_of_type(annotation) doc_annotation = f_spec.doc_annotations.get(name) t = r.get_value_type_of_type_str(doc_annotation) if t: return t if is_defined(default_value): return r.get_value_type_of_type(type(default_value)) return None
def _build_decorator_kwargs_params(self): params = {} for k, param in six.iteritems(self.task_decorator.decorator_kwargs): if k in self.exclude: # we'll take care of result param later continue if param is None: self.exclude.add(k) continue context = self._get_param_context(k) if k not in self.callable_spec.args and k not in self.base_params: # we have parameter which is not part of real function signature # @task(some_unknown_parameter=parameter) logger.info( "{} is not part of parameters, creating hidden parameter".format( context ) ) if k in self.callable_spec.defaults: if isinstance(self.callable_spec.defaults[k], ParameterFactory): raise DatabandBuildError( "{}: {} has conlficted definition in function and in decorator itself".format( context, k ) ) if is_defined(param.parameter.default): logger.warning( "Default value conflict between function and @task decorator" ) param = param.default(self.callable_spec.defaults.get(k)) if k not in self.base_params or isinstance(param, ParameterFactory): # we are going to build a new parameter param = build_parameter(param, context=context) params[k] = param return params
def _pop_kwarg(kwargs, key, default=NOTHING, error_msg=None): if key in kwargs: return kwargs.pop(key) if is_defined(default): return default raise TypeError(error_msg or "%s must be specified" % key)
def __init__(self, task_class, classdict, namespace_at_class_time): super(TaskDefinition, self).__init__() self.task_definition_uid = get_uuid() self.hidden = False self.task_class = task_class # type: Type[Task] self.namespace_at_class_time = namespace_at_class_time if self.task_class._conf__decorator_spec: cls_name = self.task_class._conf__decorator_spec.name else: cls_name = self.task_class.__name__ self.full_task_family = "%s.%s" % (task_class.__module__, cls_name) self.full_task_family_short = "%s.%s" % ( _short_name(task_class.__module__), cls_name, ) self.task_family = self._build_user_task_family() if not self.task_family: self.task_family = cls_name self.task_config_section = self.full_task_family else: self.task_config_section = self.task_family # all the attributes that points to_Parameter self.task_params = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_params, self.defaults = self._calculate_task_class_values( classdict) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output( self.task_params) defaults = { p.name: p.default for p in self.task_params.values() if is_defined(p.default) } self.task_defaults_config_store = parse_and_build_config_store( source="%s[defaults]" % self.full_task_family_short, config_values={self.task_config_section: defaults}, set_if_not_exists_only=True, ) self.task_defaults_config_store.update( parse_and_build_config_store( source="%s[defaults_section]" % self.full_task_family_short, config_values=self.defaults, )) # now, if we have overloads in code ( calculated in task_definition): # class T(BaseT): # some_base_t_property = new_value if self.task_class._conf__track_source_code: self.task_source_code = _get_task_source_code(self.task_class) self.task_module_code = _get_task_module_source_code( self.task_class) self.task_source_file = _get_source_file(self.task_class) else: self.task_source_code = None self.task_module_code = "" self.task_source_file = None
def __init__( self, task_passport, # type: TaskPassport classdict=None, # type: Optional[Dict[str, Any]] base_task_definitions=None, # type: Optional[List[TaskDefinition]] defaults=None, # type: Optional[Dict[ParameterDefinition, Any]] task_decorator=None, # type: Optional[TaskDecorator] source_code=None, # type: Optional[TaskSourceCode] external_parameters=None, # type: Optional[Parameters] task_definition_uid=None, # type: Optional[UUID] ): super(TaskDefinition, self).__init__() self.hidden = False self.task_passport = task_passport self.source_code = source_code self.task_decorator = task_decorator self.base_task_definitions = (base_task_definitions or []) # type: List[ TaskDefinition] # TODO: maybe use properties or other way to delegate those... self.full_task_family = self.task_passport.full_task_family self.full_task_family_short = self.task_passport.full_task_family_short self.task_family = self.task_passport.task_family self.task_config_section = self.task_passport.task_config_section # all the attributes that points to_Parameter self.task_param_defs = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_param_defs = self._calculate_task_class_values( classdict, external_parameters) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output( self.task_param_defs) self.param_defaults = { p.name: p.default for p in self.task_param_defs.values() if is_defined(p.default) } # TODO: consider joining with task_config # TODO: calculate defaults value as _ConfigStore and merge using standard mechanism self.defaults = self._calculate_task_defaults(defaults) self.task_defaults_config_store = parse_and_build_config_store( source=self.task_passport.format_source_name("task.defaults"), config_values=self.defaults, priority=ConfigValuePriority.FALLBACK, ) self.task_signature_extra = {} if config.getboolean("task_build", "sign_with_full_qualified_name"): self.task_signature_extra[ "full_task_family"] = self.full_task_family if config.getboolean("task_build", "sign_with_task_code"): self.task_signature_extra[ "task_code_hash"] = user_friendly_signature( self.source_code.task_source_code) if task_definition_uid: self.task_definition_uid = task_definition_uid else: self.task_definition_uid = get_uuid()
def _spec_params(self): """ We process only regular params here, not the "result" params :return: """ params = {} exclude = {RESULT_PARAM, "self"} for k, param in six.iteritems(self.decorator_kwargs): if k in exclude: # we'll take care of result param later continue if param is None: exclude.add(k) continue context = "%s.%s" % (self.decorator_spec.name, k) if k not in self.decorator_spec.args and k not in self.base_params: # we have parameter which is not part of real function signature # @task(some_unknown_parameter=parameter) logger.info( "{} is not part of parameters, creating hidden parameter". format(context)) if k in self.decorator_spec.defaults: if isinstance(self.decorator_spec.defaults[k], ParameterFactory): raise DatabandBuildError( "{}: {} has conlficted definition in function and in decorator itself" .format(context, k)) if is_defined(param.parameter.default): logger.warning( "Default value conflict between function and @task decorator" ) param = param.default(self.decorator_spec.defaults.get(k)) if k in self.base_params and not isinstance( param, ParameterFactory): # just override value params[k] = param else: # we are going to build a new parameter params[k] = build_parameter(param, context=context) # let go over all kwargs of the functions for k in self.decorator_spec.args: if k in exclude or k in params: # excluded or processed already continue context = "%s.%s" % (self.decorator_spec.name, k) default = self.decorator_spec.defaults.get(k, NOTHING) if isinstance(default, ParameterFactory): # it's inplace defition # user_param= parameter[str] params[k] = build_parameter(default, context=context) elif k in self.base_params: # we have param definition from "base class" # so we just need to provide a default value for the parameter # otherwise - do nothing, as we are good if is_defined(default): params[k] = default else: try: # regular value param_value_type = guess_func_arg_value_type( self.decorator_spec, k, default) if param_value_type is None: # fallback to "object" param_value_type = DefaultObjectValueType() param = get_parameter_for_value_type(param_value_type) param = param.default(default) params[k] = build_parameter(param, context=context) except Exception: logger.exception("Failed to analyze function arg %s", context) raise return params