def add_value(self): console_value_preview_size = DescribeConfig.current( ).console_value_preview_size value_str = (TextBanner.f_io(self.value) if self._param_kind() in [ "input", "output" ] else self.definition.to_str(self.value)) value_str = safe_string(value_str, console_value_preview_size) # add preview if isinstance(self.value, Target) and self.value.target_meta: preview_value = safe_string( self.value.target_meta.value_preview, console_value_preview_size, ) # we should add minimal preview if len(preview_value) < 100: value_str += " :='%s'" % preview_value if value_str and "\n" in value_str: # some simple heuristics around value extra_padding = " " * len("\t".join(map(str, self.row))) value_str = "".join("\n%s%s" % (extra_padding, l) for l in value_str.split("\n")) value_str = "-->\n" + value_str self.row.append(value_str)
def _get_call_repr(call_name, call_args, call_kwargs): params = "" if call_args: params = ", ".join((safe_string(repr(p), 300)) for p in call_args) if call_kwargs: if params: params += ", " params += ", ".join(("%s=%s" % (p, safe_string(repr(k), 300)) for p, k in iteritems(call_kwargs))) return "{call_name}({params})".format(call_name=call_name, params=params)
def calc_init_value(self, value): if value is None: # it's None # TODO: may be we still can "normalize" the value return value if isinstance(value, Path): return target(str(value), config=self.target_config) if isinstance(value, Target): # it's deferred result - > we load it lately return value # we process value regardless parse! # cf_value.require_parse: if self.env_interpolation and isinstance(value, six.string_types): try: value = expand_env_var(value) except Exception as ex: logger.warning( "failed to expand variable '%s' : %s", safe_string(value), str(ex) ) # in case we are output and have value: # it's Target or it's str to be converted as target load_value = self.load_on_build and not self.is_output() return self.value_type.parse_value( value, load_value=load_value, target_config=self.target_config )
def pformat_config_store_as_table(config_store, sections=None): # type: (_ConfigStore, Optional[Iterable[str]]) -> str header = ["Section", "Key", "Value", "Source", "Priority"] data = [] if sections: sections = [_lower_config_name(s) for s in sections] else: sections = config_store.keys() for section in sections: section_values = config_store.get(section) if not section_values: continue for key, config_value in six.iteritems(section_values): data.append(( section, key, safe_string(config_value.value, 300), config_value.source, config_value.priority, )) if data: return safe_tabulate(tabular_data=data, headers=header) return ""
def build_parameter_value(parameter, cf_value): # type: (ParameterDefinition, ConfigValue) -> Tuple[ParameterDefinition, Any] try: parameter = _update_parameter_from_runtime_value_type(parameter, cf_value) except Exception as ex: # we don't want to fail user code on failed value discovery # we only print message from "friendly exception" and show real stack logger.exception("Failed to discover runtime for %s", parameter) try: p_val = parameter.calc_init_value(cf_value.value) except Exception as ex: raise parameter.parameter_exception("calculate value", ex=ex) # we need to break strong reference between tasks # otherwise we will have pointer from task to another task # if p_val is task, that's ok, but let minimize the risk by patching cf_value if isinstance(cf_value.value, _TaskParamContainer): cf_value.value = str(cf_value) try: if p_val is not None and not isinstance(p_val, Target): parameter.validate(p_val) except Exception as ex: raise parameter.parameter_exception( "validate value='%s'" % safe_string(p_val), ex=ex ) return parameter, p_val
def get_param_value_origin(self, param_name): # type: (str) -> str # Returns where param was created, e.g. ctor/CLI argument/env var/config from dbnd._core.task_ctrl.task_visualiser import _MAX_VALUE_SIZE param_meta = self.get_param_meta(param_name) value_source = param_meta.source if param_meta else "" value_origin = safe_string(value_source, _MAX_VALUE_SIZE) return value_origin
def add_value(self): import dbnd # noqa: 401 import dbnd before DataFrameValueType to avoid cyclic imports from dbnd._core.settings import DescribeConfig from targets.values import DataFrameValueType console_value_preview_size = ( DescribeConfig.from_databand_context().console_value_preview_size) if self.value is None: value_str = "@None" else: # We want to always use self.definition.to_str for panda's Dataframe value. # otherwise, the value blows up the log, and is not readable. value_str = (TextBanner.f_io(self.value) if self._param_kind() in ["input", "output"] and not self.definition.value_type_str == DataFrameValueType.type_str else self.definition.to_str(self.value)) value_str = safe_string(value_str, console_value_preview_size) # add preview if isinstance(self.value, Target) and self.value.target_meta: preview_value = safe_string(self.value.target_meta.value_preview, console_value_preview_size) # we should add minimal preview if len(preview_value) < 100: value_str += " :='%s'" % preview_value if value_str and "\n" in value_str: # some simple heuristics around value extra_padding = " " * len("\t".join(map(str, self.row))) value_str = "".join("\n%s%s" % (extra_padding, l) for l in value_str.split("\n")) value_str = "-->\n" + value_str self.row.append(value_str)
def __init__(self, config, task_cls, task_args, task_kwargs): # type:(DbndConfig, Type[_BaseTask], Any, Any)->None super(TaskMetaFactory, self).__init__( config=config, task_cls=task_cls, task_args=task_args, task_kwargs=task_kwargs, ) self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config) self.verbose_build = self.task_factory_config.verbose self.task_family = task_kwargs.pop("task_family", self.task_family) # extra params from constructor self.task_name = task_kwargs.pop("task_name", None) self.task_config_override = task_kwargs.pop("override", None) or {} self.task_kwargs = task_kwargs if self.task_name: self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub( "_", self.task_name) # user gives explicit name, or it full_task_family self.task_main_config_section = ( self.task_name or self.task_definition.task_config_section) if self.task_name is None: self.task_name = self.task_family self.multi_sec_conf = self._get_task_multi_section_config( config, task_kwargs) self.ctor_kwargs = None # utilities section self.build_warnings = [] self._exc_desc = "%s(%s)@%s" % ( self.task_family, ", ".join(("%s=%s" % (p, safe_string(repr(k), 300)) for p, k in iteritems(self.task_kwargs__ctor))), str(self.task_cls), ) self.task_errors = []
def build_task_run_info(self): task_run_env_uid = get_uuid() import dbnd logging.debug("Created new task run env with uid '%s'", task_run_env_uid) machine = environ.get(ENV_DBND__ENV_MACHINE, "") if environ.get(ENV_DBND__ENV_IMAGE, None): machine += " image=%s" % environ.get(ENV_DBND__ENV_IMAGE) return TaskRunEnvInfo( uid=task_run_env_uid, databand_version=dbnd.__version__, user_code_version=self.source_version, user_code_committed=True, cmd_line=subprocess.list2cmdline(sys.argv), user=self.user or dbnd_getuser(), machine=machine, project_root=project_path(), user_data=safe_string(self.user_data, max_value_len=500), heartbeat=utcnow(), )
def build_parameter_value(parameter, cf_value): # type: (ParameterDefinition, ConfigValue) -> ParameterValue from dbnd._core.parameter.parameter_value import ParameterValue as _ParameterValue warnings = [] value = cf_value.value try: if value is not None and not parameter.is_output(): updated_value_type = _update_parameter_from_runtime_value_type( parameter, value ) message = ( "{parameter}: type of the value at runtime '{runtime}'" " doesn't match user defined type '{compile}'".format( parameter=parameter, runtime=updated_value_type, compile=parameter.value_type, ) ) if updated_value_type: if isinstance(parameter.value_type, DefaultObjectValueType): # we are going to update parameter = attr.evolve( parameter, value_type=updated_value_type, load_on_build=updated_value_type.load_on_build, ) message = "%s: updating parameter with the runtime info" % (message) # warn anyway warnings.append(message) except Exception: # we don't want to fail user code on failed value discovery # we only print message from "friendly exception" and show real stack logger.exception("Failed to discover runtime for %s" % parameter) try: p_val = parameter.calc_init_value(value) except Exception as ex: raise parameter.parameter_exception( "calculate value from '%s'" % safe_string(value, 100), ex=ex ) # we need to break strong reference between tasks # otherwise we will have pointer from task to another task # if p_val is task, that's ok, but let minimize the risk by patching cf_value if isinstance(value, _TaskParamContainer): cf_value.value = str(cf_value) try: if p_val is not None and not isinstance(p_val, Target): parameter.validate(p_val) except Exception as ex: raise parameter.parameter_exception( "validate value='%s'" % safe_string(p_val), ex=ex ) p_value = _ParameterValue( parameter=parameter, source=cf_value.source, source_value=cf_value.value, value=p_val, parsed=cf_value.require_parse, warnings=warnings + cf_value.warnings, ) return p_value
def __init__(self, dbnd_context, config, new_task_factory, task_cls, task_args, task_kwargs): # type:(DatabandContext, DbndConfig, Any, Type[_BaseTask], Any, Any)->None self.task_cls = task_cls self.task_definition = task_cls.task_definition # type: TaskDefinition self.new_task_factory = new_task_factory # keep copy of user inputs self.task_kwargs__ctor = task_kwargs.copy() self.task_args__ctor = list(task_args) self.parent_task = try_get_current_task() self.config = config self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config) self.verbose_build = self.task_factory_config.verbose # let find if we are running this constructor withing another Databand Task self.dbnd_context = dbnd_context self.task_call_source = [ self.dbnd_context.user_code_detector.find_user_side_frame(2) ] if self.task_call_source and self.parent_task: self.task_call_source.extend( self.parent_task.task_meta.task_call_source) self.task_family = task_kwargs.pop("task_family", None) # extra params from constructor self.task_name = task_kwargs.pop("task_name", None) kwargs_task_config_sections = task_kwargs.pop("task_config_sections", None) self.task_config_override = task_kwargs.pop("override", None) or {} self.task_kwargs = task_kwargs if not self.task_family: self.task_family = self.task_definition.task_family if self.task_name: self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub( "_", self.task_name) # user gives explicit name, or it full_task_family self.task_main_config_section = ( self.task_name or self.task_definition.task_config_section) if self.task_name is None: self.task_name = self.task_family # there is priority of task name over task family, as name is more specific sections = [self.task_name] # _from at config files sections.extend(self._get_task_from_sections(config, self.task_name)) sections.extend( [self.task_family, self.task_definition.full_task_family]) if kwargs_task_config_sections: sections.extend(kwargs_task_config_sections) # adding "default sections" - LOWEST PRIORITY if issubclass(self.task_definition.task_class, _TaskParamContainer): sections += [CONF_TASK_SECTION] from dbnd._core.task.config import Config if issubclass(self.task_definition.task_class, Config): sections += [CONF_CONFIG_SECTION] sections = list(unique_everseen(filter(None, sections))) self.task_config_sections = sections self.task_params = list(self.task_definition.task_params.values() ) # type: List[ParameterDefinition] self.ctor_kwargs = None # utilities section self.build_warnings = [] self._exc_desc = "%s(%s)" % ( self.task_family, ", ".join(("%s=%s" % (p, safe_string(repr(k), 300)) for p, k in iteritems(self.task_kwargs__ctor))), ) self.task_errors = []
def _safe_params(params): return [ (name, safe_string(value, _MAX_PARAM_VALUE_AT_DB)) for name, value in params ]
def safe_value(value): if value is None: return value return safe_string(str(value), 500)
def _add_params_info(self): b = self.banner exclude = set(_TASK_FIELDS) # special treatment for result t_result = getattr(self.task, "result", None) from dbnd._core.decorator.schemed_result import ResultProxyTarget if isinstance(t_result, ResultProxyTarget): b.column("RESULT", t_result) exclude.add("result") params_data = [] params_warnings = [] all_info = self.verbosity >= FormatterVerbosity.HIGH relevant_params = [] for p in self.params.get_params(): if not all_info: # we don't want to show all this switches if p.name in exclude: continue if p.system: continue relevant_params.append(p) for p in relevant_params: value = self.params.get_value(p.name) value_meta = self.params.get_value_meta(p.name) target_config = p.target_config if isinstance(value, DataTarget) and hasattr(value, "config"): target_config = value.config if p.is_output(): p_kind = "output" value_str = b.f_io(value) elif not p.load_on_build: p_kind = "input" value_str = b.f_io(value) else: p_kind = "param" value_str = p.to_str(value) value_str = safe_string(value_str, _MAX_VALUE_SIZE) value_source = "" if value_meta: value_source = value_meta.source if value_meta.warnings: params_warnings.extend(value_meta.warnings) type_handler = p.value_type_str param_data = [p.name, p_kind, type_handler, target_config, value_source] # add source task class of parameter if all_info: section = p.parameter_origin.get_task_family() if section == self.task.get_task_family(): section = "" default = str(p.default) # TODO param_data += [default, section] # add preview if isinstance(value, Target) and value.target_meta: preview_value = safe_string( value.target_meta.value_preview, _MAX_VALUE_SIZE ) # we should add minimal preview if len(preview_value) < 100: value_str += " :='%s'" % preview_value if value_str and "\n" in value_str: # some simple euristics around value extra_padding = " " * len("\t".join(map(str, param_data))) value_str = "".join( "\n%s%s" % (extra_padding, l) for l in value_str.split("\n") ) value_str = "-->\n" + value_str param_data.append(value_str) params_data.append(param_data) # config_params = [ # (p.name, value) # for p, value in c._params.get_param_values() # if value is not None # ] # # b.column(c.task_meta.task_family.upper(), b.f_params(config_params)) # b.new_line() # 'Name' is missing, header is aligned to the last column params_header = ["Name", "Kind", "Type", "Format", "Source"] if all_info: params_header.extend(["Default", "Class"]) params_header.append("-= Value =-") if params_warnings: b.column("BUILD WARNINGS:", "") b.write("".join("\t%s\n" % pw for pw in params_warnings)) if params_data: p = safe_tabulate(tabular_data=params_data, headers=params_header) b.column("PARAMS:", "") b.write(p) # b.new_line() # b.column("OUTPUTS", b.f_io(task_outputs_user)) b.new_line()