def _parse_additional_data(cls, data): """ This function is to be called before converting a metadata model to rdf. It extracts rdf information from the additional_data dict and inserts it into the related json-ld document. :param data: :return: """ rdf = {} if data is not None and "@rdf" in data: rdf = data["@rdf"] json_ld = {} if data is not None and "@json-ld" in data: array = data["@json-ld"] json_ld = [] for entry in array: if isinstance(entry, str): val = data_path(data, *entry.split(".")) if val is not None: if isinstance(val, dict): json_ld.append(val) elif isinstance(val, list): json_ld.extend(val) else: logger.warning( f"Found value at {entry} was not valid.") else: logger.warning( f"Mapping file doesn't define a json-ld in additional data at: {entry}." ) elif isinstance(entry, dict): json_ld.append(entry) else: logger.warning( f"Mapping file provided an invalid json-ld: {entry}. " f"Json-ld has to be either an json-ld directly or an json path in additional data." ) return rdf, json_ld
def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv, _pypads_artifact_fallback: Optional[FileFormats] = None, _logger_call, _logger_output, _pypads_result, **kwargs): """ :param ctx: :param args: :param _pypads_artifact_fallback: Write to artifact if metric can not be logged as an double value into mlflow :param _pypads_result: :param kwargs: :return: """ result = _pypads_result # Get data from mapping or provided additional data # Find / extract name name = data_path( _pypads_env.data, "metric", "@schema", "rdfs:label", default=".".join([ _logger_output.producer.original_call.call_id.context. container.__name__, _logger_output.producer.original_call.call_id.wrappee.__name__ ])) # Find / extract description description = data_path(_pypads_env.data, "metric", "@schema", "rdfs:comment", default=getattr(ctx, "__doc__", "No description found.")) # Find / extract step step = data_path( _pypads_env.data, "metric", "@schema", "step", default=_logger_call.original_call.call_id.call_number) # Find / extract documentation documentation = data_path(_pypads_env.data, "metric", "@schema", "padre:documentation", default=ctx.__doc__) # Build tracked object metric_to = MetricTO( name=name, description=description, step=_logger_call.original_call.call_id.call_number, documentation=documentation, additional_data=_pypads_env.data, parent=_logger_output) # Store the value itself if isinstance(result, float): metric_to.as_artifact = False metric_to.metric = metric_to.store_metric( key=name, value=result, description="The metric returned by {}".format(self.name), step=step, additional_data=_pypads_env.data) else: # If value is not a valid double logger.warning( "Mlflow metrics have to be doubles. Could log the return value of type '" + str(type(result)) + "' of '" + self.name + "' as artifact instead. Activate with _pypads_artifact_fallback=True" ) if _pypads_artifact_fallback: logger.warning("Logging metric as artifact.") metric_to.as_artifact = True metric_to.metric = metric_to.store_mem_artifact( self.name, result, write_format=_pypads_artifact_fallback, description="The metric returned by {}".format(self.name)) else: return # Persist tracking object to output _logger_output.metric = metric_to.store()
def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv, _logger_call, _logger_output: Union['ParametersILFOutput', LoggerOutput], **kwargs): """ Function logging the parameters of the current pipeline object function call. """ mapping_data = _pypads_env.data # Get the estimator name estimator = data_str(mapping_data, "estimator", "@schema", "rdfs:label", default=ctx.__class__.__name__) hyper_params = FunctionParametersTO( estimator=estimator, description=f"The parameters of estimator {estimator} with {ctx}.", parent=_logger_output) # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs relevant_parameters = [] if data_path( _pypads_env.data, "estimator", "parameters", warning="No parameters are defined on the mapping file for " + str(ctx.__class__) + ". Trying to log parameters without schema definition programmatically." ): relevant_parameters = [] for parameter_type, parameters in data_path(mapping_data, "estimator", "parameters", default={}).items(): for parameter in parameters: parameter = data_path(parameter, "@schema") key = data_path(parameter, "padre:path") name = data_path(parameter, "rdfs:label") param_dict = { "name": name, "description": data_path(parameter, "rdfs:comment"), "parameter_type": data_path(parameter, "padre:value_type") } if hasattr(ctx, key): value = getattr(ctx, key) else: _kwargs = getattr(kwargs, "_kwargs") if hasattr(_kwargs, key): value = getattr(_kwargs, key) else: logger.warning( f"Couldn't extract value of in schema defined parameter {parameter}." ) continue param_dict["value"] = value add_data(mapping_data, "is_a", value=data_path(parameter, "@id")) relevant_parameters.append(param_dict) else: get_params = getattr(ctx, "get_params", None) if callable(get_params): # Extracting via get_params (valid for sklearn) relevant_parameters = [{ "name": k, "value": v } for k, v in ctx.get_params().items()] else: # Trying to get at least the named arguments relevant_parameters = [{ "name": k, "value": v } for k, v in kwargs["_kwargs"].items()] for i, param in enumerate(relevant_parameters): name = data_path(param, "name", default="UnknownParameter" + str(i)) description = data_path(param, "description") value = data_path(param, "value") parameter_type = data_path(param, "parameter_type", default=str(type(value))) try: from pypads.app.pypads import get_current_pads call_number = get_current_pads().call_tracker.call_number( _pypads_env.call.call_id) hyper_params.persist_parameter(".".join( [estimator, str(call_number), name]), str(value), param_type=parameter_type, description=description, additional_data=mapping_data) except Exception as e: logger.error( f"Couldn't log parameter {estimator + '.' + name} with value {value}" ) _logger_output.hyper_parameter_to = hyper_params.store()
def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv, _logger_call: InjectionLoggerCallModel, _logger_output, _pypads_result, **kwargs): """ This function is used to extract estimator information from the code and the related mapping file. This is run after the hooked function is executed. Pypads injects a set of default parameters. :param ctx: A reference to the context on which the original function was called :param args: Args given to the original function :param _pypads_env: A logging environment object storing information about the used mappings, original_call etc. :param _logger_call: A information object storing additonal information about the logger call itself :param _logger_output: A prepared result object of the class defined in output_schema_class(cls) :param _pypads_result: The return value of the __pre__ function :param kwargs: Kwargs given to the original function :return: """ # Get data from mapping file mapping_data = _pypads_env.data estimator_data = data_str(mapping_data, "estimator", "@schema", default={}) # Create repository object ero = EstimatorRepositoryObject( name=data_str(estimator_data, "rdfs:label", default=ctx.__class__.__name__, warning=f"No name given for {ctx.__class__}. " f"Extracting name from class."), description=data_str(estimator_data, "rdfs:description", default="Some unknown estimator."), documentation=data_str( estimator_data, "padre:documentation", default=ctx.__class__.__doc__, warning= f"No documentation defined on the mapping file for {ctx.__class__}. " f"Taking code documentation instead."), parameter_schema=data_path( estimator_data, "padre:parameters", default="unkown", warning= f"No parameters are defined on the mapping file for {ctx.__class__}. " f"Logging estimator without parameters."), location=_logger_call.original_call.call_id.context.reference, additional_data=estimator_data) # Compile identifying hash hash_id = persistent_hash(ero.json()) # Add to repo if needed if not _pypads_env.pypads.estimator_repository.has_object(uid=hash_id): repo_obj = _pypads_env.pypads.estimator_repository.get_object( uid=hash_id) repo_obj.log_json(ero) # Create referencing object eto = EstimatorTO( repository_reference=hash_id, repository_type=_pypads_env.pypads.estimator_repository.name, parent=_logger_output, additional_data=mapping_data) # Store object _logger_output.estimator = eto.store()
def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv, _logger_call, _logger_output: Union['ParametersILF.ParametersILFOutput', LoggerOutput], _args, _kwargs, **kwargs): """ Function logging the parameters of the current pipeline object function call. """ mapping_data = _pypads_env.data # Get the estimator name module = data_str(mapping_data, "module", "@schema", "rdfs:label", default=ctx.__class__.__name__) hyper_params = FunctionParametersTO( estimator=module, description=f"The parameters of model {module} with {ctx}.", parent=_logger_output) # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs relevant_parameters = [] if data_path( _pypads_env.data, "module", "parameters", warning="No parameters are defined on the mapping file for " + str(ctx.__class__) + ". Trying to log parameters without schema definition programmatically." ): relevant_parameters = [] for parameter_type, parameters in data_path(mapping_data, "module", "parameters", default={}).items(): for parameter in parameters: parameter = data_path(parameter, "@schema") key = data_path(parameter, "padre:path") name = data_path(parameter, "rdfs:label") param_dict = { "name": name, "description": data_path(parameter, "rdfs:comment"), "parameter_type": data_path(parameter, "padre:value_type") } if hasattr(ctx, key): value = getattr(ctx, key) else: _kwargs = getattr(kwargs, "_kwargs") if hasattr(_kwargs, key): value = getattr(_kwargs, key) else: logger.warning( f"Couldn't extract value of in schema defined parameter {parameter}." ) continue param_dict["value"] = value add_data(mapping_data, "is_a", value=data_path(parameter, "@id")) relevant_parameters.append(param_dict) else: import torch if isinstance(ctx, torch.optim.Optimizer): defaults = getattr(ctx, "defaults") if defaults is not None: # Extracting hyperparameters via defaults dict (valid for torch optimizers) relevant_parameters = [{ "name": "{}.{}".format(ctx.__class__.__name__, k), "value": v } for k, v in defaults.items()] else: logger.warning( 'Hyper Parameters extraction of optimizer {} failed'. format(str(ctx))) elif isinstance(ctx, torch.utils.data.DataLoader): # Get all the named arguments along with default values if not given import inspect signature = inspect.signature(_pypads_env.callback) defaults = { k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty } relevant_parameters = [{ "name": "{}.{}".format(ctx.__class__.__name__, k), "value": v } for k, v in { **defaults, **_kwargs }.items()] elif isinstance(ctx, torch.nn.Module): params = _get_relevant_parameters(ctx) relevant_parameters = [{ "name": k, "value": v } for k, v in params.items()] else: logger.warning( 'Hyper Parameters extraction of {} failed'.format( str(ctx))) for i, param in enumerate(relevant_parameters): name = data_path(param, "name", default="UnknownParameter" + str(i)) description = data_path(param, "description") value = data_path(param, "value") parameter_type = data_path(param, "parameter_type", default=str(type(value))) hyper_params.persist_parameter(name, str(value), param_type=parameter_type, description=description, additional_data=mapping_data) _logger_output.hyper_parameter_to = hyper_params.store()