Esempio n. 1
0
    def _parse_additional_data(cls, data):
        """
        This function is to be called before converting a metadata model to rdf.
        It extracts rdf information from the additional_data dict and inserts it into the related json-ld document.
        :param data:
        :return:
        """
        rdf = {}
        if data is not None and "@rdf" in data:
            rdf = data["@rdf"]

        json_ld = {}
        if data is not None and "@json-ld" in data:
            array = data["@json-ld"]
            json_ld = []
            for entry in array:
                if isinstance(entry, str):
                    val = data_path(data, *entry.split("."))
                    if val is not None:
                        if isinstance(val, dict):
                            json_ld.append(val)
                        elif isinstance(val, list):
                            json_ld.extend(val)
                        else:
                            logger.warning(
                                f"Found value at {entry} was not valid.")
                    else:
                        logger.warning(
                            f"Mapping file doesn't define a json-ld in additional data at: {entry}."
                        )
                elif isinstance(entry, dict):
                    json_ld.append(entry)
                else:
                    logger.warning(
                        f"Mapping file provided an invalid json-ld: {entry}. "
                        f"Json-ld has to be either an json-ld directly or an json path in additional data."
                    )
        return rdf, json_ld
Esempio n. 2
0
    def __post__(self,
                 ctx,
                 *args,
                 _pypads_env: InjectionLoggerEnv,
                 _pypads_artifact_fallback: Optional[FileFormats] = None,
                 _logger_call,
                 _logger_output,
                 _pypads_result,
                 **kwargs):
        """
        :param ctx:
        :param args:
        :param _pypads_artifact_fallback: Write to artifact if metric can not be logged as an double value into mlflow
        :param _pypads_result:
        :param kwargs:
        :return:
        """
        result = _pypads_result

        # Get data from mapping or provided additional data
        # Find / extract name
        name = data_path(
            _pypads_env.data,
            "metric",
            "@schema",
            "rdfs:label",
            default=".".join([
                _logger_output.producer.original_call.call_id.context.
                container.__name__,
                _logger_output.producer.original_call.call_id.wrappee.__name__
            ]))

        # Find / extract description
        description = data_path(_pypads_env.data,
                                "metric",
                                "@schema",
                                "rdfs:comment",
                                default=getattr(ctx, "__doc__",
                                                "No description found."))

        # Find / extract step
        step = data_path(
            _pypads_env.data,
            "metric",
            "@schema",
            "step",
            default=_logger_call.original_call.call_id.call_number)

        # Find / extract documentation
        documentation = data_path(_pypads_env.data,
                                  "metric",
                                  "@schema",
                                  "padre:documentation",
                                  default=ctx.__doc__)

        # Build tracked object
        metric_to = MetricTO(
            name=name,
            description=description,
            step=_logger_call.original_call.call_id.call_number,
            documentation=documentation,
            additional_data=_pypads_env.data,
            parent=_logger_output)

        # Store the value itself
        if isinstance(result, float):
            metric_to.as_artifact = False
            metric_to.metric = metric_to.store_metric(
                key=name,
                value=result,
                description="The metric returned by {}".format(self.name),
                step=step,
                additional_data=_pypads_env.data)
        else:

            # If value is not a valid double
            logger.warning(
                "Mlflow metrics have to be doubles. Could log the return value of type '"
                + str(type(result)) + "' of '" + self.name +
                "' as artifact instead. Activate with _pypads_artifact_fallback=True"
            )
            if _pypads_artifact_fallback:
                logger.warning("Logging metric as artifact.")
                metric_to.as_artifact = True
                metric_to.metric = metric_to.store_mem_artifact(
                    self.name,
                    result,
                    write_format=_pypads_artifact_fallback,
                    description="The metric returned by {}".format(self.name))
            else:
                return

        # Persist tracking object to output
        _logger_output.metric = metric_to.store()
Esempio n. 3
0
    def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv,
                 _logger_call, _logger_output: Union['ParametersILFOutput',
                                                     LoggerOutput], **kwargs):
        """
        Function logging the parameters of the current pipeline object function call.
        """

        mapping_data = _pypads_env.data

        # Get the estimator name
        estimator = data_str(mapping_data,
                             "estimator",
                             "@schema",
                             "rdfs:label",
                             default=ctx.__class__.__name__)

        hyper_params = FunctionParametersTO(
            estimator=estimator,
            description=f"The parameters of estimator {estimator} with {ctx}.",
            parent=_logger_output)

        # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs
        relevant_parameters = []

        if data_path(
                _pypads_env.data,
                "estimator",
                "parameters",
                warning="No parameters are defined on the mapping file for " +
                str(ctx.__class__) +
                ". Trying to log parameters without schema definition programmatically."
        ):
            relevant_parameters = []
            for parameter_type, parameters in data_path(mapping_data,
                                                        "estimator",
                                                        "parameters",
                                                        default={}).items():
                for parameter in parameters:
                    parameter = data_path(parameter, "@schema")
                    key = data_path(parameter, "padre:path")
                    name = data_path(parameter, "rdfs:label")

                    param_dict = {
                        "name": name,
                        "description": data_path(parameter, "rdfs:comment"),
                        "parameter_type": data_path(parameter,
                                                    "padre:value_type")
                    }

                    if hasattr(ctx, key):
                        value = getattr(ctx, key)
                    else:
                        _kwargs = getattr(kwargs, "_kwargs")
                        if hasattr(_kwargs, key):
                            value = getattr(_kwargs, key)
                        else:
                            logger.warning(
                                f"Couldn't extract value of in schema defined parameter {parameter}."
                            )
                            continue
                    param_dict["value"] = value
                    add_data(mapping_data,
                             "is_a",
                             value=data_path(parameter, "@id"))
                    relevant_parameters.append(param_dict)

        else:
            get_params = getattr(ctx, "get_params", None)
            if callable(get_params):

                # Extracting via get_params (valid for sklearn)
                relevant_parameters = [{
                    "name": k,
                    "value": v
                } for k, v in ctx.get_params().items()]
            else:

                # Trying to get at least the named arguments
                relevant_parameters = [{
                    "name": k,
                    "value": v
                } for k, v in kwargs["_kwargs"].items()]

        for i, param in enumerate(relevant_parameters):
            name = data_path(param,
                             "name",
                             default="UnknownParameter" + str(i))
            description = data_path(param, "description")
            value = data_path(param, "value")
            parameter_type = data_path(param,
                                       "parameter_type",
                                       default=str(type(value)))

            try:
                from pypads.app.pypads import get_current_pads
                call_number = get_current_pads().call_tracker.call_number(
                    _pypads_env.call.call_id)
                hyper_params.persist_parameter(".".join(
                    [estimator, str(call_number), name]),
                                               str(value),
                                               param_type=parameter_type,
                                               description=description,
                                               additional_data=mapping_data)
            except Exception as e:
                logger.error(
                    f"Couldn't log parameter {estimator + '.' + name} with value {value}"
                )

        _logger_output.hyper_parameter_to = hyper_params.store()
Esempio n. 4
0
    def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv,
                 _logger_call: InjectionLoggerCallModel, _logger_output,
                 _pypads_result, **kwargs):
        """
        This function is used to extract estimator information from the code and the related mapping file.

        This is run after the hooked function is executed. Pypads injects a set of default parameters.
        :param ctx: A reference to the context on which the original function was called
        :param args: Args given to the original function
        :param _pypads_env: A logging environment object storing information about the used mappings, original_call etc.
        :param _logger_call: A information object storing additonal information about the logger call itself
        :param _logger_output: A prepared result object of the class defined in output_schema_class(cls)
        :param _pypads_result: The return value of the __pre__ function
        :param kwargs: Kwargs given to the original function
        :return:
        """

        # Get data from mapping file
        mapping_data = _pypads_env.data
        estimator_data = data_str(mapping_data,
                                  "estimator",
                                  "@schema",
                                  default={})

        # Create repository object
        ero = EstimatorRepositoryObject(
            name=data_str(estimator_data,
                          "rdfs:label",
                          default=ctx.__class__.__name__,
                          warning=f"No name given for {ctx.__class__}. "
                          f"Extracting name from class."),
            description=data_str(estimator_data,
                                 "rdfs:description",
                                 default="Some unknown estimator."),
            documentation=data_str(
                estimator_data,
                "padre:documentation",
                default=ctx.__class__.__doc__,
                warning=
                f"No documentation defined on the mapping file for {ctx.__class__}. "
                f"Taking code documentation instead."),
            parameter_schema=data_path(
                estimator_data,
                "padre:parameters",
                default="unkown",
                warning=
                f"No parameters are defined on the mapping file for {ctx.__class__}. "
                f"Logging estimator without parameters."),
            location=_logger_call.original_call.call_id.context.reference,
            additional_data=estimator_data)

        # Compile identifying hash
        hash_id = persistent_hash(ero.json())

        # Add to repo if needed
        if not _pypads_env.pypads.estimator_repository.has_object(uid=hash_id):
            repo_obj = _pypads_env.pypads.estimator_repository.get_object(
                uid=hash_id)
            repo_obj.log_json(ero)

        # Create referencing object
        eto = EstimatorTO(
            repository_reference=hash_id,
            repository_type=_pypads_env.pypads.estimator_repository.name,
            parent=_logger_output,
            additional_data=mapping_data)

        # Store object
        _logger_output.estimator = eto.store()
Esempio n. 5
0
    def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv,
                 _logger_call,
                 _logger_output: Union['ParametersILF.ParametersILFOutput',
                                       LoggerOutput], _args, _kwargs,
                 **kwargs):
        """
        Function logging the parameters of the current pipeline object function call.
        """

        mapping_data = _pypads_env.data

        # Get the estimator name
        module = data_str(mapping_data,
                          "module",
                          "@schema",
                          "rdfs:label",
                          default=ctx.__class__.__name__)

        hyper_params = FunctionParametersTO(
            estimator=module,
            description=f"The parameters of model {module} with {ctx}.",
            parent=_logger_output)

        # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs
        relevant_parameters = []

        if data_path(
                _pypads_env.data,
                "module",
                "parameters",
                warning="No parameters are defined on the mapping file for " +
                str(ctx.__class__) +
                ". Trying to log parameters without schema definition programmatically."
        ):
            relevant_parameters = []
            for parameter_type, parameters in data_path(mapping_data,
                                                        "module",
                                                        "parameters",
                                                        default={}).items():
                for parameter in parameters:
                    parameter = data_path(parameter, "@schema")
                    key = data_path(parameter, "padre:path")
                    name = data_path(parameter, "rdfs:label")

                    param_dict = {
                        "name": name,
                        "description": data_path(parameter, "rdfs:comment"),
                        "parameter_type": data_path(parameter,
                                                    "padre:value_type")
                    }

                    if hasattr(ctx, key):
                        value = getattr(ctx, key)
                    else:
                        _kwargs = getattr(kwargs, "_kwargs")
                        if hasattr(_kwargs, key):
                            value = getattr(_kwargs, key)
                        else:
                            logger.warning(
                                f"Couldn't extract value of in schema defined parameter {parameter}."
                            )
                            continue
                    param_dict["value"] = value
                    add_data(mapping_data,
                             "is_a",
                             value=data_path(parameter, "@id"))
                    relevant_parameters.append(param_dict)

        else:
            import torch
            if isinstance(ctx, torch.optim.Optimizer):
                defaults = getattr(ctx, "defaults")
                if defaults is not None:

                    # Extracting hyperparameters via defaults dict (valid for torch optimizers)
                    relevant_parameters = [{
                        "name":
                        "{}.{}".format(ctx.__class__.__name__, k),
                        "value":
                        v
                    } for k, v in defaults.items()]
                else:
                    logger.warning(
                        'Hyper Parameters extraction of optimizer {} failed'.
                        format(str(ctx)))
            elif isinstance(ctx, torch.utils.data.DataLoader):
                # Get all the named arguments along with default values if not given
                import inspect
                signature = inspect.signature(_pypads_env.callback)
                defaults = {
                    k: v.default
                    for k, v in signature.parameters.items()
                    if v.default is not inspect.Parameter.empty
                }
                relevant_parameters = [{
                    "name":
                    "{}.{}".format(ctx.__class__.__name__, k),
                    "value":
                    v
                } for k, v in {
                    **defaults,
                    **_kwargs
                }.items()]
            elif isinstance(ctx, torch.nn.Module):
                params = _get_relevant_parameters(ctx)
                relevant_parameters = [{
                    "name": k,
                    "value": v
                } for k, v in params.items()]
            else:
                logger.warning(
                    'Hyper Parameters extraction of {} failed'.format(
                        str(ctx)))
        for i, param in enumerate(relevant_parameters):
            name = data_path(param,
                             "name",
                             default="UnknownParameter" + str(i))
            description = data_path(param, "description")
            value = data_path(param, "value")
            parameter_type = data_path(param,
                                       "parameter_type",
                                       default=str(type(value)))

            hyper_params.persist_parameter(name,
                                           str(value),
                                           param_type=parameter_type,
                                           description=description,
                                           additional_data=mapping_data)

        _logger_output.hyper_parameter_to = hyper_params.store()