def test_avoids_inferring_signature_if_not_needed(logger):
    # We create an infer_model_signature that modifies the value of x
    # If infer_model_signature was not invoked, x should not have been modified.

    x = {"data": 0}

    def modifies(_):
        x["data"] = 1

    resolve_input_example_and_signature(get_input_example, modifies, True,
                                        False, logger)

    assert x["data"] == 0
    logger.warning.assert_not_called()
def test_happy_path_works(logger):
    input_example, signature = resolve_input_example_and_signature(
        get_input_example, infer_model_signature, True, True, logger)

    assert input_example == "data"
    assert signature == "signature"
    logger.warning.assert_not_called()
def test_if_model_signature_inference_fails(logger):
    error_msg = "stack overflow"

    def throws(_):
        raise Exception(error_msg)

    input_example, signature = resolve_input_example_and_signature(
        get_input_example, throws, True, True, logger)

    assert input_example == "data"
    assert signature is None
    logger.warning.assert_called_with("Failed to infer model signature: " +
                                      error_msg)
def test_if_getting_input_example_fails(logger):
    error_msg = "NoneType has no whatever"

    def throws():
        raise Exception(error_msg)

    input_example, signature = resolve_input_example_and_signature(
        throws, infer_model_signature, True, True, logger)

    assert input_example is None
    assert signature is None
    calls = [
        call("Failed to gather input example: " + error_msg),
        call("Failed to infer model signature: " +
             "could not sample data to infer model signature: " + error_msg),
    ]
    assert logger.warning.has_calls(calls)
Beispiel #5
0
    def _log_posttraining_metadata(estimator, *args, **kwargs):
        """
        Records metadata for a scikit-learn estimator after training has completed.
        This is intended to be invoked within a patched scikit-learn training routine
        (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active
        MLflow run that can be referenced via the fluent Tracking API.

        :param estimator: The scikit-learn estimator for which to log metadata.
        :param args: The arguments passed to the scikit-learn training routine (e.g.,
                     `fit()`, `fit_transform()`, ...).
        :param kwargs: The keyword arguments passed to the scikit-learn training routine.
        """
        if hasattr(estimator, "score"):
            try:
                score_args = _get_args_for_score(estimator.score,
                                                 estimator.fit, args, kwargs)
                training_score = estimator.score(*score_args)
            except Exception as e:
                msg = (
                    estimator.score.__qualname__ +
                    " failed. The 'training_score' metric will not be recorded. Scoring error: "
                    + str(e))
                _logger.warning(msg)
            else:
                try_mlflow_log(mlflow.log_metric, "training_score",
                               training_score)

        # log common metrics and artifacts for estimators (classifier, regressor)
        _log_specialized_estimator_content(estimator,
                                           mlflow.active_run().info.run_id,
                                           args, kwargs)

        def get_input_example():
            # Fetch an input example using the first several rows of the array-like
            # training data supplied to the training routine (e.g., `fit()`)
            fit_arg_names = _get_arg_names(estimator.fit)
            X_var_name, y_var_name = fit_arg_names[:2]
            input_example = _get_Xy(args, kwargs, X_var_name,
                                    y_var_name)[0][:INPUT_EXAMPLE_SAMPLE_ROWS]
            return input_example

        def infer_model_signature(input_example):
            if not hasattr(estimator, "predict"):
                raise Exception(
                    "the trained model does not specify a `predict` function, "
                    + "which is required in order to infer the signature")

            return infer_signature(input_example,
                                   estimator.predict(input_example))

        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                estimator,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        if _is_parameter_search_estimator(estimator):
            if hasattr(estimator, "best_estimator_") and log_models:
                try_mlflow_log(
                    log_model,
                    estimator.best_estimator_,
                    artifact_path="best_estimator",
                    signature=signature,
                    input_example=input_example,
                )

            if hasattr(estimator, "best_score_"):
                try_mlflow_log(mlflow.log_metric, "best_cv_score",
                               estimator.best_score_)

            if hasattr(estimator, "best_params_"):
                best_params = {
                    "best_{param_name}".format(param_name=param_name):
                    param_value
                    for param_name, param_value in
                    estimator.best_params_.items()
                }
                try_mlflow_log(mlflow.log_params, best_params)

            if hasattr(estimator, "cv_results_"):
                try:
                    # Fetch environment-specific tags (e.g., user and source) to ensure that lineage
                    # information is consistent with the parent run
                    child_tags = context_registry.resolve_tags()
                    child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME})
                    _create_child_runs_for_parameter_search(
                        cv_estimator=estimator,
                        parent_run=mlflow.active_run(),
                        child_tags=child_tags,
                    )
                except Exception as e:

                    msg = (
                        "Encountered exception during creation of child runs for parameter search."
                        " Child runs may be missing. Exception: {}".format(
                            str(e)))
                    _logger.warning(msg)

                try:
                    cv_results_df = pd.DataFrame.from_dict(
                        estimator.cv_results_)
                    _log_parameter_search_results_as_artifact(
                        cv_results_df,
                        mlflow.active_run().info.run_id)
                except Exception as e:

                    msg = (
                        "Failed to log parameter search results as an artifact."
                        " Exception: {}".format(str(e)))
                    _logger.warning(msg)
Beispiel #6
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            @exception_safe_function
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + "-" + eval_name
                    res[key] = value
                metrics_logger.record_metrics(res, env.iteration)
                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "train_set",
            "valid_sets",
            "valid_names",
            "fobj",
            "feval",
            "init_model",
            "evals_result",
            "learning_rates",
            "callbacks",
        ]

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)

                metrics_logger.record_metrics(
                    {"stopped_iteration": extra_step})
                # best_iteration is set even if training does not stop early.
                metrics_logger.record_metrics(
                    {"best_iteration": model.best_iteration})
                # iteration starts from 1 in LightGBM.
                results = eval_results[model.best_iteration - 1]
                metrics_logger.record_metrics(results, step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in ["split", "gain"]:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: ")

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f, indent=2)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        # train_set must exist as the original train function already ran successfully
        train_set = args[1] if len(args) > 1 else kwargs.get("train_set")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(train_set, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(input_example)
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Whether to automatically log the trained model based on boolean flag.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model
Beispiel #7
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """

            if LooseVersion(xgboost.__version__) >= LooseVersion("1.3.0"):
                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa

                class Callback(
                        xgboost.callback.TrainingCallback,
                        metaclass=ExceptionSafeAbstractClass,
                ):
                    def after_iteration(self, model, epoch, evals_log):
                        """
                        Run after each iteration. Return True when training should stop.
                        """
                        # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]])
                        # that looks like this:
                        # {
                        #   "train": {
                        #     "auc": [0.5, 0.6, 0.7, ...],
                        #     ...
                        #   },
                        #   ...
                        # }
                        evaluation_result_dict = {}
                        for data_name, metric_dict in evals_log.items():
                            for metric_name, metric_values_on_each_iter in metric_dict.items(
                            ):
                                key = "{}-{}".format(data_name, metric_name)
                                # The last element in `metric_values_on_each_iter` corresponds to
                                # the meric on the current iteration
                                evaluation_result_dict[
                                    key] = metric_values_on_each_iter[-1]

                        metrics_logger.record_metrics(evaluation_result_dict,
                                                      epoch)
                        eval_results.append(evaluation_result_dict)

                        # Return `False` to indicate training should not stop
                        return False

                return Callback()

            else:

                @exception_safe_function
                def callback(env):
                    metrics_logger.record_metrics(
                        dict(env.evaluation_result_list), env.iteration)
                    eval_results.append(dict(env.evaluation_result_list))

                return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)
                metrics_logger.record_metrics(
                    {"stopped_iteration": extra_step - 1})
                metrics_logger.record_metrics(
                    {"best_iteration": model.best_iteration})
                metrics_logger.record_metrics(
                    eval_results[model.best_iteration], extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    try_mlflow_log(mlflow.log_artifact, filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model