Beispiel #1
0
def _get_args_for_metrics(fit_func, fit_args, fit_kwargs):
    """
    Get arguments to pass to metric computations in the following steps.

    1. Extract X and y from fit_args and fit_kwargs.
    2. If the sample_weight argument exists in fit_func,
       extract it from fit_args or fit_kwargs and return (X, y, sample_weight),
       otherwise return (X, y)

    :param fit_func: A fit function object.
    :param fit_args: Positional arguments given to fit_func.
    :param fit_kwargs: Keyword arguments given to fit_func.

    :returns: A tuple of either (X, y, sample_weight), where `y` and `sample_weight` may be
              `None` if the specified `fit_args` and `fit_kwargs` do not specify labels or
              a sample weighting.
    """

    def _get_Xy(args, kwargs, X_var_name, y_var_name):
        # corresponds to: model.fit(X, y)
        if len(args) >= 2:
            return args[:2]

        # corresponds to: model.fit(X, <y_var_name>=y)
        if len(args) == 1:
            return args[0], kwargs.get(y_var_name)

        # corresponds to: model.fit(<X_var_name>=X, <y_var_name>=y)
        return kwargs[X_var_name], kwargs.get(y_var_name)

    def _get_sample_weight(arg_names, args, kwargs):
        sample_weight_index = arg_names.index(_SAMPLE_WEIGHT)

        # corresponds to: model.fit(X, y, ..., sample_weight)
        if len(args) > sample_weight_index:
            return args[sample_weight_index]

        # corresponds to: model.fit(X, y, ..., sample_weight=sample_weight)
        if _SAMPLE_WEIGHT in kwargs:
            return kwargs[_SAMPLE_WEIGHT]

        return None

    fit_arg_names = _get_arg_names(fit_func)

    # In most cases, X_var_name and y_var_name become "X" and "y", respectively.
    # However, certain sklearn models use different variable names for X and y.
    # E.g., see: https://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html#sklearn.multioutput.MultiOutputClassifier.fit
    X_var_name, y_var_name = fit_arg_names[:2]
    Xy = _get_Xy(fit_args, fit_kwargs, X_var_name, y_var_name)
    sample_weight = (
        _get_sample_weight(fit_arg_names, fit_args, fit_kwargs)
        if (_SAMPLE_WEIGHT in fit_arg_names)
        else None
    )

    return (*Xy, sample_weight)
Beispiel #2
0
def _log_estimator_content(autologging_client,
                           estimator,
                           run_id,
                           prefix,
                           X,
                           y_true=None,
                           sample_weight=None):
    """
    Logs content for the given estimator, which includes metrics and artifacts that might be
    tailored to the estimator's type (e.g., regression vs classification). Training labels
    are required for metric computation; metrics will be omitted if labels are not available.

    :param autologging_client: An instance of `MlflowAutologgingQueueingClient` used for
                               efficiently logging run data to MLflow Tracking.
    :param estimator: The estimator used to compute metrics and artifacts.
    :param run_id: The run under which the content is logged.
    :param prefix: A prefix used to name the logged content. Typically it's 'training_' for
                   training-time content and user-controlled for evaluation-time content.
    :param X: The data samples.
    :param y_true: Labels.
    :param sample_weight: Per-sample weights used in the computation of metrics and artifacts.
    :return: A dict of the computed metrics.
    """
    metrics = _log_specialized_estimator_content(
        autologging_client=autologging_client,
        fitted_estimator=estimator,
        run_id=run_id,
        prefix=prefix,
        X=X,
        y_true=y_true,
        sample_weight=sample_weight,
    )

    if hasattr(estimator, "score") and y_true is not None:
        try:
            # Use the sample weight only if it is present in the score args
            score_arg_names = _get_arg_names(estimator.score)
            score_args = ((X, y_true, sample_weight) if _SAMPLE_WEIGHT
                          in score_arg_names else (X, y_true))
            score = estimator.score(*score_args)
        except Exception as e:
            msg = (
                estimator.score.__qualname__ +
                " failed. The 'training_score' metric will not be recorded. Scoring error: "
                + str(e))
            _logger.warning(msg)
        else:
            score_key = prefix + "score"
            autologging_client.log_metrics(run_id=run_id,
                                           metrics={score_key: score})
            metrics[score_key] = score

    return metrics
Beispiel #3
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """

            @exception_safe_function
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + "-" + eval_name
                    res[key] = value
                metrics_logger.record_metrics(res, env.iteration)
                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                mlflow.log_artifact(filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        autologging_client = MlflowAutologgingQueueingClient()

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        booster_params = args[0] if len(args) > 0 else kwargs["params"]
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id, params=booster_params)

        unlogged_params = [
            "params",
            "train_set",
            "valid_sets",
            "valid_names",
            "fobj",
            "feval",
            "init_model",
            "evals_result",
            "learning_rates",
            "callbacks",
        ]

        params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
            original, args, kwargs, unlogged_params
        )
        autologging_client.log_params(
            run_id=mlflow.active_run().info.run_id, params=params_to_log_for_fn
        )

        param_logging_operations = autologging_client.flush(synchronous=False)

        all_arg_names = _get_arg_names(original)
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (
                num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
            )
            if early_stopping:
                extra_step = len(eval_results)
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics={
                        "stopped_iteration": extra_step,
                        # best_iteration is set even if training does not stop early.
                        "best_iteration": model.best_iteration,
                    },
                )
                # iteration starts from 1 in LightGBM.
                last_iter_results = eval_results[model.best_iteration - 1]
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics=last_iter_results,
                    step=extra_step,
                )
                early_stopping_logging_operations = autologging_client.flush(synchronous=False)

        # logging feature importance as artifacts.
        for imp_type in ["split", "gain"]:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: "
                )

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f, indent=2)
                mlflow.log_artifact(filepath)
            finally:
                shutil.rmtree(tmpdir)

        # train_set must exist as the original train function already ran successfully
        train_set = args[1] if len(args) > 1 else kwargs.get("train_set")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(train_set, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(input_example)
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Whether to automatically log the trained model based on boolean flag.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            log_model(
                model, artifact_path="model", signature=signature, input_example=input_example,
            )

        param_logging_operations.await_completion()
        if early_stopping:
            early_stopping_logging_operations.await_completion()

        return model
Beispiel #4
0
    def train(_log_models, original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed:
            #       https://github.com/dmlc/xgboost/issues/6984
            from mlflow.xgboost._autolog import IS_TRAINING_CALLBACK_SUPPORTED

            if IS_TRAINING_CALLBACK_SUPPORTED:
                from mlflow.xgboost._autolog import AutologCallback

                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback
                return AutologCallback(metrics_logger, eval_results)
            else:
                from mlflow.xgboost._autolog import autolog_callback

                return picklable_exception_safe_function(
                    functools.partial(autolog_callback,
                                      metrics_logger=metrics_logger,
                                      eval_results=eval_results))

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt
            from cycler import cycler

            features = np.array(features)

            # Structure the supplied `importance` values as a `num_features`-by-`num_classes` matrix
            importances_per_class_by_feature = np.array(importance)
            if importances_per_class_by_feature.ndim <= 1:
                # In this case, the supplied `importance` values are not given per class. Rather,
                # one importance value is given per feature. For consistency with the assumed
                # `num_features`-by-`num_classes` matrix structure, we coerce the importance
                # values to a `num_features`-by-1 matrix
                indices = np.argsort(importance)
                # Sort features and importance values by magnitude during transformation to a
                # `num_features`-by-`num_classes` matrix
                features = features[indices]
                importances_per_class_by_feature = np.array([[
                    importance
                ] for importance in importances_per_class_by_feature[indices]])
                # In this case, do not include class labels on the feature importance plot because
                # only one importance value has been provided per feature, rather than an
                # one importance value for each class per feature
                label_classes_on_plot = False
            else:
                importance_value_magnitudes = np.abs(
                    importances_per_class_by_feature).sum(axis=1)
                indices = np.argsort(importance_value_magnitudes)
                features = features[indices]
                importances_per_class_by_feature = importances_per_class_by_feature[
                    indices]
                label_classes_on_plot = True

            num_classes = importances_per_class_by_feature.shape[1]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            h = h + 0.1 * num_classes if num_classes > 1 else h
            fig, ax = plt.subplots(figsize=(w, h))
            # When importance values are provided for each class per feature, we want to ensure
            # that the same color is used for all bars in the bar chart that have the same class
            colors_to_cycle = plt.rcParams["axes.prop_cycle"].by_key(
            )["color"][:num_classes]
            color_cycler = cycler(color=colors_to_cycle)
            ax.set_prop_cycle(color_cycler)

            # The following logic operates on one feature at a time, adding a bar to the bar chart
            # for each class that reflects the importance of the feature to predictions of that
            # class
            feature_ylocs = np.arange(num_features)
            # Define offsets on the y-axis that are used to evenly space the bars for each class
            # around the y-axis position of each feature
            offsets_per_yloc = np.linspace(
                -0.5, 0.5, num_classes) / 2 if num_classes > 1 else [0]
            for feature_idx, (feature_yloc,
                              importances_per_class) in enumerate(
                                  zip(feature_ylocs,
                                      importances_per_class_by_feature)):
                for class_idx, (offset, class_importance) in enumerate(
                        zip(offsets_per_yloc, importances_per_class)):
                    (bar, ) = ax.barh(
                        feature_yloc + offset,
                        class_importance,
                        align="center",
                        # Set the bar height such that importance value bars for a particular
                        # feature are spaced properly relative to each other (no overlap or gaps)
                        # and relative to importance value bars for other features
                        height=(0.5 / max(num_classes - 1, 1)),
                    )
                    if label_classes_on_plot and feature_idx == 0:
                        # Only set a label the first time a bar for a particular class is plotted to
                        # avoid duplicate legend entries. If we were to set a label for every bar,
                        # the legend would contain `num_features` labels for each class.
                        bar.set_label("Class {}".format(class_idx))

            ax.set_yticks(feature_ylocs)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            if label_classes_on_plot:
                ax.legend()
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                mlflow.log_artifact(filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        autologging_client = MlflowAutologgingQueueingClient()
        # logging booster params separately to extract key/value pairs and make it easier to
        # compare them across runs.
        booster_params = args[0] if len(args) > 0 else kwargs["params"]
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id,
                                      params=booster_params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
            original, args, kwargs, unlogged_params)
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id,
                                      params=params_to_log_for_fn)

        param_logging_operations = autologging_client.flush(synchronous=False)

        all_arg_names = _get_arg_names(original)
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = num_pos_args >= early_stopping_index + 1 or kwargs.get(
                "early_stopping_rounds")
            if early_stopping:
                extra_step = len(eval_results)
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics={
                        "stopped_iteration": extra_step - 1,
                        "best_iteration": model.best_iteration,
                    },
                )
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics=eval_results[model.best_iteration],
                    step=extra_step,
                )
                early_stopping_logging_operations = autologging_client.flush(
                    synchronous=False)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    mlflow.log_artifact(filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if _log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            registered_model_name = get_autologging_config(
                FLAVOR_NAME, "registered_model_name", None)
            log_model(
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )

        param_logging_operations.await_completion()
        if early_stopping:
            early_stopping_logging_operations.await_completion()

        return model
def test_get_arg_names(func, expected_args):
    assert _get_arg_names(func) == expected_args