Exemple #1
0
def test_client_logs_expected_run_data():
    client = MlflowAutologgingQueueingClient()

    params_to_log = {
        "param_key_{}".format(i): "param_val_{}".format(i)
        for i in range((2 * MAX_PARAMS_TAGS_PER_BATCH) + 1)
    }
    tags_to_log = {
        "tag_key_{}".format(i): "tag_val_{}".format(i)
        for i in range((2 * MAX_PARAMS_TAGS_PER_BATCH) + 1)
    }
    metrics_to_log = {
        "metric_key_{}".format(i): i
        for i in range((4 * MAX_METRICS_PER_BATCH) + 1)
    }

    with mlflow.start_run() as run:
        client.log_params(run_id=run.info.run_id, params=params_to_log)
        client.set_tags(run_id=run.info.run_id, tags=tags_to_log)
        client.log_metrics(run_id=run.info.run_id, metrics=metrics_to_log)
        client.flush()

    run_params, run_metrics, run_tags = get_run_data(run.info.run_id)
    assert run_params == params_to_log
    assert run_metrics == metrics_to_log
    assert run_tags == tags_to_log
Exemple #2
0
def test_client_correctly_operates_as_context_manager_for_synchronous_flush():
    params_to_log = {"a": "b"}
    metrics_to_log = {"c": 1}
    tags_to_log = {"d": "e"}

    with mlflow.start_run(), MlflowAutologgingQueueingClient() as client:
        run_id_1 = mlflow.active_run().info.run_id
        client.log_params(run_id_1, params_to_log)
        client.log_metrics(run_id_1, metrics_to_log)
        client.set_tags(run_id_1, tags_to_log)

    run_params_1, run_metrics_1, run_tags_1 = get_run_data(run_id_1)
    assert run_params_1 == params_to_log
    assert run_metrics_1 == metrics_to_log
    assert run_tags_1 == tags_to_log

    exc_to_raise = Exception("test exception")
    with pytest.raises(Exception) as raised_exc_info:
        with mlflow.start_run(), MlflowAutologgingQueueingClient() as client:
            run_id_2 = mlflow.active_run().info.run_id
            client.log_params(run_id_2, params_to_log)
            client.log_metrics(run_id_2, metrics_to_log)
            client.set_tags(run_id_2, tags_to_log)
            raise exc_to_raise

    assert raised_exc_info.value == exc_to_raise
    # Verify that no run content was logged because the context exited with an exception
    run_params_2, run_metrics_2, run_tags_2 = get_run_data(run_id_2)
    assert not run_params_2
    assert not run_metrics_2
    assert not run_tags_2
Exemple #3
0
def test_client_asynchronous_flush_operates_correctly():
    original_log_batch = MlflowClient().log_batch

    def mock_log_batch(run_id, metrics, params, tags):  # pylint: disable=unused-argument
        # Sleep to simulate a long-running logging operation
        time.sleep(3)
        return original_log_batch(run_id, metrics, params, tags)

    with mock.patch(
            "mlflow.utils.autologging_utils.client.MlflowClient.log_batch"
    ) as log_batch_mock:
        log_batch_mock.side_effect = mock_log_batch

        with mlflow.start_run() as run:
            client = MlflowAutologgingQueueingClient()
            client.log_params(run_id=run.info.run_id, params={"a": "b"})
            run_operations = client.flush(synchronous=False)

            # Parameter data should not be available because the asynchronous logging
            # operation is still inflight
            logged_params_1 = get_run_data(run.info.run_id)[0]
            assert not logged_params_1

            run_operations.await_completion()

            # Parameter data should now be available after waiting for completion of the
            # asynchronous logging operation
            logged_params_2 = get_run_data(run.info.run_id)[0]
            assert logged_params_2 == {"a": "b"}
Exemple #4
0
def test_logging_failures_are_handled_as_expected():
    experiment_name = "test_run_creation_termination"
    MlflowClient().create_experiment(experiment_name)
    experiment_id = MlflowClient().get_experiment_by_name(
        experiment_name).experiment_id

    with mock.patch(
            "mlflow.utils.autologging_utils.client.MlflowClient.log_batch"
    ) as log_batch_mock:
        log_batch_mock.side_effect = Exception("Batch logging failed!")

        client = MlflowAutologgingQueueingClient()
        pending_run_id = client.create_run(experiment_id=experiment_id)
        client.log_metrics(run_id=pending_run_id, metrics={"a": 1})
        client.set_terminated(run_id=pending_run_id, status="KILLED")

        with pytest.raises(MlflowException) as exc:
            client.flush()

        runs = mlflow.search_runs(experiment_ids=[experiment_id],
                                  output_format="list")
        assert len(runs) == 1
        run = runs[0]
        # Verify that metrics are absent due to the failure of batch logging
        assert not run.data.metrics
        # Verify that the run termination operation was still performed successfully
        assert run.info.status == "KILLED"

        assert "Failed to perform one or more operations on the run with ID {run_id}".format(
            run_id=run.info.run_id) in str(exc.value)
        assert "Batch logging failed!" in str(exc.value)
Exemple #5
0
 def fit_mlflow(original, self, *args, **kwargs):
     """
     Autologging function that performs model training by executing the training method
     referred to be `func_name` on the instance of `clazz` referred to by `self` & records
     MLflow parameters, metrics, tags, and artifacts to a corresponding MLflow Run.
     """
     autologging_client = MlflowAutologgingQueueingClient()
     _log_pretraining_metadata(autologging_client, self, *args, **kwargs)
     params_logging_future = autologging_client.flush(synchronous=False)
     fit_output = original(self, *args, **kwargs)
     _log_posttraining_metadata(autologging_client, self, *args, **kwargs)
     autologging_client.flush(synchronous=True)
     params_logging_future.await_completion()
     return fit_output
def test_client_logs_metric_steps_correctly():
    client = MlflowAutologgingQueueingClient()

    with mlflow.start_run() as run:
        for step in range(3):
            client.log_metrics(run_id=run.info.run_id, metrics={"a": 1}, step=step)
        client.flush()

    metric_history = MlflowClient().get_metric_history(run_id=run.info.run_id, key="a")
    assert len(metric_history) == 3
    assert [metric.step for metric in metric_history] == list(range(3))
def test_client_truncates_metric_keys():
    client = MlflowAutologgingQueueingClient()
    metrics_to_log = {
        "a" * (MAX_ENTITY_KEY_LENGTH + 5): 1,
        "b" * (MAX_ENTITY_KEY_LENGTH + 50): 2,
    }

    with mlflow.start_run() as run:
        client.log_metrics(run_id=run.info.run_id, metrics=metrics_to_log)
        client.flush()

    run_metrics = get_run_data(run.info.run_id)[1]
    assert run_metrics == _truncate_dict(metrics_to_log, max_key_length=MAX_ENTITY_KEY_LENGTH)
def test_client_run_creation_and_termination_are_successful():
    experiment_name = "test_run_creation_termination"
    MlflowClient().create_experiment(experiment_name)
    experiment_id = MlflowClient().get_experiment_by_name(experiment_name).experiment_id

    client = MlflowAutologgingQueueingClient()
    pending_run_id = client.create_run(experiment_id=experiment_id, start_time=5, tags={"a": "b"})
    client.set_terminated(run_id=pending_run_id, status="FINISHED", end_time=6)
    client.flush()

    runs = mlflow.search_runs(experiment_ids=[experiment_id], output_format="list")
    assert len(runs) == 1
    run = runs[0]
    assert run.info.start_time == 5
    assert run.info.end_time == 6
    assert run.info.status == "FINISHED"
    assert {"a": "b"}.items() <= run.data.tags.items()
Exemple #9
0
def test_client_truncates_param_keys_and_values():
    client = MlflowAutologgingQueueingClient()
    params_to_log = {
        "a" * (MAX_ENTITY_KEY_LENGTH + 5): "b" * (MAX_PARAM_VAL_LENGTH + 5),
        "a" * (MAX_ENTITY_KEY_LENGTH + 50): "b" * (MAX_PARAM_VAL_LENGTH + 50),
    }

    with mlflow.start_run() as run:
        client.log_params(run_id=run.info.run_id, params=params_to_log)
        client.flush()

    run_params = get_run_data(run.info.run_id)[0]
    assert run_params == _truncate_dict(
        params_to_log, max_key_length=MAX_ENTITY_KEY_LENGTH, max_value_length=MAX_PARAM_VAL_LENGTH,
    )
Exemple #10
0
def test_client_truncates_tag_keys_and_values():
    client = MlflowAutologgingQueueingClient()
    tags_to_log = {
        "a" * (MAX_ENTITY_KEY_LENGTH + 5): "b" * (MAX_PARAM_VAL_LENGTH + 5),
        "c" * (MAX_ENTITY_KEY_LENGTH + 50): "d" * (MAX_PARAM_VAL_LENGTH + 50),
    }

    with mlflow.start_run() as run:
        client.set_tags(run_id=run.info.run_id, tags=tags_to_log)
        client.flush()

    run_tags = get_run_data(run.info.run_id)[2]
    assert run_tags == _truncate_dict(
        tags_to_log, max_key_length=MAX_ENTITY_KEY_LENGTH, max_value_length=MAX_TAG_VAL_LENGTH,
    )
Exemple #11
0
def test_client_synchronous_flush_operates_correctly():
    original_log_batch = MlflowClient().log_batch

    def mock_log_batch(run_id, metrics, params, tags):  # pylint: disable=unused-argument
        # Sleep to simulate a long-running logging operation
        time.sleep(3)
        return original_log_batch(run_id, metrics, params, tags)

    with mock.patch(
            "mlflow.utils.autologging_utils.client.MlflowClient.log_batch"
    ) as log_batch_mock:
        log_batch_mock.side_effect = mock_log_batch

        with mlflow.start_run() as run:
            client = MlflowAutologgingQueueingClient()
            client.log_params(run_id=run.info.run_id, params={"a": "b"})
            client.flush(synchronous=True)

            # Parameter data should be available after the synchronous flush call returns
            logged_params = get_run_data(run.info.run_id)[0]
            assert logged_params == {"a": "b"}
def patched_fit(original, self, *args, **kwargs):
    run_id = mlflow.active_run().info.run_id
    tracking_uri = mlflow.get_tracking_uri()
    client = MlflowAutologgingQueueingClient(tracking_uri)
    metrics_logger = BatchMetricsLogger(run_id, tracking_uri)

    log_models = get_autologging_config(mlflow.paddle.FLAVOR_NAME,
                                        "log_models", True)
    log_every_n_epoch = get_autologging_config(mlflow.paddle.FLAVOR_NAME,
                                               "log_every_n_epoch", 1)

    early_stop_callback = None
    mlflow_callback = __MLflowPaddleCallback(client, metrics_logger, run_id,
                                             log_models, log_every_n_epoch)
    if "callbacks" in kwargs:
        callbacks = kwargs["callbacks"]
        for callback in callbacks:
            if isinstance(callback, paddle.callbacks.EarlyStopping):
                early_stop_callback = callback
                _log_early_stop_params(early_stop_callback, client, run_id)
                break
        kwargs["callbacks"].append(mlflow_callback)
    else:
        kwargs["callbacks"] = [mlflow_callback]
    client.flush(synchronous=False)

    result = original(self, *args, **kwargs)

    if early_stop_callback is not None:
        _log_early_stop_metrics(early_stop_callback, client, run_id)

    mlflow.log_text(str(self.summary()), "model_summary.txt")

    if log_models:
        mlflow.paddle.log_model(pd_model=self, artifact_path="model")

    client.flush(synchronous=True)

    return result
Exemple #13
0
def test_flush_clears_pending_operations():
    with mock.patch("mlflow.utils.autologging_utils.client.MlflowClient",
                    autospec=True) as mlflow_client_mock:
        client = MlflowAutologgingQueueingClient()

        pending_run_id = client.create_run(experiment_id=5)
        client.log_params(run_id=pending_run_id, params={"a": "b"})
        client.log_metrics(run_id=pending_run_id, metrics={"c": 1})
        client.set_terminated(run_id=pending_run_id, status="FINISHED")
        client.flush()

        logging_call_count_1 = len(mlflow_client_mock.method_calls)
        # Verify that at least 3 calls have been made to MLflow logging APIs as a result
        # of the flush (i.e. log_batch, create_run, and set_terminated)
        assert logging_call_count_1 >= 3

        client.flush()

        logging_call_count_2 = len(mlflow_client_mock.method_calls)
        # Verify that performing a second flush did not result in any additional logging API calls,
        # since no new run content was added prior to the flush
        assert logging_call_count_2 == logging_call_count_1
Exemple #14
0
def eval_and_log_metrics(model, X, y_true, *, prefix, sample_weight=None):
    """
    Computes and logs metrics (and artifacts) for the given model and labeled dataset.
    The metrics/artifacts mirror what is auto-logged when training a model
    (see mlflow.sklearn.autolog).

    :param model: The model to be evaluated.
    :param X: The features for the evaluation dataset.
    :param y_true: The labels for the evaluation dataset.
    :param prefix: Prefix used to name metrics and artifacts.
    :param sample_weight: Per-sample weights to apply in the computation of metrics/artifacts.
    :return: The dict of logged metrics. Artifacts can be retrieved by inspecting the run.

    ** Example **

    .. code-block:: python

        from sklearn.linear_model import LinearRegression
        import mlflow

        # enable autologging
        mlflow.sklearn.autolog()

        # prepare training data
        X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
        y = np.dot(X, np.array([1, 2])) + 3

        # prepare evaluation data
        X_eval = np.array([[3, 3], [3, 4]])
        y_eval = np.dot(X_eval, np.array([1,2])) + 3

        # train a model
        model = LinearRegression()
        with mlflow.start_run() as run:
            model.fit(X, y)
            metrics = mlflow.sklearn.eval_and_log_metrics(model, X_eval, y_eval, prefix="val_")


    Each metric's and artifact's name is prefixed with `prefix`, e.g., in the previous example the
    metrics and artifacts are named 'val_XXXXX'. Note that training-time metrics are auto-logged
    as 'training_XXXXX'. Metrics and artifacts are logged under the currently active run if one
    exists, otherwise a new run is started and left active.

    Raises an error if:
      - prefix is empty
      - model is not an sklearn estimator or does not support the 'predict' method
    """
    from mlflow.sklearn.utils import _log_estimator_content
    from sklearn.base import BaseEstimator

    if prefix is None or prefix == "":
        raise ValueError("Must specify a non-empty prefix")

    if not isinstance(model, BaseEstimator):
        raise ValueError(
            "The provided model was not a sklearn estimator. Please ensure the passed-in model is "
            "a sklearn estimator subclassing sklearn.base.BaseEstimator")

    if not hasattr(model, "predict"):
        raise ValueError(
            "Model does not support predictions. Please pass a model object defining a predict() "
            "method")

    active_run = mlflow.active_run()
    run = active_run if active_run is not None else mlflow.start_run()

    with MlflowAutologgingQueueingClient() as autologging_client:
        metrics = _log_estimator_content(
            autologging_client=autologging_client,
            estimator=model,
            run_id=run.info.run_id,
            prefix=prefix,
            X=X,
            y_true=y_true,
            sample_weight=sample_weight,
        )

    return metrics
Exemple #15
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed:
            #       https://github.com/dmlc/xgboost/issues/6984
            if Version(xgboost.__version__.replace("SNAPSHOT",
                                                   "dev")) >= Version("1.3.0"):
                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa

                class Callback(
                        xgboost.callback.TrainingCallback,
                        metaclass=ExceptionSafeAbstractClass,
                ):
                    def after_iteration(self, model, epoch, evals_log):
                        """
                        Run after each iteration. Return True when training should stop.
                        """
                        # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]])
                        # that looks like this:
                        # {
                        #   "train": {
                        #     "auc": [0.5, 0.6, 0.7, ...],
                        #     ...
                        #   },
                        #   ...
                        # }
                        evaluation_result_dict = {}
                        for data_name, metric_dict in evals_log.items():
                            for metric_name, metric_values_on_each_iter in metric_dict.items(
                            ):
                                key = "{}-{}".format(data_name, metric_name)
                                # The last element in `metric_values_on_each_iter` corresponds to
                                # the meric on the current iteration
                                evaluation_result_dict[
                                    key] = metric_values_on_each_iter[-1]

                        metrics_logger.record_metrics(evaluation_result_dict,
                                                      epoch)
                        eval_results.append(evaluation_result_dict)

                        # Return `False` to indicate training should not stop
                        return False

                return Callback()

            else:

                @exception_safe_function
                def callback(env):
                    metrics_logger.record_metrics(
                        dict(env.evaluation_result_list), env.iteration)
                    eval_results.append(dict(env.evaluation_result_list))

                return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                mlflow.log_artifact(filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        autologging_client = MlflowAutologgingQueueingClient()
        # logging booster params separately to extract key/value pairs and make it easier to
        # compare them across runs.
        booster_params = args[0] if len(args) > 0 else kwargs["params"]
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id,
                                      params=booster_params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
            original, args, kwargs, unlogged_params)
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id,
                                      params=params_to_log_for_fn)

        param_logging_operations = autologging_client.flush(synchronous=False)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics={
                        "stopped_iteration": extra_step - 1,
                        "best_iteration": model.best_iteration,
                    },
                )
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics=eval_results[model.best_iteration],
                    step=extra_step,
                )
                early_stopping_logging_operations = autologging_client.flush(
                    synchronous=False)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    mlflow.log_artifact(filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            log_model(
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        param_logging_operations.await_completion()
        if early_stopping:
            early_stopping_logging_operations.await_completion()

        return model
Exemple #16
0
def patched_fit(original, self, *args, **kwargs):
    """
    A patched implementation of `pytorch_lightning.Trainer.fit` which enables logging the
    following parameters, metrics and artifacts:

    - Training epochs
    - Optimizer parameters
    - `EarlyStoppingCallback`_ parameters
    - Metrics stored in `trainer.callback_metrics`
    - Model checkpoints
    - Trained model

    .. _EarlyStoppingCallback:
        https://pytorch-lightning.readthedocs.io/en/latest/early_stopping.html
    """
    run_id = mlflow.active_run().info.run_id
    tracking_uri = mlflow.get_tracking_uri()
    client = MlflowAutologgingQueueingClient(tracking_uri)
    metrics_logger = BatchMetricsLogger(run_id, tracking_uri)

    log_models = get_autologging_config(mlflow.pytorch.FLAVOR_NAME,
                                        "log_models", True)
    log_every_n_epoch = get_autologging_config(mlflow.pytorch.FLAVOR_NAME,
                                               "log_every_n_epoch", 1)
    log_every_n_step = get_autologging_config(mlflow.pytorch.FLAVOR_NAME,
                                              "log_every_n_step", None)

    early_stop_callback = None
    for callback in self.callbacks:
        if isinstance(callback, pl.callbacks.early_stopping.EarlyStopping):
            early_stop_callback = callback
            _log_early_stop_params(early_stop_callback, client, run_id)

    if not any(
            isinstance(callbacks, __MLflowPLCallback)
            for callbacks in self.callbacks):
        self.callbacks += [
            __MLflowPLCallback(client, metrics_logger, run_id, log_models,
                               log_every_n_epoch, log_every_n_step)
        ]

    client.flush(synchronous=False)

    result = original(self, *args, **kwargs)

    if early_stop_callback is not None:
        _log_early_stop_metrics(early_stop_callback, client, run_id)

    if Version(pl.__version__) < Version("1.4.0"):
        summary = str(ModelSummary(self.model, mode="full"))
    else:
        summary = str(ModelSummary(self.model, max_depth=-1))

    tempdir = tempfile.mkdtemp()
    try:
        summary_file = os.path.join(tempdir, "model_summary.txt")
        with open(summary_file, "w") as f:
            f.write(summary)

        mlflow.log_artifact(local_path=summary_file)
    finally:
        shutil.rmtree(tempdir)

    if log_models:
        registered_model_name = get_autologging_config(
            mlflow.pytorch.FLAVOR_NAME, "registered_model_name", None)
        mlflow.pytorch.log_model(
            pytorch_model=self.model,
            artifact_path="model",
            registered_model_name=registered_model_name,
        )

        if early_stop_callback is not None and self.checkpoint_callback.best_model_path:
            mlflow.log_artifact(
                local_path=self.checkpoint_callback.best_model_path,
                artifact_path="restored_model_checkpoint",
            )

    client.flush(synchronous=True)

    return result
Exemple #17
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """

            @exception_safe_function
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + "-" + eval_name
                    res[key] = value
                metrics_logger.record_metrics(res, env.iteration)
                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                mlflow.log_artifact(filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        autologging_client = MlflowAutologgingQueueingClient()

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        booster_params = args[0] if len(args) > 0 else kwargs["params"]
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id, params=booster_params)

        unlogged_params = [
            "params",
            "train_set",
            "valid_sets",
            "valid_names",
            "fobj",
            "feval",
            "init_model",
            "evals_result",
            "learning_rates",
            "callbacks",
        ]

        params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
            original, args, kwargs, unlogged_params
        )
        autologging_client.log_params(
            run_id=mlflow.active_run().info.run_id, params=params_to_log_for_fn
        )

        param_logging_operations = autologging_client.flush(synchronous=False)

        all_arg_names = _get_arg_names(original)
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (
                num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
            )
            if early_stopping:
                extra_step = len(eval_results)
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics={
                        "stopped_iteration": extra_step,
                        # best_iteration is set even if training does not stop early.
                        "best_iteration": model.best_iteration,
                    },
                )
                # iteration starts from 1 in LightGBM.
                last_iter_results = eval_results[model.best_iteration - 1]
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics=last_iter_results,
                    step=extra_step,
                )
                early_stopping_logging_operations = autologging_client.flush(synchronous=False)

        # logging feature importance as artifacts.
        for imp_type in ["split", "gain"]:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: "
                )

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f, indent=2)
                mlflow.log_artifact(filepath)
            finally:
                shutil.rmtree(tmpdir)

        # train_set must exist as the original train function already ran successfully
        train_set = args[1] if len(args) > 1 else kwargs.get("train_set")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(train_set, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(input_example)
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Whether to automatically log the trained model based on boolean flag.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            log_model(
                model, artifact_path="model", signature=signature, input_example=input_example,
            )

        param_logging_operations.await_completion()
        if early_stopping:
            early_stopping_logging_operations.await_completion()

        return model
Exemple #18
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed:
            #       https://github.com/dmlc/xgboost/issues/6984
            if Version(xgboost.__version__.replace("SNAPSHOT", "dev")) >= Version("1.3.0"):
                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa

                class Callback(
                    xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass,
                ):
                    def after_iteration(self, model, epoch, evals_log):
                        """
                        Run after each iteration. Return True when training should stop.
                        """
                        # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]])
                        # that looks like this:
                        # {
                        #   "train": {
                        #     "auc": [0.5, 0.6, 0.7, ...],
                        #     ...
                        #   },
                        #   ...
                        # }
                        evaluation_result_dict = {}
                        for data_name, metric_dict in evals_log.items():
                            for metric_name, metric_values_on_each_iter in metric_dict.items():
                                key = "{}-{}".format(data_name, metric_name)
                                # The last element in `metric_values_on_each_iter` corresponds to
                                # the meric on the current iteration
                                evaluation_result_dict[key] = metric_values_on_each_iter[-1]

                        metrics_logger.record_metrics(evaluation_result_dict, epoch)
                        eval_results.append(evaluation_result_dict)

                        # Return `False` to indicate training should not stop
                        return False

                return Callback()

            else:

                @exception_safe_function
                def callback(env):
                    metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration)
                    eval_results.append(dict(env.evaluation_result_list))

                return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt
            from cycler import cycler

            features = np.array(features)

            # Structure the supplied `importance` values as a `num_features`-by-`num_classes` matrix
            importances_per_class_by_feature = np.array(importance)
            if importances_per_class_by_feature.ndim <= 1:
                # In this case, the supplied `importance` values are not given per class. Rather,
                # one importance value is given per feature. For consistency with the assumed
                # `num_features`-by-`num_classes` matrix structure, we coerce the importance
                # values to a `num_features`-by-1 matrix
                indices = np.argsort(importance)
                # Sort features and importance values by magnitude during transformation to a
                # `num_features`-by-`num_classes` matrix
                features = features[indices]
                importances_per_class_by_feature = np.array(
                    [[importance] for importance in importances_per_class_by_feature[indices]]
                )
                # In this case, do not include class labels on the feature importance plot because
                # only one importance value has been provided per feature, rather than an
                # one importance value for each class per feature
                label_classes_on_plot = False
            else:
                importance_value_magnitudes = np.abs(importances_per_class_by_feature).sum(axis=1)
                indices = np.argsort(importance_value_magnitudes)
                features = features[indices]
                importances_per_class_by_feature = importances_per_class_by_feature[indices]
                label_classes_on_plot = True

            num_classes = importances_per_class_by_feature.shape[1]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            h = h + 0.1 * num_classes if num_classes > 1 else h
            fig, ax = plt.subplots(figsize=(w, h))
            # When importance values are provided for each class per feature, we want to ensure
            # that the same color is used for all bars in the bar chart that have the same class
            colors_to_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"][:num_classes]
            color_cycler = cycler(color=colors_to_cycle)
            ax.set_prop_cycle(color_cycler)

            # The following logic operates on one feature at a time, adding a bar to the bar chart
            # for each class that reflects the importance of the feature to predictions of that
            # class
            feature_ylocs = np.arange(num_features)
            # Define offsets on the y-axis that are used to evenly space the bars for each class
            # around the y-axis position of each feature
            offsets_per_yloc = np.linspace(-0.5, 0.5, num_classes) / 2 if num_classes > 1 else [0]
            for feature_idx, (feature_yloc, importances_per_class) in enumerate(
                zip(feature_ylocs, importances_per_class_by_feature)
            ):
                for class_idx, (offset, class_importance) in enumerate(
                    zip(offsets_per_yloc, importances_per_class)
                ):
                    (bar,) = ax.barh(
                        feature_yloc + offset,
                        class_importance,
                        align="center",
                        # Set the bar height such that importance value bars for a particular
                        # feature are spaced properly relative to each other (no overlap or gaps)
                        # and relative to importance value bars for other features
                        height=(0.5 / max(num_classes - 1, 1)),
                    )
                    if label_classes_on_plot and feature_idx == 0:
                        # Only set a label the first time a bar for a particular class is plotted to
                        # avoid duplicate legend entries. If we were to set a label for every bar,
                        # the legend would contain `num_features` labels for each class.
                        bar.set_label("Class {}".format(class_idx))

            ax.set_yticks(feature_ylocs)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            if label_classes_on_plot:
                ax.legend()
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                mlflow.log_artifact(filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        autologging_client = MlflowAutologgingQueueingClient()
        # logging booster params separately to extract key/value pairs and make it easier to
        # compare them across runs.
        booster_params = args[0] if len(args) > 0 else kwargs["params"]
        autologging_client.log_params(run_id=mlflow.active_run().info.run_id, params=booster_params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
            original, args, kwargs, unlogged_params
        )
        autologging_client.log_params(
            run_id=mlflow.active_run().info.run_id, params=params_to_log_for_fn
        )

        param_logging_operations = autologging_client.flush(synchronous=False)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (
                num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
            )
            if early_stopping:
                extra_step = len(eval_results)
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics={
                        "stopped_iteration": extra_step - 1,
                        "best_iteration": model.best_iteration,
                    },
                )
                autologging_client.log_metrics(
                    run_id=mlflow.active_run().info.run_id,
                    metrics=eval_results[model.best_iteration],
                    step=extra_step,
                )
                early_stopping_logging_operations = autologging_client.flush(synchronous=False)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: "
                )

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    mlflow.log_artifact(filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            log_model(
                model, artifact_path="model", signature=signature, input_example=input_example,
            )

        param_logging_operations.await_completion()
        if early_stopping:
            early_stopping_logging_operations.await_completion()

        return model