Example #1
0
    def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index):
        log_fn_args_as_params(original, args, kwargs, unlogged_params)
        early_stop_callback = None

        # Checking if the 'callback' argument of the function is set
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            mlflowKerasCallback = getKerasCallback(metrics_logger)
            if len(args) > callback_arg_index:
                tmp_list = list(args)
                early_stop_callback = _early_stop_check(tmp_list[callback_arg_index])
                tmp_list[callback_arg_index] += [mlflowKerasCallback]
                args = tuple(tmp_list)
            elif kwargs.get("callbacks"):
                early_stop_callback = _early_stop_check(kwargs["callbacks"])
                kwargs["callbacks"] += [mlflowKerasCallback]
            else:
                kwargs["callbacks"] = [mlflowKerasCallback]

            try_mlflow_log(_log_early_stop_callback_params, early_stop_callback)

            history = original(self, *args, **kwargs)

            try_mlflow_log(
                _log_early_stop_callback_metrics, early_stop_callback, history, metrics_logger
            )

        return history
Example #2
0
    def _run_and_log_function(self, original, args, kwargs, unlogged_params,
                              callback_arg_index):
        log_fn_args_as_params(original, list(args), kwargs, unlogged_params)

        callbacks = [cb(self)
                     for cb in self.callback_fns] + (self.callbacks or [])

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            mlflowFastaiCallback = getFastaiCallback(metrics_logger, self)

            # Checking if the 'callback' argument of the function is set
            if len(args) > callback_arg_index:
                tmp_list = list(args)
                callbacks += list(args[callback_arg_index])
                tmp_list[callback_arg_index] += [mlflowFastaiCallback]
                args = tuple(tmp_list)
            elif kwargs.get("callbacks"):
                callbacks += list(kwargs["callbacks"])
                kwargs["callbacks"] += [mlflowFastaiCallback]
            else:
                kwargs["callbacks"] = [mlflowFastaiCallback]

            early_stop_callback = _find_callback_of_type(
                EarlyStoppingCallback, callbacks)
            one_cycle_callback = _find_callback_of_type(
                OneCycleScheduler, callbacks)

            _log_early_stop_callback_params(early_stop_callback)
            _log_one_cycle_callback_params(one_cycle_callback)

            result = original(self, *args, **kwargs)

        return result
def test_batch_metrics_logger_logs_all_metrics(start_run, ):
    run_id = mlflow.active_run().info.run_id
    with batch_metrics_logger(run_id) as metrics_logger:
        for i in range(100):
            metrics_logger.record_metrics({hex(i): i}, i)

    metrics_on_run = mlflow.tracking.MlflowClient().get_run(
        run_id).data.metrics

    for i in range(100):
        assert hex(i) in metrics_on_run
        assert metrics_on_run[hex(i)] == i
def test_batch_metrics_logger_logs_timestamps_as_int_milliseconds(start_run, ):
    with mock.patch.object(MlflowClient,
                           "log_batch") as log_batch_mock, mock.patch(
                               "time.time",
                               return_value=123.45678901234567890):
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            metrics_logger.record_metrics({"x": 1}, step=0)

        _, kwargs = log_batch_mock.call_args

        logged_metric = kwargs["metrics"][0]

        assert logged_metric.timestamp == 123456
def test_batch_metrics_logger_records_time_correctly(start_run, ):
    with mock.patch.object(MlflowClient,
                           "log_batch",
                           wraps=lambda *args, **kwargs: time.sleep(1)):
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            metrics_logger.record_metrics({"x": 1}, step=0)

            assert metrics_logger.total_log_batch_time >= 1

            time.sleep(2)

            metrics_logger.record_metrics({"x": 1}, step=0)

            assert metrics_logger.total_training_time >= 2
Example #6
0
    def fit(original, self, *args, **kwargs):
        # Wrap `fit` execution within a batch metrics logger context.
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            mlflowGluonCallback = getGluonCallback(metrics_logger)
            if len(args) >= 4:
                l = list(args)
                l[3] += [mlflowGluonCallback]
                args = tuple(l)
            elif "event_handlers" in kwargs:
                kwargs["event_handlers"] += [mlflowGluonCallback]
            else:
                kwargs["event_handlers"] = [mlflowGluonCallback]
            result = original(self, *args, **kwargs)

        return result
def test_batch_metrics_logger_chunks_metrics_when_batch_logging(start_run, ):
    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock:
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            metrics_logger.record_metrics({hex(x): x
                                           for x in range(5000)},
                                          step=0)
            run_id = mlflow.active_run().info.run_id

            for call_idx, call in enumerate(log_batch_mock.call_args_list):
                _, kwargs = call

                assert kwargs["run_id"] == run_id
                assert len(kwargs["metrics"]) == 1000
                for metric_idx, metric in enumerate(kwargs["metrics"]):
                    assert metric.key == hex(call_idx * 1000 + metric_idx)
                    assert metric.value == call_idx * 1000 + metric_idx
                    assert metric.step == 0
def test_batch_metrics_logger_runs_training_and_logging_in_correct_ratio(
    start_run, ):
    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock:
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            metrics_logger.record_metrics({"x": 1},
                                          step=0)  # data doesn't matter

            # first metrics should be logged immediately to record a previous timestamp and
            #   batch log time
            log_batch_mock.assert_called_once()

            metrics_logger.total_log_batch_time = 1
            metrics_logger.total_training_time = 1

            log_batch_mock.reset_mock()  # resets the 'calls' of this mock

            # the above 'training' took 1 second. So with target training-to-logging time ratio of
            #   10:1, 9 more 'training' should happen without sending the batch and then after the
            #   10th training the batch should be sent.
            for i in range(2, 11):
                metrics_logger.record_metrics({"x": 1}, step=0)
                log_batch_mock.assert_not_called()
                metrics_logger.total_training_time = i

            # at this point, average log batch time is 1, and total training time is 9
            # thus the next record_metrics call should send the batch.
            metrics_logger.record_metrics({"x": 1}, step=0)
            log_batch_mock.assert_called_once()

            # update log_batch time to reflect the 'mocked' training time
            metrics_logger.total_log_batch_time = 2

            log_batch_mock.reset_mock()  # reset the recorded calls

            for i in range(12, 21):
                metrics_logger.record_metrics({"x": 1}, step=0)
                log_batch_mock.assert_not_called()
                metrics_logger.total_training_time = i

            metrics_logger.record_metrics({"x": 1}, step=0)
            log_batch_mock.assert_called_once()
def test_batch_metrics_logger_continues_if_log_batch_fails(start_run, ):
    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock:
        log_batch_mock.side_effect = [Exception("asdf"), None]

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            # this call should fail to record since log_batch raised exception
            metrics_logger.record_metrics({"x": 1}, step=0)

            metrics_logger.record_metrics({"y": 2}, step=1)

        # even though the first call to log_batch failed, the BatchMetricsLogger should continue
        #   logging subsequent batches
        last_call = log_batch_mock.call_args_list[-1]

        _, kwargs = last_call

        assert kwargs["run_id"] == run_id
        assert len(kwargs["metrics"]) == 1
        metric = kwargs["metrics"][0]
        assert metric.key == "y"
        assert metric.value == 2
        assert metric.step == 1
Example #10
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """
            @exception_safe_function
            def callback(env):
                res = {}
                for data_name, eval_name, value, _ in env.evaluation_result_list:
                    key = data_name + "-" + eval_name
                    res[key] = value
                metrics_logger.record_metrics(res, env.iteration)
                eval_results.append(res)

            return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            indices = np.argsort(importance)
            features = np.array(features)[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "train_set",
            "valid_sets",
            "valid_names",
            "fobj",
            "feval",
            "init_model",
            "evals_result",
            "learning_rates",
            "callbacks",
        ]

        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")
        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)

                metrics_logger.record_metrics(
                    {"stopped_iteration": extra_step})
                # best_iteration is set even if training does not stop early.
                metrics_logger.record_metrics(
                    {"best_iteration": model.best_iteration})
                # iteration starts from 1 in LightGBM.
                results = eval_results[model.best_iteration - 1]
                metrics_logger.record_metrics(results, step=extra_step)

        # logging feature importance as artifacts.
        for imp_type in ["split", "gain"]:
            features = model.feature_name()
            importance = model.feature_importance(importance_type=imp_type)
            try:
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. LightGBM autologging "
                    "will ignore the failure and continue. Exception: ")

            imp = {ft: imp for ft, imp in zip(features, importance.tolist())}
            tmpdir = tempfile.mkdtemp()
            try:
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.json".format(imp_type))
                with open(filepath, "w") as f:
                    json.dump(imp, f, indent=2)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                shutil.rmtree(tmpdir)

        # train_set must exist as the original train function already ran successfully
        train_set = args[1] if len(args) > 1 else kwargs.get("train_set")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(train_set, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(input_example)
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Whether to automatically log the trained model based on boolean flag.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model
Example #11
0
    def train(original, *args, **kwargs):
        def record_eval_results(eval_results, metrics_logger):
            """
            Create a callback function that records evaluation results.
            """

            if LooseVersion(xgboost.__version__) >= LooseVersion("1.3.0"):
                # In xgboost >= 1.3.0, user-defined callbacks should inherit
                # `xgboost.callback.TrainingCallback`:
                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa

                class Callback(
                        xgboost.callback.TrainingCallback,
                        metaclass=ExceptionSafeAbstractClass,
                ):
                    def after_iteration(self, model, epoch, evals_log):
                        """
                        Run after each iteration. Return True when training should stop.
                        """
                        # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]])
                        # that looks like this:
                        # {
                        #   "train": {
                        #     "auc": [0.5, 0.6, 0.7, ...],
                        #     ...
                        #   },
                        #   ...
                        # }
                        evaluation_result_dict = {}
                        for data_name, metric_dict in evals_log.items():
                            for metric_name, metric_values_on_each_iter in metric_dict.items(
                            ):
                                key = "{}-{}".format(data_name, metric_name)
                                # The last element in `metric_values_on_each_iter` corresponds to
                                # the meric on the current iteration
                                evaluation_result_dict[
                                    key] = metric_values_on_each_iter[-1]

                        metrics_logger.record_metrics(evaluation_result_dict,
                                                      epoch)
                        eval_results.append(evaluation_result_dict)

                        # Return `False` to indicate training should not stop
                        return False

                return Callback()

            else:

                @exception_safe_function
                def callback(env):
                    metrics_logger.record_metrics(
                        dict(env.evaluation_result_list), env.iteration)
                    eval_results.append(dict(env.evaluation_result_list))

                return callback

        def log_feature_importance_plot(features, importance, importance_type):
            """
            Log feature importance plot.
            """
            import matplotlib.pyplot as plt

            features = np.array(features)
            importance = np.array(importance)
            indices = np.argsort(importance)
            features = features[indices]
            importance = importance[indices]
            num_features = len(features)

            # If num_features > 10, increase the figure height to prevent the plot
            # from being too dense.
            w, h = [6.4, 4.8]  # matplotlib's default figure size
            h = h + 0.1 * num_features if num_features > 10 else h
            fig, ax = plt.subplots(figsize=(w, h))

            yloc = np.arange(num_features)
            ax.barh(yloc, importance, align="center", height=0.5)
            ax.set_yticks(yloc)
            ax.set_yticklabels(features)
            ax.set_xlabel("Importance")
            ax.set_title("Feature Importance ({})".format(importance_type))
            fig.tight_layout()

            tmpdir = tempfile.mkdtemp()
            try:
                # pylint: disable=undefined-loop-variable
                filepath = os.path.join(
                    tmpdir, "feature_importance_{}.png".format(imp_type))
                fig.savefig(filepath)
                try_mlflow_log(mlflow.log_artifact, filepath)
            finally:
                plt.close(fig)
                shutil.rmtree(tmpdir)

        # logging booster params separately via mlflow.log_params to extract key/value pairs
        # and make it easier to compare them across runs.
        params = args[0] if len(args) > 0 else kwargs["params"]
        try_mlflow_log(mlflow.log_params, params)

        unlogged_params = [
            "params",
            "dtrain",
            "evals",
            "obj",
            "feval",
            "evals_result",
            "xgb_model",
            "callbacks",
            "learning_rates",
        ]
        log_fn_args_as_params(original, args, kwargs, unlogged_params)

        all_arg_names = inspect.getargspec(original)[0]  # pylint: disable=W1505
        num_pos_args = len(args)

        # adding a callback that records evaluation results.
        eval_results = []
        callbacks_index = all_arg_names.index("callbacks")

        run_id = mlflow.active_run().info.run_id
        with batch_metrics_logger(run_id) as metrics_logger:
            callback = record_eval_results(eval_results, metrics_logger)
            if num_pos_args >= callbacks_index + 1:
                tmp_list = list(args)
                tmp_list[callbacks_index] += [callback]
                args = tuple(tmp_list)
            elif "callbacks" in kwargs and kwargs["callbacks"] is not None:
                kwargs["callbacks"] += [callback]
            else:
                kwargs["callbacks"] = [callback]

            # training model
            model = original(*args, **kwargs)

            # If early_stopping_rounds is present, logging metrics at the best iteration
            # as extra metrics with the max step + 1.
            early_stopping_index = all_arg_names.index("early_stopping_rounds")
            early_stopping = (num_pos_args >= early_stopping_index + 1
                              or "early_stopping_rounds" in kwargs)
            if early_stopping:
                extra_step = len(eval_results)
                metrics_logger.record_metrics(
                    {"stopped_iteration": extra_step - 1})
                metrics_logger.record_metrics(
                    {"best_iteration": model.best_iteration})
                metrics_logger.record_metrics(
                    eval_results[model.best_iteration], extra_step)

        # logging feature importance as artifacts.
        for imp_type in importance_types:
            imp = None
            try:
                imp = model.get_score(importance_type=imp_type)
                features, importance = zip(*imp.items())
                log_feature_importance_plot(features, importance, imp_type)
            except Exception:
                _logger.exception(
                    "Failed to log feature importance plot. XGBoost autologging "
                    "will ignore the failure and continue. Exception: ")

            if imp is not None:
                tmpdir = tempfile.mkdtemp()
                try:
                    filepath = os.path.join(
                        tmpdir, "feature_importance_{}.json".format(imp_type))
                    with open(filepath, "w") as f:
                        json.dump(imp, f)
                    try_mlflow_log(mlflow.log_artifact, filepath)
                finally:
                    shutil.rmtree(tmpdir)

        # dtrain must exist as the original train function already ran successfully
        dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain")

        # it is possible that the dataset was constructed before the patched
        #   constructor was applied, so we cannot assume the input_example_info exists
        input_example_info = getattr(dtrain, "input_example_info", None)

        def get_input_example():
            if input_example_info is None:
                raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT)
            if input_example_info.error_msg is not None:
                raise Exception(input_example_info.error_msg)
            return input_example_info.input_example

        def infer_model_signature(input_example):
            model_output = model.predict(xgboost.DMatrix(input_example))
            model_signature = infer_signature(input_example, model_output)
            return model_signature

        # Only log the model if the autolog() param log_models is set to True.
        if log_models:
            # Will only resolve `input_example` and `signature` if `log_models` is `True`.
            input_example, signature = resolve_input_example_and_signature(
                get_input_example,
                infer_model_signature,
                log_input_examples,
                log_model_signatures,
                _logger,
            )

            try_mlflow_log(
                log_model,
                model,
                artifact_path="model",
                signature=signature,
                input_example=input_example,
            )

        return model