def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None # Checking if the 'callback' argument of the function is set run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowKerasCallback = getKerasCallback(metrics_logger) if len(args) > callback_arg_index: tmp_list = list(args) early_stop_callback = _early_stop_check(tmp_list[callback_arg_index]) tmp_list[callback_arg_index] += [mlflowKerasCallback] args = tuple(tmp_list) elif kwargs.get("callbacks"): early_stop_callback = _early_stop_check(kwargs["callbacks"]) kwargs["callbacks"] += [mlflowKerasCallback] else: kwargs["callbacks"] = [mlflowKerasCallback] try_mlflow_log(_log_early_stop_callback_params, early_stop_callback) history = original(self, *args, **kwargs) try_mlflow_log( _log_early_stop_callback_metrics, early_stop_callback, history, metrics_logger ) return history
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): log_fn_args_as_params(original, list(args), kwargs, unlogged_params) callbacks = [cb(self) for cb in self.callback_fns] + (self.callbacks or []) run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowFastaiCallback = getFastaiCallback(metrics_logger, self) # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) callbacks += list(args[callback_arg_index]) tmp_list[callback_arg_index] += [mlflowFastaiCallback] args = tuple(tmp_list) elif kwargs.get("callbacks"): callbacks += list(kwargs["callbacks"]) kwargs["callbacks"] += [mlflowFastaiCallback] else: kwargs["callbacks"] = [mlflowFastaiCallback] early_stop_callback = _find_callback_of_type( EarlyStoppingCallback, callbacks) one_cycle_callback = _find_callback_of_type( OneCycleScheduler, callbacks) _log_early_stop_callback_params(early_stop_callback) _log_one_cycle_callback_params(one_cycle_callback) result = original(self, *args, **kwargs) return result
def test_batch_metrics_logger_logs_all_metrics(start_run, ): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: for i in range(100): metrics_logger.record_metrics({hex(i): i}, i) metrics_on_run = mlflow.tracking.MlflowClient().get_run( run_id).data.metrics for i in range(100): assert hex(i) in metrics_on_run assert metrics_on_run[hex(i)] == i
def test_batch_metrics_logger_logs_timestamps_as_int_milliseconds(start_run, ): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock, mock.patch( "time.time", return_value=123.45678901234567890): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) _, kwargs = log_batch_mock.call_args logged_metric = kwargs["metrics"][0] assert logged_metric.timestamp == 123456
def test_batch_metrics_logger_records_time_correctly(start_run, ): with mock.patch.object(MlflowClient, "log_batch", wraps=lambda *args, **kwargs: time.sleep(1)): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) assert metrics_logger.total_log_batch_time >= 1 time.sleep(2) metrics_logger.record_metrics({"x": 1}, step=0) assert metrics_logger.total_training_time >= 2
def fit(original, self, *args, **kwargs): # Wrap `fit` execution within a batch metrics logger context. run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowGluonCallback = getGluonCallback(metrics_logger) if len(args) >= 4: l = list(args) l[3] += [mlflowGluonCallback] args = tuple(l) elif "event_handlers" in kwargs: kwargs["event_handlers"] += [mlflowGluonCallback] else: kwargs["event_handlers"] = [mlflowGluonCallback] result = original(self, *args, **kwargs) return result
def test_batch_metrics_logger_chunks_metrics_when_batch_logging(start_run, ): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({hex(x): x for x in range(5000)}, step=0) run_id = mlflow.active_run().info.run_id for call_idx, call in enumerate(log_batch_mock.call_args_list): _, kwargs = call assert kwargs["run_id"] == run_id assert len(kwargs["metrics"]) == 1000 for metric_idx, metric in enumerate(kwargs["metrics"]): assert metric.key == hex(call_idx * 1000 + metric_idx) assert metric.value == call_idx * 1000 + metric_idx assert metric.step == 0
def test_batch_metrics_logger_runs_training_and_logging_in_correct_ratio( start_run, ): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) # data doesn't matter # first metrics should be logged immediately to record a previous timestamp and # batch log time log_batch_mock.assert_called_once() metrics_logger.total_log_batch_time = 1 metrics_logger.total_training_time = 1 log_batch_mock.reset_mock() # resets the 'calls' of this mock # the above 'training' took 1 second. So with target training-to-logging time ratio of # 10:1, 9 more 'training' should happen without sending the batch and then after the # 10th training the batch should be sent. for i in range(2, 11): metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_not_called() metrics_logger.total_training_time = i # at this point, average log batch time is 1, and total training time is 9 # thus the next record_metrics call should send the batch. metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_called_once() # update log_batch time to reflect the 'mocked' training time metrics_logger.total_log_batch_time = 2 log_batch_mock.reset_mock() # reset the recorded calls for i in range(12, 21): metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_not_called() metrics_logger.total_training_time = i metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_called_once()
def test_batch_metrics_logger_continues_if_log_batch_fails(start_run, ): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: log_batch_mock.side_effect = [Exception("asdf"), None] run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: # this call should fail to record since log_batch raised exception metrics_logger.record_metrics({"x": 1}, step=0) metrics_logger.record_metrics({"y": 2}, step=1) # even though the first call to log_batch failed, the BatchMetricsLogger should continue # logging subsequent batches last_call = log_batch_mock.call_args_list[-1] _, kwargs = last_call assert kwargs["run_id"] == run_id assert len(kwargs["metrics"]) == 1 metric = kwargs["metrics"][0] assert metric.key == "y" assert metric.value == 2 assert metric.step == 1
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ @exception_safe_function def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + "-" + eval_name res[key] = value metrics_logger.record_metrics(res, env.iteration) eval_results.append(res) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "train_set", "valid_sets", "valid_names", "fobj", "feval", "init_model", "evals_result", "learning_rates", "callbacks", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) metrics_logger.record_metrics( {"stopped_iteration": extra_step}) # best_iteration is set even if training does not stop early. metrics_logger.record_metrics( {"best_iteration": model.best_iteration}) # iteration starts from 1 in LightGBM. results = eval_results[model.best_iteration - 1] metrics_logger.record_metrics(results, step=extra_step) # logging feature importance as artifacts. for imp_type in ["split", "gain"]: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) try: log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. LightGBM autologging " "will ignore the failure and continue. Exception: ") imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f, indent=2) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # train_set must exist as the original train function already ran successfully train_set = args[1] if len(args) > 1 else kwargs.get("train_set") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(train_set, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(input_example) model_signature = infer_signature(input_example, model_output) return model_signature # Whether to automatically log the trained model based on boolean flag. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) return model
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ if LooseVersion(xgboost.__version__) >= LooseVersion("1.3.0"): # In xgboost >= 1.3.0, user-defined callbacks should inherit # `xgboost.callback.TrainingCallback`: # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback # noqa class Callback( xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass, ): def after_iteration(self, model, epoch, evals_log): """ Run after each iteration. Return True when training should stop. """ # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]]) # that looks like this: # { # "train": { # "auc": [0.5, 0.6, 0.7, ...], # ... # }, # ... # } evaluation_result_dict = {} for data_name, metric_dict in evals_log.items(): for metric_name, metric_values_on_each_iter in metric_dict.items( ): key = "{}-{}".format(data_name, metric_name) # The last element in `metric_values_on_each_iter` corresponds to # the meric on the current iteration evaluation_result_dict[ key] = metric_values_on_each_iter[-1] metrics_logger.record_metrics(evaluation_result_dict, epoch) eval_results.append(evaluation_result_dict) # Return `False` to indicate training should not stop return False return Callback() else: @exception_safe_function def callback(env): metrics_logger.record_metrics( dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) metrics_logger.record_metrics( {"stopped_iteration": extra_step - 1}) metrics_logger.record_metrics( {"best_iteration": model.best_iteration}) metrics_logger.record_metrics( eval_results[model.best_iteration], extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: ") if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) return model