def _run_and_log_function(self, original, args, kwargs, unlogged_params, is_fine_tune=False): # Check if is trying to fit while fine tuning or not mlflow_cbs = [cb for cb in self.cbs if cb.name == "___mlflow_fastai"] fit_in_fine_tune = ( original.__name__ == "fit" and len(mlflow_cbs) > 0 and mlflow_cbs[0].is_fine_tune ) if not fit_in_fine_tune: log_fn_args_as_params(original, list(args), kwargs, unlogged_params) run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: if not fit_in_fine_tune: early_stop_callback = _find_callback_of_type(EarlyStoppingCallback, self.cbs) _log_early_stop_callback_params(early_stop_callback) # First try to remove if any already registered callback self.remove_cbs(mlflow_cbs) # Log information regarding model and data without bar and print-out with self.no_bar(), self.no_logging(): _log_model_info(learner=self) mlflowFastaiCallback = getFastaiCallback( metrics_logger=metrics_logger, is_fine_tune=is_fine_tune ) # Add the new callback self.add_cb(mlflowFastaiCallback) result = original(self, *args, **kwargs) return result
def _patch_implementation( self, original, inst, *args, **kwargs ): # pylint: disable=arguments-differ unlogged_params = ["self", "generator", "callbacks", "validation_data", "verbose"] log_fn_args_as_params(original, args, kwargs, unlogged_params) run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: # Checking if the 'callback' argument of fit() is set if len(args) >= 5: tmp_list = list(args) tmp_list[4], self.log_dir = _setup_callbacks( tmp_list[4], log_models, metrics_logger ) args = tuple(tmp_list) elif kwargs.get("callbacks"): kwargs["callbacks"], self.log_dir = _setup_callbacks( kwargs["callbacks"], log_models, metrics_logger ) else: kwargs["callbacks"], self.log_dir = _setup_callbacks( [], log_models, metrics_logger ) result = original(inst, *args, **kwargs) _flush_queue() _log_artifacts_with_warning( local_dir=self.log_dir.location, artifact_path="tensorboard_logs" ) if self.log_dir.is_temp: shutil.rmtree(self.log_dir.location) return result
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): log_fn_args_as_params(original, list(args), kwargs, unlogged_params) callbacks = [cb(self) for cb in self.callback_fns] + (self.callbacks or []) run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowFastaiCallback = getFastaiCallback(metrics_logger, self) # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) callbacks += list(args[callback_arg_index]) tmp_list[callback_arg_index] += [mlflowFastaiCallback] args = tuple(tmp_list) elif kwargs.get("callbacks"): callbacks += list(kwargs["callbacks"]) kwargs["callbacks"] += [mlflowFastaiCallback] else: kwargs["callbacks"] = [mlflowFastaiCallback] early_stop_callback = _find_callback_of_type(EarlyStoppingCallback, callbacks) one_cycle_callback = _find_callback_of_type(OneCycleScheduler, callbacks) _log_early_stop_callback_params(early_stop_callback) _log_one_cycle_callback_params(one_cycle_callback) result = original(self, *args, **kwargs) return result
def test_batch_metrics_logger_continues_if_log_batch_fails( start_run, ): # pylint: disable=unused-argument with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: log_batch_mock.side_effect = [Exception("asdf"), None] run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: # this call should fail to record since log_batch raised exception metrics_logger.record_metrics({"x": 1}, step=0) metrics_logger.record_metrics({"y": 2}, step=1) # even though the first call to log_batch failed, the BatchMetricsLogger should continue # logging subsequent batches last_call = log_batch_mock.call_args_list[-1] _, kwargs = last_call assert kwargs["run_id"] == run_id assert len(kwargs["metrics"]) == 1 metric = kwargs["metrics"][0] assert metric.key == "y" assert metric.value == 2 assert metric.step == 1
def fit(self, *args, **kwargs): if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(Estimator, "fit") # Wrap `fit` execution within a batch metrics logger context. run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowGluonCallback = getGluonCallback(metrics_logger) if len(args) >= 4: l = list(args) l[3] += [mlflowGluonCallback] args = tuple(l) elif "event_handlers" in kwargs: kwargs["event_handlers"] += [mlflowGluonCallback] else: kwargs["event_handlers"] = [mlflowGluonCallback] result = original(self, *args, **kwargs) if auto_end_run: try_mlflow_log(mlflow.end_run) return result
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None # Checking if the 'callback' argument of the function is set run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowKerasCallback = getKerasCallback(metrics_logger) if len(args) > callback_arg_index: tmp_list = list(args) early_stop_callback = _early_stop_check( tmp_list[callback_arg_index]) tmp_list[callback_arg_index] += [mlflowKerasCallback] args = tuple(tmp_list) elif kwargs.get("callbacks"): early_stop_callback = _early_stop_check(kwargs["callbacks"]) kwargs["callbacks"] += [mlflowKerasCallback] else: kwargs["callbacks"] = [mlflowKerasCallback] try_mlflow_log(_log_early_stop_callback_params, early_stop_callback) history = original(self, *args, **kwargs) try_mlflow_log(_log_early_stop_callback_metrics, early_stop_callback, history, metrics_logger) return history
def test_batch_metrics_logger_logs_all_metrics(start_run,): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: for i in range(100): metrics_logger.record_metrics({hex(i): i}, i) metrics_on_run = mlflow.tracking.MlflowClient().get_run(run_id).data.metrics for i in range(100): assert hex(i) in metrics_on_run assert metrics_on_run[hex(i)] == i
def _patch_implementation( self, original, inst, *args, **kwargs ): # pylint: disable=arguments-differ unlogged_params = ["self", "x", "y", "callbacks", "validation_data", "verbose"] log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: # Check if the 'callback' argument of fit() is set positionally if len(args) >= 6: # Convert the positional training function arguments to a list in order to # mutate the contents args = list(args) # Make a shallow copy of the preexisting callbacks to avoid permanently # modifying their contents for future training invocations. Introduce # TensorBoard & tf.keras callbacks if necessary callbacks = list(args[5]) callbacks, self.log_dir = _setup_callbacks( callbacks, log_models, metrics_logger ) # Replace the callbacks positional entry in the copied arguments and convert # the arguments back to tuple form for usage in the training function args[5] = callbacks args = tuple(args) else: # Make a shallow copy of the preexisting callbacks and introduce TensorBoard # & tf.keras callbacks if necessary callbacks = list(kwargs.get("callbacks") or []) kwargs["callbacks"], self.log_dir = _setup_callbacks( callbacks, log_models, metrics_logger ) early_stop_callback = _get_early_stop_callback(callbacks) _log_early_stop_callback_params(early_stop_callback) history = original(inst, *args, **kwargs) _log_early_stop_callback_metrics( callback=early_stop_callback, history=history, metrics_logger=metrics_logger, ) _flush_queue() mlflow.log_artifacts( local_dir=self.log_dir.location, artifact_path="tensorboard_logs", ) if self.log_dir.is_temp: shutil.rmtree(self.log_dir.location) return history
def test_batch_metrics_logger_records_time_correctly(start_run,): with mock.patch.object(MlflowClient, "log_batch", wraps=lambda *args, **kwargs: time.sleep(1)): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) assert metrics_logger.total_log_batch_time >= 1 time.sleep(2) metrics_logger.record_metrics({"x": 1}, step=0) assert metrics_logger.total_training_time >= 2
def test_batch_metrics_logger_logs_timestamps_as_int_milliseconds(start_run,): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock, mock.patch( "time.time", return_value=123.45678901234567890 ): run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) _, kwargs = log_batch_mock.call_args logged_metric = kwargs["metrics"][0] assert logged_metric.timestamp == 123456
def fit(original, self, *args, **kwargs): # Wrap `fit` execution within a batch metrics logger context. run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: mlflowGluonCallback = getGluonCallback(metrics_logger) if len(args) >= 4: args = (*args[:3], args[3] + [mlflowGluonCallback], *args[4:]) elif "event_handlers" in kwargs: kwargs["event_handlers"] += [mlflowGluonCallback] else: kwargs["event_handlers"] = [mlflowGluonCallback] result = original(self, *args, **kwargs) return result
def test_batch_metrics_logger_chunks_metrics_when_batch_logging(start_run,): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({hex(x): x for x in range(5000)}, step=0) run_id = mlflow.active_run().info.run_id for call_idx, call in enumerate(log_batch_mock.call_args_list): _, kwargs = call assert kwargs["run_id"] == run_id assert len(kwargs["metrics"]) == 1000 for metric_idx, metric in enumerate(kwargs["metrics"]): assert metric.key == hex(call_idx * 1000 + metric_idx) assert metric.value == call_idx * 1000 + metric_idx assert metric.step == 0
def fit(self, *args, **kwargs): with _manage_active_run() as run: original = gorilla.get_original_attribute(tensorflow.keras.Model, "fit") unlogged_params = [ "self", "x", "y", "callbacks", "validation_data", "verbose" ] log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None run_id = run.info.run_id with batch_metrics_logger(run_id) as metrics_logger: # Checking if the 'callback' argument of fit() is set if len(args) >= 6: tmp_list = list(args) early_stop_callback = _early_stop_check(tmp_list[5]) tmp_list[5], log_dir = _setup_callbacks( tmp_list[5], log_models, metrics_logger) args = tuple(tmp_list) elif kwargs.get("callbacks"): early_stop_callback = _early_stop_check( kwargs["callbacks"]) kwargs["callbacks"], log_dir = _setup_callbacks( kwargs["callbacks"], log_models, metrics_logger) else: kwargs["callbacks"], log_dir = _setup_callbacks( [], log_models, metrics_logger) _log_early_stop_callback_params(early_stop_callback) history = original(self, *args, **kwargs) _log_early_stop_callback_metrics(early_stop_callback, history, metrics_logger) _flush_queue() _log_artifacts_with_warning(local_dir=log_dir.location, artifact_path="tensorboard_logs") if log_dir.is_temp: shutil.rmtree(log_dir.location) return history
def test_batch_metrics_logger_runs_training_and_logging_in_correct_ratio( start_run, ): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: metrics_logger.record_metrics({"x": 1}, step=0) # data doesn't matter # first metrics should be logged immediately to record a previous timestamp and # batch log time log_batch_mock.assert_called_once() metrics_logger.total_log_batch_time = 1 metrics_logger.total_training_time = 1 log_batch_mock.reset_mock() # resets the 'calls' of this mock # the above 'training' took 1 second. So with target training-to-logging time ratio of # 10:1, 9 more 'training' should happen without sending the batch and then after the # 10th training the batch should be sent. for i in range(2, 11): metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_not_called() metrics_logger.total_training_time = i # at this point, average log batch time is 1, and total training time is 9 # thus the next record_metrics call should send the batch. metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_called_once() # update log_batch time to reflect the 'mocked' training time metrics_logger.total_log_batch_time = 2 log_batch_mock.reset_mock() # reset the recorded calls for i in range(12, 21): metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_not_called() metrics_logger.total_training_time = i metrics_logger.record_metrics({"x": 1}, step=0) log_batch_mock.assert_called_once()
def fit_generator(self, *args, **kwargs): """ NOTE: `fit_generator()` is deprecated in TF >= 2.1.0 and simply wraps `fit()`. To avoid unintentional creation of nested MLflow runs caused by a patched `fit_generator()` method calling a patched `fit()` method, we only patch `fit_generator()` in TF < 2.1.0. """ with _manage_active_run() as run: original = gorilla.get_original_attribute(tensorflow.keras.Model, "fit_generator") unlogged_params = [ "self", "generator", "callbacks", "validation_data", "verbose" ] log_fn_args_as_params(original, args, kwargs, unlogged_params) run_id = run.info.run_id with batch_metrics_logger(run_id) as metrics_logger: # Checking if the 'callback' argument of fit() is set if len(args) >= 5: tmp_list = list(args) tmp_list[4], log_dir = _setup_callbacks( tmp_list[4], log_models, metrics_logger) args = tuple(tmp_list) elif kwargs.get("callbacks"): kwargs["callbacks"], log_dir = _setup_callbacks( kwargs["callbacks"], log_models, metrics_logger) else: kwargs["callbacks"], log_dir = _setup_callbacks( [], log_models, metrics_logger) result = original(self, *args, **kwargs) _flush_queue() _log_artifacts_with_warning(local_dir=log_dir.location, artifact_path="tensorboard_logs") if log_dir.is_temp: shutil.rmtree(log_dir.location) return result
def train(*args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) original = gorilla.get_original_attribute(xgboost, "train") # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, "stopped_iteration", len(eval_results) - 1) try_mlflow_log(mlflow.log_metric, "best_iteration", model.best_iteration) try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: ") if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed: # https://github.com/dmlc/xgboost/issues/6984 if Version(xgboost.__version__.replace("SNAPSHOT", "dev")) >= Version("1.3.0"): # In xgboost >= 1.3.0, user-defined callbacks should inherit # `xgboost.callback.TrainingCallback`: # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback # noqa class Callback( xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass, ): def after_iteration(self, model, epoch, evals_log): """ Run after each iteration. Return True when training should stop. """ # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]]) # that looks like this: # { # "train": { # "auc": [0.5, 0.6, 0.7, ...], # ... # }, # ... # } evaluation_result_dict = {} for data_name, metric_dict in evals_log.items(): for metric_name, metric_values_on_each_iter in metric_dict.items(): key = "{}-{}".format(data_name, metric_name) # The last element in `metric_values_on_each_iter` corresponds to # the meric on the current iteration evaluation_result_dict[key] = metric_values_on_each_iter[-1] metrics_logger.record_metrics(evaluation_result_dict, epoch) eval_results.append(evaluation_result_dict) # Return `False` to indicate training should not stop return False return Callback() else: @exception_safe_function def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = ( num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs ) if early_stopping: extra_step = len(eval_results) metrics_logger.record_metrics({"stopped_iteration": extra_step - 1}) metrics_logger.record_metrics({"best_iteration": model.best_iteration}) metrics_logger.record_metrics(eval_results[model.best_iteration], extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: " ) if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) return model
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed: # https://github.com/dmlc/xgboost/issues/6984 if Version(xgboost.__version__.replace("SNAPSHOT", "dev")) >= Version("1.3.0"): # In xgboost >= 1.3.0, user-defined callbacks should inherit # `xgboost.callback.TrainingCallback`: # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback # noqa class Callback( xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass, ): def after_iteration(self, model, epoch, evals_log): """ Run after each iteration. Return True when training should stop. """ # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]]) # that looks like this: # { # "train": { # "auc": [0.5, 0.6, 0.7, ...], # ... # }, # ... # } evaluation_result_dict = {} for data_name, metric_dict in evals_log.items(): for metric_name, metric_values_on_each_iter in metric_dict.items(): key = "{}-{}".format(data_name, metric_name) # The last element in `metric_values_on_each_iter` corresponds to # the meric on the current iteration evaluation_result_dict[key] = metric_values_on_each_iter[-1] metrics_logger.record_metrics(evaluation_result_dict, epoch) eval_results.append(evaluation_result_dict) # Return `False` to indicate training should not stop return False return Callback() else: @exception_safe_function def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt from cycler import cycler features = np.array(features) # Structure the supplied `importance` values as a `num_features`-by-`num_classes` matrix importances_per_class_by_feature = np.array(importance) if importances_per_class_by_feature.ndim <= 1: # In this case, the supplied `importance` values are not given per class. Rather, # one importance value is given per feature. For consistency with the assumed # `num_features`-by-`num_classes` matrix structure, we coerce the importance # values to a `num_features`-by-1 matrix indices = np.argsort(importance) # Sort features and importance values by magnitude during transformation to a # `num_features`-by-`num_classes` matrix features = features[indices] importances_per_class_by_feature = np.array( [[importance] for importance in importances_per_class_by_feature[indices]] ) # In this case, do not include class labels on the feature importance plot because # only one importance value has been provided per feature, rather than an # one importance value for each class per feature label_classes_on_plot = False else: importance_value_magnitudes = np.abs(importances_per_class_by_feature).sum(axis=1) indices = np.argsort(importance_value_magnitudes) features = features[indices] importances_per_class_by_feature = importances_per_class_by_feature[indices] label_classes_on_plot = True num_classes = importances_per_class_by_feature.shape[1] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h h = h + 0.1 * num_classes if num_classes > 1 else h fig, ax = plt.subplots(figsize=(w, h)) # When importance values are provided for each class per feature, we want to ensure # that the same color is used for all bars in the bar chart that have the same class colors_to_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"][:num_classes] color_cycler = cycler(color=colors_to_cycle) ax.set_prop_cycle(color_cycler) # The following logic operates on one feature at a time, adding a bar to the bar chart # for each class that reflects the importance of the feature to predictions of that # class feature_ylocs = np.arange(num_features) # Define offsets on the y-axis that are used to evenly space the bars for each class # around the y-axis position of each feature offsets_per_yloc = np.linspace(-0.5, 0.5, num_classes) / 2 if num_classes > 1 else [0] for feature_idx, (feature_yloc, importances_per_class) in enumerate( zip(feature_ylocs, importances_per_class_by_feature) ): for class_idx, (offset, class_importance) in enumerate( zip(offsets_per_yloc, importances_per_class) ): (bar,) = ax.barh( feature_yloc + offset, class_importance, align="center", # Set the bar height such that importance value bars for a particular # feature are spaced properly relative to each other (no overlap or gaps) # and relative to importance value bars for other features height=(0.5 / max(num_classes - 1, 1)), ) if label_classes_on_plot and feature_idx == 0: # Only set a label the first time a bar for a particular class is plotted to # avoid duplicate legend entries. If we were to set a label for every bar, # the legend would contain `num_features` labels for each class. bar.set_label("Class {}".format(class_idx)) ax.set_yticks(feature_ylocs) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) if label_classes_on_plot: ax.legend() fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) mlflow.log_artifact(filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) autologging_client = MlflowAutologgingQueueingClient() # logging booster params separately to extract key/value pairs and make it easier to # compare them across runs. booster_params = args[0] if len(args) > 0 else kwargs["params"] autologging_client.log_params(run_id=mlflow.active_run().info.run_id, params=booster_params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] params_to_log_for_fn = get_mlflow_run_params_for_fn_args( original, args, kwargs, unlogged_params ) autologging_client.log_params( run_id=mlflow.active_run().info.run_id, params=params_to_log_for_fn ) param_logging_operations = autologging_client.flush(synchronous=False) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = ( num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs ) if early_stopping: extra_step = len(eval_results) autologging_client.log_metrics( run_id=mlflow.active_run().info.run_id, metrics={ "stopped_iteration": extra_step - 1, "best_iteration": model.best_iteration, }, ) autologging_client.log_metrics( run_id=mlflow.active_run().info.run_id, metrics=eval_results[model.best_iteration], step=extra_step, ) early_stopping_logging_operations = autologging_client.flush(synchronous=False) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: " ) if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) mlflow.log_artifact(filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) log_model( model, artifact_path="model", signature=signature, input_example=input_example, ) param_logging_operations.await_completion() if early_stopping: early_stopping_logging_operations.await_completion() return model
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ @exception_safe_function def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + "-" + eval_name res[key] = value metrics_logger.record_metrics(res, env.iteration) eval_results.append(res) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) mlflow.log_artifact(filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) autologging_client = MlflowAutologgingQueueingClient() # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. booster_params = args[0] if len(args) > 0 else kwargs["params"] autologging_client.log_params(run_id=mlflow.active_run().info.run_id, params=booster_params) unlogged_params = [ "params", "train_set", "valid_sets", "valid_names", "fobj", "feval", "init_model", "evals_result", "learning_rates", "callbacks", ] params_to_log_for_fn = get_mlflow_run_params_for_fn_args( original, args, kwargs, unlogged_params ) autologging_client.log_params( run_id=mlflow.active_run().info.run_id, params=params_to_log_for_fn ) param_logging_operations = autologging_client.flush(synchronous=False) all_arg_names = _get_arg_names(original) num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = ( num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs ) if early_stopping: extra_step = len(eval_results) autologging_client.log_metrics( run_id=mlflow.active_run().info.run_id, metrics={ "stopped_iteration": extra_step, # best_iteration is set even if training does not stop early. "best_iteration": model.best_iteration, }, ) # iteration starts from 1 in LightGBM. last_iter_results = eval_results[model.best_iteration - 1] autologging_client.log_metrics( run_id=mlflow.active_run().info.run_id, metrics=last_iter_results, step=extra_step, ) early_stopping_logging_operations = autologging_client.flush(synchronous=False) # logging feature importance as artifacts. for imp_type in ["split", "gain"]: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) try: log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. LightGBM autologging " "will ignore the failure and continue. Exception: " ) imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f, indent=2) mlflow.log_artifact(filepath) finally: shutil.rmtree(tmpdir) # train_set must exist as the original train function already ran successfully train_set = args[1] if len(args) > 1 else kwargs.get("train_set") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(train_set, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(input_example) model_signature = infer_signature(input_example, model_output) return model_signature # Whether to automatically log the trained model based on boolean flag. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) log_model( model, artifact_path="model", signature=signature, input_example=input_example, ) param_logging_operations.await_completion() if early_stopping: early_stopping_logging_operations.await_completion() return model