def save_parameters(path, params=None): original = gorilla.get_original_attribute(nn, 'save_parameters') original(path, params) run_id = mlflow.active_run().info.run_id uri = 'runs:{}/{}'.format(run_id, 'parameters') try_mlflow_log(mlflow.log_artifact, path, uri)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + "-" + eval_name res[key] = value eval_results.append(res) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(lightgbm, "train") # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "train_set", "valid_sets", "valid_names", "fobj", "feval", "init_model", "evals_result", "learning_rates", "callbacks", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = ( num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs ) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, "stopped_iteration", len(eval_results)) # best_iteration is set even if training does not stop early. try_mlflow_log(mlflow.log_metric, "best_iteration", model.best_iteration) # iteration starts from 1 in LightGBM. try_mlflow_log( mlflow.log_metrics, eval_results[model.best_iteration - 1], step=extra_step ) # logging feature importance as artifacts. for imp_type in ["split", "gain"]: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception( "Failed to log feature importance plot. LightGBM autologging " "will ignore the failure and continue. Exception: " ) imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f, indent=2) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # train_set must exist as the original train function already ran successfully train_set = args[1] if len(args) > 1 else kwargs.get("train_set") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(train_set, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(input_example) model_signature = infer_signature(input_example, model_output) return model_signature input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_example, log_model_signature, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def train_begin(self, estimator, *args, **kwargs): try_mlflow_log(mlflow.log_param, "num_layers", len(estimator.net)) if estimator.max_epoch is not None: try_mlflow_log(mlflow.log_param, "epochs", estimator.max_epoch) if estimator.max_batch is not None: try_mlflow_log(mlflow.log_param, "batches", estimator.max_batch) try_mlflow_log(mlflow.log_param, "optimizer_name", type(estimator.trainer.optimizer).__name__) if hasattr(estimator.trainer.optimizer, "lr"): try_mlflow_log(mlflow.log_param, "learning_rate", estimator.trainer.optimizer.lr) if hasattr(estimator.trainer.optimizer, "epsilon"): try_mlflow_log(mlflow.log_param, "epsilon", estimator.trainer.optimizer.epsilon)
def on_train_begin(self, logs=None): # pylint: disable=unused-argument opt = self.model.optimizer if hasattr(opt, '_name'): try_mlflow_log(mlflow.log_param, 'optimizer_name', opt._name) # Elif checks are if the optimizer is a TensorFlow optimizer rather than a Keras one. elif hasattr(opt, 'optimizer'): # TensorFlow optimizer parameters are associated with the inner optimizer variable. # Therefore, we assign opt to be opt.optimizer for logging parameters. opt = opt.optimizer try_mlflow_log(mlflow.log_param, 'optimizer_name', type(opt).__name__) if hasattr(opt, 'lr'): lr = opt.lr if type( opt.lr) is float else tensorflow.keras.backend.eval(opt.lr) try_mlflow_log(mlflow.log_param, 'learning_rate', lr) elif hasattr(opt, '_lr'): lr = opt._lr if type( opt._lr) is float else tensorflow.keras.backend.eval(opt._lr) try_mlflow_log(mlflow.log_param, 'learning_rate', lr) if hasattr(opt, 'epsilon'): epsilon = opt.epsilon if type(opt.epsilon) is float \ else tensorflow.keras.backend.eval(opt.epsilon) try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) elif hasattr(opt, '_epsilon'): epsilon = opt._epsilon if type(opt._epsilon) is float \ else tensorflow.keras.backend.eval(opt._epsilon) try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(mlflow.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def on_train_end(self, logs=None): # pylint: disable=unused-argument try_mlflow_log(mlflow.keras.log_model, self.model, artifact_path='model')
def on_train_end(self, logs=None): try_mlflow_log(log_model, self.model, artifact_path="model")
def on_train_end(self, **kwargs): try_mlflow_log(log_model, self.learner, artifact_path="model")
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + '-' + eval_name res[key] = value eval_results.append(res) return callback if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(lightgbm, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(mlflow.log_params, params) unlogged_params = ['params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval', 'init_model', 'evals_result', 'learning_rates', 'callbacks'] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results)) # best_iteration is set even if training does not stop early. try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration) # iteration starts from 1 in LightGBM. try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration - 1], step=extra_step) # logging feature importance as artifacts. for imp_type in ['split', 'gain']: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def on_checkpoint(self, checkpoint_path): #os.makedirs(checkpoint_path, exist_ok=True) try_mlflow_log(self.client.log_artifact, self._run_id, checkpoint_path)
def _log_estimator_params(param_map): # Chunk model parameters to avoid hitting the log_batch API limit for chunk in _chunk_dict(param_map, chunk_size=MAX_PARAMS_TAGS_PER_BATCH,): truncated = _truncate_dict(chunk, MAX_ENTITY_KEY_LENGTH, MAX_PARAM_VAL_LENGTH) try_mlflow_log(mlflow.log_params, truncated)
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ @exception_safe_function def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) metrics_logger.record_metrics( {"stopped_iteration": extra_step - 1}) metrics_logger.record_metrics( {"best_iteration": model.best_iteration}) metrics_logger.record_metrics( eval_results[model.best_iteration], extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: ") if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) return model
def on_train_start(self, trainer, pl_module): """ Logs Optimizer related metrics when the train begins :param trainer: pytorch lightning trainer instance :param pl_module: pytorch lightning base module """ try_mlflow_log(mlflow.set_tag, "Mode", "training") try_mlflow_log(mlflow.log_param, "epochs", trainer.max_epochs) for callback in trainer.callbacks: if isinstance(callback, pl.callbacks.early_stopping.EarlyStopping): self.early_stopping = True self._log_early_stop_params(callback) if hasattr(trainer, "optimizers"): for optimizer in trainer.optimizers: try_mlflow_log(mlflow.log_param, "optimizer_name", type(optimizer).__name__) optimizer_name = type(optimizer).__name__.lower() + "_optimizer" if hasattr(optimizer, "defaults"): optim_dict = optimizer.defaults if "lr" in optim_dict: try_mlflow_log( mlflow.log_param, "learning_rate_" + optimizer_name, optim_dict["lr"], ) if "eps" in optim_dict: try_mlflow_log( mlflow.log_param, "epsilon_" + optimizer_name, optim_dict["eps"] ) if "betas" in optim_dict: try_mlflow_log( mlflow.log_param, "betas_" + optimizer_name, optim_dict["betas"] ) if "weight_decay" in optim_dict: try_mlflow_log( mlflow.log_param, "weight_decay_" + optimizer_name, optim_dict["weight_decay"], ) summary = str(ModelSummary(pl_module, mode="full")) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, "w") as f: f.write(summary) try_mlflow_log(mlflow.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def on_train_begin(self, logs=None): # pylint: disable=unused-argument try_mlflow_log(mlflow.log_param, 'num_layers', len(self.model.layers)) try_mlflow_log(mlflow.log_param, 'optimizer_name', type(self.model.optimizer).__name__) if hasattr(self.model.optimizer, 'lr'): lr = self.model.optimizer.lr if \ type(self.model.optimizer.lr) is float \ else keras.backend.eval(self.model.optimizer.lr) try_mlflow_log(mlflow.log_param, 'learning_rate', lr) if hasattr(self.model.optimizer, 'epsilon'): epsilon = self.model.optimizer.epsilon if \ type(self.model.optimizer.epsilon) is float \ else keras.backend.eval(self.model.optimizer.epsilon) try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) try_mlflow_log(mlflow.set_tag, 'model_summary', summary) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(mlflow.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def fit_mlflow(self, func_name, *args, **kwargs): should_start_run = mlflow.active_run() is None if should_start_run: try_mlflow_log(mlflow.start_run) # TODO: We should not log nested estimator parameters for # parameter search estimators (GridSearchCV, RandomizedSearchCV) # Chunk and truncate model parameters to avoid hitting the log_batch API limit for chunk in _chunk_dict(self.get_params(deep=True), chunk_size=MAX_PARAMS_TAGS_PER_BATCH): truncated = _truncate_dict(chunk, MAX_ENTITY_KEY_LENGTH, MAX_PARAM_VAL_LENGTH) try_mlflow_log(mlflow.log_params, truncated) try_mlflow_log( mlflow.set_tags, { "estimator_name": self.__class__.__name__, "estimator_class": self.__class__.__module__ + "." + self.__class__.__name__, }, ) original_fit = gorilla.get_original_attribute(self, func_name) try: fit_output = original_fit(*args, **kwargs) except Exception as e: if should_start_run: try_mlflow_log(mlflow.end_run, RunStatus.to_string(RunStatus.FAILED)) raise e if hasattr(self, "score"): try: score_args = _get_args_for_score(self.score, self.fit, args, kwargs) training_score = self.score(*score_args) except Exception as e: # pylint: disable=broad-except msg = ( self.score.__qualname__ + " failed. The 'training_score' metric will not be recorded. Scoring error: " + str(e)) _logger.warning(msg) else: try_mlflow_log(mlflow.log_metric, "training_score", training_score) try_mlflow_log(log_model, self, artifact_path="model") if should_start_run: try_mlflow_log(mlflow.end_run) return fit_output
def _log_specialized_estimator_content(fitted_estimator, run_id, prefix, X, y_true, sample_weight=None): import sklearn mlflow_client = MlflowClient() metrics = dict() try: if sklearn.base.is_classifier(fitted_estimator): metrics = _get_classifier_metrics(fitted_estimator, prefix, X, y_true, sample_weight) elif sklearn.base.is_regressor(fitted_estimator): metrics = _get_regressor_metrics(fitted_estimator, prefix, X, y_true, sample_weight) except Exception as err: msg = ("Failed to autolog metrics for " + fitted_estimator.__class__.__name__ + ". Logging error: " + str(err)) _logger.warning(msg) else: # batch log all metrics try_mlflow_log( mlflow_client.log_batch, run_id, metrics=[ Metric(key=str(key), value=value, timestamp=int(time.time() * 1000), step=0) for key, value in metrics.items() ], ) if sklearn.base.is_classifier(fitted_estimator): try: artifacts = _get_classifier_artifacts(fitted_estimator, prefix, X, y_true, sample_weight) except Exception as e: msg = ("Failed to autolog artifacts for " + fitted_estimator.__class__.__name__ + ". Logging error: " + str(e)) _logger.warning(msg) return with TempDir() as tmp_dir: for artifact in artifacts: try: display = artifact.function(**artifact.arguments) display.ax_.set_title(artifact.title) artifact_path = "{}.png".format(artifact.name) filepath = tmp_dir.path(artifact_path) display.figure_.savefig(filepath) import matplotlib.pyplot as plt plt.close(display.figure_) except Exception as e: _log_warning_for_artifacts(artifact.name, artifact.function, e) try_mlflow_log(mlflow_client.log_artifacts, run_id, tmp_dir.path()) return metrics
def _log_posttraining_metadata(estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ if hasattr(estimator, "score"): try: score_args = _get_args_for_score(estimator.score, estimator.fit, args, kwargs) training_score = estimator.score(*score_args) except Exception as e: msg = ( estimator.score.__qualname__ + " failed. The 'training_score' metric will not be recorded. Scoring error: " + str(e) ) _logger.warning(msg) else: try_mlflow_log(mlflow.log_metric, "training_score", training_score) # log common metrics and artifacts for estimators (classifier, regressor) _log_specialized_estimator_content(estimator, mlflow.active_run().info.run_id, args, kwargs) def get_input_example(): # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) fit_arg_names = _get_arg_names(estimator.fit) X_var_name, y_var_name = fit_arg_names[:2] input_example = _get_Xy(args, kwargs, X_var_name, y_var_name)[0][ :INPUT_EXAMPLE_SAMPLE_ROWS ] return input_example def infer_model_signature(input_example): if not hasattr(estimator, "predict"): raise Exception( "the trained model does not specify a `predict` function, " + "which is required in order to infer the signature" ) return infer_signature(input_example, estimator.predict(input_example)) if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_") and log_models: try_mlflow_log( log_model, estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_score_"): try_mlflow_log(mlflow.log_metric, "best_cv_score", estimator.best_score_) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } try_mlflow_log(mlflow.log_params, best_params) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME}) _create_child_runs_for_parameter_search( cv_estimator=estimator, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception as e: msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format(str(e)) ) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict(estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id ) except Exception as e: msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e)) ) _logger.warning(msg)
def on_epoch_end(self, epoch, logs=None): if not logs: return try_mlflow_log(mlflow.log_metrics, logs, step=epoch)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): res = {} for data_name, eval_name, value, _ in env.evaluation_result_list: key = data_name + '-' + eval_name res[key] = value eval_results.append(res) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align='center', height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel('Importance') ax.set_title('Feature Importance ({})'.format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, 'feature_importance_{}.png'.format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False original = gorilla.get_original_attribute(lightgbm, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(mlflow.log_params, params) unlogged_params = ['params', 'train_set', 'valid_sets', 'valid_names', 'fobj', 'feval', 'init_model', 'evals_result', 'learning_rates', 'callbacks'] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, 'stopped_iteration', len(eval_results)) # best_iteration is set even if training does not stop early. try_mlflow_log(mlflow.log_metric, 'best_iteration', model.best_iteration) # iteration starts from 1 in LightGBM. try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration - 1], step=extra_step) # logging feature importance as artifacts. for imp_type in ['split', 'gain']: features = model.feature_name() importance = model.feature_importance(importance_type=imp_type) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception('Failed to log feature importance plot. LightGBM autologging ' 'will ignore the failure and continue. Exception: ') imp = {ft: imp for ft, imp in zip(features, importance.tolist())} tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f, indent=2) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def on_train_begin(self, **kwargs): info = layers_info(self.learner) try_mlflow_log(mlflow.log_param, "num_layers", len(info)) try_mlflow_log(mlflow.log_param, "opt_func", self.opt_func.func.__name__) if hasattr(self.opt, "true_wd"): try_mlflow_log(mlflow.log_param, "true_wd", self.opt.true_wd) if hasattr(self.opt, "bn_wd"): try_mlflow_log(mlflow.log_param, "bn_wd", self.opt.bn_wd) if hasattr(self.opt, "train_bn"): try_mlflow_log(mlflow.log_param, "train_bn", self.train_bn) summary = model_summary(self.learner) try_mlflow_log(mlflow.set_tag, "model_summary", summary) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, "w") as f: f.write(summary) try_mlflow_log(mlflow.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ # TODO: Remove `replace("SNAPSHOT", "dev")` once the following issue is addressed: # https://github.com/dmlc/xgboost/issues/6984 if Version(xgboost.__version__.replace("SNAPSHOT", "dev")) >= Version("1.3.0"): # In xgboost >= 1.3.0, user-defined callbacks should inherit # `xgboost.callback.TrainingCallback`: # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback # noqa class Callback( xgboost.callback.TrainingCallback, metaclass=ExceptionSafeAbstractClass, ): def after_iteration(self, model, epoch, evals_log): """ Run after each iteration. Return True when training should stop. """ # `evals_log` is a nested dict (type: Dict[str, Dict[str, List[float]]]) # that looks like this: # { # "train": { # "auc": [0.5, 0.6, 0.7, ...], # ... # }, # ... # } evaluation_result_dict = {} for data_name, metric_dict in evals_log.items(): for metric_name, metric_values_on_each_iter in metric_dict.items(): key = "{}-{}".format(data_name, metric_name) # The last element in `metric_values_on_each_iter` corresponds to # the meric on the current iteration evaluation_result_dict[key] = metric_values_on_each_iter[-1] metrics_logger.record_metrics(evaluation_result_dict, epoch) eval_results.append(evaluation_result_dict) # Return `False` to indicate training should not stop return False return Callback() else: @exception_safe_function def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") run_id = mlflow.active_run().info.run_id with batch_metrics_logger(run_id) as metrics_logger: callback = record_eval_results(eval_results, metrics_logger) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = ( num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs ) if early_stopping: extra_step = len(eval_results) metrics_logger.record_metrics({"stopped_iteration": extra_step - 1}) metrics_logger.record_metrics({"best_iteration": model.best_iteration}) metrics_logger.record_metrics(eval_results[model.best_iteration], extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: " ) if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) def get_input_example(): if input_example_info is None: raise Exception(ENSURE_AUTOLOGGING_ENABLED_TEXT) if input_example_info.error_msg is not None: raise Exception(input_example_info.error_msg) return input_example_info.input_example def infer_model_signature(input_example): model_output = model.predict(xgboost.DMatrix(input_example)) model_signature = infer_signature(input_example, model_output) return model_signature # Only log the model if the autolog() param log_models is set to True. if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) return model
def before_fit(self): from fastai.callback.all import ParamScheduler # Do not record in case of predicting or lr_finder if hasattr(self, "lr_finder") or hasattr(self, "gather_preds"): return if self.is_fine_tune and len(self.opt.param_lists) == 1: _logger.warning( "Using `fine_tune` with model which cannot be frozen." " Current model have only one param group which makes it impossible to freeze." " Because of this it will record some fitting params twice (overriding exception)" ) frozen = self.opt.frozen_idx != 0 if frozen and self.is_fine_tune: self.freeze_prefix = "freeze_" try_mlflow_log(mlflow.log_param, "frozen_idx", self.opt.frozen_idx) else: self.freeze_prefix = "" # Extract function name when `opt_func` is partial function if isinstance(self.opt_func, partial): try_mlflow_log( mlflow.log_param, self.freeze_prefix + "opt_func", self.opt_func.keywords["opt"].__name__, ) else: try_mlflow_log(mlflow.log_param, self.freeze_prefix + "opt_func", self.opt_func.__name__) params_not_to_log = [] for cb in self.cbs: if isinstance(cb, ParamScheduler): params_not_to_log = list(cb.scheds.keys()) for param, f in cb.scheds.items(): values = [] for step in np.linspace(0, 1, num=100, endpoint=False): values.append(f(step)) values = np.array(values) # Log params main values from scheduling try_mlflow_log(mlflow.log_param, self.freeze_prefix + param + "_min", np.min(values, 0)) try_mlflow_log(mlflow.log_param, self.freeze_prefix + param + "_max", np.max(values, 0)) try_mlflow_log(mlflow.log_param, self.freeze_prefix + param + "_init", values[0]) try_mlflow_log(mlflow.log_param, self.freeze_prefix + param + "_final", values[-1]) # Plot and save image of scheduling fig = plt.figure() plt.plot(values) plt.ylabel(param) tempdir = tempfile.mkdtemp() try: scheds_file = os.path.join( tempdir, self.freeze_prefix + param + ".png") plt.savefig(scheds_file) plt.close(fig) try_mlflow_log(mlflow.log_artifact, local_path=scheds_file) finally: shutil.rmtree(tempdir) break for param in self.opt.hypers[0]: if param not in params_not_to_log: try_mlflow_log( mlflow.log_param, self.freeze_prefix + param, [h[param] for h in self.opt.hypers], ) if hasattr(self.opt, "true_wd"): try_mlflow_log(mlflow.log_param, self.freeze_prefix + "true_wd", self.opt.true_wd) if hasattr(self.opt, "bn_wd"): try_mlflow_log(mlflow.log_param, self.freeze_prefix + "bn_wd", self.opt.bn_wd) if hasattr(self.opt, "train_bn"): try_mlflow_log(mlflow.log_param, self.freeze_prefix + "train_bn", self.opt.train_bn)
def test_no_force_try_mlflow_log_to_fail(): with mlflow.start_run(): try_mlflow_log(lambda: 1 / 0)
def on_epoch_end(self, epoch, logs=None): if (epoch - 1) % _LOG_EVERY_N_STEPS == 0: try_mlflow_log(mlflow.log_metrics, logs, step=epoch)
def on_train_end(self, logs=None): try_mlflow_log(mlflow.log_param, 'num_layers', len(self.model.layers)) try_mlflow_log(mlflow.log_param, 'optimizer_name', type(self.model.optimizer).__name__) if hasattr(self.model.optimizer, 'lr'): lr = self.model.optimizer.lr if \ type(self.model.optimizer.lr) is float \ else keras.backend.eval(self.model.optimizer.lr) try_mlflow_log(mlflow.log_param, 'learning_rate', lr) if hasattr(self.model.optimizer, 'epsilon'): epsilon = self.model.optimizer.epsilon if \ type(self.model.optimizer.epsilon) is float \ else keras.backend.eval(self.model.optimizer.epsilon) try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) try_mlflow_log(mlflow.set_tag, 'summary', summary) try_mlflow_log(log_model, self.model, artifact_path='model')
def _log_artifacts_with_warning(**kwargs): try_mlflow_log(mlflow.log_artifacts, **kwargs)
def _log_posttraining_metadata(estimator, spark_model, params): if _is_parameter_search_estimator(estimator): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update( {MLFLOW_AUTOLOGGING: AUTOLOGGING_INTEGRATION_NAME}) _create_child_runs_for_parameter_search( parent_estimator=estimator, parent_model=spark_model, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception: import traceback msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( traceback.format_exc())) _logger.warning(msg) estimator_param_maps = _get_tuning_param_maps( estimator, estimator._autologging_metadata.uid_to_indexed_name_map) metrics_dict, best_index = _get_param_search_metrics_and_best_index( estimator, spark_model) _log_parameter_search_results_as_artifact( estimator_param_maps, metrics_dict, mlflow.active_run().info.run_id) # Log best_param_map as JSON artifact best_param_map = estimator_param_maps[best_index] try_mlflow_log(mlflow.log_dict, best_param_map, artifact_file="best_parameters.json") # Log best_param_map as autologging parameters as well _log_estimator_params({ f"best_{param_name}": param_value for param_name, param_value in best_param_map.items() }) if log_models: if _should_log_model(spark_model): # TODO: support model signature try_mlflow_log( mlflow.spark.log_model, spark_model, artifact_path="model", ) if _is_parameter_search_model(spark_model): try_mlflow_log( mlflow.spark.log_model, spark_model.bestModel, artifact_path="best_model", ) else: _logger.warning( _get_warning_msg_for_skip_log_model(spark_model))
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): eval_results.append(dict(env.evaluation_result_list)) return callback if not mlflow.active_run(): try_mlflow_log(mlflow.start_run) auto_end_run = True else: auto_end_run = False def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align="center", height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel("Importance") ax.set_title("Feature Importance ({})".format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, "feature_importance_{}.png".format(imp_type)) fig.savefig(filepath) try_mlflow_log(mlflow.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) original = gorilla.get_original_attribute(xgboost, "train") # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] try_mlflow_log(mlflow.log_params, params) unlogged_params = [ "params", "dtrain", "evals", "obj", "feval", "evals_result", "xgb_model", "callbacks", "learning_rates", ] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index("callbacks") callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif "callbacks" in kwargs and kwargs["callbacks"] is not None: kwargs["callbacks"] += [callback] else: kwargs["callbacks"] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(mlflow.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index("early_stopping_rounds") early_stopping = (num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(mlflow.log_metric, "stopped_iteration", len(eval_results) - 1) try_mlflow_log(mlflow.log_metric, "best_iteration", model.best_iteration) try_mlflow_log(mlflow.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = None try: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception( "Failed to log feature importance plot. XGBoost autologging " "will ignore the failure and continue. Exception: ") if imp is not None: tmpdir = tempfile.mkdtemp() try: filepath = os.path.join( tmpdir, "feature_importance_{}.json".format(imp_type)) with open(filepath, "w") as f: json.dump(imp, f) try_mlflow_log(mlflow.log_artifact, filepath) finally: shutil.rmtree(tmpdir) # dtrain must exist as the original train function already ran successfully dtrain = args[1] if len(args) > 1 else kwargs.get("dtrain") input_example = None signature = None try: # it is possible that the dataset was constructed before the patched # constructor was applied, so we cannot assume the input_example_info exists input_example_info = getattr(dtrain, "input_example_info", None) if input_example_info is None: raise Exception("please ensure that autologging is " + "enabled before constructing the dataset.") input_example = input_example_info.input_example if input_example is None: # input example collection failed raise Exception(input_example_info.error_msg) model_output = model.predict(xgboost.DMatrix(input_example)) signature = infer_signature(input_example, model_output) except Exception as e: # pylint: disable=broad-except input_example = None msg = "Failed to gather example input and model signature: " + str( e) _logger.warning(msg) try_mlflow_log( log_model, model, artifact_path="model", signature=signature, input_example=input_example, ) if auto_end_run: try_mlflow_log(mlflow.end_run) return model
def _log_posttraining_metadata(estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ if hasattr(estimator, "score"): try: score_args = _get_args_for_score(estimator.score, estimator.fit, args, kwargs) training_score = estimator.score(*score_args) except Exception as e: # pylint: disable=broad-except msg = ( estimator.score.__qualname__ + " failed. The 'training_score' metric will not be recorded. Scoring error: " + str(e)) _logger.warning(msg) else: try_mlflow_log(mlflow.log_metric, "training_score", training_score) # log common metrics and artifacts for estimators (classifier, regressor) _log_specialized_estimator_content(estimator, mlflow.active_run().info.run_id, args, kwargs) input_example = None signature = None if hasattr(estimator, "predict"): try: # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) SAMPLE_ROWS = 5 fit_arg_names = _get_arg_names(estimator.fit) X_var_name, y_var_name = fit_arg_names[:2] input_example = _get_Xy(args, kwargs, X_var_name, y_var_name)[0][:SAMPLE_ROWS] model_output = estimator.predict(input_example) signature = infer_signature(input_example, model_output) except Exception as e: # pylint: disable=broad-except input_example = None msg = "Failed to infer an input example and model signature: " + str( e) _logger.warning(msg) try_mlflow_log( log_model, estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_"): try_mlflow_log( log_model, estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } try_mlflow_log(mlflow.log_params, best_params) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run environment_tags = context_registry.resolve_tags() _create_child_runs_for_parameter_search( cv_estimator=estimator, parent_run=mlflow.active_run(), child_tags=environment_tags, ) except Exception as e: # pylint: disable=broad-except msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( str(e))) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict( estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id) except Exception as e: # pylint: disable=broad-except msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e))) _logger.warning(msg)
def train_end(self, estimator, *args, **kwargs): if isinstance(estimator.net, HybridSequential): try_mlflow_log(log_model, estimator.net, artifact_path="model")
def add_series(self, index, value): if _check_interval(index, self.flush_at, self.interval): value = sum(self.buf + [value]) / (len(self.buf) + 1) try_mlflow_log(mlflow.log_metric, self.name, value, step=index) original = gorilla.get_original_attribute(MonitorSeries, 'add') original(self, index, value)