def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) early_stop_callback = _early_stop_check( tmp_list[callback_arg_index]) tmp_list[callback_arg_index] += [__MLflowKerasCallback()] args = tuple(tmp_list) elif 'callbacks' in kwargs: early_stop_callback = _early_stop_check(kwargs['callbacks']) kwargs['callbacks'] += [__MLflowKerasCallback()] else: kwargs['callbacks'] = [__MLflowKerasCallback()] _log_early_stop_callback_params(early_stop_callback) history = original(self, *args, **kwargs) _log_early_stop_callback_metrics(early_stop_callback, history) if auto_end_run: try_mlflow_log(kiwi.end_run) return history
def on_train_begin(self, logs=None): # pylint: disable=unused-argument try_mlflow_log(kiwi.log_param, 'num_layers', len(self.model.layers)) try_mlflow_log(kiwi.log_param, 'optimizer_name', type(self.model.optimizer).__name__) if hasattr(self.model.optimizer, 'lr'): lr = self.model.optimizer.lr if \ type(self.model.optimizer.lr) is float \ else keras.backend.eval(self.model.optimizer.lr) try_mlflow_log(kiwi.log_param, 'learning_rate', lr) if hasattr(self.model.optimizer, 'epsilon'): epsilon = self.model.optimizer.epsilon if \ type(self.model.optimizer.epsilon) is float \ else keras.backend.eval(self.model.optimizer.epsilon) try_mlflow_log(kiwi.log_param, 'epsilon', epsilon) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(kiwi.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False log_fn_args_as_params(original, [self] + list(args), kwargs, unlogged_params) callbacks = [cb(self) for cb in self.callback_fns] + (self.callbacks or []) # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) callbacks += list(args[callback_arg_index]) tmp_list[callback_arg_index] += [__MLflowFastaiCallback(self)] args = tuple(tmp_list) elif 'callbacks' in kwargs: callbacks += list(kwargs['callbacks']) kwargs['callbacks'] += [__MLflowFastaiCallback(self)] else: kwargs['callbacks'] = [__MLflowFastaiCallback(self)] early_stop_callback = _find_callback_of_type(EarlyStoppingCallback, callbacks) one_cycle_callback = _find_callback_of_type(OneCycleScheduler, callbacks) _log_early_stop_callback_params(early_stop_callback) _log_one_cycle_callback_params(one_cycle_callback) result = original(self, *args, **kwargs) if auto_end_run: try_mlflow_log(kiwi.end_run) return result
def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt indices = np.argsort(importance) features = np.array(features)[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align='center', height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel('Importance') ax.set_title('Feature Importance ({})'.format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join( tmpdir, 'feature_importance_{}.png'.format(imp_type)) fig.savefig(filepath) try_mlflow_log(kiwi.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir)
def epoch_end(self, estimator, *args, **kwargs): logs = {} for metric in estimator.train_metrics: metric_name, metric_val = metric.get() logs[metric_name] = metric_val for metric in estimator.val_metrics: metric_name, metric_val = metric.get() logs[metric_name] = metric_val try_mlflow_log(kiwi.log_metrics, logs, step=self.current_epoch) self.current_epoch += 1
def _log_early_stop_callback_params(callback): if callback: try: earlystopping_params = {'early_stop_monitor': callback.monitor, 'early_stop_min_delta': callback.min_delta, 'early_stop_patience': callback.patience, 'early_stop_mode': callback.mode} try_mlflow_log(kiwi.log_params, earlystopping_params) except Exception: # pylint: disable=W0703 return
def _manage_active_run(): if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) global _AUTOLOG_RUN_ID if kiwi.active_run( ) is not None: # defensive check in case `mlflow.start_run` fails _AUTOLOG_RUN_ID = kiwi.active_run().info.run_id yield kiwi.active_run() if kiwi.active_run() is not None and kiwi.active_run( ).info.run_id == _AUTOLOG_RUN_ID: try_mlflow_log(kiwi.end_run)
def on_epoch_end(self, **kwargs): """ Log loss and other metrics values after each epoch """ if kwargs['smooth_loss'] is None or kwargs["last_metrics"] is None: return epoch = kwargs['epoch'] metrics = [kwargs['smooth_loss']] + kwargs["last_metrics"] metrics = map(float, metrics) metrics = dict(zip(self.metrics_names, metrics)) try_mlflow_log(kiwi.log_metrics, metrics, step=epoch)
def _log_early_stop_callback_params(callback): if callback: try: earlystopping_params = { 'monitor': callback.monitor, 'min_delta': callback.min_delta, 'patience': callback.patience, 'baseline': callback.baseline, 'restore_best_weights': callback.restore_best_weights } try_mlflow_log(kiwi.log_params, earlystopping_params) except Exception: # pylint: disable=W0703 return
def _flush_queue(): """ Flush the metric queue and log contents in batches to MLflow. Queue is divided into batches according to run id. """ global _metric_queue client = kiwi.tracking.MlflowClient() dic = _assoc_list_to_map(_metric_queue) for key in dic: try_mlflow_log(client.log_batch, key, metrics=dic[key], params=[], tags=[]) _metric_queue = []
def _log_one_cycle_callback_params(callback): if callback: try: params = { 'lr_max': callback.lr_max, 'div_factor': callback.div_factor, 'pct_start': callback.pct_start, 'final_div': callback.final_div, 'tot_epochs': callback.tot_epochs, 'start_epoch': callback.start_epoch, 'moms': callback.moms, } try_mlflow_log(kiwi.log_params, params) except Exception: # pylint: disable=W0703 return
def on_train_begin(self, logs=None): # pylint: disable=unused-argument config = self.model.optimizer.get_config() for attribute in config: try_mlflow_log(kiwi.log_param, "opt_" + attribute, config[attribute]) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(kiwi.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def _log_event(event): """ Extracts metric information from the event protobuf """ if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) global _AUTOLOG_RUN_ID _AUTOLOG_RUN_ID = kiwi.active_run().info.run_id if event.WhichOneof('what') == 'summary': summary = event.summary for v in summary.value: if v.HasField('simple_value'): if (event.step - 1) % _LOG_EVERY_N_STEPS == 0: _thread_pool.submit(_add_to_queue, key=v.tag, value=v.simple_value, step=event.step, time=int(time.time() * 1000), run_id=kiwi.active_run().info.run_id)
def _log_early_stop_callback_metrics(callback, history): if callback: callback_attrs = _get_early_stop_callback_attrs(callback) if callback_attrs is None: return stopped_epoch, restore_best_weights, patience = callback_attrs try_mlflow_log(kiwi.log_metric, 'stopped_epoch', stopped_epoch) # Weights are restored only if early stopping occurs if stopped_epoch != 0 and restore_best_weights: restored_epoch = stopped_epoch - max(1, patience) try_mlflow_log(kiwi.log_metric, 'restored_epoch', restored_epoch) restored_metrics = { key: history.history[key][restored_epoch] for key in history.history.keys() } # Metrics are logged as 'epoch_loss' and 'epoch_acc' in TF 1.X if LooseVersion( tensorflow.__version__) < LooseVersion('2.0.0'): if 'loss' in restored_metrics: restored_metrics['epoch_loss'] = restored_metrics.pop( 'loss') if 'acc' in restored_metrics: restored_metrics['epoch_acc'] = restored_metrics.pop( 'acc') # Checking that a metric history exists metric_key = next(iter(history.history), None) if metric_key is not None: last_epoch = len(history.history[metric_key]) try_mlflow_log(kiwi.log_metrics, restored_metrics, step=last_epoch)
def train(self, *args, **kwargs): with _manage_active_run(): original = gorilla.get_original_attribute( tensorflow.estimator.Estimator, 'train') # Checking step and max_step parameters for logging if len(args) >= 3: try_mlflow_log(kiwi.log_param, 'steps', args[2]) if len(args) >= 4: try_mlflow_log(kiwi.log_param, 'max_steps', args[3]) if 'steps' in kwargs: try_mlflow_log(kiwi.log_param, 'steps', kwargs['steps']) if 'max_steps' in kwargs: try_mlflow_log(kiwi.log_param, 'max_steps', kwargs['max_steps']) result = original(self, *args, **kwargs) return result
def export_savedmodel(self, *args, **kwargs): auto_end = False global _AUTOLOG_RUN_ID if not kiwi.active_run(): if _AUTOLOG_RUN_ID: try_mlflow_log(kiwi.start_run, _AUTOLOG_RUN_ID) else: try_mlflow_log(kiwi.start_run) auto_end = True original = gorilla.get_original_attribute( tensorflow.estimator.Estimator, 'export_savedmodel') serialized = original(self, *args, **kwargs) try_mlflow_log(log_model, tf_saved_model_dir=serialized.decode('utf-8'), tf_meta_graph_tags=[tag_constants.SERVING], tf_signature_def_key='predict', artifact_path='model') if (kiwi.active_run() is not None and kiwi.active_run().info.run_id == _AUTOLOG_RUN_ID)\ or auto_end: try_mlflow_log(kiwi.end_run) return serialized
def _log_early_stop_callback_metrics(callback, history): if callback: callback_attrs = _get_early_stop_callback_attrs(callback) if callback_attrs is None: return stopped_epoch, restore_best_weights, patience = callback_attrs try_mlflow_log(kiwi.log_metric, 'stopped_epoch', stopped_epoch) # Weights are restored only if early stopping occurs if stopped_epoch != 0 and restore_best_weights: restored_epoch = stopped_epoch - max(1, patience) try_mlflow_log(kiwi.log_metric, 'restored_epoch', restored_epoch) restored_metrics = { key: history.history[key][restored_epoch] for key in history.history.keys() } # Checking that a metric history exists metric_key = next(iter(history.history), None) if metric_key is not None: last_epoch = len(history.history[metric_key]) try_mlflow_log(kiwi.log_metrics, restored_metrics, step=last_epoch)
def on_epoch_end(self, epoch, logs=None): if not logs: return try_mlflow_log(kiwi.log_metrics, logs, step=epoch)
def _log_artifacts_with_warning(**kwargs): try_mlflow_log(kiwi.log_artifacts, **kwargs)
def on_train_end(self, **kwargs): try_mlflow_log(log_model, self.learner, artifact_path='model')
def on_train_begin(self, **kwargs): info = layers_info(self.learner) try_mlflow_log(kiwi.log_param, 'num_layers', len(info)) try_mlflow_log(kiwi.log_param, 'opt_func', self.opt_func.func.__name__) if hasattr(self.opt, 'true_wd'): try_mlflow_log(kiwi.log_param, 'true_wd', self.opt.true_wd) if hasattr(self.opt, 'bn_wd'): try_mlflow_log(kiwi.log_param, 'bn_wd', self.opt.bn_wd) if hasattr(self.opt, 'train_bn'): try_mlflow_log(kiwi.log_param, 'train_bn', self.train_bn) summary = model_summary(self.learner) try_mlflow_log(kiwi.set_tag, 'model_summary', summary) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(kiwi.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def on_train_begin(self, logs=None): # pylint: disable=unused-argument opt = self.model.optimizer if hasattr(opt, '_name'): try_mlflow_log(kiwi.log_param, 'optimizer_name', opt._name) # Elif checks are if the optimizer is a TensorFlow optimizer rather than a Keras one. elif hasattr(opt, 'optimizer'): # TensorFlow optimizer parameters are associated with the inner optimizer variable. # Therefore, we assign opt to be opt.optimizer for logging parameters. opt = opt.optimizer try_mlflow_log(kiwi.log_param, 'optimizer_name', type(opt).__name__) if hasattr(opt, 'lr'): lr = opt.lr if type( opt.lr) is float else tensorflow.keras.backend.eval(opt.lr) try_mlflow_log(kiwi.log_param, 'learning_rate', lr) elif hasattr(opt, '_lr'): lr = opt._lr if type( opt._lr) is float else tensorflow.keras.backend.eval(opt._lr) try_mlflow_log(kiwi.log_param, 'learning_rate', lr) if hasattr(opt, 'epsilon'): epsilon = opt.epsilon if type(opt.epsilon) is float \ else tensorflow.keras.backend.eval(opt.epsilon) try_mlflow_log(kiwi.log_param, 'epsilon', epsilon) elif hasattr(opt, '_epsilon'): epsilon = opt._epsilon if type(opt._epsilon) is float \ else tensorflow.keras.backend.eval(opt._epsilon) try_mlflow_log(kiwi.log_param, 'epsilon', epsilon) sum_list = [] self.model.summary(print_fn=sum_list.append) summary = '\n'.join(sum_list) tempdir = tempfile.mkdtemp() try: summary_file = os.path.join(tempdir, "model_summary.txt") with open(summary_file, 'w') as f: f.write(summary) try_mlflow_log(kiwi.log_artifact, local_path=summary_file) finally: shutil.rmtree(tempdir)
def on_train_end(self, logs=None): try_mlflow_log(log_model, self.model, artifact_path='model')
def on_train_end(self, logs=None): # pylint: disable=unused-argument try_mlflow_log(kiwi.keras.log_model, self.model, artifact_path='model')
def train_end(self, estimator, *args, **kwargs): if isinstance(estimator.net, HybridSequential): try_mlflow_log(log_model, estimator.net, artifact_path="model")
def train_begin(self, estimator, *args, **kwargs): try_mlflow_log(kiwi.log_param, "num_layers", len(estimator.net)) if estimator.max_epoch is not None: try_mlflow_log(kiwi.log_param, "epochs", estimator.max_epoch) if estimator.max_batch is not None: try_mlflow_log(kiwi.log_param, "batches", estimator.max_batch) try_mlflow_log(kiwi.log_param, "optimizer_name", type(estimator.trainer.optimizer).__name__) if hasattr(estimator.trainer.optimizer, "lr"): try_mlflow_log(kiwi.log_param, "learning_rate", estimator.trainer.optimizer.lr) if hasattr(estimator.trainer.optimizer, "epsilon"): try_mlflow_log(kiwi.log_param, "epsilon", estimator.trainer.optimizer.epsilon)
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): eval_results.append(dict(env.evaluation_result_list)) return callback if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align='center', height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel('Importance') ax.set_title('Feature Importance ({})'.format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, 'feature_importance_{}.png'.format(imp_type)) fig.savefig(filepath) try_mlflow_log(kiwi.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) original = gorilla.get_original_attribute(xgboost, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(kiwi.log_params, params) unlogged_params = ['params', 'dtrain', 'evals', 'obj', 'feval', 'evals_result', 'xgb_model', 'callbacks', 'learning_rates'] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(kiwi.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(kiwi.log_metric, 'stopped_iteration', len(eval_results) - 1) try_mlflow_log(kiwi.log_metric, 'best_iteration', model.best_iteration) try_mlflow_log(kiwi.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception('Failed to log feature importance plot. LightGBM autologging ' 'will ignore the failure and continue. Exception: ') tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f) try_mlflow_log(kiwi.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(kiwi.end_run) return model
def on_epoch_end(self, epoch, logs=None): if (epoch - 1) % _LOG_EVERY_N_STEPS == 0: try_mlflow_log(kiwi.log_metrics, logs, step=epoch)