def train(inputs_path: str): spark = SparkUtils.build_or_get_session('training') df_kids = spark.read.parquet(inputs_path) label_col = 'final_status' mlflow_tracking_ui = 'http://35.246.84.226' mlflow_experiment_name = 'kickstarter' mlflow.set_tracking_uri(mlflow_tracking_ui) mlflow.set_experiment(experiment_name=mlflow_experiment_name) numerical_columns = ['days_campaign', 'hours_prepa', 'goal'] categorical_columns = ['country_clean', 'currency_clean'] features = numerical_columns + categorical_columns df = df_kids.select(features + [label_col]) max_iter = 15 model_specs: Pipeline = build_model( numerical_columns=numerical_columns, categorical_columns=categorical_columns, label_col=label_col, max_iter=max_iter) df_train, df_test = df.randomSplit([0.8, 0.2], seed=12345) df_train = df_train.cache() evaluator = BinaryClassificationEvaluator() \ .setMetricName('areaUnderROC') \ .setRawPredictionCol('rawPrediction') \ .setLabelCol('final_status') gbt = model_specs.getStages()[-1] params_grid = ParamGridBuilder()\ .addGrid(gbt.maxDepth, [6]) \ .addGrid(gbt.maxIter, [15]) \ .addGrid(gbt.maxBins, [32])\ .build() cross_val = CrossValidator(estimator=model_specs, estimatorParamMaps=params_grid, evaluator=evaluator, numFolds=2) with mlflow.start_run() as active_run: logger.info(f'Cross evaluating model on {df_train.count()} lines') cross_val_model: CrossValidatorModel = cross_val.fit(df_train) model = cross_val_model.bestModel logger.info('Evaluating model') train_metrics = evaluator.evaluate(model.transform(df_train)) metrics = {'train_auc': train_metrics} test_metrics = evaluator.evaluate(model.transform(df_test)) metrics.update({'test_auc': test_metrics}) logger.info(f'Model metrics: {metrics}') logger.info('Logging to mlflow') mlflow_params = {'model_class': 'gbt', 'max_iter': max_iter} mlflow.log_params(mlflow_params) mlflow.log_metrics(metrics) log_model(model, 'model') model_uri = mlflow.get_artifact_uri(artifact_path='model') logger.info(f'Model successfully trained and saved @ {model_uri}')
from helper import get_git_info feature_names = [ 'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)' ] train_df = pd.read_csv('data/prepared/train.csv') test_df = pd.read_csv('data/prepared/test.csv') clf = DecisionTreeClassifier(random_state=0) clf.fit(train_df[feature_names], train_df['species']) y_pred = clf.predict(test_df[feature_names]) score = accuracy_score(test_df['species'], y_pred) print(f"accuracy_score = {score}") dump(clf, 'model.pkl') git_branch_name, git_origin_url = get_git_info() mlflow.set_experiment('iris') with mlflow.start_run(): tags = { 'model': 'DecisionTree', 'git_origin_url': git_origin_url, } metrics = {'score': score} mlflow.set_tags(tags) mlflow.log_metrics(metrics)
def run(max_runs, max_p): tracking_client = mlflow.tracking.MlflowClient() np.random.seed(_random_state) def new_eval(experiment_id): def eval(parms): md, msl = parms with mlflow.start_run(nested=True) as child_run: p = mlflow.projects.run( run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "max_depth": md, "min_samples_leaf": msl, }, experiment_id=experiment_id, synchronous=False, ) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = metrics["train_acc"] test_loss = metrics["test_acc"] else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = -np.finfo(np.float64).max test_loss = -np.finfo(np.float64).max mlflow.log_params({ "param_max_depth": md, "param_min_samples_leaf": msl, }) mlflow.log_metrics({ "train_acc": train_loss, "test_acc": test_loss, }) return p.run_id return eval with mlflow.start_run() as run: experiment_id = run.info.experiment_id runs = [(np.random.randint(1, 10), np.random.randint(1, 10)) for _ in range(max_runs)] with ThreadPoolExecutor(max_workers=max_p) as executor: _ = executor.map( new_eval(experiment_id), runs, ) # find the best run, log its metrics as the final metrics of this run. client = MlflowClient() runs = client.search_runs( [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format( run_id=run.info.run_id)) best_val_train = -np.finfo(np.float64).max best_val_test = -np.finfo(np.float64).max best_run = None for r in runs: if r.data.metrics["test_acc"] > best_val_test: best_run = r best_val_train = r.data.metrics["train_acc"] best_val_test = r.data.metrics["test_acc"] mlflow.set_tag("best_run", best_run.info.run_id) mlflow.log_metrics({ "train_acc": best_val_train, "test_acc": best_val_test, })
def run_pipeline(self, pipe, features: list, model_name: str, cv: int = 5, save_model: bool = False) -> dict: """ Run MLflow pipeline: Train model, predict on train/test data, calculate cross-validated F1 scores, plot results and log params, metrics, artifacts and model. :param pipe: sklearn Pipeline :param features: List of features used for model :param model_name: Name of model running through pipeline :param cv: Cross-validation parameter used in sklearn cross_val_score method :param save_model: Boolean indicating whether to save model to disk :returns: Dict containing sklearn model object, predictions on self.X_train and self.X_test, and missclassifications """ with mlflow.start_run(): # define pipeline, fit model and predict print(f"Running pipeline for '{model_name}' model") model = pipe.fit(self.X_train[features], self.y_train) y_pred_train = model.predict(self.X_train[features]) y_pred_test = model.predict(self.X_test[features]) # get missclassifications misses_train = self.X_train[features][self.y_train != y_pred_train].index misses_test = self.X_test[features][self.y_test != y_pred_test].index # calculate (cross-validated) metrics acc_train = accuracy_score(self.y_train, y_pred_train) acc_test = accuracy_score(self.y_test, y_pred_test) f1_train = f1_score(self.y_train, y_pred_train) f1_test = f1_score(self.y_test, y_pred_test) cv_acc_train = np.average(cross_val_score(model, self.X_train[features], self.y_train, scoring="accuracy", cv=cv, n_jobs=-1)) cv_f1_train = np.average(cross_val_score(model, self.X_train[features], self.y_train, scoring="f1", cv=cv, n_jobs=-1)) # print results print( f"Accuracy train: {np.round(acc_train, 4)}", f"CV accuracy train: {np.round(cv_acc_train, 4)}", f"Accuracy test: {np.round(acc_test, 4)}", f"F1-score train: {np.round(f1_train, 4)}", f"CV F1-score train: {np.round(cv_f1_train, 4)}", f"F1-score test: {np.round(f1_test, 4)}", sep="\n") print("Classification report") print(classification_report(self.y_test, y_pred_test)) # print confusion matrix if not "encode" in model.named_steps.keys(): plot_confusion_matrix(model.named_steps["model"], self.X_test[features], self.y_test, cmap="Blues", normalize=None, values_format="d") # print feature importances if hasattr(model.named_steps["model"], "feature_importances_"): feat_importances = self._get_feature_importances(features, model.named_steps["model"]) feat_imp_plot = self._plot_feature_importances(feat_importances) save_fig(feat_imp_plot, self.figure_path, f"{model_name}_feat_importances_plot") mlflow.log_artifact(self.figure_path.joinpath(f"{model_name}_feat_importances_plot.svg")) # log params, metrics, artifacts and model mlflow.log_params(model.named_steps["model"].get_params()) mlflow.log_metrics({ "accuracy_train": acc_train, "cv_accuracy_train": cv_acc_train, "accuracy_test": acc_test, "f1_score_train": f1_train, "cv_f1_score_train": cv_f1_train, "f1_score_test": f1_test}) mlflow.sklearn.log_model(model, model_name) if save_model: mlflow.sklearn.save_model(model, self.model_path.joinpath(model_name)) result = {"model": model, "y_pred_test": y_pred_test, "y_pred_train": y_pred_train, "misses_train": misses_train, "misses_test": misses_test} return result
def train_epochs(epochs, batch_size, token_size, hidden_size, embedding_size): # Read data x_train_full = open("../input/wili-2018/x_train.txt").read().splitlines() y_train_full = open("../input/wili-2018/y_train.txt").read().splitlines() x_test_full = open("../input/wili-2018/x_test.txt").read().splitlines() y_test_full = open("../input/wili-2018/y_test.txt").read().splitlines() # Get encoders char_vocab = Dictionary().char_dict(x_train_full) lang_vocab = Dictionary().lang_dict(y_train_full) # Convert data x_train_idx, y_train_idx = Encoder().encode_labeled_data( x_train_full, y_train_full, char_vocab, lang_vocab) x_test_idx, y_test_idx = Encoder().encode_labeled_data( x_test_full, y_test_full, char_vocab, lang_vocab) x_train, x_val, y_train, y_val = train_test_split(x_train_idx, y_train_idx, test_size=0.15) train_data = [(x, y) for x, y in zip(x_train, y_train)] val_data = [(x, y) for x, y in zip(x_val, y_val)] test_data = [(x, y) for x, y in zip(x_test_idx, y_test_idx)] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not torch.cuda.is_available(): logging.warning("WARNING: CUDA is not available.") criterion = torch.nn.CrossEntropyLoss(reduction='sum') bidirectional = False ntokens = len(char_vocab) nlabels = len(lang_vocab) pad_index = char_vocab.pad_index model, optimizer = get_model( ntokens, embedding_size, hidden_size, nlabels, bidirectional, pad_index, device) with mlflow.start_run(): mlflow.log_metrics( { "train samples": len(train_data), "val samples": len(val_data), "test samples": len(test_data) } ) mlflow.log_dict(lang_vocab.token2idx, "lang_vocab.json") mlflow.log_dict(char_vocab.token2idx, "char_vocab.json") params = {'epochs': epochs, 'batch_size': batch_size, 'token_size': token_size, 'hidden_size': hidden_size, 'embedding_size': embedding_size} mlflow.log_dict(params, "params.json") logging.info(f'Training cross-validation model for {epochs} epochs') for epoch in range(epochs): train_acc = train(model, optimizer, train_data, batch_size, token_size, criterion, device) logging.info(f'| epoch {epoch:02d} | train accuracy={train_acc:.1f}%') validate(model, val_data, batch_size, token_size, device, lang_vocab, tag='val', epoch=epoch) validate(model, test_data, batch_size, token_size, device, lang_vocab, tag='test', epoch=epoch) mlflow.pytorch.log_model(model, f'{epoch:02d}.model') mlflow.pytorch.log_model(model, 'model')
"confidence": CONFIDENCE, }, } mlflow.log_params(params) # run game trials each with randomly choosen seed # since the seed has be be rendered by MLFlow using JS, # make sure seed stays within JS's Number.MAX_SAFE_INTEGER seeds = [randint(0, 2**53) for _ in range(N_TRIALS)] scores = list(tqdm(pool.imap(run_trial, seeds), total=N_TRIALS)) for i_trial, score, seed in zip(range(N_TRIALS), scores, seeds): # log scores for each trial mlflow.log_metrics( metrics={ f"team_{TEAM_NAME[0]}_score": score[0], f"team_{TEAM_NAME[1]}_score": score[1], "rng_seed": seed, }, step=i_trial, ) # log game trial wins to MLFlow stats = compute_statistics(scores) mlflow.log_metrics( { f"team_{TEAM_NAME[0]}_wins": stats["team_blue_wins"], f"team_{TEAM_NAME[0]}_win_ratio": stats["team_blue_win_ratio"], f"team_{TEAM_NAME[0]}_ci_lower": stats["team_blue_ci"][0], f"team_{TEAM_NAME[0]}_ci_upper":
def log_metrics(metrics: Dict[str, Any], step: int = None): mlflow.log_metrics(metrics, step)
with torch.no_grad(): # Forward pass, calculate logit predictions logits = module(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) # Move logits and labels to CPU logits = logits[0].to('cpu').numpy() label_ids = b_labels.to('cpu').numpy() pred_flat = np.argmax(logits, axis=1).flatten() labels_flat = label_ids.flatten() pred_y.extend(pred_flat) true_y.extend(labels_flat) df_metrics=pd.DataFrame({'Epoch':module.hparams.epochs,'Actual_class':labels_flat,'Predicted_class':pred_flat}) tmp_eval_accuracy = accuracy_score(labels_flat,pred_flat) tmp_eval_mcc_accuracy = matthews_corrcoef(labels_flat, pred_flat) eval_accuracy += tmp_eval_accuracy eval_mcc_accuracy += tmp_eval_mcc_accuracy nb_eval_steps += 1 print(F'\n\tValidation Accuracy: {eval_accuracy/nb_eval_steps}') print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy/nb_eval_steps}') mlflow.log_metrics({'accuracy':acc(torch.tensor(pred_y),torch.tensor(true_y)).item(),'f1':f1(torch.tensor(pred_y),torch.tensor(true_y)).item(),'precision':precision(torch.tensor(pred_y),torch.tensor(true_y)).item(),'recall':recall(torch.tensor(pred_y),torch.tensor(true_y)).item()}) mlflow.set_tag('Version','LRFinder') mlflow.set_tag('Stage','train') mlflow.set_tag('Commit', get_commit()) mlflow.set_tag('Time',get_commit_time()) mlflow.set_tag('Model',module.model_name)
def run( training_data, max_runs, batch_size, max_p, epochs, metric, gpy_model, gpy_acquisition, initial_design, seed, ): bounds = [ { "name": "lr", "type": "continuous", "domain": (1e-5, 1e-1) }, { "name": "momentum", "type": "continuous", "domain": (0.0, 1.0) }, ] # create random file to store run ids of the training tasks tracking_client = mlflow.tracking.MlflowClient() def new_eval(nepochs, experiment_id, null_train_loss, null_valid_loss, null_test_loss, return_all=False): """ Create a new eval function :param nepochs: Number of epochs to train the model. :experiment_id: Experiment id for the training run :valid_null_loss: Loss of a null model on the validation dataset :test_null_loss: Loss of a null model on the test dataset. :return_test_loss: Return both validation and test loss if set. :return: new eval function. """ def eval(params): """ Train Keras model with given parameters by invoking MLflow run. Notice we store runUuid and resulting metric in a file. We will later use these to pick the best run and to log the runUuids of the child runs as an artifact. This is a temporary workaround until MLflow offers better mechanism of linking runs together. :param params: Parameters to the train_keras script we optimize over: learning_rate, drop_out_1 :return: The metric value evaluated on the validation data. """ lr, momentum = params[0] with mlflow.start_run(nested=True) as child_run: p = mlflow.projects.run( run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed), }, experiment_id=experiment_id, synchronous=False, ) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_valid_loss, metrics["train_{}".format(metric)]) valid_loss = min(null_valid_loss, metrics["val_{}".format(metric)]) test_loss = min(null_test_loss, metrics["test_{}".format(metric)]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss valid_loss = null_valid_loss test_loss = null_test_loss mlflow.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): valid_loss, "test_{}".format(metric): test_loss, }) if return_all: return train_loss, valid_loss, test_loss else: return valid_loss return eval with mlflow.start_run() as run: experiment_id = run.info.experiment_id # Evaluate null model first. # We use null model (predict everything to the mean) as a reasonable upper bound on loss. # We need an upper bound to handle the failed runs (e.g. return NaNs) because GPyOpt can not # handle Infs. # Always including a null model in our results is also a good ML practice. train_null_loss, valid_null_loss, test_null_loss = new_eval( 0, experiment_id, _inf, _inf, _inf, True)(params=[[0, 0]]) myProblem = GPyOpt.methods.BayesianOptimization( new_eval(epochs, experiment_id, train_null_loss, valid_null_loss, test_null_loss), bounds, evaluator_type="local_penalization" if min(batch_size, max_p) > 1 else "sequential", batch_size=batch_size, num_cores=max_p, model_type=gpy_model, acquisition_type=gpy_acquisition, initial_design_type=initial_design, initial_design_numdata=max_runs >> 2, exact_feval=False, ) myProblem.run_optimization(max_runs) matplotlib.use("agg") plt.switch_backend("agg") with TempDir() as tmp: acquisition_plot = tmp.path("acquisition_plot.png") convergence_plot = tmp.path("convergence_plot.png") myProblem.plot_acquisition(filename=acquisition_plot) myProblem.plot_convergence(filename=convergence_plot) if os.path.exists(convergence_plot): mlflow.log_artifact(convergence_plot, "converegence_plot") if os.path.exists(acquisition_plot): mlflow.log_artifact(acquisition_plot, "acquisition_plot") # find the best run, log its metrics as the final metrics of this run. client = MlflowClient() runs = client.search_runs( [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format( run_id=run.info.run_id)) best_val_train = _inf best_val_valid = _inf best_val_test = _inf best_run = None for r in runs: if r.data.metrics["val_rmse"] < best_val_valid: best_run = r best_val_train = r.data.metrics["train_rmse"] best_val_valid = r.data.metrics["val_rmse"] best_val_test = r.data.metrics["test_rmse"] mlflow.set_tag("best_run", best_run.info.run_id) mlflow.log_metrics({ "train_{}".format(metric): best_val_train, "val_{}".format(metric): best_val_valid, "test_{}".format(metric): best_val_test, })
def train_tabular_mcts( cls, policy_fn, n, sum_ret, num_rollouts=100000, max_steps_per_episode=500, epsilon=0.9, discount_rate=0.95, perc_rollouts_full_random=10, rollout_start_count=0, init_board=None, random_seed=None, print_stats=False, print_freq=100, ): global num_revisits for rollout_num in range(num_rollouts): rollout_num += rollout_start_count num_rollouts_full_random = num_rollouts * perc_rollouts_full_random / 100 if rollout_num <= num_rollouts_full_random: prob_rand_action = 1 else: prob_rand_action = min( 1, 10 * epsilon / (rollout_num - num_rollouts_full_random + 1)) # print("prob of rand action: %s" % prob_rand_action) game = cls(random_seed=random_seed) if init_board: game.score = 0 game.set_board(init_board) curr_board, score, done = game.get_state() states, actions, rewards, is_duplicate = [], [], [], [] state_action_pairs = set() step_num = 0 while step_num <= max_steps_per_episode and not done: if random.random() < prob_rand_action: action = game.action_space.sample() else: action = policy_fn(curr_board) states.append(curr_board) actions.append(action) is_duplicate.append((tuple(curr_board), action) in state_action_pairs) state_action_pairs.add((tuple(curr_board), action)) new_board, reward, done, _ = game.step(action) rewards.append(reward) # TODO: use a DNN as a value function, and use n-step Temporal Difference learning. curr_board = new_board step_num += 1 # calculate returns (i.e., discounted future rewards) using bellman backups for the rollout just completed returns = [0] * len(rewards) returns[-1] = rewards[-1] num_revisits_this_rollout = 0 for i in range(len(rewards) - 2, -1, -1): if (tuple(states[i]), actions[i]) in sum_ret: num_revisits_this_rollout += 1 returns[i] = rewards[i] + discount_rate * returns[i + 1] if not is_duplicate[i]: n[(tuple(states[i]), actions[i])] += 1 sum_ret[(tuple(states[i]), actions[i])] += returns[i] num_revisits += num_revisits_this_rollout stats = { "num_steps": len(rewards), "game_score": sum(rewards), "prob random action": prob_rand_action, "len sum_ret dict": len(sum_ret), "total num states-action pairs revisited": num_revisits, "percent state-action pairs in this rollout seen already": num_revisits_this_rollout / step_num * 100.0, } if print_stats and rollout_num % print_freq == 0: print("rollout num %s" % rollout_num) for k, v in stats.items(): print("%s: %s" % (k, v)) print() mlflow.log_metrics(stats, step=rollout_num) return num_revisits
'XGBoost': 'x_val', 'RandomForestClassifier': 'x_val', 'LightGBM': 'x_val_2'} # Develop baseline models for model in models_dict.items(): print(f"Running {model[0]}") with mlflow.start_run(run_name="Baseline: " + model[0]): # Compute cross validation score cv_score = cross_val_score(estimator=model[1], X=eval(model_data[model[0]]), y=y_train, scoring='f1', cv=5) # Get cv mean and std score mean_cv_score = np.mean(cv_score) std_cv_score = np.std(cv_score) # Fit model model[1].fit(eval(model_data[model[0]]), y_train) # Get scores for validation data val_pred_prob = model[1].predict_proba(eval(val_data[model[0]]))[:, -1] val_f1_score = f1_score(y_val, model[1].predict(eval(val_data[model[0]]))) val_roc_score = roc_auc_score(y_val, val_pred_prob) val_log_loss = log_loss(y_val, val_pred_prob) # Log metrics mlflow.log_metrics({'mean_cv_f1_score': mean_cv_score, 'std_cv_f1_score': std_cv_score, 'validation_f1_score': val_f1_score, 'validation_auc_roc': val_roc_score, 'validation_log_loss': val_log_loss})
if opt.view_result is True: plt.plot(recall, precision, '-o') area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]] area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]] plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r') fig = plt.gcf() # gcf - get current figure fig.canvas.set_window_title('AP ' + class_name) text = 'AP for ' + str(class_name) +': {0:.4f}'.format(ap) plt.title(text) plt.xlabel('Recall') plt.ylabel('Precision') axes = plt.gca() # gca - get current axes axes.set_xlim([0.0, 1.0]) axes.set_ylim([0.0, 1.05]) # .05 to give some extra space plt.show() cv2.waitKey(0) mean_AP = sum_AP / (num_classes - not_exist_class) print('mAP: %.4f' % (mean_AP)) # 5. store recall and precision value for plot # 6. store in mlflow if opt.mlflow is True and dataset_name == 'test': with mlflow.start_run() as run: mlflow.log_params(config['params']) mlflow.log_params(config['model']) mlflow.log_params(config['data']) mlflow.log_metric('mAP', mean_AP) mlflow.log_metrics(ap_per_class)
with open('config/config.json', 'r') as file: project_name = json.load(file)['project_name'] mlflow.set_experiment(project_name) df = Spreadsheet().get_data('../data/raw/train.csv') p = Preprocessing() df = p.clean_data(df) df = p.categ_encoding(df) X = df.drop(columns=["Survived"]) y = df["Survived"] algos = [ RandomForestClassifier, GradientBoostingClassifier, LogisticRegression ] for algo in algos: with mlflow.start_run() as run: model = TrainerSklearn().train(X, y, classification=True, algorithm=algo, data_split=('cv', { 'cv': 8 }), preprocessing=p) mlflow.log_params({'algorithm': algo}) mlflow.log_metrics(model.get_metrics()) mlflow.sklearn.log_model(model.get_model(), 'model')
('clf', MultinomialNB()) ]) model.fit(X_train, y_train) mlflow.sklearn.log_model(model, 'model') train_predictions = model.predict(X_train) test_predictions = model.predict(X_test) test_scores = {} test_scores['train_accuracy_score'] = accuracy_score( train_predictions, y_train) test_scores['test_accuracy_score'] = accuracy_score( test_predictions, y_test) mlflow.log_metrics(test_scores) # Create custom tags mlflow.set_tag("documentType", documentType) mlflow.set_tag("taskType", taskType) mlflow.set_tag("engagementId", engagementId) mlflow.set_tag("filePaths", filePaths) # Get model metadata and metrics to return by job experiment_info = { "artifact_uri": run.info.artifact_uri, "experiment_id": run.info.experiment_id, "run_id": run.info.run_id, "train_accuracy_score": test_scores['train_accuracy_score'], "test_accuracy_score": test_scores['test_accuracy_score'] }
def log_metrics(self, metrics, step=None): mlflow.log_metrics(metrics, step)
def log_metrics(cls, metrics, step): try: mlflow.log_metrics(metrics, step=step) except ConnectionError: logger.warning(f"ConnectionError in logging metrics to MLFlow.")
def log_metrics(cls, metrics, step): mlflow.log_metrics(metrics, step=step)
def run_algo(df_train, df_test, table_name, version, features, algo, params, experiment_id, run_name=None, do_shap=False, verbose=True, nested=False): _print = print if verbose else lambda x: x pca_params = params['pca_params'] algo_params = params['algo_params'] model = BostonModel(algo=algo, pca_params=pca_params, algo_params=algo_params).train(df_train) _print('done: trained model') y_train_pred = model.predict_pd(df_train) _print('done: predicting train'.format(y_train_pred)) y_test_pred = model.predict_pd(df_test) _print('done: predicting test'.format(y_test_pred)) metrics_train = evaluate(df_train['PRICE'], y_train_pred, prefix='train') _print('metrics train: {}'.format(metrics_train)) metrics_test = evaluate(df_test['PRICE'], y_test_pred, prefix='test') _print('metrics test: {}'.format(metrics_test)) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name, nested=nested) as run: mlflow.log_params(pca_params) mlflow.log_params(algo_params) mlflow.log_params({'algo_name': get_algo_name(algo)}) mlflow.log_params({'table_name': table_name}) mlflow.log_params({'table_version': version}) mlflow.log_metrics(metrics_train) mlflow.log_metrics(metrics_test) mlflow.set_tag('features', features) mlflow.set_tag('run_id', run.info.run_uuid) mlflow.sklearn.log_model(model, 'model') scatter_test = get_scatter(y_test_pred, df_test['PRICE'], 'test') mlflow.log_artifact('scatter_test.png', 'charts') hist_test = get_hist(y_test_pred, df_test['PRICE'], 'test') mlflow.log_artifact('hist_test.png', 'charts') if do_shap: shap_mean = get_shap_mean_values(model, df_test, FEATURES) plot(get_mean_shap_feaute_importance(shap_mean, FEATURES), filename='shap.html') mlflow.log_artifact('shap.html', 'charts') _print('done run: id={}'.format(run.info.run_uuid)) plt.close('all') return run.info, {'y_pred': y_test_pred, 'df': df_test, 'model': model}
def run(dpath, img_size=160, epochs=10, batch_size=32, learning_rate=0.0001, output='model', dset=None): global g_image_size g_image_size = img_size img_shape = (img_size, img_size, 3) info('Loading Data Set') train = load_dataset(dpath, dset) train_data, train_labels = zip(*train) train_ds = Dataset.zip((Dataset.from_tensor_slices(list(train_data)), Dataset.from_tensor_slices(list(train_labels)), Dataset.from_tensor_slices( [img_size] * len(train_data)))) # noqa: E501 print(train_ds) train_ds = train_ds.map(map_func=process_image, num_parallel_calls=5) train_ds = train_ds.apply(tf.data.experimental.ignore_errors()) train_ds = train_ds.batch(batch_size) train_ds = train_ds.prefetch(buffer_size=5) train_ds = train_ds.repeat() info('Creating Model') base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, include_top=False, weights='imagenet') base_model.trainable = True model = tf.keras.Sequential([ base_model, tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), loss='binary_crossentropy', metrics=['accuracy']) model.summary() info('Training') steps_per_epoch = math.ceil(len(train) / batch_size) mlflow.tensorflow.autolog() model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch) # Log metric accuracy = random() # dummy score metric = { 'name': 'accuracy-score', 'numberValue': accuracy, 'format': "PERCENTAGE", } metrics = {'metrics': [metric]} mlflow.log_metrics({"accuracy": accuracy}) info('Writing Pipeline Metric') with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f: json.dump(metrics, f) info('Saving Model') output = check_dir(output) print('Serializing into saved_model format') tf.saved_model.save(model, str(output)) print('Done!') file_output = str(Path(output).joinpath('latest.h5')) print('Serializing h5 model to:\n{}'.format(file_output)) model.save(file_output) return generate_hash(file_output, 'kf_pipeline')
def handle_result(self, results, **info): # For each result that's being reported by ``train.report()``, # we get the result from the rank 0 worker (i.e. first worker) and # report it to MLflow. rank_zero_results = results[0] mlflow.log_metrics(rank_zero_results)
def choose_model(X, y, fraction, n_splits, n_repeats, n_jobs, mlflow_tracking): print(f"\nStart model selection...") # Define dataset for modeling X_fit, y_fit = get_fractioned_data(X, y, fraction) # Get list of basic models will being estimated basic_models = get_list_of_basic_models() # # Create dict for modeling results # baseline_score = get_baseline_score(y) # = { # "Baseline": { # "cv_score_mean": baseline_score, # "cv_score_std": None, # "time_spent": None, # } # } basic_results = {} # Define num. of CV splits and K-repeats scorer, cv = set_custom_scorer_cv(n_splits, n_repeats) # Starts MLflow Tracking if mlflow_tracking: # Setup MLflow tracking server exp_id = mlflow_set_exp_id("Model:Choose") # Run loop through list of basic models for basic_model in basic_models: model_name = type(basic_model).__name__ print(f"Modeling {model_name}...") # Fit each basic model via cross-validation tic = time.time() basic_model_scores = model_selection.cross_val_score( X=X_fit, y=y_fit, estimator=basic_model, scoring=scorer, cv=cv, n_jobs=n_jobs, # -1 means using all processors verbose=0, # The verbosity level. default=0 ) # Calculate time spent min, sec = divmod(time.time() - tic, 60) time_spent = f"{int(min)}min {int(sec)}sec" # Save results to dict basic_results.update( { basic_model: { "cv_score_mean": basic_model_scores.mean(), "cv_score_std": basic_model_scores.std(), "time_spent": time_spent, } } ) ##* Log models with MLflow logging if mlflow_tracking: print(f"\tLogging {model_name} results to runs...") with mlflow.start_run(experiment_id=exp_id, run_name=model_name): mlflow.log_params( { "time_spent": time_spent, "fraction": fraction, "cv_n_splits": n_splits, "cv_n_repeats": n_repeats, "random_state": rnd_state, } ) mlflow.log_metrics( { "cv_score_mean": basic_model_scores.mean(), "cv_score_std": basic_model_scores.std(), } ) # Sort dict by score basic_results = dict( sorted( basic_results.items(), key=lambda x: ( x[1]["cv_score_mean"], x[1]["cv_score_std"], x[1]["time_spent"], ), ) ) print(" ") print("-------------- Models' rating --------------") pprint(basic_results, sort_dicts=False) # Pick up best model from basic set of chosen_model = list(basic_results.keys())[0] return basic_results, chosen_model
def run(self): print('Search Random # 1') random.seed(self.random_seed) np.random.seed(self.random_seed) params_space = [ { # remove saga (because it is way too slow, x10 of so) # aslo newton-cg, because it is x100 slower # 'solver': random.choice(['newton-cg', 'sag', 'saga', 'lbfgs']), 'solver': random.choice(['sag', 'lbfgs']), 'C': np.random.uniform(0, 1.0) } for _ in range(self.max_runs) ] print('Search Random # 2') with mlflow.start_run() as run: experiment_id = run.info.experiment_id print('Search Random # 3') # run all random tasks in parallel # TODO: pass train, test, and validation sets? tasks = yield [ LogMetrics( model_name=self.model_name, model_params={ **params, 'random_seed': self.random_seed, }, experiment_id=experiment_id, ) for params in params_space ] print('Search Random # 4') # find the best params (based on validation metric) best_run = None best_val_train = -_inf best_val_valid = -_inf best_val_test = -_inf for model_output in tasks: # TODO: get the score and compare with the best with model_output['score'].open('r') as f: res = yaml.load(f) # TODO: we don't have yet validation set (should add) # new_val_valid = res['valid'][self.metric] new_val_valid = res['test'][self.metric] # TODO: in case of accuracy it is "<" # in case of loss it should be ">" print('best_val_valid < new_val_valid', best_val_valid, new_val_valid) if best_val_valid < new_val_valid: print('find better', new_val_valid) best_run = res['run_id'] best_val_train = res['train'][self.metric] best_val_valid = new_val_valid best_val_test = res['test'][self.metric] metrics = { f'train_{self.metric}': float(best_val_train), # f'val_{self.metric}': best_val_valid, f'test_{self.metric}': float(best_val_test), } mlflow.set_tag('best_run', best_run) mlflow.log_metrics(metrics) with self.output().open('w') as f: yaml.dump({ 'metrics': metrics, 'best_run_id': best_run, }, f, default_flow_style=False)
make_linear_preprocessor(), LogisticRegression( C=C, class_weight="balanced", max_iter=1000, random_state=0 ), ) clf.fit(X_train, y_train) # scores = cross_val_score(clf, X_train, y_train, cv=5) y_pred = clf.predict(X_test) precision, recall, fscore, _ = precision_recall_fscore_support( y_test, y_pred, average="binary" ) mlflow.log_metrics( {"precision": precision, "recall": recall, "fscore": fscore} ) # return score return -recall return eval_fn X, y = fetch_censusdata() print(X.shape, y.shape) space = [hp.quniform("C", 1.0, 100.0, 0.5)] with mlflow.start_run() as _: mlflow.log_param("max_evals", max_evals) mlflow.set_tags({"training_type": "Hyperopt"})
}) clf = Supervised(model=m, task='binary', X_train=X_train, y_train=y_train, num_cv=cv, class_weight=class_weight, seed=SEED) score = clf.train_cv() avg_f1_test = np.mean(score['test_f1']) std_f1_test = np.std(score['test_f1']) avg_micro_f1_test = np.mean(score['test_f1_micro']) std_micro_f1_test = np.std(score['test_f1_micro']) mlflow.log_metrics({ f'avg_f1_test': avg_f1_test, f'std_f1_test': std_f1_test, f'avg_micro_f1_test': avg_micro_f1_test, f'std_micro_f1_test': std_micro_f1_test, }) y_pred = clf.evaluate(X_test) y_prob = clf.predict_proba(X_test)[:, 1] model_scores = calculate_model_score(y_test, y_pred) mlflow.log_metrics({ f'accuracy': model_scores['accuracy'], f'f1': model_scores['f1'], f'f1_micro': model_scores['f1_micro'], f'f1_macro': model_scores['f1_macro'], f'precision': model_scores['precision'], f'recall': model_scores['recall'], f'roc_auc': model_scores['roc_auc'] })
def train_model( model, dataloaders, criterion, optimizer, scheduler, train_config, device, ): """Trains the model """ num_epochs = train_config["training"]["epoch"] model_name = train_config["models"]["name"] dataset_name = dataloaders["train"].dataset.__class__.__name__ tic = time.time() model = model.to(device) best_model_weights = copy.deepcopy(model.state_dict()) best_acc = 0.0 contexts = ["train", "test"] dataset_sizes = {x: len(dataloaders[x].dataset) for x in contexts} # mlflow stuffs host = train_config["mlflow"]["host"] port = train_config["mlflow"]["port"] mlflow.set_tracking_uri(f"http://{host}:{port}") mlflow.set_experiment(f"{model_name}_{dataset_name}") with mlflow.start_run(): for epoch in range(num_epochs): print("Epoch {}/{}".format(epoch, num_epochs - 1)) print("-" * 10) losses = {} accuracies = {} # Each epoch has a training and testing phase (Note: for the stanford car dataset, the validation and test is synonymous) for phase in contexts: if phase == "train": model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 # Iterate over data for images, labels in tqdm(dataloaders[phase]): images = images.to(device) labels = labels.to(device) optimizer.zero_grad( ) # https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch # forward # track history if only in train with torch.set_grad_enabled(phase == "train"): outputs = model(images) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # backward + optimize if phase == "train": loss.backward() optimizer.step() # Running Statistics (Running means within the epoch) running_loss += loss.item() * images.size(0) running_corrects += torch.sum(preds == labels.data) if phase == "train": scheduler.step() # Epoch Statistics epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print("{} phase. Loss: {:.2f} Acc: {:.2f}".format( phase, epoch_loss, epoch_acc)) losses[phase] = epoch_loss accuracies[phase] = epoch_acc.item() # deep copy and save out the model if phase == "test" and epoch_acc > best_acc: save_path = (Path(".") / "checkpoints" / "{}_{}_{:.2f}.pth".format( model_name, epoch, epoch_loss)) print("Saving best model") best_acc = epoch_acc best_model_weights = copy.deepcopy(model.state_dict()) save_ckpt(model, optimizer, epoch, losses, accuracies, save_path) # Logging as mlflow artifactss mlflow.pytorch.log_model(model, "models") mlflow.log_metrics( { "train_acc": accuracies["train"], "train_loss": losses["train"], "test_acc": accuracies["test"], "test_loss": losses["test"], }, step=epoch, ) # mlflow.log_metric(key='train_acc', value=accuracies['train'], step=epoch) # mlflow.log_metric(key='train_loss', value=accuracies['train'], step=epoch) # mlflow.log_metric(key='test_acc', value=accuracies['test'], step=epoch) # mlflow.log_metric(key='test_loss', value=accuracies['test'], step=epoch) time_elapsed = time.time() - tic print("Training complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best test Acc: {:4f}".format(best_acc)) # load best model weights model.load_state_dict(best_model_weights) # Log the model as a mlflow artifact return model
def run(self): """Runs the continuation strategy. A continuation strategy that defines how predictor and corrector components of the algorithm interact with the states of the mathematical system. """ for i in range(self.continuation_steps): self._state_wrap.counter = i self._bparam_wrap.counter = i self._value_wrap.counter = i self._quality_wrap.counter = i if i == 0 and self.hparams["natural_start"]: print(f" unconstrained solver for 1st step") concat_states = [ self._prev_state, pytree_element_add(self._prev_bparam, 0.03), ] corrector = UnconstrainedCorrector( objective=self.objective, concat_states=concat_states, grad_fn=self.compute_grad_fn, value_fn=self.value_func, accuracy_fn=self.accuracy_fn1, hparams=self.hparams, dataset_tuple=self.dataset_tuple, ) state, bparam, quality, value, val_loss, val_acc = corrector.correction_step( ) if self.hparams[ "double_natural_start"]: # TODO: refactor natural and double natural start self._prev_state = state self._prev_bparam = bparam print(f" unconstrained solver for 2nd step") concat_states = [ self._prev_state, pytree_element_add(self._prev_bparam, 0.07), ] corrector = UnconstrainedCorrector( objective=self.objective, concat_states=concat_states, grad_fn=self.compute_grad_fn, value_fn=self.value_func, accuracy_fn=self.accuracy_fn1, hparams=self.hparams, dataset_tuple=self.dataset_tuple, ) state, bparam, quality, value, val_loss, val_acc = corrector.correction_step( ) self._state_wrap.state = state self._bparam_wrap.state = bparam print( "delta_s", self._value_wrap.get_record(), self._bparam_wrap.get_record(), self._delta_s, ) self.sw.write([ self._state_wrap.get_record(), self._bparam_wrap.get_record(), self._value_wrap.get_record(), self._quality_wrap.get_record(), ]) concat_states = [ (self._prev_state, self._prev_bparam), (self._state_wrap.state, self._bparam_wrap.state), self.prev_secant_direction, ] predictor = SecantPredictor( concat_states=concat_states, delta_s=self._delta_s, prev_delta_s=self._prev_delta_s, omega=self._omega, net_spacing_param=self.hparams["net_spacing_param"], net_spacing_bparam=self.hparams["net_spacing_bparam"], hparams=self.hparams, ) predictor.prediction_step() self.prev_secant_direction = predictor.secant_direction self.hparams["sphere_radius"] = ( self.hparams["sphere_radius_m"] * self._delta_s ) # l2_norm(predictor.secant_direction) mlflow.log_metric(f"sphere_radius{self.perturb_index}", self.hparams["sphere_radius"], i) mlflow.log_metric(f"delta_s{self.perturb_index}", self._delta_s, i) concat_states = [ predictor.state, predictor.bparam, predictor.secant_direction, { "state": predictor.state, "bparam": predictor.bparam }, ] corrector = PerturbedFixedCorrecter( objective=self.objective, dual_objective=self.dual_objective, accuracy_fn1=self.accuracy_fn1, value_fn=self.value_func, concat_states=concat_states, key_state=self.key_state, compute_min_grad_fn=self.compute_min_grad_fn, compute_grad_fn=self.compute_grad_fn, hparams=self.hparams, delta_s=self._delta_s, pred_state=[self._state_wrap.state, self._bparam_wrap.state], pred_prev_state=[ self._state_wrap.state, self._bparam_wrap.state ], counter=self.continuation_steps, dataset_tuple=self.dataset_tuple, ) self._prev_state = copy.deepcopy(self._state_wrap.state) self._prev_bparam = copy.deepcopy(self._bparam_wrap.state) ( state, bparam, quality, value, val_loss, val_acc, corrector_omega, ) = (corrector.correction_step() ) # TODO: make predictor corrector similar api's # TODO: Enable MLFlow bparam = tree_map(self.clip_lambda_max, bparam) bparam = tree_map(self.clip_lambda_min, bparam) self._state_wrap.state = state self._bparam_wrap.state = bparam self._value_wrap.state = value self._quality_wrap.state = quality # self._omega = corrector_omega self._prev_delta_s = self._delta_s self._delta_s = corrector_omega * self._delta_s self._delta_s = min(self._delta_s, self.hparams["max_arc_len"]) self._delta_s = max(self._delta_s, self.hparams["min_arc_len"]) if (bparam[0] >= self.hparams["lambda_max"]) or ( bparam[0] <= self.hparams["lambda_min"]): self.sw.write([ self._state_wrap.get_record(), self._bparam_wrap.get_record(), self._value_wrap.get_record(), self._quality_wrap.get_record(), ]) break mlflow.log_metrics( { f"train_loss{self.perturb_index}": float(self._value_wrap.state), f"delta_s{self.perturb_index}": float(self._delta_s), f"bparam{self.perturb_index}": float(self._bparam_wrap.state[0]), f"norm grads{self.perturb_index}": float(self._quality_wrap.state), f"val_loss{self.perturb_index}": float(val_loss), f"val_acc{self.perturb_index}": float(val_acc), f"corrector_omega{self.perturb_index}": float(corrector_omega) }, i)
def _log(self, d: Dict[str, float]): with self.start_active_run(): log_metrics(d, self.gens[self.active_run])
def mlflow_log_metrics(metrics: dict): mlflow.log_metrics(metrics)
def main(args: DictConfig): # Non-strict access to fields OmegaConf.set_struct(args, False) # Adding default estimator params default_names, _, _, default_values, _, _, _ = \ inspect.getfullargspec(instantiate(args.estimator, context_size=0).__class__.__init__) if default_values is not None: args.estimator['defaults'] = { n: str(v) for (n, v) in zip(default_names[len(default_names) - len(default_values):], default_values) } args.estimator['defaults'].pop('cat_context') logger.info(OmegaConf.to_yaml(args, resolve=True)) # Data generator init dag = DirectedAcyclicGraph.random_dag(**args.data_generator.dag) # if 'interpolation_switch' in args.data_generator.sem: # args.data_generator.sem.interpolation_switch = args.data.n_train + args.data.n_test sem = instantiate(args.data_generator.sem, dag=dag) # Experiment tracking mlflow.set_tracking_uri(args.exp.mlflow_uri) mlflow.set_experiment(args.data_generator.sem_type) # Checking if run exist if check_existing_hash(args, args.data_generator.sem_type): logger.info('Skipping existing run.') return else: logger.info('No runs found - perfoming one.') mlflow.start_run() mlflow.log_params(flatten_dict(args)) # Generating Train-test dataframes train_df = pd.DataFrame(sem.sample(size=args.data.n_train, seed=args.data.train_seed).numpy(), columns=dag.var_names) test_df = pd.DataFrame(sem.sample(size=args.data.n_test, seed=args.data.test_seed).numpy(), columns=dag.var_names) # Saving artifacts train_df.to_csv(hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/train.csv'), index=False) test_df.to_csv(hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/test.csv'), index=False) sem.dag.plot_dag() plt.savefig(hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/dag.png')) if len(dag.var_names) <= 20: df = pd.concat([train_df, test_df], keys=['train', 'test']).reset_index().drop(columns=['level_1']) g = sns.pairplot(df, plot_kws={'alpha': 0.25}, hue='level_0') g.fig.suptitle(sem.__class__.__name__) plt.savefig(hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/data.png')) metrics = {} for var_ind, target_var in enumerate(dag.var_names): var_results = {} # Considering all the variables for input input_vars = [var for var in dag.var_names if var != target_var] y_train, X_train = train_df.loc[:, target_var].values, train_df.loc[:, input_vars].values y_test, X_test = test_df.loc[:, target_var].values, test_df.loc[:, input_vars].values # Initialising risks risks = {} for risk in args.predictors.risks: risks[risk] = getattr(importlib.import_module('sklearn.metrics'), risk) # Fitting predictive model models = {} for pred_model in args.predictors.pred_models: logger.info(f'Fitting {pred_model._target_} for target = {target_var} and inputs {input_vars}') model = instantiate(pred_model) model.fit(X_train, y_train) y_pred = model.predict(X_test) models[pred_model._target_] = model for risk, risk_func in risks.items(): var_results[f'test_{risk}_{pred_model._target_}'] = risk_func(y_test, y_pred) sampler = instantiate(args.estimator.sampler, X_train=X_train, fit_method=args.estimator.fit_method, fit_params=args.estimator.fit_params) # =================== Relative feature importance =================== # 1. G = MB(target_var), FoI = input_vars / MB(target_var) G_vars_1 = list(sem.get_markov_blanket(target_var)) fsoi_vars_1 = [var for var in input_vars if var not in list(sem.get_markov_blanket(target_var))] prefix_1 = 'mb' # 2. G = input_vars / MB(target_var), FoI = MB(target_var) fsoi_vars_2 = list(sem.get_markov_blanket(target_var)) G_vars_2 = [var for var in input_vars if var not in list(sem.get_markov_blanket(target_var))] prefix_2 = 'non_mb' for (G_vars, fsoi_vars, prefix) in zip([G_vars_1, G_vars_2], [fsoi_vars_1, fsoi_vars_2], [prefix_1, prefix_2]): G = search_nonsorted(input_vars, G_vars) fsoi = search_nonsorted(input_vars, fsoi_vars) rfi_gof_metrics = {} for f, f_var in zip(fsoi, fsoi_vars): estimator = sampler.train([f], G) # GoF diagnostics rfi_gof_results = {} if estimator is not None: rfi_gof_results[f'rfi/gof/{prefix}_mean_log_lik'] = \ estimator.log_prob(inputs=X_test[:, f], context=X_test[:, G]).mean() # Advanced conditional GoF metrics if sem.get_markov_blanket(f_var).issubset(set(G_vars)): cond_mode = 'all' if isinstance(sem, LinearGaussianNoiseSEM): cond_mode = 'arbitrary' if sem.get_markov_blanket(f_var).issubset(set(G_vars)) or isinstance(sem, LinearGaussianNoiseSEM): rfi_gof_results[f'rfi/gof/{prefix}_kld'] = \ conditional_kl_divergence(estimator, sem, f_var, G_vars, args.exp, cond_mode, test_df) rfi_gof_results[f'rfi/gof/{prefix}_hd'] = \ conditional_hellinger_distance(estimator, sem, f_var, G_vars, args.exp, cond_mode, test_df) rfi_gof_results[f'rfi/gof/{prefix}_jsd'] = \ conditional_js_divergence(estimator, sem, f_var, G_vars, args.exp, cond_mode, test_df) rfi_gof_metrics = {k: rfi_gof_metrics.get(k, []) + [rfi_gof_results.get(k, np.nan)] for k in set(list(rfi_gof_metrics.keys()) + list(rfi_gof_results.keys()))} # Feature importance if len(fsoi) > 0: var_results[f'rfi/{prefix}_cond_size'] = len(G_vars) for model_name, model in models.items(): for risk, risk_func in risks.items(): rfi_explainer = explainer.Explainer(model.predict, fsoi, X_train, sampler=sampler, loss=risk_func, fs_names=input_vars) mb_explanation = rfi_explainer.rfi(X_test, y_test, G, nr_runs=args.exp.rfi.nr_runs) var_results[f'rfi/{prefix}_mean_rfi_{risk}_{model_name}'] = \ np.abs(mb_explanation.fi_vals(return_np=True)).mean() var_results = {**var_results, **{k: np.nanmean(v) if len(G_vars) > 0 else np.nan for (k, v) in rfi_gof_metrics.items()}} # TODO =================== Global SAGE =================== mlflow.log_metrics(var_results, step=var_ind) metrics = {k: metrics.get(k, []) + [var_results.get(k, np.nan)] for k in set(list(metrics.keys()) + list(var_results.keys()))} # Logging mean statistics mlflow.log_metrics({k: np.nanmean(v) for (k, v) in metrics.items()}, step=len(dag.var_names)) mlflow.end_run()
def ml_run(self, run_id=None): seed_randomness(self.random_seed) params_space = { 'optimizer_props': { 'lr': hp.uniform('lr', 0.001, 0.00001), 'beta_1': hp.uniform('beta_1', .0, 0.9999), } } # Solution # 1) pass custom method # and if we haven't calculated loss, break hyperopt # 2) once we got outside we try to yield last unsolved task # 3) because luigi would run again current task. # TODO: to solve this problem I would need to pickle inner state of Trail of hyperopt # https://github.com/hyperopt/hyperopt/wiki/FMin#13-the-trials-object # TODO: how to leverage parallelism? # https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB best = None while not best: try: best = fmin(fn=self._fn, space=params_space, algo=tpe.suggest if self.algo == "tpe.suggest" else rand.suggest, max_evals=self.max_runs, rstate=np.random.RandomState(self.random_seed), show_progressbar=False, ) except NewValueForOptimizer as e: # TODO: maybe we can run multiple runs in parallel? params = e.new_value model_task = get_model_task_by_name(self.model_name) model_result = yield model_task( parent_run_id=run_id, random_seed=self.random_seed, **params, ) model_run_id = self.get_run_id_from_result(model_result) with model_result['metrics'].open('r') as f: model_metrics = yaml.load(f) hyper_opt_runs = self._get_hyper_opt_runs() or {} hyper_opt_runs[get_key_by_params(params)] = { 'metrics': model_metrics, 'run_id': model_run_id, } with self.output()['hyper_opt_runs'].open('w') as f: pickle.dump(hyper_opt_runs, f) print('we got the best params:', best) hyper_opt_runs = self._get_hyper_opt_runs() or {} # TODO: hyperopts drops 'optimizer_props' for some reasons # need to protect it best_model_state = hyper_opt_runs.get(get_key_by_params({ 'optimizer_props': best })) if best_model_state is None: raise Exception('it seems we do not have any runs here') # TODO: format of the best model metrics should look the same # as metrics of experiments metrics = { f'train_{self.metric}': float(best_model_state['metrics'][self.metric]['train']), f'val_{self.metric}': float(best_model_state['metrics'][self.metric]['val']), f'test_{self.metric}': float(best_model_state['metrics'][self.metric]['test']), } mlflow.log_metrics(metrics) # child task could be mlflow_task so we can get run_id from its 'mlflow' state best_run = best_model_state.get('run_id', None) if best_run: mlflow.set_tag('best_run', best_run) with self.output()['metrics'].open('w') as f: yaml.dump(best_model_state, f, default_flow_style=False)