def compute_log_metrics(clf, x_val, y_val): """Compute and log metrics.""" y_prob = clf.predict_proba(x_val)[:, 1] # select best threshold fpr, tpr, thresholds = metrics.roc_curve(y_val, y_prob) best_threshold = thresholds[np.argmax(tpr-fpr)] y_pred = (y_prob > best_threshold).astype(int) acc = metrics.accuracy_score(y_val, y_pred) roc_auc = metrics.roc_auc_score(y_val, y_prob) avg_prc = metrics.average_precision_score(y_val, y_prob) print("Evaluation\n" f" Accuracy = {acc:.4f}\n" f" ROC AUC = {roc_auc:.4f}\n" f" Average precision = {avg_prc:.4f}\n") print(metrics.classification_report(y_val, y_pred, digits=4)) # Log metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("ROC AUC", roc_auc) bedrock.log_metric("Avg precision", avg_prc) bedrock.log_chart_data(y_val.astype(int).tolist(), y_prob.flatten().tolist()) # Calculate and upload xafai metrics analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val) analyzer.fairness_config(CONFIG_FAI).test_labels(y_val).test_inference(y_pred) analyzer.analyze()
def compute_log_metrics(clf, x_val, y_val): """Compute and log metrics.""" y_prob = clf.predict_proba(x_val)[:, 1] y_pred = (y_prob > 0.5).astype(int) acc = metrics.accuracy_score(y_val, y_pred) precision = metrics.precision_score(y_val, y_pred) recall = metrics.recall_score(y_val, y_pred) f1_score = metrics.f1_score(y_val, y_pred) roc_auc = metrics.roc_auc_score(y_val, y_prob) avg_prc = metrics.average_precision_score(y_val, y_prob) print("Evaluation\n" f" Accuracy = {acc:.4f}\n" f" Precision = {precision:.4f}\n" f" Recall = {recall:.4f}\n" f" F1 score = {f1_score:.4f}\n" f" ROC AUC = {roc_auc:.4f}\n" f" Average precision = {avg_prc:.4f}") # Log metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("ROC AUC", roc_auc) bedrock.log_metric("Avg precision", avg_prc) bedrock.log_chart_data(y_val.astype(int).tolist(), y_prob.flatten().tolist()) # Calculate and upload xafai metrics analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val) analyzer.fairness_config(PROTECTED_FEATURES).test_labels(y_val).test_inference(y_pred) analyzer.analyze()
def compute_log_metrics(model, x_train, x_test, y_test, best_th=0.5, model_name="tree_model", model_type=ModelTypes.TREE): """Compute and log metrics.""" test_prob = model.predict_proba(x_test)[:, 1] test_pred = np.where(test_prob > best_th, 1, 0) acc = metrics.accuracy_score(y_test, test_pred) precision = metrics.precision_score(y_test, test_pred) recall = metrics.recall_score(y_test, test_pred) f1_score = metrics.f1_score(y_test, test_pred) roc_auc = metrics.roc_auc_score(y_test, test_prob) avg_prc = metrics.average_precision_score(y_test, test_prob) print("Evaluation\n" f" Accuracy = {acc:.4f}\n" f" Precision = {precision:.4f}\n" f" Recall = {recall:.4f}\n" f" F1 score = {f1_score:.4f}\n" f" ROC AUC = {roc_auc:.4f}\n" f" Average precision = {avg_prc:.4f}") # Bedrock Logger: captures model metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_chart_data( y_test.astype(int).tolist(), test_prob.flatten().tolist()) bedrock.log_metric("Accuracy", acc) # TODO - Fill in the blanks # Add ROC AUC and Avg precision bedrock.log_metric("ROC AUC", roc_auc) bedrock.log_metric("Avg precision", avg_prc) # TODO - Fill in the blanks # Bedrock Model Analyzer: generates model explainability and fairness metrics # Requires model object from pipeline to be passed in analyzer = ModelAnalyzer(model[1], model_name=model_name, model_type=model_type)\ .train_features(x_train)\ .test_features(x_test) # TODO - Fill in the blanks # Apply fairness config to the Bedrock Model Analyzer instance analyzer.fairness_config(CONFIG_FAI)\ .test_labels(y_test)\ .test_inference(test_pred) # Return the 4 metrics return analyzer.analyze()
def save_model_metrics(best_model): """Saves the best model and logs the relevant metrics""" joblib.dump(best_model['model'], OUTPUT_MODEL_PATH) logger = logging.getLogger(__name__) bedrock = BedrockApi(logger) for param, value in best_model['params'].items(): bedrock.log_metric(param, value) bedrock.log_metric("ROC AUC", best_model['roc_auc']) bedrock.log_metric("PR AUC", best_model['pr_auc'])
def mnist_model(verbose=1, callbacks=[]): m = build_model() (xtrain, ytrain), (xtest, ytest) = mnist_data() if int(keras.__version__.split(".")[0]) == 2: training_log = m.fit( xtrain, ytrain, validation_data=(xtest, ytest), epochs=int(os.environ.get("N_EPOCH", 10)), batch_size=int(os.environ.get("BATCH_SIZE", 32)), verbose=verbose, callbacks=callbacks ) else: training_log = m.fit( xtrain, ytrain, validation_data=(xtest, ytest), nb_epoch=int(os.environ.get("N_EPOCH", 10)), batch_size=int(os.environ.get("BATCH_SIZE", 32)), verbose=verbose, callbacks=callbacks ) logger = logging.getLogger(__name__) bedrock = BedrockApi(logger) bedrock.log_metric("Accuracy", training_log.history['accuracy'][-1]) bedrock.log_metric("Loss", training_log.history['loss'][-1]) bedrock.log_metric("Validation Accuracy", training_log.history['val_accuracy'][-1]) bedrock.log_metric("Validation Loss", training_log.history['val_loss'][-1]) y_pred = m.predict(xtest) pred_label = np.amax(y_pred,axis=-1) y_label = np.amax(ytest,axis=-1) y_pred = np.max(y_pred, axis=-1) correct = np.equal(pred_label, y_label) bedrock.log_chart_data(correct.astype(int).tolist(),y_pred.flatten().tolist()) # serialize model to JSON model_json = m.to_json() with open("/artefact/model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 m.save_weights("/artefact/model.h5") print("Saved model to disk")
import logging from bedrock_client.bedrock.api import BedrockApi import shlex import os import subprocess import re import shutil logger = logging.getLogger(__name__) bedrock = BedrockApi(logger) def log_learnloss(output): bedrock.log_metric( "loss", float(re.search('(?<=, ).*(?= avg)', output).group().strip())) bedrock.log_metric( "loss_iteration", int(re.search("\d*", output.split()[0]).group().strip())) bedrock.log_metric( "learn", float(re.search('(?<=loss, ).*(?= rate)', output).group().strip())) bedrock.log_metric( "learn_iteration", int(re.search("\d*", output.split()[0]).group().strip())) def run_command(command, logfile): logfile.write("This is the run command: %s\n" % command) process = subprocess.Popen(shlex.split(command),
def compute_log_metrics(clf, x_val, y_val): """Compute and log metrics.""" print("\tEvaluating using validation data") y_prob = clf.predict_proba(x_val)[:, 1] y_pred = (y_prob > 0.5).astype(int) acc = metrics.accuracy_score(y_val, y_pred) precision = metrics.precision_score(y_val, y_pred) recall = metrics.recall_score(y_val, y_pred) f1_score = metrics.f1_score(y_val, y_pred) roc_auc = metrics.roc_auc_score(y_val, y_prob) avg_prc = metrics.average_precision_score(y_val, y_prob) print(f"Accuracy = {acc:.6f}") print(f"Precision = {precision:.6f}") print(f"Recall = {recall:.6f}") print(f"F1 score = {f1_score:.6f}") print(f"ROC AUC = {roc_auc:.6f}") print(f"Average precision = {avg_prc:.6f}") # Log metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("Precision", precision) bedrock.log_metric("Recall", recall) bedrock.log_metric("F1 score", f1_score) bedrock.log_metric("ROC AUC", roc_auc) bedrock.log_metric("Avg precision", avg_prc) bedrock.log_chart_data( y_val.astype(int).tolist(), y_prob.flatten().tolist())
def compute_log_metrics(bst, xgtest, test_data): """Compute and log metrics.""" print("\tEvaluating using validation data") preds = bst.predict(xgtest) y_target = test_data['y_yes'] y_preds = np.round(preds).astype(int) acc = metrics.accuracy_score(y_target, y_preds) f1 = metrics.f1_score(y_target, y_preds) precision = metrics.precision_score(y_target, y_preds) recall = metrics.recall_score(y_target, y_preds) print("Accuracy = {:.6f}".format(acc)) print("Precision = {:.6f}".format(precision)) print("Recall = {:.6f}".format(recall)) print("F1 score = {:.6f}".format(f1)) # Log metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("Precision", precision) bedrock.log_metric("Recall", recall) bedrock.log_metric("F1 score", f1) bedrock.log_chart_data( y_target.astype(int).tolist(), y_preds.flatten().tolist())
def compute_log_metrics(gbm, x_val, y_val): """Compute and log metrics.""" print("\tEvaluating using validation data") y_prob = gbm.predict_proba(x_val)[:, 1] y_pred = (y_prob > 0.5).astype(int) acc = metrics.accuracy_score(y_val, y_pred) precision = metrics.precision_score(y_val, y_pred) recall = metrics.recall_score(y_val, y_pred) f1_score = metrics.f1_score(y_val, y_pred) auc = metrics.roc_auc_score(y_val, y_prob) avg_prc = metrics.average_precision_score(y_val, y_prob) print("Accuracy =", acc) print("Precision =", precision) print("Recall =", recall) print("F1 score =", f1_score) print("AUC =", auc) print("Average precision =", avg_prc) # Log metrics logger = logging.getLogger(__name__) bedrock = BedrockApi(logger) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("Precision", precision) bedrock.log_metric("Recall", recall) bedrock.log_metric("F1 score", f1_score) bedrock.log_metric("AUC", auc) bedrock.log_metric("Avg precision", avg_prc) bedrock.log_chart_data(y_val.astype(int).tolist(), y_prob.flatten().tolist())
def compute_log_metrics(pipe: Pipeline, x_test: pd.core.frame.DataFrame, y_test: np.ndarray, y_test_onehot: np.ndarray): """ Computes, prints and log metrics. :param pipe: Pipeline of transforms with a trained final estimator :type pipe: sklearn.pipeline.Pipeline :param x_test: Features for testing :type x_test: pandas.core.frame.DataFrame :param y_test: Target variable data for testing :type y_test: numpy.ndarray :param y_test_onehot: One hot encoded target variable data :type y_test_onehot: numpy.ndarray :return: Test predicted probability and predictions :rtype: tuple[numpy.ndarray, numpy.ndarray] """ test_prob = pipe.predict_proba(x_test) test_pred = pipe.predict(x_test) acc = metrics.accuracy_score(y_test, test_pred) precision = metrics.precision_score(y_test, test_pred, average='macro') recall = metrics.recall_score(y_test, test_pred, average='macro') f1_score = metrics.f1_score(y_test, test_pred, average='macro') roc_auc = metrics.roc_auc_score(y_test_onehot, test_prob, average='macro', multi_class='ovr') avg_prc = metrics.average_precision_score(y_test_onehot, test_prob, average='macro') print("\nEVALUATION\n" f"\tAccuracy = {acc:.4f}\n" f"\tPrecision (macro) = {precision:.4f}\n" f"\tRecall (macro) = {recall:.4f}\n" f"\tF1 score (macro) = {f1_score:.4f}\n" f"\tROC AUC (macro) = {roc_auc:.4f}\n" f"\tAverage precision (macro) = {avg_prc:.4f}") # Bedrock Logger: captures model metrics bedrock = BedrockApi(logging.getLogger(__name__)) # `log_chart_data` assumes binary classification # For multiclass labels, we can use a "micro-average" by # quantifying score on all classes jointly # See https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html # noqa: E501 # This will allow us to use the same `log_chart_data` method bedrock.log_chart_data( y_test_onehot.ravel().astype(int).tolist(), # list of int test_prob.ravel().astype(float).tolist() # list of float ) bedrock.log_metric("Accuracy", acc) bedrock.log_metric("Precision (macro)", precision) bedrock.log_metric("Recall (macro)", recall) bedrock.log_metric("F1 Score (macro)", f1_score) bedrock.log_metric("ROC AUC (macro)", roc_auc) bedrock.log_metric("Avg precision (macro)", avg_prc) return test_prob, test_pred
def log_metrics(run_dir): """Log metrics.""" # Validation results found in the last 7 elements of the last line of results.txt with open(run_dir + "results.txt", "r") as f: lines = f.readlines() precision, recall, map50, map50_95, val_giou, val_obj, val_cls = [float(v) for v in lines[-1].split()[-7:]] print(f" Precision = {precision:.6f}") print(f" Recall = {recall:.6f}") print(f" [email protected] = {map50:.6f}") print(f" [email protected]:0.95 = {map50_95:.6f}") print(f" val GIoU = {val_giou:.6f}") print(f" val Objectness = {val_obj:.6f}") print(f" val Classification = {val_cls:.6f}") # Log metrics bedrock = BedrockApi(logging.getLogger(__name__)) bedrock.log_metric("Precision", precision) bedrock.log_metric("Recall", recall) bedrock.log_metric("[email protected]", map50) bedrock.log_metric("[email protected]:0.95", map50_95) bedrock.log_metric("val GIoU", val_giou) bedrock.log_metric("val Objectness", val_obj) bedrock.log_metric("val Classification", val_cls)