Esempio n. 1
0
def compute_log_metrics(clf, x_val, y_val):
    """Compute and log metrics."""
    y_prob = clf.predict_proba(x_val)[:, 1]
    
    # select best threshold
    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_prob)
    best_threshold = thresholds[np.argmax(tpr-fpr)]
    
    y_pred = (y_prob > best_threshold).astype(int)

    acc = metrics.accuracy_score(y_val, y_pred)
    roc_auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}\n")
    print(metrics.classification_report(y_val, y_pred, digits=4))

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(y_val.astype(int).tolist(),
                           y_prob.flatten().tolist())

    # Calculate and upload xafai metrics
    analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val)
    analyzer.fairness_config(CONFIG_FAI).test_labels(y_val).test_inference(y_pred)
    analyzer.analyze()
Esempio n. 2
0
def compute_log_metrics(clf, x_val, y_val):
    """Compute and log metrics."""
    y_prob = clf.predict_proba(x_val)[:, 1]
    y_pred = (y_prob > 0.5).astype(int)

    acc = metrics.accuracy_score(y_val, y_pred)
    precision = metrics.precision_score(y_val, y_pred)
    recall = metrics.recall_score(y_val, y_pred)
    f1_score = metrics.f1_score(y_val, y_pred)
    roc_auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  Precision         = {precision:.4f}\n"
          f"  Recall            = {recall:.4f}\n"
          f"  F1 score          = {f1_score:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}")

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(y_val.astype(int).tolist(),
                           y_prob.flatten().tolist())

    # Calculate and upload xafai metrics
    analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val)
    analyzer.fairness_config(PROTECTED_FEATURES).test_labels(y_val).test_inference(y_pred)
    analyzer.analyze()
Esempio n. 3
0
def compute_log_metrics(model,
                        x_train,
                        x_test,
                        y_test,
                        best_th=0.5,
                        model_name="tree_model",
                        model_type=ModelTypes.TREE):
    """Compute and log metrics."""
    test_prob = model.predict_proba(x_test)[:, 1]
    test_pred = np.where(test_prob > best_th, 1, 0)

    acc = metrics.accuracy_score(y_test, test_pred)
    precision = metrics.precision_score(y_test, test_pred)
    recall = metrics.recall_score(y_test, test_pred)
    f1_score = metrics.f1_score(y_test, test_pred)
    roc_auc = metrics.roc_auc_score(y_test, test_prob)
    avg_prc = metrics.average_precision_score(y_test, test_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  Precision         = {precision:.4f}\n"
          f"  Recall            = {recall:.4f}\n"
          f"  F1 score          = {f1_score:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}")

    # Bedrock Logger: captures model metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_chart_data(
        y_test.astype(int).tolist(),
        test_prob.flatten().tolist())

    bedrock.log_metric("Accuracy", acc)
    # TODO - Fill in the blanks
    # Add ROC AUC and Avg precision
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)

    # TODO - Fill in the blanks
    # Bedrock Model Analyzer: generates model explainability and fairness metrics
    # Requires model object from pipeline to be passed in
    analyzer = ModelAnalyzer(model[1], model_name=model_name, model_type=model_type)\
                    .train_features(x_train)\
                    .test_features(x_test)

    # TODO - Fill in the blanks
    # Apply fairness config to the Bedrock Model Analyzer instance
    analyzer.fairness_config(CONFIG_FAI)\
        .test_labels(y_test)\
        .test_inference(test_pred)

    # Return the 4 metrics
    return analyzer.analyze()
Esempio n. 4
0
def save_model_metrics(best_model):
    """Saves the best model and logs the relevant metrics"""
    joblib.dump(best_model['model'], OUTPUT_MODEL_PATH)

    logger = logging.getLogger(__name__)
    bedrock = BedrockApi(logger)
    for param, value in best_model['params'].items():
        bedrock.log_metric(param, value)
    bedrock.log_metric("ROC AUC", best_model['roc_auc'])
    bedrock.log_metric("PR AUC", best_model['pr_auc'])
Esempio n. 5
0
def mnist_model(verbose=1, callbacks=[]):
    m = build_model()
    (xtrain, ytrain), (xtest, ytest) = mnist_data()
    if int(keras.__version__.split(".")[0]) == 2:
        training_log = m.fit(
            xtrain,
            ytrain,
            validation_data=(xtest, ytest),
            epochs=int(os.environ.get("N_EPOCH", 10)),
            batch_size=int(os.environ.get("BATCH_SIZE", 32)),
            verbose=verbose,
            callbacks=callbacks
        )
    else:
        training_log = m.fit(
            xtrain,
            ytrain,
            validation_data=(xtest, ytest),
            nb_epoch=int(os.environ.get("N_EPOCH", 10)),
            batch_size=int(os.environ.get("BATCH_SIZE", 32)),
            verbose=verbose,
            callbacks=callbacks
        )
    
    logger = logging.getLogger(__name__)
    bedrock = BedrockApi(logger)

    bedrock.log_metric("Accuracy", training_log.history['accuracy'][-1])
    bedrock.log_metric("Loss", training_log.history['loss'][-1])
    
    bedrock.log_metric("Validation Accuracy", training_log.history['val_accuracy'][-1])
    bedrock.log_metric("Validation Loss", training_log.history['val_loss'][-1])
    
    y_pred = m.predict(xtest)
    pred_label = np.amax(y_pred,axis=-1)
    y_label = np.amax(ytest,axis=-1)
    y_pred = np.max(y_pred, axis=-1)
    correct = np.equal(pred_label, y_label)
    
    bedrock.log_chart_data(correct.astype(int).tolist(),y_pred.flatten().tolist())
    
    # serialize model to JSON
    model_json = m.to_json()
    with open("/artefact/model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    m.save_weights("/artefact/model.h5")
    print("Saved model to disk")
Esempio n. 6
0
import logging
from bedrock_client.bedrock.api import BedrockApi
import shlex
import os
import subprocess
import re
import shutil

logger = logging.getLogger(__name__)
bedrock = BedrockApi(logger)


def log_learnloss(output):
    bedrock.log_metric(
        "loss", float(re.search('(?<=, ).*(?= avg)', output).group().strip()))
    bedrock.log_metric(
        "loss_iteration",
        int(re.search("\d*",
                      output.split()[0]).group().strip()))
    bedrock.log_metric(
        "learn",
        float(re.search('(?<=loss, ).*(?= rate)', output).group().strip()))
    bedrock.log_metric(
        "learn_iteration",
        int(re.search("\d*",
                      output.split()[0]).group().strip()))


def run_command(command, logfile):
    logfile.write("This is the run command: %s\n" % command)
    process = subprocess.Popen(shlex.split(command),
Esempio n. 7
0
def compute_log_metrics(clf, x_val, y_val):
    """Compute and log metrics."""
    print("\tEvaluating using validation data")
    y_prob = clf.predict_proba(x_val)[:, 1]
    y_pred = (y_prob > 0.5).astype(int)

    acc = metrics.accuracy_score(y_val, y_pred)
    precision = metrics.precision_score(y_val, y_pred)
    recall = metrics.recall_score(y_val, y_pred)
    f1_score = metrics.f1_score(y_val, y_pred)
    roc_auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)

    print(f"Accuracy          = {acc:.6f}")
    print(f"Precision         = {precision:.6f}")
    print(f"Recall            = {recall:.6f}")
    print(f"F1 score          = {f1_score:.6f}")
    print(f"ROC AUC           = {roc_auc:.6f}")
    print(f"Average precision = {avg_prc:.6f}")

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 score", f1_score)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(
        y_val.astype(int).tolist(),
        y_prob.flatten().tolist())
Esempio n. 8
0
def compute_log_metrics(bst, xgtest, test_data):
    """Compute and log metrics."""
    print("\tEvaluating using validation data")
    preds = bst.predict(xgtest)
    y_target = test_data['y_yes']
    y_preds = np.round(preds).astype(int)

    acc = metrics.accuracy_score(y_target, y_preds)
    f1 = metrics.f1_score(y_target, y_preds)
    precision = metrics.precision_score(y_target, y_preds)
    recall = metrics.recall_score(y_target, y_preds)

    print("Accuracy = {:.6f}".format(acc))
    print("Precision = {:.6f}".format(precision))
    print("Recall = {:.6f}".format(recall))
    print("F1 score = {:.6f}".format(f1))

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 score", f1)
    bedrock.log_chart_data(
        y_target.astype(int).tolist(),
        y_preds.flatten().tolist())
Esempio n. 9
0
def compute_log_metrics(gbm, x_val, y_val):
    """Compute and log metrics."""
    print("\tEvaluating using validation data")
    y_prob = gbm.predict_proba(x_val)[:, 1]
    y_pred = (y_prob > 0.5).astype(int)
    acc = metrics.accuracy_score(y_val, y_pred)
    precision = metrics.precision_score(y_val, y_pred)
    recall = metrics.recall_score(y_val, y_pred)
    f1_score = metrics.f1_score(y_val, y_pred)
    auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)
    print("Accuracy =", acc)
    print("Precision =", precision)
    print("Recall =", recall)
    print("F1 score =", f1_score)
    print("AUC =", auc)
    print("Average precision =", avg_prc)

    # Log metrics
    logger = logging.getLogger(__name__)

    bedrock = BedrockApi(logger)
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 score", f1_score)
    bedrock.log_metric("AUC", auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(y_val.astype(int).tolist(),
                           y_prob.flatten().tolist())
Esempio n. 10
0
def compute_log_metrics(pipe: Pipeline, x_test: pd.core.frame.DataFrame,
                        y_test: np.ndarray, y_test_onehot: np.ndarray):
    """
    Computes, prints and log metrics.

    :param pipe: Pipeline of transforms with a trained final estimator
    :type pipe: sklearn.pipeline.Pipeline
    :param x_test: Features for testing
    :type x_test: pandas.core.frame.DataFrame
    :param y_test: Target variable data for testing
    :type y_test: numpy.ndarray
    :param y_test_onehot: One hot encoded target variable data
    :type y_test_onehot: numpy.ndarray
    :return: Test predicted probability and predictions
    :rtype: tuple[numpy.ndarray, numpy.ndarray]
    """
    test_prob = pipe.predict_proba(x_test)
    test_pred = pipe.predict(x_test)

    acc = metrics.accuracy_score(y_test, test_pred)
    precision = metrics.precision_score(y_test, test_pred, average='macro')
    recall = metrics.recall_score(y_test, test_pred, average='macro')
    f1_score = metrics.f1_score(y_test, test_pred, average='macro')
    roc_auc = metrics.roc_auc_score(y_test_onehot,
                                    test_prob,
                                    average='macro',
                                    multi_class='ovr')
    avg_prc = metrics.average_precision_score(y_test_onehot,
                                              test_prob,
                                              average='macro')
    print("\nEVALUATION\n"
          f"\tAccuracy                  = {acc:.4f}\n"
          f"\tPrecision (macro)         = {precision:.4f}\n"
          f"\tRecall (macro)            = {recall:.4f}\n"
          f"\tF1 score (macro)          = {f1_score:.4f}\n"
          f"\tROC AUC (macro)           = {roc_auc:.4f}\n"
          f"\tAverage precision (macro) = {avg_prc:.4f}")

    # Bedrock Logger: captures model metrics
    bedrock = BedrockApi(logging.getLogger(__name__))

    # `log_chart_data` assumes binary classification
    # For multiclass labels, we can use a "micro-average" by
    # quantifying score on all classes jointly
    # See https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html  # noqa: E501
    # This will allow us to use the same `log_chart_data` method
    bedrock.log_chart_data(
        y_test_onehot.ravel().astype(int).tolist(),  # list of int
        test_prob.ravel().astype(float).tolist()  # list of float
    )

    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision (macro)", precision)
    bedrock.log_metric("Recall (macro)", recall)
    bedrock.log_metric("F1 Score (macro)", f1_score)
    bedrock.log_metric("ROC AUC (macro)", roc_auc)
    bedrock.log_metric("Avg precision (macro)", avg_prc)

    return test_prob, test_pred
Esempio n. 11
0
def log_metrics(run_dir):
    """Log metrics."""
    # Validation results found in the last 7 elements of the last line of results.txt
    with open(run_dir + "results.txt", "r") as f:
        lines = f.readlines()
    precision, recall, map50, map50_95, val_giou, val_obj, val_cls = [float(v) for v in lines[-1].split()[-7:]]

    print(f"  Precision          = {precision:.6f}")
    print(f"  Recall             = {recall:.6f}")
    print(f"  [email protected]            = {map50:.6f}")
    print(f"  [email protected]:0.95       = {map50_95:.6f}")
    print(f"  val GIoU           = {val_giou:.6f}")
    print(f"  val Objectness     = {val_obj:.6f}")
    print(f"  val Classification = {val_cls:.6f}")

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("[email protected]", map50)
    bedrock.log_metric("[email protected]:0.95", map50_95)
    bedrock.log_metric("val GIoU", val_giou)
    bedrock.log_metric("val Objectness", val_obj)
    bedrock.log_metric("val Classification", val_cls)