Exemple #1
0
def compute_log_metrics(model,
                        x_train,
                        x_test,
                        y_test,
                        best_th=0.5,
                        model_name="tree_model",
                        model_type=ModelTypes.TREE):
    """Compute and log metrics."""
    test_prob = model.predict_proba(x_test)
    test_pred = model.predict(x_test)

    acc = metrics.accuracy_score(y_test, test_pred)
    precision = metrics.precision_score(y_test, test_pred, average='macro')
    recall = metrics.recall_score(y_test, test_pred, average='macro')
    f1_score = metrics.f1_score(y_test, test_pred, average='macro')
    roc_auc = metrics.roc_auc_score(y_test, test_prob, multi_class='ovr')
    avg_prc = metrics.average_precision_score(y_test, test_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  Precision         = {precision:.4f}\n"
          f"  Recall            = {recall:.4f}\n"
          f"  F1 score          = {f1_score:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}")

    # Bedrock Logger: captures model metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_chart_data(
        y_test.astype(int).tolist(),
        test_prob.flatten().tolist())

    bedrock.log_metric("Accuracy", acc)
    # TODO - Bedrock model monitoring: Fill in the blanks
    # Add ROC AUC and Avg precision
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 Score", f1_score)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)

    # TODO - Explainability metrics: Fill in the blanks
    # Bedrock Model Analyzer: generates model explainability and fairness metrics
    # Requires model object from pipeline to be passed in
    analyzer = ModelAnalyzer(model[1], model_name=model_name, model_type=model_type)\
                    .train_features(x_train)\
                    .test_features(x_test)

    # TODO - Fairness metrics: Fill in the blanks
    # Apply fairness config to the Bedrock Model Analyzer instance
    analyzer.fairness_config(CONFIG_FAI)\
        .test_labels(y_test)\
        .test_inference(test_pred)

    # Return the 4 metrics
    return analyzer.analyze()
Exemple #2
0
def main():
    x_train, y_train = load_dataset(filepath=TRAIN_DATA_PATH, target='Type')
    x_test, y_test = load_dataset(filepath=TEST_DATA_PATH, target='Type')
    print('X (train)')
    print(x_train)

    # sklearn `roc_auc_score` and `average_precision_score` expects
    # binary label indicators with shape (n_samples, n_classes)
    enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
    y_train_onehot = enc.fit_transform(y_train.reshape(-1, 1))
    y_test_onehot = enc.fit_transform(y_test.reshape(-1, 1))
    print('\nCATEGORIES')
    for value, category in enumerate(enc.categories_[0]):
        print(f'{category} : {value}')

    # Convert target variable to numeric values
    # ModelMonitoringService.export_text expect both features
    # and inference to be numeric values
    y_train = np.argmax(y_train_onehot, axis=1)
    y_test = np.argmax(y_test_onehot, axis=1)

    pipe = train_log_reg_model(x_train, y_train, seed=0, C=C, verbose=True)
    # Save trained model
    feature_names = x_train.columns.tolist()
    print("\nSAMPLE FEATURES")
    print({
        feature_name: str(x_train[feature_name][0])
        for feature_name in feature_names
    })
    joblib.dump([feature_names, enc, pipe], OUTPUT_MODEL_PATH)
    print('\nSaved trained one hot encoder and logistic regression model.')

    test_prob, test_pred = compute_log_metrics(pipe, x_test, y_test,
                                               y_test_onehot)

    # Save feature and inferance distribution
    train_predicted = pipe.predict(x_train).flatten().tolist()
    collectors = [
        FeatureHistogramCollector(
            data=x_train.iteritems(),
            discrete={7, 8},  # Specify which column indices are discrete
        ),
        InferenceHistogramCollector(data=train_predicted, is_discrete=True)
        # Specify inference as discrete
    ]
    encoder = MetricEncoder(collectors=collectors)
    with open(BaselineMetricCollector.DEFAULT_HISTOGRAM_PATH, "wb") as f:
        f.write(encoder.as_text())
    print('Saved feature and inference distribution.')

    # Train Shap model and calculate xafai metrics
    analyzer = (ModelAnalyzer(
        pipe[1], model_name='logistic',
        model_type=ModelTypes.LINEAR).train_features(x_train).test_features(
            x_test).fairness_config(CONFIG_FAI).test_labels(
                y_test).test_inference(test_pred))
    analyzer.analyze()
    print('Saved Shap model and fairness results.')
Exemple #3
0
def compute_log_metrics(clf, x_val, y_val):
    """Compute and log metrics."""
    print("\tEvaluating using validation data")
    y_prob = clf.predict_proba(x_val)[:, 1]
    y_pred = (y_prob > 0.5).astype(int)

    acc = metrics.accuracy_score(y_val, y_pred)
    precision = metrics.precision_score(y_val, y_pred)
    recall = metrics.recall_score(y_val, y_pred)
    f1_score = metrics.f1_score(y_val, y_pred)
    roc_auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)

    print(f"Accuracy          = {acc:.6f}")
    print(f"Precision         = {precision:.6f}")
    print(f"Recall            = {recall:.6f}")
    print(f"F1 score          = {f1_score:.6f}")
    print(f"ROC AUC           = {roc_auc:.6f}")
    print(f"Average precision = {avg_prc:.6f}")

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("Precision", precision)
    bedrock.log_metric("Recall", recall)
    bedrock.log_metric("F1 score", f1_score)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(y_val.astype(int).tolist(),
                           y_prob.flatten().tolist())

    # Calculate and upload xafai metrics
    analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val)
    analyzer.test_labels(y_val.values).test_inference(y_pred)
    analyzer.analyze()
def compute_log_metrics(clf, x_val, y_val):
    """Compute and log metrics."""
    y_prob = clf.predict_proba(x_val)[:, 1]
    
    # select best threshold
    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_prob)
    best_threshold = thresholds[np.argmax(tpr-fpr)]
    
    y_pred = (y_prob > best_threshold).astype(int)

    acc = metrics.accuracy_score(y_val, y_pred)
    roc_auc = metrics.roc_auc_score(y_val, y_prob)
    avg_prc = metrics.average_precision_score(y_val, y_prob)
    print("Evaluation\n"
          f"  Accuracy          = {acc:.4f}\n"
          f"  ROC AUC           = {roc_auc:.4f}\n"
          f"  Average precision = {avg_prc:.4f}\n")
    print(metrics.classification_report(y_val, y_pred, digits=4))

    # Log metrics
    bedrock = BedrockApi(logging.getLogger(__name__))
    bedrock.log_metric("Accuracy", acc)
    bedrock.log_metric("ROC AUC", roc_auc)
    bedrock.log_metric("Avg precision", avg_prc)
    bedrock.log_chart_data(y_val.astype(int).tolist(),
                           y_prob.flatten().tolist())

    # Calculate and upload xafai metrics
    analyzer = ModelAnalyzer(clf, "tree_model", model_type=ModelTypes.TREE).test_features(x_val)
    analyzer.fairness_config(CONFIG_FAI).test_labels(y_val).test_inference(y_pred)
    analyzer.analyze()