def evaluate(model_name, preds, labels, country, loss_fn=None, reduction=None, loss_weight=None, weight_scale=None, gamma=None):
    """ Evalautes loss and metrics for predictions vs labels.

    Args:
        preds - (tensor) model predictions
        labels - (npy array / tensor) ground truth labels
        loss_fn - (function) function that takes preds and labels and outputs some loss metric
        reduction - (str) "avg" or "sum", where "avg" calculates the average accuracy for each batch
                                          where "sum" tracks total correct and total pixels separately
        loss_weight - (bool) whether we use weighted loss function or not

    Returns:
        loss - (float) the loss the model incurs
        cm - (nparray) confusion matrix given preds and labels
        accuracy - (float) given "avg" reduction, returns accuracy 
        total_correct - (int) given "sum" reduction, gives total correct pixels
        num_pixels - (int) given "sum" reduction, gives total number of valid pixels
    """
    cm = metrics.get_cm(preds, labels, country, model_name)
    
    if model_name in NON_DL_MODELS:
        accuracy = metrics.get_accuracy(model_name, preds, labels, reduction=reduction)
        return None, cm, accuracy, None
    elif model_name in DL_MODELS:
        if reduction == "avg":
            loss, confidence = loss_fn(labels, preds, reduction, country, loss_weight, weight_scale)
            accuracy = metrics.get_accuracy(model_name, labels, model_name, reduction=reduction)
            return loss, cm, accuracy, confidence
        elif reduction == "sum":
            loss, confidence, _ = loss_fn(labels, preds, reduction, country, loss_weight, weight_scale) 
            total_correct, num_pixels = metrics.get_accuracy(model_name, preds, labels, reduction=reduction)
            return loss, cm, total_correct, num_pixels, confidence
        else:
            raise ValueError(f"reduction: `{reduction}` not supported")
Exemple #2
0
    def evaluate_fairness(self, dataloader, device, N, target_vals,
                          H_T_given_S, verbose):

        IXY_ub = 0
        H_T_given_SY_ub = 0
        accuracy = 0

        with torch.no_grad():
            for it, (x, t, s) in enumerate(dataloader):

                x = x.to(device).float()
                t = t.to(device).float()
                s = s.to(device).float()

                y, y_mean = self.encoder(x)
                output = self.decoder(y, s)

                IXY_ub += self.get_IXY_ub(y_mean, self.prior_type)
                H_T_given_SY_ub += self.get_H_output_given_SY_ub(output, t)
                accuracy += metrics.get_accuracy(output, t,
                                                 target_vals) * len(x)

        IXY_ub /= N
        H_T_given_SY_ub /= N
        print(H_T_given_SY_ub)
        accuracy /= N
        IYT_given_S_lb = H_T_given_S - H_T_given_SY_ub.item()
        print(f'I(X;Y) = {IXY_ub.item()}') if verbose else 0
        print(f'I(Y;T|S) = {IYT_given_S_lb}') if verbose else 0
        print(f'Accuracy (network): {accuracy}') if verbose else 0
        return IXY_ub, IYT_given_S_lb
Exemple #3
0
def classify(train_set, test_set, k):
    predictions = list()
    result_list = list()
    for test_set_entry in test_set:
        neighbors = get_neighbors(train_set, test_set_entry, k,
                                  distance_method)
        result = get_response(neighbors)
        predictions.append(result)
        res_string = '> predicted=' + result + ', actual=' + test_set_entry[-1]
        result_list.append(res_string)
    accuracy = get_accuracy(test_set, predictions)
    return result_list, accuracy
Exemple #4
0
def tune_alpha(max_iterations, train_samples, train_labels, val_samples,
               val_labels, num_feats):
    alphas = [0.1, 0.5, 1, 1.5, 2]
    best = 0
    max_accuracy = 0.0
    for a in alphas:
        classifier = wnn(max_iterations, alpha)
        classifier.fit(train_samples, train_labels, num_feats)
        results = classifier.predict(val_samples)
        curr_accuracy = get_accuracy(results, val_labels)
        if curr_accuracy > max_accuracy:
            best = a
            max_accuracy = curr_accuracy
    return best
Exemple #5
0
def tune_learning_rate(max_iterations, train_samples, train_labels,
                       val_samples, val_labels, num_feats):
    rates = [0.01, 0.05, 0.07, 0.1, 0.5, 0.7, 1]
    best = 0
    max_accuracy = 0.0
    for lr in rates:
        classifier = pc(max_iterations, lr)
        classifier.fit(train_samples, train_labels, num_feats)
        results = classifier.predict(val_samples)
        curr_accuracy = get_accuracy(results, val_labels)
        if curr_accuracy > max_accuracy:
            best = lr
            max_accuracy = curr_accuracy
    return best
Exemple #6
0
def run(simulation_args):
    # Data producer
    ds_features, targets = get_data(simulation_args.data_set)
    num_clusters = len(np.unique(targets))
    # K-Means constructor
    kmeans_alg = KMeans(n_clusters=num_clusters, n_init=5, max_iter=500)

    row_list = list()
    for r in range(5, 105, 5):
        for method_name, transformer in get_dim_reduction_transformer_dict(r, num_clusters).items():
            labels, running_time = produce_fit(kmeans_alg, ds_features, transformer)
            row_list.append([method_name, r, sum_squared_norm_from_centroids(ds_features, labels),
                             get_accuracy(labels, targets), running_time,
                             normalized_mutual_info_score(targets, labels)])

    df = pd.DataFrame(row_list, columns=['dim_reduction_method', 'r', 'Objective value', 'Accuracy', 'Running time',
                                         'normalized_mutual_info_score'])
    df.set_index('r', inplace=True)
    plot_df(simulation_args.data_set, df)
Exemple #7
0
def test_ffnn():
    params = {
        'n_layers': 4,
        'hidden_nodes': [512, 512, 512, 512],
        'epochs': 10,
        'use_dynamic_features': True,
        'use_mspec': False,
        'as_mat': False,
        'speaker_norm': False,
        'context_length': 17
    }
    net = FFNN(params)
    model = net.train_model()
    net.set_model(model)
    y_true, yp = net.predict_on_test()
    print("FFNN RESULTS")
    print(get_f1_score(y_true, yp))
    print(get_accuracy(y_true, yp))
    print(classification_report(y_true, yp))
Exemple #8
0
def test_rnn():
    """Notice as_mat is true here!"""
    params = {
        'n_layers': 2,
        'hidden_nodes': [32, 32],
        'epochs': 100,
        'use_dynamic_features': True,
        'use_mspec': True,
        'as_mat': True,
        'speaker_norm': False,
        'context_length': 35
    }
    net = RNN(params)
    model = net.train_model(params['unroll'])
    net.set_model(model)
    y_true, yp = net.predict_on_test()
    print("RNN RESULTS")
    print(get_f1_score(y_true, yp))
    print(get_accuracy(y_true, yp))
    print(classification_report(y_true, yp))
    model.save('rnn-64-64-context-35.h5')
Exemple #9
0
def store_results(y_true, yp, net, model_path, model):
    phones = fixed_phones(net)
    with open(os.path.join(os.getcwd(), model_path + os.sep + 'results.txt'),
              'w') as f:
        f.write('acc: {}\n'.format(str(get_accuracy(y_true, yp))))
        f.write('edit: {}\n'.format(
            str(
                eval_edit_dist(y_true,
                               yp,
                               net.test,
                               feature_name=net.feature_name))))
        f.write('f1-score: {}\n'.format(str(get_f1_score(y_true, yp))))
        report = get_classification_report(y_true, yp, phones)
        f.write(str(report))
    cm = get_confusion_matrix(y_true, yp)
    net.plot_confusion_matrix(
        cm,
        phones,
        os.path.join(os.getcwd(),
                     model_path + os.sep + 'confusion_matrix.png'),
        normalize=True)
    model.save(os.path.join(os.getcwd(), model_path + os.sep + 'model.h5'))
Exemple #10
0
def get_multi_label_summary_metrics(preds,
                                    probs,
                                    targets,
                                    label_names,
                                    verbose=True):
    """
    Currently designed for multi-label classification
    """
    label_level_accuracy = np.round(metrics.get_accuracy(preds, targets), 3)
    img_level_accuracy = np.round(scipy_metrics.accuracy_score(targets, preds),
                                  3)
    correct_img_idx, correct_label_idx = np.where(preds == targets)
    incorrect_img_idx, incorrect_label_idx = np.where(preds != targets)

    accuracy = metrics.get_accuracy(preds, targets)
    error = np.sum(preds != targets) / len(preds.flatten())
    f2_score = metrics.get_f2_score(preds, targets, 'samples')

    # TP/FP/TN/FN
    TP_img_idx, TP_label_idx = np.where((preds == targets) & (preds == 1))
    FP_img_idx, FP_label_idx = np.where((preds != targets) & (preds == 1))
    TN_img_idx, TN_label_idx = np.where((preds == targets) & (preds == 0))
    FN_img_idx, FN_label_idx = np.where((preds != targets) & (preds == 0))
    TP, FP, TN, FN = TP_label_idx, FP_label_idx, TN_label_idx, FN_label_idx
    n_TP = len(TP_label_idx)
    n_FP = len(FP_label_idx)
    n_TN = len(TN_label_idx)
    n_FN = len(FN_label_idx)

    #Labels
    n_labels = len(preds.flatten())
    correct_labels_cnt = np.count_nonzero(preds == targets)
    incorrect_labels_cnt = np.count_nonzero(preds != targets)
    assert (correct_labels_cnt + incorrect_labels_cnt == n_labels)

    # Total Positive/True/One Labels
    total_positive_labels = np.sum(targets)
    total_positive_labels_by_class = np.sum(targets, axis=0)

    #Images
    n_imgs = len(preds)
    image_idx = np.unique(np.where(preds == targets))
    incorrect_images_idx = np.unique(incorrect_img_idx)
    mask = np.in1d(image_idx, incorrect_images_idx)
    correct_images_idx = np.where(~mask)[0]
    n_imgs_correct = len(correct_images_idx)
    n_imgs_incorrect = len(incorrect_images_idx)
    assert (n_imgs_correct + n_imgs_incorrect == n_imgs)

    correct_freq = get_label_freq_bins(correct_label_idx, label_names)
    incorrect_freq = get_label_freq_bins(incorrect_label_idx, label_names)
    total_freq = correct_freq[:, 1] + incorrect_freq[:, 1]
    total_ones = np.sum(targets, axis=0)
    percent_ones = np.round(total_ones / total_freq * 100, 1)
    assert np.sum(incorrect_freq[:, 1]) + np.sum(correct_freq[:,
                                                              1]) == n_labels

    # Truth
    tp_freq = get_label_freq_bins(TP_label_idx, label_names)
    fp_freq = get_label_freq_bins(FP_label_idx, label_names)
    tn_freq = get_label_freq_bins(TN_label_idx, label_names)
    fn_freq = get_label_freq_bins(FN_label_idx, label_names)
    assert np.sum(tp_freq[:, 1]) == n_TP
    assert np.sum(fp_freq[:, 1]) == n_FP
    assert np.sum(tn_freq[:, 1]) == n_TN
    assert np.sum(fn_freq[:, 1]) == n_FN

    # Metrics
    error_pct = np.round(incorrect_freq[:, 1] / total_freq * 100, 1)
    weighted_error_pct = np.round(
        incorrect_freq[:, 1] / np.sum(incorrect_freq[:, 1]), 2)
    #http://ml-cheatsheet.readthedocs.io/en/latest/glossary.html?highlight=precision
    total_precision = n_TP / (n_TP + n_FP)
    total_recall = n_TP / (n_TP + n_FN)
    precision_by_label = np.round(
        tp_freq[:, 1] / (tp_freq[:, 1] + fp_freq[:, 1]) * 100, 1)
    recall_by_label = np.round(
        tp_freq[:, 1] / (tp_freq[:, 1] + fn_freq[:, 1]) * 100, 1)
    weighted_fp_pct = np.round(fp_freq / n_FP * 100, 1)[:, 1]
    weighted_fn_pct = np.round(fn_freq / n_FN * 100, 1)[:, 1]
    mean_prob_by_label = np.round(np.mean(probs, axis=0), 2)
    median_prob_by_label = np.round(np.median(probs, axis=0), 2)

    combined_pivot = np.column_stack([
        error_pct, weighted_error_pct, precision_by_label, recall_by_label,
        correct_freq[:, 1], incorrect_freq[:, 1], tp_freq[:, 1], tn_freq[:, 1],
        fp_freq[:, 1], fn_freq[:, 1], weighted_fp_pct, weighted_fn_pct,
        total_ones, percent_ones, mean_prob_by_label, median_prob_by_label
    ])

    columns = [
        'err_pct', 'wt_err_pct', 'precision', 'recall', 'correct_labels',
        'incorrect_labels', 'tp', 'tn', 'fp', 'fn', 'wt_fp_pct', 'wt_fn_pct',
        'total_ones', 'pct_ones', 'mean_prb', 'med_prb'
    ]
    int_columns = [
        'total_ones', 'correct_labels', 'incorrect_labels', 'tp', 'tn', 'fp',
        'fn'
    ]
    float_columns = ['pct_ones', 'err_pct', 'precision', 'recall']
    combined_pivot[np.isnan(combined_pivot)] = 0
    summary_df = pd.DataFrame(combined_pivot, columns=columns)
    summary_df.insert(0, 'lb', pd.Series(label_names, index=summary_df.index))
    # sum_row = summary_df.sum(numeric_only=True)
    # sum_row['lb'] = 'sum'
    # mean_row = np.round(summary_df.mean(numeric_only=True), 1)
    # mean_row['lb'] = 'mean'
    # summary_df = summary_df.append(sum_row, ignore_index=True)
    # summary_df = summary_df.append(mean_row, ignore_index=True)
    summary_df[int_columns] = summary_df[int_columns].astype(int)

    if verbose:
        print("Error", round(error, 4), "\nAcc", round(accuracy, 4),
              "\nn_labels", n_labels, "\nn_labels_correct", correct_labels_cnt,
              "\nn_labels_incorrect", incorrect_labels_cnt, "\nn_imgs", n_imgs,
              "\nn_imgs_correct", n_imgs_correct, "\nn_imgs_incorrect",
              n_imgs_incorrect, '\ntotal_one_labels', total_positive_labels,
              '\nlabel_level_accuracy', label_level_accuracy,
              '\nimg_level_accuracy', img_level_accuracy)

    return summary_df
 def get_accuracy(self):
     return metrics.get_accuracy(self.test_labels, self.pred_labels)