def evaluate(model_name, preds, labels, country, loss_fn=None, reduction=None, loss_weight=None, weight_scale=None, gamma=None): """ Evalautes loss and metrics for predictions vs labels. Args: preds - (tensor) model predictions labels - (npy array / tensor) ground truth labels loss_fn - (function) function that takes preds and labels and outputs some loss metric reduction - (str) "avg" or "sum", where "avg" calculates the average accuracy for each batch where "sum" tracks total correct and total pixels separately loss_weight - (bool) whether we use weighted loss function or not Returns: loss - (float) the loss the model incurs cm - (nparray) confusion matrix given preds and labels accuracy - (float) given "avg" reduction, returns accuracy total_correct - (int) given "sum" reduction, gives total correct pixels num_pixels - (int) given "sum" reduction, gives total number of valid pixels """ cm = metrics.get_cm(preds, labels, country, model_name) if model_name in NON_DL_MODELS: accuracy = metrics.get_accuracy(model_name, preds, labels, reduction=reduction) return None, cm, accuracy, None elif model_name in DL_MODELS: if reduction == "avg": loss, confidence = loss_fn(labels, preds, reduction, country, loss_weight, weight_scale) accuracy = metrics.get_accuracy(model_name, labels, model_name, reduction=reduction) return loss, cm, accuracy, confidence elif reduction == "sum": loss, confidence, _ = loss_fn(labels, preds, reduction, country, loss_weight, weight_scale) total_correct, num_pixels = metrics.get_accuracy(model_name, preds, labels, reduction=reduction) return loss, cm, total_correct, num_pixels, confidence else: raise ValueError(f"reduction: `{reduction}` not supported")
def evaluate_fairness(self, dataloader, device, N, target_vals, H_T_given_S, verbose): IXY_ub = 0 H_T_given_SY_ub = 0 accuracy = 0 with torch.no_grad(): for it, (x, t, s) in enumerate(dataloader): x = x.to(device).float() t = t.to(device).float() s = s.to(device).float() y, y_mean = self.encoder(x) output = self.decoder(y, s) IXY_ub += self.get_IXY_ub(y_mean, self.prior_type) H_T_given_SY_ub += self.get_H_output_given_SY_ub(output, t) accuracy += metrics.get_accuracy(output, t, target_vals) * len(x) IXY_ub /= N H_T_given_SY_ub /= N print(H_T_given_SY_ub) accuracy /= N IYT_given_S_lb = H_T_given_S - H_T_given_SY_ub.item() print(f'I(X;Y) = {IXY_ub.item()}') if verbose else 0 print(f'I(Y;T|S) = {IYT_given_S_lb}') if verbose else 0 print(f'Accuracy (network): {accuracy}') if verbose else 0 return IXY_ub, IYT_given_S_lb
def classify(train_set, test_set, k): predictions = list() result_list = list() for test_set_entry in test_set: neighbors = get_neighbors(train_set, test_set_entry, k, distance_method) result = get_response(neighbors) predictions.append(result) res_string = '> predicted=' + result + ', actual=' + test_set_entry[-1] result_list.append(res_string) accuracy = get_accuracy(test_set, predictions) return result_list, accuracy
def tune_alpha(max_iterations, train_samples, train_labels, val_samples, val_labels, num_feats): alphas = [0.1, 0.5, 1, 1.5, 2] best = 0 max_accuracy = 0.0 for a in alphas: classifier = wnn(max_iterations, alpha) classifier.fit(train_samples, train_labels, num_feats) results = classifier.predict(val_samples) curr_accuracy = get_accuracy(results, val_labels) if curr_accuracy > max_accuracy: best = a max_accuracy = curr_accuracy return best
def tune_learning_rate(max_iterations, train_samples, train_labels, val_samples, val_labels, num_feats): rates = [0.01, 0.05, 0.07, 0.1, 0.5, 0.7, 1] best = 0 max_accuracy = 0.0 for lr in rates: classifier = pc(max_iterations, lr) classifier.fit(train_samples, train_labels, num_feats) results = classifier.predict(val_samples) curr_accuracy = get_accuracy(results, val_labels) if curr_accuracy > max_accuracy: best = lr max_accuracy = curr_accuracy return best
def run(simulation_args): # Data producer ds_features, targets = get_data(simulation_args.data_set) num_clusters = len(np.unique(targets)) # K-Means constructor kmeans_alg = KMeans(n_clusters=num_clusters, n_init=5, max_iter=500) row_list = list() for r in range(5, 105, 5): for method_name, transformer in get_dim_reduction_transformer_dict(r, num_clusters).items(): labels, running_time = produce_fit(kmeans_alg, ds_features, transformer) row_list.append([method_name, r, sum_squared_norm_from_centroids(ds_features, labels), get_accuracy(labels, targets), running_time, normalized_mutual_info_score(targets, labels)]) df = pd.DataFrame(row_list, columns=['dim_reduction_method', 'r', 'Objective value', 'Accuracy', 'Running time', 'normalized_mutual_info_score']) df.set_index('r', inplace=True) plot_df(simulation_args.data_set, df)
def test_ffnn(): params = { 'n_layers': 4, 'hidden_nodes': [512, 512, 512, 512], 'epochs': 10, 'use_dynamic_features': True, 'use_mspec': False, 'as_mat': False, 'speaker_norm': False, 'context_length': 17 } net = FFNN(params) model = net.train_model() net.set_model(model) y_true, yp = net.predict_on_test() print("FFNN RESULTS") print(get_f1_score(y_true, yp)) print(get_accuracy(y_true, yp)) print(classification_report(y_true, yp))
def test_rnn(): """Notice as_mat is true here!""" params = { 'n_layers': 2, 'hidden_nodes': [32, 32], 'epochs': 100, 'use_dynamic_features': True, 'use_mspec': True, 'as_mat': True, 'speaker_norm': False, 'context_length': 35 } net = RNN(params) model = net.train_model(params['unroll']) net.set_model(model) y_true, yp = net.predict_on_test() print("RNN RESULTS") print(get_f1_score(y_true, yp)) print(get_accuracy(y_true, yp)) print(classification_report(y_true, yp)) model.save('rnn-64-64-context-35.h5')
def store_results(y_true, yp, net, model_path, model): phones = fixed_phones(net) with open(os.path.join(os.getcwd(), model_path + os.sep + 'results.txt'), 'w') as f: f.write('acc: {}\n'.format(str(get_accuracy(y_true, yp)))) f.write('edit: {}\n'.format( str( eval_edit_dist(y_true, yp, net.test, feature_name=net.feature_name)))) f.write('f1-score: {}\n'.format(str(get_f1_score(y_true, yp)))) report = get_classification_report(y_true, yp, phones) f.write(str(report)) cm = get_confusion_matrix(y_true, yp) net.plot_confusion_matrix( cm, phones, os.path.join(os.getcwd(), model_path + os.sep + 'confusion_matrix.png'), normalize=True) model.save(os.path.join(os.getcwd(), model_path + os.sep + 'model.h5'))
def get_multi_label_summary_metrics(preds, probs, targets, label_names, verbose=True): """ Currently designed for multi-label classification """ label_level_accuracy = np.round(metrics.get_accuracy(preds, targets), 3) img_level_accuracy = np.round(scipy_metrics.accuracy_score(targets, preds), 3) correct_img_idx, correct_label_idx = np.where(preds == targets) incorrect_img_idx, incorrect_label_idx = np.where(preds != targets) accuracy = metrics.get_accuracy(preds, targets) error = np.sum(preds != targets) / len(preds.flatten()) f2_score = metrics.get_f2_score(preds, targets, 'samples') # TP/FP/TN/FN TP_img_idx, TP_label_idx = np.where((preds == targets) & (preds == 1)) FP_img_idx, FP_label_idx = np.where((preds != targets) & (preds == 1)) TN_img_idx, TN_label_idx = np.where((preds == targets) & (preds == 0)) FN_img_idx, FN_label_idx = np.where((preds != targets) & (preds == 0)) TP, FP, TN, FN = TP_label_idx, FP_label_idx, TN_label_idx, FN_label_idx n_TP = len(TP_label_idx) n_FP = len(FP_label_idx) n_TN = len(TN_label_idx) n_FN = len(FN_label_idx) #Labels n_labels = len(preds.flatten()) correct_labels_cnt = np.count_nonzero(preds == targets) incorrect_labels_cnt = np.count_nonzero(preds != targets) assert (correct_labels_cnt + incorrect_labels_cnt == n_labels) # Total Positive/True/One Labels total_positive_labels = np.sum(targets) total_positive_labels_by_class = np.sum(targets, axis=0) #Images n_imgs = len(preds) image_idx = np.unique(np.where(preds == targets)) incorrect_images_idx = np.unique(incorrect_img_idx) mask = np.in1d(image_idx, incorrect_images_idx) correct_images_idx = np.where(~mask)[0] n_imgs_correct = len(correct_images_idx) n_imgs_incorrect = len(incorrect_images_idx) assert (n_imgs_correct + n_imgs_incorrect == n_imgs) correct_freq = get_label_freq_bins(correct_label_idx, label_names) incorrect_freq = get_label_freq_bins(incorrect_label_idx, label_names) total_freq = correct_freq[:, 1] + incorrect_freq[:, 1] total_ones = np.sum(targets, axis=0) percent_ones = np.round(total_ones / total_freq * 100, 1) assert np.sum(incorrect_freq[:, 1]) + np.sum(correct_freq[:, 1]) == n_labels # Truth tp_freq = get_label_freq_bins(TP_label_idx, label_names) fp_freq = get_label_freq_bins(FP_label_idx, label_names) tn_freq = get_label_freq_bins(TN_label_idx, label_names) fn_freq = get_label_freq_bins(FN_label_idx, label_names) assert np.sum(tp_freq[:, 1]) == n_TP assert np.sum(fp_freq[:, 1]) == n_FP assert np.sum(tn_freq[:, 1]) == n_TN assert np.sum(fn_freq[:, 1]) == n_FN # Metrics error_pct = np.round(incorrect_freq[:, 1] / total_freq * 100, 1) weighted_error_pct = np.round( incorrect_freq[:, 1] / np.sum(incorrect_freq[:, 1]), 2) #http://ml-cheatsheet.readthedocs.io/en/latest/glossary.html?highlight=precision total_precision = n_TP / (n_TP + n_FP) total_recall = n_TP / (n_TP + n_FN) precision_by_label = np.round( tp_freq[:, 1] / (tp_freq[:, 1] + fp_freq[:, 1]) * 100, 1) recall_by_label = np.round( tp_freq[:, 1] / (tp_freq[:, 1] + fn_freq[:, 1]) * 100, 1) weighted_fp_pct = np.round(fp_freq / n_FP * 100, 1)[:, 1] weighted_fn_pct = np.round(fn_freq / n_FN * 100, 1)[:, 1] mean_prob_by_label = np.round(np.mean(probs, axis=0), 2) median_prob_by_label = np.round(np.median(probs, axis=0), 2) combined_pivot = np.column_stack([ error_pct, weighted_error_pct, precision_by_label, recall_by_label, correct_freq[:, 1], incorrect_freq[:, 1], tp_freq[:, 1], tn_freq[:, 1], fp_freq[:, 1], fn_freq[:, 1], weighted_fp_pct, weighted_fn_pct, total_ones, percent_ones, mean_prob_by_label, median_prob_by_label ]) columns = [ 'err_pct', 'wt_err_pct', 'precision', 'recall', 'correct_labels', 'incorrect_labels', 'tp', 'tn', 'fp', 'fn', 'wt_fp_pct', 'wt_fn_pct', 'total_ones', 'pct_ones', 'mean_prb', 'med_prb' ] int_columns = [ 'total_ones', 'correct_labels', 'incorrect_labels', 'tp', 'tn', 'fp', 'fn' ] float_columns = ['pct_ones', 'err_pct', 'precision', 'recall'] combined_pivot[np.isnan(combined_pivot)] = 0 summary_df = pd.DataFrame(combined_pivot, columns=columns) summary_df.insert(0, 'lb', pd.Series(label_names, index=summary_df.index)) # sum_row = summary_df.sum(numeric_only=True) # sum_row['lb'] = 'sum' # mean_row = np.round(summary_df.mean(numeric_only=True), 1) # mean_row['lb'] = 'mean' # summary_df = summary_df.append(sum_row, ignore_index=True) # summary_df = summary_df.append(mean_row, ignore_index=True) summary_df[int_columns] = summary_df[int_columns].astype(int) if verbose: print("Error", round(error, 4), "\nAcc", round(accuracy, 4), "\nn_labels", n_labels, "\nn_labels_correct", correct_labels_cnt, "\nn_labels_incorrect", incorrect_labels_cnt, "\nn_imgs", n_imgs, "\nn_imgs_correct", n_imgs_correct, "\nn_imgs_incorrect", n_imgs_incorrect, '\ntotal_one_labels', total_positive_labels, '\nlabel_level_accuracy', label_level_accuracy, '\nimg_level_accuracy', img_level_accuracy) return summary_df
def get_accuracy(self): return metrics.get_accuracy(self.test_labels, self.pred_labels)