def get_final_results(self): """Find the best setting and use it to test on the test data and print the results.""" best_setting = self.find_best_setting() print "\nThe best", self.optimize_for, "was", best_setting[ self.optimize_for] print "It was found with the following settings:" print best_setting if not self.check_test: print "check_test is set to false, Will not evaluate performance on held-out test set." return print "\nAbout to evaluate results on held-out test set!!" print "Will use the settings that produced the best", self.optimize_for print "batch size is an", type(best_setting['batch_size']) best_setting = self.convert_param_dict_for_use(dict(best_setting)) print "batch size is an", type(best_setting['batch_size']) print "\nFINAL TEST RESULTS:" preds = self.test_on_test(best_setting) true_y = self.data_loader.test_Y accs = [] aucs = [] for i, label in enumerate(LABELS_TO_PREDICT): print "\n", label acc, auc, f1, precision, recall = gen_wrap.compute_all_classification_metrics( preds[:, i], true_y[:, i]) print label, 'Acc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall accs.append(acc) aucs.append(auc) print "\nFINAL TEST RESULTS ON ALL", label, "DATA:" print 'Acc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall if self.check_noisy_data: noisy_preds = self.predict_on_data( self.data_loader.noisy_test_X) acc, auc, f1, precision, recall = gen_wrap.compute_all_classification_metrics( noisy_preds[:, i], self.data_loader.noisy_test_Y[:, i]) print "\nFINAL TEST RESULTS ON NOISY", label, "DATA:" print 'Acc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall clean_preds = self.predict_on_data( self.data_loader.clean_test_X) acc, auc, f1, precision, recall = gen_wrap.compute_all_classification_metrics( clean_preds[:, i], self.data_loader.clean_test_Y[:, i]) print "\nFINAL TEST RESULTS ON CLEAN", label, "DATA:" print 'Acc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall print "Overall:", 'Acc:', np.mean(accs), 'AUC:', np.mean(aucs)
def get_final_results(self): """Find the best setting and use it to test on the test data and print the results.""" best_setting = self.find_best_setting() print("\nThe best", self.optimize_for, "was", best_setting[self.optimize_for]) print("It was found with the following settings:") print(best_setting) if not self.check_test: print("check_test is set to false, Will not evaluate performance on held-out test set.") return print("\nAbout to evaluate results on held-out test set!!") print("Will use the settings that produced the best", optimize_for) best_setting = self.convert_param_dict_for_use(best_setting) print("\nFINAL TEST RESULTS:") loss, preds = self.test_on_test(best_setting) true_y = self.classification_data_loader.test_Y accs = [] aucs = [] for i,label in enumerate(LABELS_TO_PREDICT): acc, auc, f1, precision, recall = gen_wrap.compute_all_classification_metrics(preds[:,i], true_y[:,i]) print(label, 'Acc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall) accs.append(acc) aucs.append(auc) print("Overall:", 'Acc:', np.mean(accs), 'AUC:', np.mean(aucs))
def svm_pred_best_result(self, svm_model, X, Y, label, best_acc, best_auc): """Given an SVM model and some data, tests whether the SVM's predictions are more accurate than the existing best accuracy. Returns the highest of the two.""" preds = svm_model.predict(X) acc, auc, f1, precision, recall = gen_wrap.compute_all_classification_metrics( preds, Y[:, label]) if acc > best_acc and auc > best_auc: best_acc = acc best_auc = auc return best_acc, best_auc
def get_cross_validation_results(self, param_dict): """Goes through every cross-validation fold in the class's DataLoader, assesses all necessary metrics for each fold, and saves them into the param_dict. Args: param_dict: A dictionary with keys representing parameter names and values representing settings for those parameters. Returns: The param_dict augmented with keys for the names of metrics and values representing the score on those metrics. """ num_labels = len(self.data_loader.wanted_labels) all_acc = np.empty((self.num_cross_folds, num_labels)) all_auc = np.empty((self.num_cross_folds, num_labels)) all_f1 = np.empty((self.num_cross_folds, num_labels)) all_precision = np.empty((self.num_cross_folds, num_labels)) all_recall = np.empty((self.num_cross_folds, num_labels)) all_loss = [np.nan] * self.num_cross_folds if self.check_noisy_data: noisy_acc = np.empty((self.num_cross_folds, num_labels)) noisy_auc = np.empty((self.num_cross_folds, num_labels)) clean_acc = np.empty((self.num_cross_folds, num_labels)) clean_auc = np.empty((self.num_cross_folds, num_labels)) for f in range(self.num_cross_folds): self.data_loader.set_to_cross_validation_fold(f) preds = self.train_and_predict(param_dict) true_y = self.data_loader.val_Y for l in range(num_labels): (all_acc[f, l], all_auc[f, l], all_f1[f, l], all_precision[f, l], all_recall[f, l]) = gen_wrap.compute_all_classification_metrics( preds[:, l], true_y[:, l]) if self.check_noisy_data: noisy_preds = self.predict_on_data( self.data_loader.noisy_val_X) clean_preds = self.predict_on_data( self.data_loader.clean_val_X) for l in range(num_labels): noisy_acc[f, l], noisy_auc[ f, l], _, _, _ = gen_wrap.compute_all_classification_metrics( noisy_preds[:, l], self.data_loader.noisy_val_Y[:, l]) clean_acc[f, l], clean_auc[ f, l], _, _, _ = gen_wrap.compute_all_classification_metrics( clean_preds[:, l], self.data_loader.clean_val_Y[:, l]) param_dict['val_acc'] = np.nanmean(all_acc) param_dict['val_auc'] = np.nanmean(all_auc) param_dict['val_f1'] = np.nanmean(all_f1) param_dict['val_precision'] = np.nanmean(all_precision) param_dict['val_recall'] = np.nanmean(all_recall) print "Finished training all folds, average acc was", np.nanmean( all_acc) for i, label in enumerate(LABELS_TO_PREDICT): param_dict['val_acc_' + label] = np.nanmean(all_acc[:, i]) param_dict['val_auc_' + label] = np.nanmean(all_auc[:, i]) print "Average accuracy for label", label, "=", np.nanmean( all_acc[:, i]) if self.check_noisy_data: param_dict['noisy_val_acc'] = np.nanmean(noisy_acc) param_dict['noisy_val_auc'] = np.nanmean(noisy_auc) print "Perf on noisy data:", np.nanmean( noisy_acc), "acc", np.nanmean(noisy_auc), "auc" param_dict['clean_val_acc'] = np.nanmean(clean_acc) param_dict['clean_val_auc'] = np.nanmean(clean_auc) print "Perf on clean data:", np.nanmean( clean_acc), "acc", np.nanmean(clean_auc), "auc" for i, label in enumerate(LABELS_TO_PREDICT): param_dict['noisy_val_acc_' + label] = np.nanmean(noisy_acc[:, i]) param_dict['noisy_val_auc_' + label] = np.nanmean(noisy_auc[:, i]) param_dict['clean_val_acc_' + label] = np.nanmean(clean_acc[:, i]) param_dict['clean_val_auc_' + label] = np.nanmean(clean_auc[:, i]) return param_dict
def get_cross_validation_results(self, param_dict): """Goes through every cross-validation fold in the class's DataLoader, assesses all necessary metrics for each fold, and saves them into the param_dict. Args: param_dict: A dictionary with keys representing parameter names and values representing settings for those parameters. Returns: The param_dict augmented with keys for the names of metrics and values representing the score on those metrics. """ losses = [] aucs = None accs = None noisy_accs = None noisy_aucs = None clean_accs = None clean_aucs = None self.data_loader.set_to_cross_validation_fold(0) losses.append(self.train_and_predict(param_dict)) for f in range(self.num_cross_folds): #self.data_loader.set_to_cross_validation_fold(f) self.classification_data_loader.set_to_cross_validation_fold(f) #losses.append(self.train_and_predict(param_dict)) (fold_accs, fold_aucs, f_noisy_accs, f_noisy_aucs, f_clean_accs, f_clean_aucs) = self.test_embedding_classification_quality() accs = self.append_fold_results(accs, fold_accs) aucs = self.append_fold_results(aucs, fold_aucs) noisy_accs = self.append_fold_results(noisy_accs, f_noisy_accs) noisy_aucs = self.append_fold_results(noisy_aucs, f_noisy_aucs) clean_accs = self.append_fold_results(clean_accs, f_clean_accs) clean_aucs = self.append_fold_results(clean_aucs, f_clean_aucs) print("Losses for each fold:", losses) param_dict[self.optimize_for] = np.mean(losses) for i, label in enumerate(LABELS_TO_PREDICT): param_dict['svm_val_acc_'+label] = np.nanmean(accs[:,i]) param_dict['svm_val_auc_'+label] = np.nanmean(aucs[:,i]) print("Average accuracy for label", label, "=", np.nanmean(accs[:,i])) param_dict['svm_noisy_val_acc_'+label] = np.nanmean(noisy_accs[:,i]) param_dict['svm_noisy_val_auc_'+label] = np.nanmean(noisy_aucs[:,i]) param_dict['svm_clean_val_acc_'+label] = np.nanmean(clean_accs[:,i]) param_dict['svm_clean_val_auc_'+label] = np.nanmean(clean_aucs[:,i]) # caculate f1 score results_val = np.asarray(self.f1s_val) preds_val = results_val[:,i,0] trues_val = results_val[:,i,1] print(preds_val) print(trues_val) _acc, _auc, f1, _precision, _recall = gen_wrap.compute_all_classification_metrics( preds_val, trues_val) param_dict['svm_val_f1_'+label] = f1 print("Average F1 Score for label", label, "=", f1) param_dict['svm_val_acc'] = np.nanmean(accs) param_dict['svm_val_auc'] = np.nanmean(aucs) param_dict['svm_noisy_val_acc'] = np.nanmean(noisy_accs) param_dict['svm_noisy_val_auc'] = np.nanmean(noisy_aucs) param_dict['svm_clean_val_acc'] = np.nanmean(clean_accs) param_dict['svm_clean_val_auc'] = np.nanmean(clean_aucs) print("Average accuracy on noisy data", np.nanmean(noisy_accs)) print("Average accuracy on clean data", np.nanmean(clean_accs)) # caculate f1 score return param_dict