def calculate_acc(embeddings1 , embeddings2 , actual_issame , nrof_folds=10 , compare_func=pair_euc_score , sigma_sq1=None , sigma_sq2=None): assert (embeddings1.shape[0] == embeddings2.shape[0]) assert (embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = nrof_folds accuracies = np.zeros(nrof_folds, dtype=np.float32) thresholds = np.zeros(nrof_folds, dtype=np.float32) k_fold = KFold(n_splits=nrof_folds, shuffle=False) # diff = np.subtract(embeddings1, embeddings2) # dist = np.sum(np.square(diff), 1) dist = compare_func(embeddings1, embeddings2, sigma_sq1, sigma_sq2) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): # Training _, thresholds[fold_idx] = metrics.accuracy(dist[train_set], actual_issame[train_set]) # Testing accuracies[fold_idx], _ = metrics.accuracy(dist[test_set], actual_issame[test_set], np.array([thresholds[fold_idx]])) accuracy = np.mean(accuracies) threshold = - np.mean(thresholds) return accuracy, threshold
def calculate_roc(embeddings1, embeddings2, actual_issame, compare_func, nrof_folds=10): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) k_fold = KFold(nrof_pairs, n_folds=nrof_folds, shuffle=False) accuracy = np.zeros((nrof_folds)) indices = np.arange(nrof_pairs) #print('pca', pca) accuracies = np.zeros(10, dtype=np.float32) thresholds = np.zeros(10, dtype=np.float32) dist = compare_func(embeddings1, embeddings2) for fold_idx, (train_set, test_set) in enumerate(k_fold): #print('train_set', train_set) #print('test_set', test_set) # Find the best threshold for the fold from evaluation import metrics train_score = dist[train_set] train_labels = actual_issame[train_set] == 1 acc, thresholds[fold_idx] = metrics.accuracy(train_score, train_labels) # print('train acc', acc, thresholds[i]) # Testing test_score = dist[test_set] accuracies[fold_idx], _ = metrics.accuracy(test_score, actual_issame[test_set]==1, np.array([thresholds[fold_idx]])) accuracy = np.mean(accuracies) threshold = np.mean(thresholds) return accuracy, threshold
def test_standard_proto(self, features, compare_func): assert self.standard_folds is not None accuracies = np.zeros(10, dtype=np.float32) thresholds = np.zeros(10, dtype=np.float32) features1 = [] features2 = [] for i in range(10): # Training train_indices1 = np.concatenate([self.standard_folds[j].indices1 for j in range(10) if j!=i]) train_indices2 = np.concatenate([self.standard_folds[j].indices2 for j in range(10) if j!=i]) train_labels = np.concatenate([self.standard_folds[j].labels for j in range(10) if j!=i]) train_features1 = features[train_indices1,:] train_features2 = features[train_indices2,:] train_score = compare_func(train_features1, train_features2) _, thresholds[i] = metrics.accuracy(train_score, train_labels) # Testing fold = self.standard_folds[i] test_features1 = features[fold.indices1,:] test_features2 = features[fold.indices2,:] test_score = compare_func(test_features1, test_features2) accuracies[i], _ = metrics.accuracy(test_score, fold.labels, np.array([thresholds[i]])) accuracy = np.mean(accuracies) threshold = - np.mean(thresholds) return accuracy, threshold
def main(): args = parse_user_arguments() preds = parse_pred_file(args.pred_file, args.format, args.confusion_set) tp, tn, fp, fn, xyz = contingency_table(args.cnfs_file, preds, args.threshold, args.difference) all = tn + fp + fn + tp + xyz print "### Evaluation of file {}".format(args.cnfs_file) print "" print "threshold : %.4f " % (args.threshold or 0) print "difference : %.4f " % (args.difference or 0) print "" print "# Statistics:" print "Prevalence : %.4f" % metrics.prevalence(tp, tn, fp + xyz, fn) print "Bias : %.4f" % metrics.bias(tp, tn, fp + xyz, fn) print "Changes : %d" % (tp + fp) print "" print "# WAS evaluation scheme:" print "TN x/x/x : %.4f (%d)" % (tn/float(all), tn) print "FP x/x/y : %.4f (%d)" % (fp/float(all), fp) print "FN x/y/x : %.4f (%d)" % (fn/float(all), fn) print "TP x/y/y : %.4f (%d)" % (tp/float(all), tp) print "* x/y/z : %.4f (%d)" % (xyz/float(all), xyz) print "" if args.edit_counts: print "# Simple accuracy:" acc, aa, ab, skipped = metrics.edits_count(tp, tn, fp, fn, xyz) print "All edits : %.4f" % acc print "A-A edits : %.4f" % aa print "A-B edits : %.4f" % ab print "A-B skipped : %.4f" % skipped print "" if args.detection_task: print "# Detection task:" print "Accuracy : %.4f" % metrics.accuracy(tp + xyz, tn, fp, fn) if args.all: print "Specificity : %.4f" % metrics.specificity(tp + xyz, tn, fp, fn) print "Fall-out : %.4f" % metrics.fall_out(tp + xyz, tn, fp, fn) print "Precision : %.4f" % metrics.precision(tp + xyz, tn, fp, fn) print "Recall : %.4f" % metrics.recall(tp + xyz, tn, fp, fn) print "F0.5 : %.4f" % metrics.fscore(tp + xyz, tn, fp, fn) print "" print "# Correction task:" print "Accuracy : %.4f" % metrics.accuracy(tp, tn, fp + xyz, fn) if args.all: print "Specificity : %.4f" % metrics.specificity(tp, tn, fp + xyz, fn) print "Fall-out : %.4f" % metrics.fall_out(tp, tn, fp + xyz, fn) print "Precision : %.4f" % metrics.precision(tp, tn, fp + xyz, fn) print "Recall : %.4f" % metrics.recall(tp, tn, fp + xyz, fn) print "F0.5 : %.4f" % metrics.fscore(tp, tn, fp + xyz, fn) print ""
def evaluate(model, data_input, gold_output): predictions = model.predict( data_input, batch_size=keras_models.model_params['batch_size'], verbose=1) if len(predictions.shape) == 3: predictions_classes = np.argmax(predictions, axis=2) train_batch_f1 = metrics.accuracy_per_sentence(predictions_classes, gold_output) print("Results (per sentence): ", train_batch_f1) train_y_properties_stream = gold_output.reshape(gold_output.shape[0] * gold_output.shape[1]) predictions_classes = predictions_classes.reshape( predictions_classes.shape[0] * predictions_classes.shape[1]) class_mask = train_y_properties_stream != 0 train_y_properties_stream = train_y_properties_stream[class_mask] predictions_classes = predictions_classes[class_mask] else: predictions_classes = np.argmax(predictions, axis=1) train_y_properties_stream = gold_output accuracy = metrics.accuracy(predictions_classes, train_y_properties_stream) micro_scores = metrics.compute_micro_PRF(predictions_classes, train_y_properties_stream, empty_label=keras_models.p0_index) print("Results: Accuracy: ", accuracy) print("Results: Micro-Average F1: ", micro_scores) return predictions_classes, predictions
def run_one_mini_batch(cls, model, criterion, optimizer, input, target, **kwargs): """See parent method for documentation Extra-Parameters ---------- optimizer : torch.optim The optimizer used to perform the weight update. """ # Compute output output = model(input, target_size=target.shape[0]) # Compute and record the loss loss = criterion(output, target) MetricLogger().update(key='loss', value=loss.item(), n=len(input)) # Compute and record the accuracy acc = accuracy(output.data, target.data, topk=(1, ))[0] MetricLogger().update(key='accuracy', value=acc[0], n=len(input)) # TODO check if n is correct # Reset gradient optimizer.zero_grad() # Compute gradients loss.backward() # Perform a step by updating the weights optimizer.step()
def evaluate(model, data_input, gold_output, model_name): predictions = model.predict_generator(generate_arrays_from_file1(data_input, batch_size=20), steps=int(len(gold_output) / 20), verbose=1) sio.savemat(model_name+'all_prediction.mat', {'data': predictions}) sio.savemat(model_name+'all_groundtruth.mat', {'data': gold_output}) if len(predictions.shape) == 3: predictions_classes = np.argmax(predictions, axis=2) # train_batch_f1 = metrics.accuracy_per_sentence(predictions_classes, gold_output) # print("Results (per sentence): ", train_batch_f1) train_y_properties_stream = gold_output.reshape(gold_output.shape[0] * gold_output.shape[1]) predictions_classes = predictions_classes.reshape(predictions_classes.shape[0] * predictions_classes.shape[1]) class_mask = train_y_properties_stream != 0 train_y_properties_stream = train_y_properties_stream[class_mask] predictions_classes = predictions_classes[class_mask] predictions = np.squeeze(predictions, axis=1) else: predictions_classes = np.argmax(predictions, axis=1) train_y_properties_stream = gold_output accuracy = metrics.accuracy(predictions_classes, train_y_properties_stream) print("Results: Accuracy: ", accuracy) print(predictions.shape) micro_curve = metrics.compute_precision_recall_curve(predictions, train_y_properties_stream, micro=True, empty_label = keras_models_further.p0_index) with open(model_name + "all_micro_curve.dat", 'w') as out: out.write("\n".join(["{}\t{}".format(*t) for t in micro_curve])) print("Micro precision-recall-curve stored in:", "./data/micro_curve.dat") macro_curve = metrics.compute_precision_recall_curve(predictions, train_y_properties_stream, micro=False, empty_label = keras_models_further.p0_index) with open(model_name + "all_macro_curve.dat", 'w') as out: out.write("\n".join(["{}\t{}".format(*t) for t in macro_curve])) print("Macro precision-recall-curve stored in:", "./data/macro_curve.dat") return predictions_classes, predictions, micro_curve, macro_curve
def collect_metrics(self, outputs, oovs_target, no_extend_target, epoch=-1): num_samples = no_extend_target.size(0) metrics = Pack(num_samples=num_samples) loss = 0 logits = outputs.logits # [batch_size, dec_seq_len, vocab_size] nll_loss_ori = self.copy_gen_loss( scores=logits.transpose(1, 2).contiguous(), align=oovs_target, target=no_extend_target) # [batch_size, tgt_len] nll_loss = torch.mean(torch.sum(nll_loss_ori, dim=-1)) num_words = no_extend_target.ne(self.padding_idx).sum() # .item() ppl = nll_loss_ori.sum() / num_words ppl = ppl.exp() acc = accuracy(logits, no_extend_target, padding_idx=self.padding_idx) metrics.add(nll=(nll_loss, num_words), acc=acc, ppl=ppl) if self.use_posterior: kl_loss = self.kl_loss(torch.log(outputs.prior_attn + 1e-20), outputs.posterior_attn.detach()) metrics.add(kl=kl_loss) if self.stage == 1: loss += nll_loss loss += kl_loss if self.use_bow: bow_logits = outputs.bow_logits # size = [batch_size, 1, vocab_size] bow_logits = bow_logits.repeat(1, no_extend_target.size(-1), 1) bow = self.nll_loss(bow_logits, no_extend_target) loss += bow metrics.add(bow=bow) else: loss += nll_loss metrics.add(loss=loss) return metrics
def run_one_mini_batch(cls, model, criterion, input, target, multi_run_label, **kwargs): """See parent method for documentation""" # Compute output output = model(input, target_size=target.shape[0]) # Compute and record the loss loss = criterion(output, target) MetricLogger().update(key='loss', value=loss.item(), n=len(input)) # Compute and record the accuracy acc = accuracy(output.data, target.data, topk=(1,))[0] MetricLogger().update(key='accuracy', value=acc[0], n=len(input)) # Update the confusion matrix MetricLogger().update(key='confusion_matrix', p=np.argmax(output.data.cpu().numpy(), axis=1), t=target.cpu().numpy()) # Update the classification results MetricLogger().update(key='classification_results', p=np.argmax(output.data.cpu().numpy(), axis=1), t=target.cpu().numpy(), f_ind=input.file_name_ind.cpu().numpy())
def run_one_mini_batch(cls, model, criterion, input, target, **kwargs): """See parent method for documentation""" # Compute output output = model(input) # Unpack the target target = target['category_id'] # Compute and record the loss loss = criterion(output, target) MetricLogger().update(key='loss', value=loss.item(), n=len(input)) # Compute and record the accuracy acc = accuracy(output.data, target.data, topk=(1, ))[0] MetricLogger().update(key='accuracy', value=acc[0], n=len(input)) # Update the confusion matrix MetricLogger().update(key='confusion_matrix', p=np.argmax(output.data.cpu().numpy(), axis=1), t=target.cpu().numpy())
def predict_on_training_quick(paths): # Get paths data_path = paths['training_data'] labels_path = paths['training_labels'] # Get and split data training_data = data.get_training_data(data_path, labels_path) X_train, X_test, y_train, y_test, z_list = data.split_data(training_data, 0.1) # Initiate model and predict y_hat_lr = lr.model(alpha = 0.55).predict(X_train, X_test, y_train) # Evaluate model cf_lr = metrics.confusionMatrix(y_hat_lr, y_test, 'logistic_regression', paths) print('## Logistic Regression results ##') print(metrics.accuracy(cf_lr)) print(metrics.precision(cf_lr)) print(metrics.recall(cf_lr)) print(metrics.fscore(cf_lr))
def predict_on_training_long(paths): # Get paths data_path = paths['training_data'] labels_path = paths['training_labels'] # Get and split data training_data = data.get_training_data(data_path, labels_path) X_train, X_test, y_train, y_test, z_list = data.split_data(training_data, 0.05) # Release variable training_data = None # Decompose training data into singular values U, s, Vt = svd.deconstruct(X_train) # Build logistic regression with 90% variance retained X_train_reduced, X_test_reduced = svd.reconstruct(U, s, Vt, X_test, 0.9) y_hat_lr = lr.model(alpha = 0.55).predict(X_train_reduced, X_test_reduced, y_train) # Build nearest neighbours with 40% variance retained X_train_reduced, X_test_reduced = svd.reconstruct(U, s, Vt, X_test, 0.4) y_hat_nn = nn.model(k = 20).predict(X_train_reduced, X_test_reduced, y_train) # Build naive bayes y_hat_nb = nb.model(prior_weight = 0).predict(X_train, X_test, y_train) # Create ensemble model y_hat_em = em.model(y_hat_lr, y_hat_nb, y_hat_nn, 4, 2, 3) # Evaluate models cf_lr = metrics.confusionMatrix(y_hat_lr, y_test, 'logistic_regression', paths) cf_nb = metrics.confusionMatrix(y_hat_nb, y_test, 'naive_bayes', paths) cf_nn = metrics.confusionMatrix(y_hat_nn, y_test, 'nearest_neighbours', paths) cf_em = metrics.confusionMatrix(y_hat_em, y_test, 'ensemble_model', paths) print('## Logistic Regression results ##') print(metrics.accuracy(cf_lr)) print(metrics.precision(cf_lr)) print(metrics.recall(cf_lr)) print(metrics.fscore(cf_lr)) print('## Naive Bayes results ##') print(metrics.accuracy(cf_nb)) print(metrics.precision(cf_nb)) print(metrics.recall(cf_nb)) print(metrics.fscore(cf_nb)) print ('## Nearest Neighbours results ##') print(metrics.accuracy(cf_nn)) print(metrics.precision(cf_nn)) print(metrics.recall(cf_nn)) print(metrics.fscore(cf_nn)) print ('## Ensemble Model results ##') print(metrics.accuracy(cf_em)) print(metrics.precision(cf_em)) print(metrics.recall(cf_em)) print(metrics.fscore(cf_em)) return
def test_accuracy(self): assert accuracy(tp=4, tn=3, fp=2, fn=1) == 0.7
reviews_matrixhl_test = create_sparse_matrix(uniquewordshl, reviews1500hl_test) svd_model = Classification(SGDClassifier(), reviews_matrix, review_overall_list3) svd_modelhl = Classification(SGDClassifier(), reviews_matrixhl, review_overall_list3) predicted_svd = prediction(svd_model, reviews_matrix_test) predicted_svdhl = prediction(svd_modelhl, reviews_matrixhl_test) f1_score_svd = f1_score(review_overall_list3_test, predicted_svd, average='macro') # print('F1-SCORE RESULT: ' + str(f1_score_svd)) accuracy_svd = accuracy(review_overall_list3_test, predicted_svd) # print('ACCURACY RESULT: ' + str(accuracy_svd)) precision_svd = precision(review_overall_list3_test, predicted_svd, average='macro') # print('PRECISION RESULT: ' + str(precision_svd)) recall_svd = recall(review_overall_list3_test, predicted_svd, average='macro') # print('RECALL RESULT: ' + str(recall_svd)) f1_score_svdhl = f1_score(review_overall_list3_test, predicted_svdhl,