Ejemplo n.º 1
0
def calculate_acc(embeddings1
                  , embeddings2
                  , actual_issame
                  , nrof_folds=10
                  , compare_func=pair_euc_score
                  , sigma_sq1=None
                  , sigma_sq2=None):

    assert (embeddings1.shape[0] == embeddings2.shape[0])
    assert (embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = nrof_folds
    accuracies = np.zeros(nrof_folds, dtype=np.float32)
    thresholds = np.zeros(nrof_folds, dtype=np.float32)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)

    # diff = np.subtract(embeddings1, embeddings2)
    # dist = np.sum(np.square(diff), 1)
    dist = compare_func(embeddings1, embeddings2, sigma_sq1, sigma_sq2)
    indices = np.arange(nrof_pairs)

    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        # Training
        _, thresholds[fold_idx] = metrics.accuracy(dist[train_set], actual_issame[train_set])

        # Testing
        accuracies[fold_idx], _ = metrics.accuracy(dist[test_set], actual_issame[test_set], np.array([thresholds[fold_idx]]))

    accuracy = np.mean(accuracies)
    threshold = - np.mean(thresholds)
    return accuracy, threshold
Ejemplo n.º 2
0
def calculate_roc(embeddings1, embeddings2, actual_issame, compare_func, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    k_fold = KFold(nrof_pairs, n_folds=nrof_folds, shuffle=False)

    accuracy = np.zeros((nrof_folds))
    indices = np.arange(nrof_pairs)
    #print('pca', pca)

    accuracies = np.zeros(10, dtype=np.float32)
    thresholds = np.zeros(10, dtype=np.float32)

    dist = compare_func(embeddings1, embeddings2)

    for fold_idx, (train_set, test_set) in enumerate(k_fold):
        #print('train_set', train_set)
        #print('test_set', test_set)
        # Find the best threshold for the fold
        from evaluation import metrics
        train_score = dist[train_set]
        train_labels = actual_issame[train_set] == 1
        acc, thresholds[fold_idx] = metrics.accuracy(train_score, train_labels)
        # print('train acc', acc, thresholds[i])

        # Testing
        test_score = dist[test_set]
        accuracies[fold_idx], _ = metrics.accuracy(test_score, actual_issame[test_set]==1, np.array([thresholds[fold_idx]]))

    accuracy = np.mean(accuracies)
    threshold = np.mean(thresholds)
    return accuracy, threshold
Ejemplo n.º 3
0
    def test_standard_proto(self, features, compare_func):

        assert self.standard_folds is not None
        
        accuracies = np.zeros(10, dtype=np.float32)
        thresholds = np.zeros(10, dtype=np.float32)

        features1 = []
        features2 = []

        for i in range(10):
            # Training
            train_indices1 = np.concatenate([self.standard_folds[j].indices1 for j in range(10) if j!=i])
            train_indices2 = np.concatenate([self.standard_folds[j].indices2 for j in range(10) if j!=i])
            train_labels = np.concatenate([self.standard_folds[j].labels for j in range(10) if j!=i])

            train_features1 = features[train_indices1,:]
            train_features2 = features[train_indices2,:]
            
            train_score = compare_func(train_features1, train_features2)
            _, thresholds[i] = metrics.accuracy(train_score, train_labels)

            # Testing
            fold = self.standard_folds[i]
            test_features1 = features[fold.indices1,:]
            test_features2 = features[fold.indices2,:]
            
            test_score = compare_func(test_features1, test_features2)
            accuracies[i], _ = metrics.accuracy(test_score, fold.labels, np.array([thresholds[i]]))

        accuracy = np.mean(accuracies)
        threshold = - np.mean(thresholds)
        return accuracy, threshold
Ejemplo n.º 4
0
def main():
    args = parse_user_arguments()

    preds = parse_pred_file(args.pred_file, args.format, args.confusion_set)

    tp, tn, fp, fn, xyz = contingency_table(args.cnfs_file, preds, 
                                            args.threshold, args.difference)
    all = tn + fp + fn + tp + xyz
    
    print "### Evaluation of file {}".format(args.cnfs_file)
    print ""

    print "threshold   : %.4f " % (args.threshold or 0)
    print "difference  : %.4f " % (args.difference or 0)
    print ""

    print "# Statistics:"
    print "Prevalence  : %.4f" % metrics.prevalence(tp, tn, fp + xyz, fn)
    print "Bias        : %.4f" % metrics.bias(tp, tn, fp + xyz, fn)
    print "Changes     : %d" % (tp + fp)
    print ""

    print "# WAS evaluation scheme:"
    print "TN x/x/x    : %.4f (%d)" % (tn/float(all), tn)
    print "FP x/x/y    : %.4f (%d)" % (fp/float(all), fp)
    print "FN x/y/x    : %.4f (%d)" % (fn/float(all), fn)
    print "TP x/y/y    : %.4f (%d)" % (tp/float(all), tp)
    print "*  x/y/z    : %.4f (%d)" % (xyz/float(all), xyz)
    print ""

    if args.edit_counts:
        print "# Simple accuracy:"
        acc, aa, ab, skipped = metrics.edits_count(tp, tn, fp, fn, xyz)
        print "All edits   : %.4f" % acc
        print "A-A edits   : %.4f" % aa
        print "A-B edits   : %.4f" % ab
        print "A-B skipped : %.4f" % skipped
        print ""

    if args.detection_task:
        print "# Detection task:"
        print "Accuracy    : %.4f" % metrics.accuracy(tp + xyz, tn, fp, fn)
        if args.all:
            print "Specificity : %.4f" % metrics.specificity(tp + xyz, tn, fp, fn)
            print "Fall-out    : %.4f" % metrics.fall_out(tp + xyz, tn, fp, fn)
        print "Precision   : %.4f" % metrics.precision(tp + xyz, tn, fp, fn)
        print "Recall      : %.4f" % metrics.recall(tp + xyz, tn, fp, fn)
        print "F0.5        : %.4f" % metrics.fscore(tp + xyz, tn, fp, fn)
        print ""
    
    print "# Correction task:"
    print "Accuracy    : %.4f" % metrics.accuracy(tp, tn, fp + xyz, fn)
    if args.all:
        print "Specificity : %.4f" % metrics.specificity(tp, tn, fp + xyz, fn)
        print "Fall-out    : %.4f" % metrics.fall_out(tp, tn, fp + xyz, fn)
    print "Precision   : %.4f" % metrics.precision(tp, tn, fp + xyz, fn)
    print "Recall      : %.4f" % metrics.recall(tp, tn, fp + xyz, fn)
    print "F0.5        : %.4f" % metrics.fscore(tp, tn, fp + xyz, fn)
    print ""
Ejemplo n.º 5
0
def evaluate(model, data_input, gold_output):
    predictions = model.predict(
        data_input,
        batch_size=keras_models.model_params['batch_size'],
        verbose=1)
    if len(predictions.shape) == 3:
        predictions_classes = np.argmax(predictions, axis=2)
        train_batch_f1 = metrics.accuracy_per_sentence(predictions_classes,
                                                       gold_output)
        print("Results (per sentence): ", train_batch_f1)
        train_y_properties_stream = gold_output.reshape(gold_output.shape[0] *
                                                        gold_output.shape[1])
        predictions_classes = predictions_classes.reshape(
            predictions_classes.shape[0] * predictions_classes.shape[1])
        class_mask = train_y_properties_stream != 0
        train_y_properties_stream = train_y_properties_stream[class_mask]
        predictions_classes = predictions_classes[class_mask]
    else:
        predictions_classes = np.argmax(predictions, axis=1)
        train_y_properties_stream = gold_output
    accuracy = metrics.accuracy(predictions_classes, train_y_properties_stream)
    micro_scores = metrics.compute_micro_PRF(predictions_classes,
                                             train_y_properties_stream,
                                             empty_label=keras_models.p0_index)
    print("Results: Accuracy: ", accuracy)
    print("Results: Micro-Average F1: ", micro_scores)
    return predictions_classes, predictions
Ejemplo n.º 6
0
    def run_one_mini_batch(cls, model, criterion, optimizer, input, target,
                           **kwargs):
        """See parent method for documentation

        Extra-Parameters
        ----------
        optimizer : torch.optim
            The optimizer used to perform the weight update.
        """
        # Compute output
        output = model(input, target_size=target.shape[0])

        # Compute and record the loss
        loss = criterion(output, target)
        MetricLogger().update(key='loss', value=loss.item(), n=len(input))

        # Compute and record the accuracy
        acc = accuracy(output.data, target.data, topk=(1, ))[0]
        MetricLogger().update(key='accuracy', value=acc[0], n=len(input))
        # TODO check if n is correct

        # Reset gradient
        optimizer.zero_grad()
        # Compute gradients
        loss.backward()
        # Perform a step by updating the weights
        optimizer.step()
def evaluate(model, data_input, gold_output, model_name):
    predictions = model.predict_generator(generate_arrays_from_file1(data_input, batch_size=20),
                                          steps=int(len(gold_output) / 20), verbose=1)
    sio.savemat(model_name+'all_prediction.mat', {'data': predictions})
    sio.savemat(model_name+'all_groundtruth.mat', {'data': gold_output})
    if len(predictions.shape) == 3:
        predictions_classes = np.argmax(predictions, axis=2)
        # train_batch_f1 = metrics.accuracy_per_sentence(predictions_classes, gold_output)
        # print("Results (per sentence): ", train_batch_f1)
        train_y_properties_stream = gold_output.reshape(gold_output.shape[0] * gold_output.shape[1])
        predictions_classes = predictions_classes.reshape(predictions_classes.shape[0] * predictions_classes.shape[1])
        class_mask = train_y_properties_stream != 0
        train_y_properties_stream = train_y_properties_stream[class_mask]
        predictions_classes = predictions_classes[class_mask]
        predictions = np.squeeze(predictions, axis=1)
    else:
        predictions_classes = np.argmax(predictions, axis=1)
        train_y_properties_stream = gold_output

    accuracy = metrics.accuracy(predictions_classes, train_y_properties_stream)
    print("Results: Accuracy: ", accuracy)
    print(predictions.shape)
    micro_curve = metrics.compute_precision_recall_curve(predictions, train_y_properties_stream, micro=True, empty_label = keras_models_further.p0_index)
    with open(model_name + "all_micro_curve.dat", 'w') as out:
        out.write("\n".join(["{}\t{}".format(*t) for t in micro_curve]))
    print("Micro precision-recall-curve stored in:", "./data/micro_curve.dat")
    macro_curve = metrics.compute_precision_recall_curve(predictions, train_y_properties_stream, micro=False, empty_label = keras_models_further.p0_index)
    with open(model_name + "all_macro_curve.dat", 'w') as out:
        out.write("\n".join(["{}\t{}".format(*t) for t in macro_curve]))
    print("Macro precision-recall-curve stored in:", "./data/macro_curve.dat")
    return predictions_classes, predictions, micro_curve, macro_curve
Ejemplo n.º 8
0
    def collect_metrics(self,
                        outputs,
                        oovs_target,
                        no_extend_target,
                        epoch=-1):

        num_samples = no_extend_target.size(0)
        metrics = Pack(num_samples=num_samples)
        loss = 0
        logits = outputs.logits  # [batch_size, dec_seq_len, vocab_size]
        nll_loss_ori = self.copy_gen_loss(
            scores=logits.transpose(1, 2).contiguous(),
            align=oovs_target,
            target=no_extend_target)  # [batch_size, tgt_len]

        nll_loss = torch.mean(torch.sum(nll_loss_ori, dim=-1))

        num_words = no_extend_target.ne(self.padding_idx).sum()  # .item()
        ppl = nll_loss_ori.sum() / num_words
        ppl = ppl.exp()
        acc = accuracy(logits, no_extend_target, padding_idx=self.padding_idx)
        metrics.add(nll=(nll_loss, num_words), acc=acc, ppl=ppl)

        if self.use_posterior:
            kl_loss = self.kl_loss(torch.log(outputs.prior_attn + 1e-20),
                                   outputs.posterior_attn.detach())

            metrics.add(kl=kl_loss)

            if self.stage == 1:
                loss += nll_loss
                loss += kl_loss

            if self.use_bow:
                bow_logits = outputs.bow_logits  # size = [batch_size, 1, vocab_size]
                bow_logits = bow_logits.repeat(1, no_extend_target.size(-1), 1)
                bow = self.nll_loss(bow_logits, no_extend_target)
                loss += bow
                metrics.add(bow=bow)

        else:
            loss += nll_loss

        metrics.add(loss=loss)
        return metrics
Ejemplo n.º 9
0
    def run_one_mini_batch(cls, model, criterion, input, target, multi_run_label, **kwargs):
        """See parent method for documentation"""
        # Compute output
        output = model(input, target_size=target.shape[0])

        # Compute and record the loss
        loss = criterion(output, target)
        MetricLogger().update(key='loss', value=loss.item(), n=len(input))

        # Compute and record the accuracy
        acc = accuracy(output.data, target.data, topk=(1,))[0]
        MetricLogger().update(key='accuracy', value=acc[0], n=len(input))

        # Update the confusion matrix
        MetricLogger().update(key='confusion_matrix', p=np.argmax(output.data.cpu().numpy(), axis=1), t=target.cpu().numpy())
        # Update the classification results
        MetricLogger().update(key='classification_results', p=np.argmax(output.data.cpu().numpy(), axis=1), t=target.cpu().numpy(),
                          f_ind=input.file_name_ind.cpu().numpy())
Ejemplo n.º 10
0
    def run_one_mini_batch(cls, model, criterion, input, target, **kwargs):
        """See parent method for documentation"""
        # Compute output
        output = model(input)

        # Unpack the target
        target = target['category_id']

        # Compute and record the loss
        loss = criterion(output, target)
        MetricLogger().update(key='loss', value=loss.item(), n=len(input))

        # Compute and record the accuracy
        acc = accuracy(output.data, target.data, topk=(1, ))[0]
        MetricLogger().update(key='accuracy', value=acc[0], n=len(input))

        # Update the confusion matrix
        MetricLogger().update(key='confusion_matrix',
                              p=np.argmax(output.data.cpu().numpy(), axis=1),
                              t=target.cpu().numpy())
Ejemplo n.º 11
0
def predict_on_training_quick(paths):
    
    # Get paths
    data_path = paths['training_data']
    labels_path = paths['training_labels']    
    
    # Get and split data    
    training_data = data.get_training_data(data_path, labels_path)
    X_train, X_test, y_train, y_test, z_list = data.split_data(training_data, 0.1)

    
    # Initiate model and predict
    y_hat_lr = lr.model(alpha = 0.55).predict(X_train, X_test, y_train)

    # Evaluate model
    cf_lr = metrics.confusionMatrix(y_hat_lr, y_test, 'logistic_regression', paths)
    
    print('## Logistic Regression results ##')
    
    print(metrics.accuracy(cf_lr))
    print(metrics.precision(cf_lr))
    print(metrics.recall(cf_lr))
    print(metrics.fscore(cf_lr))
Ejemplo n.º 12
0
def predict_on_training_long(paths):
    
    # Get paths
    data_path = paths['training_data']
    labels_path = paths['training_labels']      
    
    # Get and split data
    training_data = data.get_training_data(data_path, labels_path)
    X_train, X_test, y_train, y_test, z_list = data.split_data(training_data, 0.05)
    
    # Release variable
    training_data = None

    # Decompose training data into singular values
    U, s, Vt = svd.deconstruct(X_train)
    
    # Build logistic regression with 90% variance retained
    X_train_reduced, X_test_reduced = svd.reconstruct(U, s, Vt, X_test, 0.9)
    y_hat_lr = lr.model(alpha = 0.55).predict(X_train_reduced, X_test_reduced, y_train)
    
    # Build nearest neighbours with 40% variance retained
    X_train_reduced, X_test_reduced = svd.reconstruct(U, s, Vt, X_test, 0.4)
    y_hat_nn = nn.model(k = 20).predict(X_train_reduced, X_test_reduced, y_train)
    
    # Build naive bayes
    y_hat_nb = nb.model(prior_weight = 0).predict(X_train, X_test, y_train)

    # Create ensemble model
    y_hat_em = em.model(y_hat_lr, y_hat_nb, y_hat_nn, 4, 2, 3)

    # Evaluate models
    cf_lr = metrics.confusionMatrix(y_hat_lr, y_test, 'logistic_regression', paths)
    cf_nb = metrics.confusionMatrix(y_hat_nb, y_test, 'naive_bayes', paths)
    cf_nn = metrics.confusionMatrix(y_hat_nn, y_test, 'nearest_neighbours', paths)
    cf_em = metrics.confusionMatrix(y_hat_em, y_test, 'ensemble_model', paths)
    
    print('## Logistic Regression results ##')
    
    print(metrics.accuracy(cf_lr))
    print(metrics.precision(cf_lr))
    print(metrics.recall(cf_lr))
    print(metrics.fscore(cf_lr))
    
    print('## Naive Bayes results ##')
    
    print(metrics.accuracy(cf_nb))
    print(metrics.precision(cf_nb))
    print(metrics.recall(cf_nb))
    print(metrics.fscore(cf_nb))
    
    print ('## Nearest Neighbours results ##')
    
    print(metrics.accuracy(cf_nn))
    print(metrics.precision(cf_nn))
    print(metrics.recall(cf_nn))
    print(metrics.fscore(cf_nn))
    
    print ('## Ensemble Model results ##')
    
    print(metrics.accuracy(cf_em))
    print(metrics.precision(cf_em))
    print(metrics.recall(cf_em))
    print(metrics.fscore(cf_em))
    
    return
Ejemplo n.º 13
0
 def test_accuracy(self):
     assert accuracy(tp=4, tn=3, fp=2, fn=1) == 0.7
Ejemplo n.º 14
0
reviews_matrixhl_test = create_sparse_matrix(uniquewordshl, reviews1500hl_test)

svd_model = Classification(SGDClassifier(), reviews_matrix,
                           review_overall_list3)
svd_modelhl = Classification(SGDClassifier(), reviews_matrixhl,
                             review_overall_list3)
predicted_svd = prediction(svd_model, reviews_matrix_test)
predicted_svdhl = prediction(svd_modelhl, reviews_matrixhl_test)

f1_score_svd = f1_score(review_overall_list3_test,
                        predicted_svd,
                        average='macro')
#
print('F1-SCORE RESULT: ' + str(f1_score_svd))

accuracy_svd = accuracy(review_overall_list3_test, predicted_svd)
#
print('ACCURACY RESULT: ' + str(accuracy_svd))

precision_svd = precision(review_overall_list3_test,
                          predicted_svd,
                          average='macro')
#
print('PRECISION RESULT: ' + str(precision_svd))

recall_svd = recall(review_overall_list3_test, predicted_svd, average='macro')
#
print('RECALL RESULT: ' + str(recall_svd))

f1_score_svdhl = f1_score(review_overall_list3_test,
                          predicted_svdhl,