Example #1
0
def evaluate_results(net, test_loader, pad_id, cuda, args, epoch):
    logger.info("Evaluating test samples...")
    acc = 0
    out_labels = []
    true_labels = []
    net.eval()
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_loader), total=len(test_loader)):
            x, e1_e2_start, labels, _, _, _ = data
            attention_mask = (x != pad_id).float()
            token_type_ids = torch.zeros((x.shape[0], x.shape[1])).long()

            if args.only_evaluate == 2 and i >= 10:
                break

            if cuda:
                x = x.cuda()
                labels = labels.cuda()
                attention_mask = attention_mask.cuda()
                token_type_ids = token_type_ids.cuda()

            classification_logits = net(x, token_type_ids=token_type_ids, attention_mask=attention_mask, \
                          e1_e2_start=e1_e2_start)

            accuracy, (o, l) = evaluate_(classification_logits, labels)
            out_labels.extend(o)
            true_labels.extend(l)
            acc += accuracy

    accuracy = acc / (i + 1)

    results = {
        "accuracy":
        accuracy,
        "sklearn f1-macro":
        sklearn_f1_score(true_labels,
                         out_labels,
                         labels=list(range(args.num_classes)),
                         average='macro'),
        "sklearn f1-micro":
        sklearn_f1_score(true_labels,
                         out_labels,
                         labels=list(range(args.num_classes)),
                         average='micro')
    }

    if args.task == 'SemEval':
        logger.info("Generating additional files ...")
        semeval_files(args, true_labels, out_labels, epoch + 1)
    elif args.task == 'KBP37':
        KBP37_scorer1(args, true_labels, out_labels)
        KBP37_files(args, true_labels, out_labels, epoch + 1)
    elif args.task == 'TACRED':
        TACRED_scorer(args, true_labels, out_labels)

    logger.info("***** Eval results *****")
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results
Example #2
0
def f1_score(predictions, ground_truth):
    '''Compute F1 scores.

    .. math::
        F_{score}^{(n)} = \\frac
            {2 * Precision * Recall}
            {Precision + Recall}

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    f1_scores : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the F1 score for that appliance.  If there are multiple
        chunks then the value is the weighted mean of the F1 score for
        each chunk.
    '''
    # If we import sklearn at top of file then sphinx breaks.
    from sklearn.metrics import f1_score as sklearn_f1_score

    # sklearn produces lots of DepreciationWarnings with PyTables
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    f1_scores = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        scores_for_meter = pd.DataFrame(columns=['score', 'num_samples'])
        aligned_meters = align_two_meters(
            pred_meter, ground_truth_meter, 'when_on')
        for aligned_states_chunk in aligned_meters:
            aligned_states_chunk.dropna(inplace=True)
            aligned_states_chunk = aligned_states_chunk.astype(int)
            score = sklearn_f1_score(aligned_states_chunk.icol(0),
                                     aligned_states_chunk.icol(1))
            scores_for_meter = scores_for_meter.append(
                {'score': score, 'num_samples': len(aligned_states_chunk)},
                ignore_index=True)

        # Calculate weighted mean
        num_samples = scores_for_meter['num_samples'].sum()
        if num_samples > 0:
            scores_for_meter['proportion'] = (
                scores_for_meter['num_samples'] / num_samples)
            avg_score = (
                scores_for_meter['score'] * scores_for_meter['proportion']
            ).sum()
        else:
            warn("No aligned samples when calculating F1-score for prediction"
                 " meter {} and ground truth meter {}."
                 .format(pred_meter, ground_truth_meter))
            avg_score = np.NaN
        f1_scores[pred_meter.instance()] = avg_score

    return pd.Series(f1_scores)
Example #3
0
def f1_score(predictions, ground_truth):
    '''Compute F1 scores.
    .. math::
        F_{score}^{(n)} = \\frac
            {2 * Precision * Recall}
            {Precision + Recall}
    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup
    Returns
    -------
    f1_scores : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the F1 score for that appliance.  If there are multiple
        chunks then the value is the weighted mean of the F1 score for
        each chunk.
    '''
    # If we import sklearn at top of file then sphinx breaks.
    from sklearn.metrics import f1_score as sklearn_f1_score

    # sklearn produces lots of DepreciationWarnings with PyTables
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    f1_scores = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        scores_for_meter = pd.DataFrame(columns=['score', 'num_samples'])
        aligned_meters = align_two_meters(pred_meter, ground_truth_meter,
                                          'when_on')
        for aligned_states_chunk in aligned_meters:
            aligned_states_chunk.dropna(inplace=True)
            aligned_states_chunk = aligned_states_chunk.astype(int)
            score = sklearn_f1_score(aligned_states_chunk.iloc[:, 0],
                                     aligned_states_chunk.iloc[:, 1])
            scores_for_meter = scores_for_meter.append(
                {
                    'score': score,
                    'num_samples': len(aligned_states_chunk)
                },
                ignore_index=True)

        # Calculate weighted mean
        num_samples = scores_for_meter['num_samples'].sum()
        if num_samples > 0:
            scores_for_meter['proportion'] = (scores_for_meter['num_samples'] /
                                              num_samples)
            avg_score = (scores_for_meter['score'] *
                         scores_for_meter['proportion']).sum()
        else:
            warn("No aligned samples when calculating F1-score for prediction"
                 " meter {} and ground truth meter {}.".format(
                     pred_meter, ground_truth_meter))
            avg_score = np.NaN
        f1_scores[pred_meter.instance()] = avg_score

    return pd.Series(f1_scores)
def classify_generic(classificator, data):
    start_time = data["start_time"]

    x_test = data["x_test"]
    y_test = data["y_test"]
    x_train = data["x_train"]
    y_train = data["y_train"]

    experiment_hash = data["experiment_hash"]

    name = classificator.__class__.__name__

    console.print(f"\n[yellow]Classificator: [blue]{classificator}\n")

    log('start', 'fit', start_time)
    classificator.fit(x_train, y_train)
    log('end', 'fit', start_time)

    log('start', 'predict', start_time)
    predict = classificator.predict(x_test)
    log('end', 'predict', start_time)

    log('start', 'f1_score', start_time)
    f1_score = utils.round_float(
        sklearn_f1_score(y_test,
                         predict,
                         labels=numpy.unique(predict),
                         average='weighted'))
    log('end', 'f1_score', start_time)

    log('start', 'accuracy', start_time)
    accuracy = utils.round_float(accuracy_score(y_test, predict))
    log('end', 'accuracy', start_time)

    log('start', 'conf_mat', start_time)
    conf_mat = confusion_matrix(y_test, predict)
    log('end', 'conf_mat', start_time)

    log('start', 'save_conf_mat', start_time)
    generate_conf_mat.save_conf_mat(experiment_hash, classificator, name,
                                    x_test, y_test)
    log('end', 'save_conf_mat', start_time)

    log('start', 'classification_report', start_time)
    creport = classification_report(y_test, predict)
    log('end', 'classification_report', start_time)

    time = utils.round_float(utils.get_time_diff(start_time))

    result = {
        'f1_score': f1_score,
        'accuracy': accuracy,
        'conf_mat': conf_mat,
        'creport': creport,
        'time': time
    }

    return result
Example #5
0
    def validation_step(self, batch, batch_nb):

        input_ids, attention_mask, token_type_ids, slot_labels = batch

        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )

        active_loss = attention_mask.view(-1) == 1
        active_logits = outputs.view(-1,
                                     len(self.slot_labels_type))[active_loss]
        active_labels = slot_labels.view(-1)[active_loss]
        loss = F.cross_entropy(active_logits, active_labels)

        a, y_hat = torch.max(outputs, dim=2)
        y_hat = y_hat.detach().cpu().numpy()
        slot_label_ids = slot_labels.detach().cpu().numpy()

        slot_label_map = {
            i: label
            for i, label in enumerate(self.slot_labels_type)
        }
        slot_gt_labels = [[] for _ in range(slot_label_ids.shape[0])]
        slot_pred_labels = [[] for _ in range(slot_label_ids.shape[0])]

        for i in range(slot_label_ids.shape[0]):
            for j in range(slot_label_ids.shape[1]):
                if slot_label_ids[i, j] != self.ignore_index:
                    slot_gt_labels[i].append(
                        slot_label_map[slot_label_ids[i][j]])
                    slot_pred_labels[i].append(slot_label_map[y_hat[i][j]])

        val_acc = torch.tensor(seqeval_f1_score(slot_gt_labels,
                                                slot_pred_labels),
                               dtype=torch.float32)
        token_val_acc = sklearn_f1_score(
            list(chain.from_iterable(slot_gt_labels)),
            list(chain.from_iterable(slot_pred_labels)),
            average="micro",
        )

        token_val_acc = torch.tensor(token_val_acc, dtype=torch.float32)

        return {
            "val_loss": loss,
            "val_acc": val_acc,
            "token_val_acc": token_val_acc
        }
Example #6
0
def f1_score(predictions, ground_truth):
    """Compute F1 scores.
    
    .. math::
        F_{score}^{(n)} = \\frac
            {2 * Precision * Recall}
            {Precision + Recall}

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    f1_scores : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the F1 score for that appliance.  If there are multiple
        chunks then the value is the weighted mean of the F1 score for 
        each chunk.
    """
    # If we import sklearn at top of file then sphinx breaks.
    from sklearn.metrics import f1_score as sklearn_f1_score

    # sklearn produces lots of DepreciationWarnings with PyTables
    import warnings

    warnings.filterwarnings("ignore", category=DeprecationWarning)

    f1_scores = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        scores_for_meter = pd.DataFrame(columns=["score", "n_samples"])
        for aligned_states_chunk in align_two_meters(pred_meter, ground_truth_meter, "when_on"):
            aligned_states_chunk.dropna(inplace=True)
            aligned_states_chunk = aligned_states_chunk.astype(int)
            score = sklearn_f1_score(aligned_states_chunk.icol(0), aligned_states_chunk.icol(1))
            scores_for_meter = scores_for_meter.append(
                {"score": score, "n_samples": len(aligned_states_chunk)}, ignore_index=True
            )

        # Calculate weighted mean
        tot_samples = scores_for_meter["n_samples"].sum()
        scores_for_meter["proportion"] = scores_for_meter["n_samples"] / tot_samples
        avg_score = (scores_for_meter["score"] * scores_for_meter["proportion"]).sum()
        f1_scores[pred_meter.instance()] = avg_score

    return pd.Series(f1_scores)
def do_svm(input_trained_svm, input_test_svm, conj_train=0):
    x_train, y_train = load_svmlight_file(input_trained_svm)
    x_test, y_test = load_svmlight_file(input_test_svm)
    x_train = x_train.toarray()
    x_test = x_test.toarray()
    classificator = svm.SVC()
    classificator.fit(x_train, y_train)
    predict = classificator.predict(x_test)
    f1_score = round_float(
        sklearn_f1_score(y_test,
                         predict,
                         labels=np.unique(predict),
                         average='weighted'))
    accuracy = round_float(accuracy_score(y_test, predict))
    cm = confusion_matrix(y_test, predict)
    # Saving Results
    title = 'svm_c' + str(conj_train)
    with open('out/reports/' + title + '_results.txt', 'w') as f:
        with redirect_stdout(f):
            print(f'Accuracy:  {accuracy}')
            print(f'F1Score:  {f1_score}')
            # Confusion Matrix
            print(f'Confusion Matrix: \n', cm)
Example #8
0
                y: np.reshape(y_test, (y_test.shape[0], ))
            })

            # Get predictions for the test set.
            _, y_pred = sess.run([accuracy, predictions],
                                 feed_dict={
                                     X: test_data,
                                     y: np.reshape(y_test, (y_test.shape[0], ))
                                 })

            # Write summaries.
            summary_train_acc = acc_summary.eval(
                feed_dict={
                    X: train_data,
                    y: np.reshape(y_train, (y_train.shape[0], ))
                })
            summary_test_acc = acc_summary.eval(
                feed_dict={
                    X: test_data,
                    y: np.reshape(y_test, (y_test.shape[0], ))
                })
            summary_writer_train.add_summary(summary_train_acc, epoch)
            summary_writer_test.add_summary(summary_test_acc, epoch)

            print(
                "Epoch: {} Last batch accuracy: {} Test accuracy: {} F1-Score: {}"
                .format(epoch, acc_train, acc_test,
                        sklearn_f1_score(y_test, y_pred, average='macro')))

        saver.save(sess, LOG_DIR + "/tf_model")