def _eval_epochs_done_callback(task_name,
                               global_vars,
                               label_ids,
                               graph_fold=None,
                               normalize_cm=True):
    labels = np.asarray(global_vars[task_name + '_all_labels'])
    preds = np.asarray(global_vars[task_name + '_all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy for task {task_name}: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))

    classification_report = get_classification_report(labels, preds, label_ids)
    logging.info(classification_report)

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels,
                              preds,
                              graph_fold,
                              label_ids,
                              normalize=normalize_cm,
                              prefix=task_name)
    return accuracy
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    f1_scores = get_f1_scores(labels, preds, average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    classification_report = get_classification_report(labels, preds, label_ids)
    logging.info(classification_report)

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm)

    return dict({'Accuracy': accuracy})
Exemple #3
0
def _eval_epochs_done_callback(task_name, global_vars, label_ids, graph_fold=None, normalize_cm=True):
    labels = np.array(global_vars[task_name + '_labels'])
    preds = np.array(global_vars[task_name + '_preds'])

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm, prefix=task_name)

    logging.info(f'{get_classification_report(labels, preds, label_ids)}')
    return get_classification_report(labels, preds, label_ids, output_dict=True)
def eval_epochs_done_callback(global_vars, graph_fold):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))
    plot_confusion_matrix(labels, preds, graph_fold)
    logging.info(classification_report(labels, preds))
    return dict({"accuracy": accuracy})
Exemple #5
0
def eval_epochs_done_callback(global_vars, graph_fold):
    intent_labels = np.asarray(global_vars['all_intent_labels'])
    intent_preds = np.asarray(global_vars['all_intent_preds'])

    slot_labels = np.asarray(global_vars['all_slot_labels'])
    slot_preds = np.asarray(global_vars['all_slot_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    slot_labels = slot_labels[subtokens_mask]
    slot_preds = slot_preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if intent_preds.shape[0] > sample_size + 1:
        i = random.randint(0, intent_preds.shape[0] - sample_size - 1)
    logging.info("Sampled i_preds: [%s]" %
                 list2str(intent_preds[i:i + sample_size]))
    logging.info("Sampled intents: [%s]" %
                 list2str(intent_labels[i:i + sample_size]))
    logging.info("Sampled s_preds: [%s]" %
                 list2str(slot_preds[i:i + sample_size]))
    logging.info("Sampled slots: [%s]" %
                 list2str(slot_labels[i:i + sample_size]))

    plot_confusion_matrix(intent_labels, intent_preds, graph_fold)

    logging.info('Intent prediction results')
    correct_preds = sum(intent_labels == intent_preds)
    intent_accuracy = correct_preds / intent_labels.shape[0]
    logging.info(f'Intent accuracy: {intent_accuracy}')
    logging.info(f'Classification report:\n \
        {classification_report(intent_labels, intent_preds)}')

    logging.info('Slot prediction results')
    slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0]
    logging.info(f'Slot accuracy: {slot_accuracy}')
    logging.info(f'Classification report:\n \
        {classification_report(slot_labels[:-2], slot_preds[:-2])}')

    return dict({
        'intent_accuracy': intent_accuracy,
        'slot_accuracy': slot_accuracy
    })
Exemple #6
0
def eval_epochs_done_callback(global_vars,
                              label_ids,
                              graph_fold=None,
                              none_label_id=0,
                              normalize_cm=True):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))

    # remove labels from label_ids that don't appear in the dev set
    used_labels = set(labels) | set(preds)
    label_ids = {
        k: label_ids[k]
        for k, v in label_ids.items() if v in used_labels
    }

    logging.info(classification_report(labels, preds, target_names=label_ids))

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels,
                              preds,
                              graph_fold,
                              label_ids,
                              normalize=normalize_cm)

    return dict({'Accuracy': accuracy})
Exemple #7
0
def eval_epochs_done_callback(global_vars,
                              intents_label_ids,
                              slots_label_ids,
                              graph_fold=None,
                              normalize_cm=True):
    intent_labels = np.asarray(global_vars['all_intent_labels'])
    intent_preds = np.asarray(global_vars['all_intent_preds'])

    slot_labels = np.asarray(global_vars['all_slot_labels'])
    slot_preds = np.asarray(global_vars['all_slot_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    slot_labels = slot_labels[subtokens_mask]
    slot_preds = slot_preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if intent_preds.shape[0] > sample_size + 1:
        i = random.randint(0, intent_preds.shape[0] - sample_size - 1)
    logging.info("Sampled i_preds: [%s]" %
                 list2str(intent_preds[i:i + sample_size]))
    logging.info("Sampled intents: [%s]" %
                 list2str(intent_labels[i:i + sample_size]))
    logging.info("Sampled s_preds: [%s]" %
                 list2str(slot_preds[i:i + sample_size]))
    logging.info("Sampled slots: [%s]" %
                 list2str(slot_labels[i:i + sample_size]))

    if graph_fold:
        # calculate, plot and save the confusion_matrix
        plot_confusion_matrix(intent_labels,
                              intent_preds,
                              graph_fold,
                              intents_label_ids,
                              normalize=normalize_cm,
                              prefix='Intent')
        plot_confusion_matrix(slot_labels,
                              slot_preds,
                              graph_fold,
                              slots_label_ids,
                              normalize=normalize_cm,
                              prefix='Slot')

    logging.info('Slot Prediction Results:')
    slot_accuracy = np.mean(slot_labels == slot_preds)
    logging.info(f'Slot Accuracy: {slot_accuracy}')
    f1_scores = get_f1_scores(slot_labels,
                              slot_preds,
                              average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    logging.info(
        f'\n {get_classification_report(slot_labels, slot_preds, label_ids=slots_label_ids)}'
    )

    logging.info('Intent Prediction Results:')
    intent_accuracy = np.mean(intent_labels == intent_preds)
    logging.info(f'Intent Accuracy: {intent_accuracy}')
    f1_scores = get_f1_scores(intent_labels,
                              intent_preds,
                              average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    logging.info(
        f'\n {get_classification_report(intent_labels, intent_preds, label_ids=intents_label_ids)}'
    )

    return dict({
        'intent_accuracy': intent_accuracy,
        'slot_accuracy': slot_accuracy
    })