Example #1
0
    pred_y = []
    current_index = 0
    for sent_y in test_y:
        pred_y.append(flat_pred_y[current_index:current_index + len(sent_y)])
        current_index += len(sent_y)
    assert len(pred_y) == len(test_x_features)

    sents = [[word['word[0]'] for word in words] for words in test_x_features]

    for words, features,\
        true_labels, pred_labels in izip(sents,
                                         test_x_features, test_y, pred_y):
        print_label_error(words,
                          features,
                          true_labels,
                          pred_labels,
                          target_true_label='IC',
                          target_pred_label='AL',
                          print_features=True,
                          model=model,
                          dict_vect=dict_vect,
                          label_encoder=label_encoder)

print "Confusion matrix:"
table = pds.DataFrame(confusion_matrix(list(chain.from_iterable(test_y)),
                                       list(chain.from_iterable(pred_y)),
                                       labels=labels),
                      index=map(lambda s: '{}_true'.format(s), labels),
                      columns=map(lambda s: '{}_pred'.format(s), labels))
print table
def eval_rule_based(output_path, okform_dir,
                    accepted_labels=set(['AL', 'IC']),
                    print_errors=False):
    """
    Return:
    numpy.ndarray: (#label, 3)
    count of #match, #mode, #ref for each label
    
    First word of sentence is ignored
    """
    ret_stat = np.zeros((len(accepted_labels), 3),
                        dtype=np.float64)
    
    n_finished = 0
    n_errorless = 0
    
    with Path(output_path).open('r', encoding='utf8') as prediction_file:
        while True:
            if n_finished % 1000 == 0:
                logger.info('Finished {}/{}'.format(n_errorless, n_finished))
                
            line1 = prediction_file.readline()
            line2 = prediction_file.readline()

            if not line2:
                break

            try:
                id_ = line1.strip()
                pred_json = json.loads(line2.strip())

                if pred_json['resultingHeadline'] is None:
                    continue

                pred_tokens = pred_json['resultingHeadline']
                
                auxil_path = str(Path(okform_dir) /
                                 Path(id_).with_suffix('.auxil'))
                paf_path = str(Path(okform_dir) /
                               Path(id_).with_suffix('.paf'))
                
                title_sents, _ = separate_title_from_body(auxil_path, paf_path)
                
                true_tokens = [item['token']
                               for item in title_sents[0]['features']]
                
                if is_consistent_prediction(pred_tokens, true_tokens):
                    stat = eval_stat(pred_tokens, true_tokens,
                                     accepted_labels)
                    if print_errors:
                        print_label_error(true_tokens,
                                          # we don't have features here
                                          features=None,
                                          instance_id=id_,
                                          excluded_indices=set([0]),
                                          correct_labels=map(get_label,
                                                             true_tokens),
                                          predicted_labels=map(get_label,
                                                               pred_tokens),
                                          target_true_label='IC',
                                          target_pred_label='AL',
                                          print_features=False)
                    ret_stat += stat
                    n_errorless += 1
                else:
                    logger.debug(
                        'Predicted and true tokens inconsisent:\n{}\n{}\n'.format(
                            pred_tokens, true_tokens)
                    )
            except:
                logger.error(traceback.format_exc())
                continue
            finally:
                n_finished += 1

    return ret_stat
Example #3
0
    word_counter = Counter()
    feature_counter = Counter()

    for words, s in izip(load_sents(content_path),
                         load_test_data(test_data_path)):
        correct_labels = [l for _, l in s]

        features = [f for f, _ in s]
        predicted_labels = tagger.tag(features)

        # all the predicted/true labels
        # used for confusion matrix
        pred_y += predicted_labels
        true_y += correct_labels

        print_label_error(words, features, correct_labels, predicted_labels,
                          args.true_label, args.pred_label,
                          args.print_features)

    cm = confusion_matrix(true_y, pred_y, labels=labels)

    table = df(cm,
               index=map(lambda s: '{}_true'.format(s), labels),
               columns=map(lambda s: '{}_pred'.format(s), labels))
    print table
    print word_counter.most_common(10)
    print feature_counter.most_common(10)
    # import sys
    # sys.stderr.write("Confusion matrix:\n")
    # sys.stderr.write("{}".format(cm))
Example #4
0
def eval_rule_based(output_path,
                    okform_dir,
                    accepted_labels=set(['AL', 'IC']),
                    print_errors=False):
    """
    Return:
    numpy.ndarray: (#label, 3)
    count of #match, #mode, #ref for each label
    
    First word of sentence is ignored
    """
    ret_stat = np.zeros((len(accepted_labels), 3), dtype=np.float64)

    n_finished = 0
    n_errorless = 0

    with Path(output_path).open('r', encoding='utf8') as prediction_file:
        while True:
            if n_finished % 1000 == 0:
                logger.info('Finished {}/{}'.format(n_errorless, n_finished))

            line1 = prediction_file.readline()
            line2 = prediction_file.readline()

            if not line2:
                break

            try:
                id_ = line1.strip()
                pred_json = json.loads(line2.strip())

                if pred_json['resultingHeadline'] is None:
                    continue

                pred_tokens = pred_json['resultingHeadline']

                auxil_path = str(
                    Path(okform_dir) / Path(id_).with_suffix('.auxil'))
                paf_path = str(
                    Path(okform_dir) / Path(id_).with_suffix('.paf'))

                title_sents, _ = separate_title_from_body(auxil_path, paf_path)

                true_tokens = [
                    item['token'] for item in title_sents[0]['features']
                ]

                if is_consistent_prediction(pred_tokens, true_tokens):
                    stat = eval_stat(pred_tokens, true_tokens, accepted_labels)
                    if print_errors:
                        print_label_error(
                            true_tokens,
                            # we don't have features here
                            features=None,
                            instance_id=id_,
                            excluded_indices=set([0]),
                            correct_labels=map(get_label, true_tokens),
                            predicted_labels=map(get_label, pred_tokens),
                            target_true_label='IC',
                            target_pred_label='AL',
                            print_features=False)
                    ret_stat += stat
                    n_errorless += 1
                else:
                    logger.debug(
                        'Predicted and true tokens inconsisent:\n{}\n{}\n'.
                        format(pred_tokens, true_tokens))
            except:
                logger.error(traceback.format_exc())
                continue
            finally:
                n_finished += 1

    return ret_stat
    for words, s in izip(
            load_sents(content_path),
            load_test_data(test_data_path)):
        correct_labels = [l for _, l in s]

        features = [f for f, _ in s]
        predicted_labels = tagger.tag(features)
        
        # all the predicted/true labels
        # used for confusion matrix
        pred_y += predicted_labels
        true_y += correct_labels
        
        print_label_error(words, features,
                          correct_labels, predicted_labels,
                          args.true_label, args.pred_label,
                          args.print_features)

    cm = confusion_matrix(true_y, pred_y,
                          labels=labels)

    table = df(cm,
               index=map(lambda s: '{}_true'.format(s), labels),
               columns=map(lambda s: '{}_pred'.format(s), labels))
    print table
    print word_counter.most_common(10)
    print feature_counter.most_common(10)
    # import sys
    # sys.stderr.write("Confusion matrix:\n")
    # sys.stderr.write("{}".format(cm))