Example #1
0
def eval_exp_train(preds, part='train', postproc=None, zip_fname=None):
    """
    Evaluate predictions from experiment

    Converts IOB tags predicted by CRF to Brat format and then calls the official scoring function.
    """
    part_dir = join(LOCAL_DIR, part)
    true_iob_dir = join(part_dir, 'iob')

    labels_fname = join(part_dir, part + '_labels.pkl')
    labels = read_labels(labels_fname)
    filenames = labels['__filenames__']

    # Convert CRF prediction to IOB tags
    pred_iob_dir = '_' + part + '/iob'
    pred_to_iob(preds, filenames, true_iob_dir, pred_iob_dir)

    if postproc:
        postproc_dir = '_' + part + '/iob_pp'
        postproc(pred_iob_dir, postproc_dir)
        pred_iob_dir = postproc_dir

    # Convert predicted IOB tags to predicted Brat annotations
    txt_dir = join(DATA_DIR, part)
    brat_dir = '_' + part + '/brat'
    iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

    # Evaluate
    calculateMeasures(txt_dir, brat_dir, 'rel')

    if zip_fname:
        package(brat_dir, part, zip_fname)

    return brat_dir
Example #2
0
splits = list(
    group_k_fold.split(data['feats'], data['Material'], data['filenames']))

# Step 4: Run CRF classifier
crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
pred = {}

for ent in ENTITIES:
    pred[ent] = cross_val_predict(crf, data['feats'], data[ent], cv=splits)
    # Report scores directly on I and B tags,
    # disregard 'O' because it is by far the most frequent class
    print('\n' + ent + ':\n')
    print(flat_classification_report(data[ent], pred[ent], digits=3,
                                     labels=('B', 'I')))


# Step 5: Convert CRF prediction to IOB tags
pred_iob_dir = '_train/iob'

pred_to_iob(pred, data['filenames'], true_iob_dir, pred_iob_dir)

# Step 6: Convert predicted IOB tags to predicted Brat annotations
txt_dir = join(DATA_DIR, 'train')
brat_dir = '_train/brat'

iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

# Step 7: Evaluate
calculateMeasures(txt_dir, brat_dir, 'rel')