Esempio n. 1
0
def eval_exp_train(preds, part='train', postproc=None, zip_fname=None):
    """
    Evaluate predictions from experiment

    Converts IOB tags predicted by CRF to Brat format and then calls the official scoring function.
    """
    part_dir = join(LOCAL_DIR, part)
    true_iob_dir = join(part_dir, 'iob')

    labels_fname = join(part_dir, part + '_labels.pkl')
    labels = read_labels(labels_fname)
    filenames = labels['__filenames__']

    # Convert CRF prediction to IOB tags
    pred_iob_dir = '_' + part + '/iob'
    pred_to_iob(preds, filenames, true_iob_dir, pred_iob_dir)

    if postproc:
        postproc_dir = '_' + part + '/iob_pp'
        postproc(pred_iob_dir, postproc_dir)
        pred_iob_dir = postproc_dir

    # Convert predicted IOB tags to predicted Brat annotations
    txt_dir = join(DATA_DIR, part)
    brat_dir = '_' + part + '/brat'
    iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

    # Evaluate
    calculateMeasures(txt_dir, brat_dir, 'rel')

    if zip_fname:
        package(brat_dir, part, zip_fname)

    return brat_dir
Esempio n. 2
0
splits = list(
    group_k_fold.split(data['feats'], data['Material'], data['filenames']))

# Step 4: Run CRF classifier
crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
pred = {}

for ent in ENTITIES:
    pred[ent] = cross_val_predict(crf, data['feats'], data[ent], cv=splits)
    # Report scores directly on I and B tags,
    # disregard 'O' because it is by far the most frequent class
    print('\n' + ent + ':\n')
    print(flat_classification_report(data[ent], pred[ent], digits=3,
                                     labels=('B', 'I')))


# Step 5: Convert CRF prediction to IOB tags
pred_iob_dir = '_train/iob'

pred_to_iob(pred, data['filenames'], true_iob_dir, pred_iob_dir)

# Step 6: Convert predicted IOB tags to predicted Brat annotations
txt_dir = join(DATA_DIR, 'train')
brat_dir = '_train/brat'

iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

# Step 7: Evaluate
calculateMeasures(txt_dir, brat_dir, 'rel')

Esempio n. 3
0
                        continue
                except IndexError:
                    pass

                span = Span(unique_label, m.start(), m.end())
                if span not in annots:
                    print(annots)
                    print('==> adding span', span, 'for phrase', repr(phrase))
                    spans.append(span)

        out_brat_fname = join(out_brat_dir, basename(in_brat_fname))
        write_brat_file(out_brat_fname, spans, text)


def get_phrase_annots(spans, text):
    phrase2annots = defaultdict(list)
    for span in spans:
        phrase = text[span.begin:span.end]
        phrase2annots[phrase].append(span)
    return phrase2annots


in_brat_dir = join(EXPS_DIR, 'best/_dev/brat')
txt_dir = join(DATA_DIR, 'dev')
out_brat_dir = '_dev/brat'

postproc_brat(in_brat_dir, txt_dir, out_brat_dir)

calculateMeasures(txt_dir, in_brat_dir, 'rel')
calculateMeasures(txt_dir, out_brat_dir, 'rel')
Esempio n. 4
0
    parser.add_argument('material_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Material" in 3rd column')
    parser.add_argument('process_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Process" in 3rd column')
    parser.add_argument('task_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Task" in 3rd column')
    parser.add_argument('pred_iob_dir',
                        help='directory for writing json files with predicted IOB tags')
    parser.add_argument('pred_brat_dir',
                        help='directory for writing predicted Brat annotation files')

    args = parser.parse_args()

    # Step 1: Convert CFR++ output to IOB tags in Json format
    crfplus_dirs = {
        'Material': args.material_dir,
        'Process': args.process_dir,
        'Task': args.task_dir
    }

    convert(crfplus_dirs, args.true_iob_dir, args.pred_iob_dir)

    # Step 2: Convert predicted IOB tags to predicted Brat annotations
    iob_to_brat(args.pred_iob_dir, args.true_brat_dir, args.pred_brat_dir)

    # Step 3: Evaluate
    calculateMeasures(args.true_brat_dir, args.pred_brat_dir, 'rel')



Esempio n. 5
0
                      indent=4,
                      sort_keys=True,
                      ensure_ascii=False)
        except Exception as err:
            print('*** ERRROR **', err)
            print(crfplus_fname)
            print(line)
            print()


# Step 1: Convert CFR++ output to IOB tags in Json format
true_iob_dir = join(LOCAL_DIR, 'train/iob')
pred_iob_dir = '_entityOp_Utpal/iob'

crfplus_dirs = {
    'Material': '_entityOp_Utpal/materialOp',
    'Process': '_entityOp_Utpal/processOp',
    'Task': '_entityOp_Utpal/taskOp'
}

convert(crfplus_dirs, true_iob_dir, pred_iob_dir)

# Step 2: Convert predicted IOB tags to predicted Brat annotations
true_brat_dir = join(DATA_DIR, 'train')
pred_brat_dir = '_entityOp_Utpal/brat'

iob_to_brat(pred_iob_dir, true_brat_dir, pred_brat_dir)

# Step 3: Evaluate
calculateMeasures(true_brat_dir, pred_brat_dir, 'rel')