def eval_exp_train(preds, part='train', postproc=None, zip_fname=None): """ Evaluate predictions from experiment Converts IOB tags predicted by CRF to Brat format and then calls the official scoring function. """ part_dir = join(LOCAL_DIR, part) true_iob_dir = join(part_dir, 'iob') labels_fname = join(part_dir, part + '_labels.pkl') labels = read_labels(labels_fname) filenames = labels['__filenames__'] # Convert CRF prediction to IOB tags pred_iob_dir = '_' + part + '/iob' pred_to_iob(preds, filenames, true_iob_dir, pred_iob_dir) if postproc: postproc_dir = '_' + part + '/iob_pp' postproc(pred_iob_dir, postproc_dir) pred_iob_dir = postproc_dir # Convert predicted IOB tags to predicted Brat annotations txt_dir = join(DATA_DIR, part) brat_dir = '_' + part + '/brat' iob_to_brat(pred_iob_dir, txt_dir, brat_dir) # Evaluate calculateMeasures(txt_dir, brat_dir, 'rel') if zip_fname: package(brat_dir, part, zip_fname) return brat_dir
splits = list( group_k_fold.split(data['feats'], data['Material'], data['filenames'])) # Step 4: Run CRF classifier crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True) pred = {} for ent in ENTITIES: pred[ent] = cross_val_predict(crf, data['feats'], data[ent], cv=splits) # Report scores directly on I and B tags, # disregard 'O' because it is by far the most frequent class print('\n' + ent + ':\n') print(flat_classification_report(data[ent], pred[ent], digits=3, labels=('B', 'I'))) # Step 5: Convert CRF prediction to IOB tags pred_iob_dir = '_train/iob' pred_to_iob(pred, data['filenames'], true_iob_dir, pred_iob_dir) # Step 6: Convert predicted IOB tags to predicted Brat annotations txt_dir = join(DATA_DIR, 'train') brat_dir = '_train/brat' iob_to_brat(pred_iob_dir, txt_dir, brat_dir) # Step 7: Evaluate calculateMeasures(txt_dir, brat_dir, 'rel')
continue except IndexError: pass span = Span(unique_label, m.start(), m.end()) if span not in annots: print(annots) print('==> adding span', span, 'for phrase', repr(phrase)) spans.append(span) out_brat_fname = join(out_brat_dir, basename(in_brat_fname)) write_brat_file(out_brat_fname, spans, text) def get_phrase_annots(spans, text): phrase2annots = defaultdict(list) for span in spans: phrase = text[span.begin:span.end] phrase2annots[phrase].append(span) return phrase2annots in_brat_dir = join(EXPS_DIR, 'best/_dev/brat') txt_dir = join(DATA_DIR, 'dev') out_brat_dir = '_dev/brat' postproc_brat(in_brat_dir, txt_dir, out_brat_dir) calculateMeasures(txt_dir, in_brat_dir, 'rel') calculateMeasures(txt_dir, out_brat_dir, 'rel')
parser.add_argument('material_dir', help='directory containing tab-delimited files with predicted IOB tags for label "Material" in 3rd column') parser.add_argument('process_dir', help='directory containing tab-delimited files with predicted IOB tags for label "Process" in 3rd column') parser.add_argument('task_dir', help='directory containing tab-delimited files with predicted IOB tags for label "Task" in 3rd column') parser.add_argument('pred_iob_dir', help='directory for writing json files with predicted IOB tags') parser.add_argument('pred_brat_dir', help='directory for writing predicted Brat annotation files') args = parser.parse_args() # Step 1: Convert CFR++ output to IOB tags in Json format crfplus_dirs = { 'Material': args.material_dir, 'Process': args.process_dir, 'Task': args.task_dir } convert(crfplus_dirs, args.true_iob_dir, args.pred_iob_dir) # Step 2: Convert predicted IOB tags to predicted Brat annotations iob_to_brat(args.pred_iob_dir, args.true_brat_dir, args.pred_brat_dir) # Step 3: Evaluate calculateMeasures(args.true_brat_dir, args.pred_brat_dir, 'rel')
indent=4, sort_keys=True, ensure_ascii=False) except Exception as err: print('*** ERRROR **', err) print(crfplus_fname) print(line) print() # Step 1: Convert CFR++ output to IOB tags in Json format true_iob_dir = join(LOCAL_DIR, 'train/iob') pred_iob_dir = '_entityOp_Utpal/iob' crfplus_dirs = { 'Material': '_entityOp_Utpal/materialOp', 'Process': '_entityOp_Utpal/processOp', 'Task': '_entityOp_Utpal/taskOp' } convert(crfplus_dirs, true_iob_dir, pred_iob_dir) # Step 2: Convert predicted IOB tags to predicted Brat annotations true_brat_dir = join(DATA_DIR, 'train') pred_brat_dir = '_entityOp_Utpal/brat' iob_to_brat(pred_iob_dir, true_brat_dir, pred_brat_dir) # Step 3: Evaluate calculateMeasures(true_brat_dir, pred_brat_dir, 'rel')