Ejemplo n.º 1
0
def eval_exp_train(preds, part='train', postproc=None, zip_fname=None):
    """
    Evaluate predictions from experiment

    Converts IOB tags predicted by CRF to Brat format and then calls the official scoring function.
    """
    part_dir = join(LOCAL_DIR, part)
    true_iob_dir = join(part_dir, 'iob')

    labels_fname = join(part_dir, part + '_labels.pkl')
    labels = read_labels(labels_fname)
    filenames = labels['__filenames__']

    # Convert CRF prediction to IOB tags
    pred_iob_dir = '_' + part + '/iob'
    pred_to_iob(preds, filenames, true_iob_dir, pred_iob_dir)

    if postproc:
        postproc_dir = '_' + part + '/iob_pp'
        postproc(pred_iob_dir, postproc_dir)
        pred_iob_dir = postproc_dir

    # Convert predicted IOB tags to predicted Brat annotations
    txt_dir = join(DATA_DIR, part)
    brat_dir = '_' + part + '/brat'
    iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

    # Evaluate
    calculateMeasures(txt_dir, brat_dir, 'rel')

    if zip_fname:
        package(brat_dir, part, zip_fname)

    return brat_dir
Ejemplo n.º 2
0
splits = list(
    group_k_fold.split(data['feats'], data['Material'], data['filenames']))

# Step 4: Run CRF classifier
crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
pred = {}

for ent in ENTITIES:
    pred[ent] = cross_val_predict(crf, data['feats'], data[ent], cv=splits)
    # Report scores directly on I and B tags,
    # disregard 'O' because it is by far the most frequent class
    print('\n' + ent + ':\n')
    print(flat_classification_report(data[ent], pred[ent], digits=3,
                                     labels=('B', 'I')))


# Step 5: Convert CRF prediction to IOB tags
pred_iob_dir = '_train/iob'

pred_to_iob(pred, data['filenames'], true_iob_dir, pred_iob_dir)

# Step 6: Convert predicted IOB tags to predicted Brat annotations
txt_dir = join(DATA_DIR, 'train')
brat_dir = '_train/brat'

iob_to_brat(pred_iob_dir, txt_dir, brat_dir)

# Step 7: Evaluate
calculateMeasures(txt_dir, brat_dir, 'rel')

Ejemplo n.º 3
0
labeled as Process.

Unless the token string is a substring of a larger token string already labeled as
Process (e.g. "chemical reaction  enhancement").

We take the majority label. If there is a draw, then we skip it.

Reads IOB files and writes new IOB files.
"""

from os.path import join

from eval import calculateMeasures
from sie import EXPS_DIR, DATA_DIR
from sie.brat import iob_to_brat
from sie.postproc import postproc_labels

in_iob_dir = join(EXPS_DIR, 'best/_train/iob')
#in_iob_dir = join(EXPS_DIR, 'prune/_train/iob')
out_iob_dir = '_train/iob'

postproc_labels(in_iob_dir, out_iob_dir)

# Step 6: Convert predicted IOB tags to predicted Brat annotations
txt_dir = join(DATA_DIR, 'train')
brat_dir = '_train/brat'

iob_to_brat(out_iob_dir, txt_dir, brat_dir)

# Step 7: Evaluate
calculateMeasures(txt_dir, brat_dir, 'rel')
Ejemplo n.º 4
0
    parser.add_argument('material_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Material" in 3rd column')
    parser.add_argument('process_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Process" in 3rd column')
    parser.add_argument('task_dir',
                        help='directory containing tab-delimited files with predicted IOB tags for label "Task" in 3rd column')
    parser.add_argument('pred_iob_dir',
                        help='directory for writing json files with predicted IOB tags')
    parser.add_argument('pred_brat_dir',
                        help='directory for writing predicted Brat annotation files')

    args = parser.parse_args()

    # Step 1: Convert CFR++ output to IOB tags in Json format
    crfplus_dirs = {
        'Material': args.material_dir,
        'Process': args.process_dir,
        'Task': args.task_dir
    }

    convert(crfplus_dirs, args.true_iob_dir, args.pred_iob_dir)

    # Step 2: Convert predicted IOB tags to predicted Brat annotations
    iob_to_brat(args.pred_iob_dir, args.true_brat_dir, args.pred_brat_dir)

    # Step 3: Evaluate
    calculateMeasures(args.true_brat_dir, args.pred_brat_dir, 'rel')



Ejemplo n.º 5
0
"""
Test of performance loss due to use of IOB scheme

Converts the derived IOB tags for train/dev data back to Brat annotation format
(.ann files) and then use the evaluation script to compare it to the original
Brat annotation files.
If the conversion was perfect, scores would be perfect.
However, scores are lower because some annotation spans cannot be aligned
to the tokens resulting from Spacy.
Also, entities embedded in entities of the same type (e.g. a Material text span
containing another Material text span) can not be represented in an IOB schema.
"""

from os.path import join

from sie import DATA_DIR, LOCAL_DIR
from sie.brat import iob_to_brat

from eval import calculateMeasures

for part in 'train', 'dev':
    iob_dir = join(LOCAL_DIR, part, 'iob')
    txt_dir = join(DATA_DIR, part)
    brat_dir = join('_brat', part)
    iob_to_brat(iob_dir, txt_dir, brat_dir)
    print('\nScores for {} part:\n'.format(part))
    calculateMeasures(txt_dir, brat_dir, 'rel')
Ejemplo n.º 6
0
                      indent=4,
                      sort_keys=True,
                      ensure_ascii=False)
        except Exception as err:
            print('*** ERRROR **', err)
            print(crfplus_fname)
            print(line)
            print()


# Step 1: Convert CFR++ output to IOB tags in Json format
true_iob_dir = join(LOCAL_DIR, 'train/iob')
pred_iob_dir = '_entityOp_Utpal/iob'

crfplus_dirs = {
    'Material': '_entityOp_Utpal/materialOp',
    'Process': '_entityOp_Utpal/processOp',
    'Task': '_entityOp_Utpal/taskOp'
}

convert(crfplus_dirs, true_iob_dir, pred_iob_dir)

# Step 2: Convert predicted IOB tags to predicted Brat annotations
true_brat_dir = join(DATA_DIR, 'train')
pred_brat_dir = '_entityOp_Utpal/brat'

iob_to_brat(pred_iob_dir, true_brat_dir, pred_brat_dir)

# Step 3: Evaluate
calculateMeasures(true_brat_dir, pred_brat_dir, 'rel')