from baseline.utils import convert_iob_to_bio import os import argparse parser = argparse.ArgumentParser( description='Translate input sequence to output sequence') parser.add_argument('--io_dir', help='Input/Output dir', default='../data') parser.add_argument('--train_file', help='Training file relative name', default='eng.train') parser.add_argument('--valid_file', help='Validation file relative name', default='eng.testa') parser.add_argument('--test_file', help='Test file relative name', default='eng.testb') parser.add_argument('--suffix', help='Suffix to append', default='.bio') args = parser.parse_args() convert_iob_to_bio(os.path.join(args.io_dir, args.train_file), os.path.join(args.io_dir, args.train_file + args.suffix)) convert_iob_to_bio(os.path.join(args.io_dir, args.valid_file), os.path.join(args.io_dir, args.valid_file + args.suffix)) convert_iob_to_bio(os.path.join(args.io_dir, args.test_file), os.path.join(args.io_dir, args.test_file + args.suffix))
def test(): spans = generate_spans() iob = generate_iob(spans) gold_bio = generate_bio(spans) bio = convert_iob_to_bio(iob) assert bio == gold_bio
def test_iob_bio_i_first(): in_ = ['I-X', 'O'] gold = ['B-X', 'O'] res = convert_iob_to_bio(in_) assert res == gold
def test_iob_bio_i_after_i_same(): in_ = ['O', 'I-X', 'I-X', 'O'] gold = ['O', 'B-X', 'I-X', 'O'] res = convert_iob_to_bio(in_) assert res == gold
def test_iob_bio_i_after_b_diff(): in_ = ['O', 'B-X', 'I-Y', 'O'] gold = ['O', 'B-X', 'B-Y', 'O'] res = convert_iob_to_bio(in_) assert res == gold
def test(): spans = generate_spans() gold_bio = generate_bio(spans) res = convert_iob_to_bio(convert_bio_to_iob(gold_bio)) assert res == gold_bio