コード例 #1
0
ファイル: jackknife_tagger.py プロジェクト: xgeric/redshift
def do_fold(train_loc, model_loc, test_loc, out_loc, iters=5):
    print model_loc, out_loc
    tagger = GreedyTagger(str(model_loc), clean=True, reuse_idx=True)
    train_strs = train_loc.open().read().strip().split('\n\n')

    train = read_conll('\n\n'.join(train_strs))
    tagger.train(train, nr_iter=iters)
    tagger.save()
    test_data = test_loc.open().read()
    to_tag = read_conll(test_data)
    tagger.add_tags(to_tag)
    to_tag.write_tags(open(str(out_loc), 'w'))
コード例 #2
0
def do_fold(train_loc, model_loc, test_loc, out_loc, iters=5):
    print model_loc, out_loc
    tagger = GreedyTagger(str(model_loc), clean=True, reuse_idx=True)
    train_strs = train_loc.open().read().strip().split('\n\n')

    train = read_conll('\n\n'.join(train_strs))
    tagger.train(train, nr_iter=iters)
    tagger.save()
    test_data = test_loc.open().read()
    to_tag = read_conll(test_data)
    tagger.add_tags(to_tag)
    to_tag.write_tags(open(str(out_loc), 'w'))
コード例 #3
0
def main(train_loc, out_loc, label_set="MALT", allow_reattach=False, allow_moves=False):
    parser_dir = Path('/tmp').join('parser')
    if not parser_dir.exists():
        parser_dir.mkdir()
    grammar_loc = Path(train_loc).parent().join('rgrammar') if allow_reattach else None
    parser = Parser(str(parser_dir), clean=True, label_set=label_set,
                    allow_reattach=allow_reattach, allow_move=allow_moves,
                    grammar_loc=grammar_loc)
    train = read_conll(open(train_loc).read())
    parser.add_gold_moves(train)
    with open(out_loc, 'w') as out_file:
        train.write_moves(out_file)
コード例 #4
0
def main(model_loc, dev_loc):
    parser = load_parser(model_loc)
    sents = read_conll(open(dev_loc).read())
    get_edit_stats(parser, sents)
コード例 #5
0
ファイル: stats_for_disfl.py プロジェクト: xgeric/redshift
def main(model_loc, dev_loc):
    parser = load_parser(model_loc)
    sents = read_conll(open(dev_loc).read())
    get_edit_stats(parser, sents)