def do_fold(train_loc, model_loc, test_loc, out_loc, iters=5): print model_loc, out_loc tagger = GreedyTagger(str(model_loc), clean=True, reuse_idx=True) train_strs = train_loc.open().read().strip().split('\n\n') train = read_conll('\n\n'.join(train_strs)) tagger.train(train, nr_iter=iters) tagger.save() test_data = test_loc.open().read() to_tag = read_conll(test_data) tagger.add_tags(to_tag) to_tag.write_tags(open(str(out_loc), 'w'))
def main(train_loc, out_loc, label_set="MALT", allow_reattach=False, allow_moves=False): parser_dir = Path('/tmp').join('parser') if not parser_dir.exists(): parser_dir.mkdir() grammar_loc = Path(train_loc).parent().join('rgrammar') if allow_reattach else None parser = Parser(str(parser_dir), clean=True, label_set=label_set, allow_reattach=allow_reattach, allow_move=allow_moves, grammar_loc=grammar_loc) train = read_conll(open(train_loc).read()) parser.add_gold_moves(train) with open(out_loc, 'w') as out_file: train.write_moves(out_file)
def main(model_loc, dev_loc): parser = load_parser(model_loc) sents = read_conll(open(dev_loc).read()) get_edit_stats(parser, sents)