def main(model_dir, train_loc, dev_loc, iters=5, n_sents=0, feat_thresh=5, beam_width=4): sent_strs = open(train_loc).read().strip().replace('|', '/').split('\n') # Apply limit if n_sents != 0: sent_strs = sent_strs[:n_sents] tagger = redshift.tagger.train('\n'.join(sent_strs), model_dir, beam_width=beam_width, nr_iter=iters, feat_thresh=feat_thresh) dev_input = [ Input.from_pos(s.replace('|', '/')) for s in open(dev_loc).read().strip().split('\n') ] t = 1e-100 c = 0 for sent in dev_input: gold_tags = [tok.tag for tok in sent.tokens] tagger.tag(sent) for i, token in enumerate(sent.tokens): c += gold_tags[i] == token.tag t += 1 print c / t
def test_tag(tagger, sentence): tagger.tag(sentence) assert sentence.length == 7 tokens = list(sentence.tokens) assert tokens[0].word == 'This' assert tokens[1].word == 'is' assert tokens[2].word == 'a' assert tokens[3].word == 'test' assert tokens[4].word == '.'
def main(model_dir, train_loc, dev_loc, iters=5, n_sents=0, feat_thresh=5, beam_width=4): sent_strs = open(train_loc).read().strip().replace('|', '/').split('\n') # Apply limit if n_sents != 0: sent_strs = sent_strs[:n_sents] tagger = redshift.tagger.train('\n'.join(sent_strs), model_dir, beam_width=beam_width, nr_iter=iters, feat_thresh=feat_thresh) dev_input = [Input.from_pos(s.replace('|', '/')) for s in open(dev_loc).read().strip().split('\n')] t = 1e-100 c = 0 for sent in dev_input: gold_tags = [tok.tag for tok in sent.tokens] tagger.tag(sent) for i, token in enumerate(sent.tokens): c += gold_tags[i] == token.tag t += 1 print c / t