-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline_parser_tagger.py
67 lines (55 loc) · 1.99 KB
/
pipeline_parser_tagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# ! /usr/bin/python
'''
Pipeline to run sequence labeling, pcfg parsing and dual decomposition.
Created on Sep 21, 2013
@author: swabha
'''
import sys,re, time
import utils, cky, viterbi, dd_parser_tagger
'''
Reads the learnt parameters of pcfg and hmm from respective files
'''
def quick_execute(dev):
print "loading learnt parameters..."
pcfg_prob, nonterms, start = cky.get_pcfg()
hmm, tagset = viterbi.get_hmm_tagset()
print "reading dev data..."
parses = utils.read_parses_no_indent(dev)
i = 0
for parse in parses:
if len(parse) > 100:
parse_list = utils.make_parse_list(parse)
sentence, truetags = utils.get_terminals_tags(parse_list)
print '\n', sentence, '\n'
#print dev_sentences.index(sentence)
print "running dual decomposition..."
num_iterations = dd_parser_tagger.run(sentence, pcfg_prob, nonterms, start, tagset, hmm)
print "\n", truetags, " :true tags"
if num_iterations != -1:
print "converges in ", num_iterations ," iterations \n"
else:
print "does not converge :(\n"
'''
Learns the hmm, the pcfg from treebank and then executes the dual
decomposition code.
'''
def execute(treebank, dev):
print "reading treebank..."
parses = utils.read_parses_no_indent(treebank)
parse_lists = []
for parse in parses:
parse_lists.append(utils.make_parse_list(parse))
print "learning pcfg..."
nonterms, terms, start, prob = grammar.learn(parse_lists)
print "learning hmm..."
emission, transition = sequnece_labeler.learn(parse_lists)
print "reading dev data..."
dev_sentences = utils.get_sentences(dev)
print dev_sentences[100]
for sentence in dev_sentences:
parse = cky.run(sentence, nonterms, start, prob)
sequnece = viterbi.run(sentence, emission, transition)
if __name__ == "__main__":
treebank = sys.argv[1]
dev = sys.argv[2]
quick_execute(dev)