/
console.py
71 lines (63 loc) · 2.55 KB
/
console.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import sys
from utils import examples, rmap, mapping, queries, SequenceLabeler, SequenceLabeler2, SequenceLabeler3, \
SequenceLabeler4, SequenceLabeler3, chunk, preprocess, test, postprocess, query
import pyvw
from time import time
import os.path
def help():
print """
Commands:
\t/start model.vw => initialize the model
\t/train watch_N harry_B potter_I on_N hbo_B => train the model with the sample
\t/query watch big bang => query the model with the sample
\t/save model.vw => save the model and quit
"""
def main():
vw = []
sl = []
while True:
inp = raw_input("> ")
inp = inp.strip()
words = inp.split()
cmd = words[0]
if cmd == "/save":
for temp in vw:
temp.finish()
sys.exit(1)
if cmd == "/train":
data = " ".join(words[1:]).strip()
for i in range(10):
for temp in sl:
temp.learn(preprocess([data]))
elif cmd == "/query":
data = " ".join(words[1:]).strip()
output = set()
for s in sl:
output.add(postprocess(query(s, data)))
for out in output:
print "\t", out
elif cmd == "/start":
data = " ".join(words[1:]).strip()
if os.path.isfile(data + ".1") and os.path.isfile(data + ".2") and os.path.isfile(
data + ".3") and os.path.isfile(data + ".4"):
vw = [
pyvw.vw("--quiet -i " + data + ".1 -f "+data + ".1"),
pyvw.vw("--quiet -i " + data + ".2 -f "+data + ".2"),
pyvw.vw("--quiet -i " + data + ".3 -f "+data + ".3"),
pyvw.vw("--quiet -i " + data + ".4 -f "+data + ".4")
]
else:
vw = [
pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".1"),
pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".2"),
pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".3"),
pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".4")
]
sl = [
vw[0].init_search_task(SequenceLabeler),
vw[1].init_search_task(SequenceLabeler2),
vw[2].init_search_task(SequenceLabeler3),
vw[3].init_search_task(SequenceLabeler4)
]
if __name__ == "__main__":
main()