/
kernel_tree.py
114 lines (100 loc) · 2.89 KB
/
kernel_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import math
import os
from xml.dom import minidom
import xml.etree.ElementTree as ET
import logging
import string
import os
import sys
import time
import zmq
import codecs
from collections import defaultdict
from nltk.corpus import wordnet as wn
import jsonrpclib
from simplejson import loads
from sms_query_1 import input_from_xml
from svm_test import main_function
threshold_score = 0.5
'''
tree = ET.parse("/home/rohspeed/Faq_retrieval/FIRE_TRAINING_DATA/FIRE2013_TRAINING_DATA/FAQs/English/eng.xml")
root = tree.getroot()
questions = defaultdict(list)
data = []
for i in range(0,len(root)):
domain = ""
question = ""
for j in range(0,4):
if root[i][j].tag == "DOMAIN":
domain = root[i][j].text
if root[i][j].tag == "QUESTION":
question = root[i][j].text
data.append((domain,question))
for domain,question in data:
questions[domain].append(question)
fname = codecs.open('kernel_trees.txt','w+', encoding='utf-8')
fname.seek(0,0)
'''
class convert_training_data():
def convert_to_parse_tree(self,question):
server = jsonrpclib.Server("http://localhost:8080")
try:
question = question.strip()
question = question.encode('ascii','ignore')
result = loads(server.parse(question))
if result:
res = result['sentences'][0]['parsetree']
# print res
return res
print "#################"
except UnicodeEncodeError:
print "Error in question format"
class kernel_function():
def data_from_svm(self):
questions,predict,classes,scores, domains = main_function()
context = zmq.Context()
socket = context.socket(zmq.REQ)
socket.connect("tcp://127.0.0.1:5000")
for i in range(0,len(questions)):
quest = questions[i]
train_questions_indices = []
fname = open("kernel_trees.txt","r")
max_score = -4.0
for score in scores[i]:
max_score = max(score,max_score)
if max_score >= threshold_score:
print "iam here in one"
domain = domains[predict[i]]
temp = input_from_xml()
dmns,quests = temp.fetch_input_from_xml_questions()
cnt = 1
indices = []
for k,v in quests.items():
temp_questions = quests[k]
for i in range(0,len(temp_questions)):
if(k == domain):
indices.append(cnt)
cnt = cnt + 1
for i in range(0,len(indices)):
train_questions_indices.append(indices[i])
else:
print "iam here in two"
line_cnt = 1
for line in fname.readlines():
train_questions_indices.append(line_cnt)
line_cnt = line_cnt + 1
temp2 = convert_training_data()
parse_quest = temp2.convert_to_parse_tree(quest)
print len(parse_quest)
msg = str(parse_quest)
msg2 = ""
for i in range(0,len(train_questions_indices)):
msg2 = msg2 + str(train_questions_indices[i])
msg2 = msg2 + "$"
msg2 = str(msg2)
main_msg = msg + "\n" + msg2
socket.send(main_msg)
time.sleep(1000)
if __name__ == "__main__":
obj = kernel_function()
obj.data_from_svm()