-
Notifications
You must be signed in to change notification settings - Fork 0
/
conceptnet_validate.py
88 lines (68 loc) · 2.23 KB
/
conceptnet_validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
# Kaggle AI2 - ConceptNet5 Model
# Implementation of the conceptnet5 for AI2 Q&A system.
# Generates a Validation Set
import random
import fileUtil
import prelimAnalysis as prenlp
from conceptnet5_client.web.api import Association
from conceptnet5_client.utils.result import Result
def computeScore(sim, ans):
pts = 0
for a in ans:
for s in sim:
if a in s[0].encode('utf-8'):
pts += s[1]
return float(pts) / len(sim)
# Training set paramter - change accordinly to select specific trianing example
# Import Dataset
data = fileUtil.parseTrainingSet('data/training_set.tsv')
correct = 0
for idx in range(0, len(data)):
q = data[idx]
# Perform NLP Preprocessing
ques = prenlp.preprocess(q.question)
ans_a = prenlp.preprocess(q.a)
ans_b = prenlp.preprocess(q.b)
ans_c = prenlp.preprocess(q.c)
ans_d = prenlp.preprocess(q.d)
print "Question: " + str(q.question)
print "A: " + str(q.a)
print "B: " + str(q.b)
print "C: " + str(q.c)
print "D: " + str(q.d)
# Generate Semantic Graph from Question
a = Association(filter="/c/en", limit=30)
semnet = a.get_similar_concepts_by_term_list(ques)
r = Result(semnet)
# Parse Similarity
similar = r.get_similar()
if len(similar) > 0:
# Splice Leading API Directory
for word in similar:
word[0] = word[0][6:]
print "\n"
print similar
print "\n"
# Compute Score Probabilities
prob = [str(computeScore(similar, ans_a)),
str(computeScore(similar, ans_b)),
str(computeScore(similar, ans_c)),
str(computeScore(similar, ans_d))]
print "<SCORE PROBABILITIES>"
print prob
prob = map(float, prob)
if sum(prob) == 0:
chidx = random.randint(0,3)
else:
# Obtain Letter Choice
chidx = prob.index(max(prob))
else:
chidx = random.randint(0,3)
guess = {0: "A", 1: "B", 2: "C", 3: "D"}
print "GUESS: " + str(guess[chidx])
print "ANSWER: " + str(q.ans)
if guess[chidx] == str(q.ans):
print "correct"
correct += 1
# print "Total Score: " + str(float(correct)/len(data))