-
Notifications
You must be signed in to change notification settings - Fork 0
/
politifier.py
82 lines (72 loc) · 2.95 KB
/
politifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# from test_documents import TEST_DOCUMENTS
import json
from nltk import word_tokenize, sent_tokenize
from features.vectorizer import PolitenessFeatureVectorizer
from dependency_parse import get_parses
import pprint
from run_model import score_text
vectorizer = PolitenessFeatureVectorizer()
TEST_DOCUMENTS = []
TEST_DOCUMENTS.append(get_parses("Have you found the answer for your question? If yes would you please share it?"))
TEST_DOCUMENTS.append(get_parses("So you think this is correct. Are you sure?"))
TEST_DOCUMENTS.append(get_parses("Sorry :) I dont want to hack the system!! :) is there another way?"))
TEST_DOCUMENTS.append(get_parses("What are you trying to do? Why can't you just store the \"Range\"?"))
TEST_DOCUMENTS.append(get_parses("That is weird. Why can't you just store the \"Range\"?"))
TEST_DOCUMENTS.append(get_parses("This was supposed to have been moved to <url> per the cfd. why wasn't it moved?"))
TEST_DOCUMENTS.append(get_parses("You are wrong. But the approach is correct"))
# TEST_DOCUMENTS.append(get_parses(""))
pp = pprint.PrettyPrinter(indent=4)
fks = False
for each in TEST_DOCUMENTS:
# print each['sentences']
# print
fs = vectorizer.features(each)
for feature,score in fs.items():
if score == 1:
if (feature.startswith('feature_politeness')):
# print feature
if ('==Direct_question==' in feature):
each['sentences'][0] = "Sorry but " + each['sentences'][0]
# if ('==2nd_person==' in feature):
# li = []
# for i in each['sentences'][0].split(" "):
# y = 'we' if i.lower() == 'you' else i
# li.append(y)
# each['sentences'] = sent_tokenize(' '.join(li))
if ('==Direct_start==' in feature):
li = each['sentences'][0].split(" ")
li[0] = 'Do'
each['sentences'][0] = ' '.join(li)
if ('==2nd_person_start==' in feature):
li = each['sentences'][0].split(" ")
li[0] = 'This'
each['sentences'][0] = ' '.join(li)
if('==Direct_question==' in feature):
li = each['sentences'][1].split(" ")
li.pop(0)
each['sentences'][1] = ' '.join(li)
# pp.pprint(TEST_DOCUMENTS)
for each in TEST_DOCUMENTS:
print "\nINPUTS\t:\t", each['text']
print "FEAINP\t:\t",
fs = vectorizer.features(get_parses(each['text']))
infeatures = []
for feature,score in fs.items():
if score != 0:
if (feature.startswith('feature_politeness')):
infeatures.append(feature)
print infeatures
print 'ISCORE\t:\t', score_text(get_parses(each['text']))
print "\nOUTPUT\t:\t", ' '.join(each['sentences'])
print "FEAOUT\t:\t",
fs = vectorizer.features(get_parses(' '.join(each['sentences'])))
outfeatures = []
for feature,score in fs.items():
if score != 0:
if (feature.startswith('feature_politeness')):
outfeatures.append(feature)
print outfeatures
# print (list(set(infeatures) - set(outfeatures)))
print "OSCORE\t:\t", score_text(get_parses(' '.join(each['sentences'])))
print '-'*80
print