/
utils.py
38 lines (31 loc) · 1.2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from sentimental.models import Sentence, Project
from reverend.thomas import Bayes
class Guesser(object):
def __init__(self, project):
self.project = project
self.bayes = Bayes()
self._train()
self.data = []
self.best = []
def _train(self):
for sentence in self.project.classified():
self.bayes.train(sentence.get_classification(), sentence.sentence)
def guess(self):
for sentence in self.project.to_classify():
data = {'sentence_id': sentence.id}
data['guesses'] = self.bayes.guess(sentence.sentence)
self.data.append(data)
return self.data
def best_matches(self):
if not self.data: return []
for matches in self.data:
try:
matches['guesses'] = sorted(matches['guesses'], key=lambda x:x[1], reverse=True)[0]
except:
matches['guesses'] = (None, None)
match = {}
match['id'] = matches['sentence_id']
match['guess'] = matches['guesses'][0]
match['certainty'] = matches['guesses'][1]
self.best.append(match)
return self.best