/
tutor02.py
75 lines (64 loc) · 2.66 KB
/
tutor02.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python
# -*- coding: utf8 -*-
from senticnet.senticnet import Senticnet
from nltk.classify import NaiveBayesClassifier
import pymorphy2
import codecs
def word_feats(words):
return dict([(word, True) for word in words])
sn = Senticnet('ru')
morph = pymorphy2.MorphAnalyzer()
# заполняем масссив тэгами SenticNet
positive_vocab = ['#интерес', '#радость', '#сюрприз', '#восхищение']
negative_vocab = ['#попугать', '#гнев', '#печаль', '#отвращение']
# добавляем слова из WordNet
with codecs.open('dict/positive.txt', encoding='utf-8') as file_object:
for line in file_object:
line = line.rstrip('\n\r')
positive_vocab.append(morph.parse(line)[0].normal_form)
with codecs.open('dict/negative.txt', encoding='utf-8') as file_object:
for line in file_object:
line = line.rstrip('\n\r')
negative_vocab.append(morph.parse(line)[0].normal_form)
# наполнаяем множества позитивных и негативных слов и обучаем классификатор
positive_features = [(word_feats(pos), 'pos') for pos in positive_vocab]
negative_features = [(word_feats(neg), 'neg') for neg in negative_vocab]
train_set = negative_features + positive_features
classifier = NaiveBayesClassifier.train(train_set)
# Основная работа
sentence = 'Тест'
sentence = sentence.lower()
words = sentence.split(' ')
neg = 0
pos = 0
mood = 0
for word in words:
word = morph.parse(word.decode('utf8'))[0].normal_form
try: # пробуем получить теги из SenticNet
moodtags = sn.moodtags(word.encode('utf8'))
for item in moodtags:
mood += 1
if item in negative_vocab:
neg += 1
elif item in positive_vocab:
pos += 1
except: # проверяем, не входит ли слово в словарь
if word in negative_vocab:
neg += 1
mood += 1
elif word in positive_vocab:
pos += 1
mood += 1
# если ничего не помогает, пытаемся классифицировать слово через наивный Баесовский классификатор
if mood == 0:
for word in words:
classResult = classifier.classify(word_feats(word))
if classResult == 'neg':
mood += 1
neg += 1
if classResult == 'pos':
pos += 1
mood += 1
print('Positive: ' + str(float(pos) / mood))
print('Negative: ' + str(float(neg) / mood))
print('Result: ' + str((float(pos)-float(neg)) / mood))