Esempio n. 1
0
 def parse_senstiment(self, text):
     tokenizer = RegexTokenizer()
     tokens = tokenizer.split('всё очень плохо') # хз
     model = FastTextSocialNetworkModel(tokenizer=tokenizer)
     
     return model.predict([text], k=2)[0]
     
Esempio n. 2
0
class SentimentAnalyzer:
    def __init__(self):
        self.tokenizer = RegexTokenizer()
        self.model = FastTextSocialNetworkModel(tokenizer=self.tokenizer)

    def _map_sentiment(self, sentiment):
        if sentiment == "positive":
            return 1
        if sentiment == "negative":
            return -1
        else:
            return 0

    def sentiment_label_dataframe(self, df: DataFrame) -> DataFrame:
        df['sentiment'] = None
        for i, row in df.iterrows():
            results = self.model.predict([row['text']], k=30)
            sentiment = max(list(results[0].items()),
                            key=lambda prob: prob[1])[0]
            sentiment = self._map_sentiment(sentiment)
            df.at[i, 'sentiment'] = sentiment

        return df

    def sentiment_label_sentence(self, sentence: str):
        results = self.model.predict([sentence], k=30)
        sentiment = max(list(results[0].items()), key=lambda prob: prob[1])[0]
        return self._map_sentiment(sentiment)
Esempio n. 3
0
 def validation_comment(
     self, film_id, user_id
 ):  # валидация отзыва, если он проходит проверку то comm_check = 1
     x = self.conn.execute(
         'SELECT rating1, rating2, rating3, rating4, rating5, comm_text '
         'FROM Comments WHERE film_id = (?) and user_id = (?)', (
             film_id[0],
             user_id,
         )).fetchall()
     sr = 0
     for r in x[0][0:5]:
         sr = sr + r
     sr = sr / 5
     tokenizer = RegexTokenizer()
     model = FastTextSocialNetworkModel(tokenizer=tokenizer)
     result = model.predict([x[0][5]], k=5)
     dl = result[0]['neutral'] + result[0]['negative'] + result[0][
         'positive']
     res = 10 * (result[0]['neutral'] / dl - result[0]['negative'] / dl +
                 result[0]['positive'] / dl)
     if abs(sr - res) <= valid_const:
         self.set_comment(film_id, user_id, 'comm_check', 1)
         self.conn.commit()
         self.set_comment(film_id, user_id, 'second_check', 0)
         self.conn.commit()
Esempio n. 4
0
def graph():
    file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"),
                                                 ("all files", "*.*")))
    f = open(file)
    raw = f.read()
    sentences = nltk.sent_tokenize(raw)
    command = 'download'
    arguments = ['fasttext-social-network-model']
    if command == 'download':
        downloader = DataDownloader()
        for filename in arguments:
            if filename not in AVAILABLE_FILES:
                raise ValueError(f'Unknown package: {filename}')
            source, destination = AVAILABLE_FILES[filename]
            destination_path: str = os.path.join(DATA_BASE_PATH, destination)
            if os.path.exists(destination_path):
                continue
            downloader.download(source=source, destination=destination)
    else:
        raise ValueError('Unknown command')

    import dostoevsky
    from dostoevsky.tokenization import RegexTokenizer
    from dostoevsky.models import FastTextSocialNetworkModel

    tokenizer = RegexTokenizer()
    tokens = tokenizer.split(
        'всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    messages = sentences

    results = model.predict(messages, k=2)

    for message, sentiment in zip(messages, results):
        positive_values_all = [
            sentiment.get('positive')
            for message, sentiment in zip(messages, results)
        ]
        positive_values = [
            0.0 if value == None else value for value in positive_values_all
        ]

        negative_values_all = [
            sentiment.get('negative')
            for message, sentiment in zip(messages, results)
        ]
        negative_values = [
            0.0 if value == None else value for value in negative_values_all
        ]
        summary = (len(negative_values))

    n_value = np.array(negative_values)
    p_value = np.array(positive_values)
    counts_value = np.arange(summary)
    plt.plot(counts_value, p_value, n_value)
    plt.show()
Esempio n. 5
0
def analyze(all_news):
    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    texts = [news['title'] + ' ' + news['article'] for news in all_news]
    results = model.predict(texts)
    for i in range(len(all_news)):
        all_news[i]['tone'] = results[i]
    return all_news
Esempio n. 6
0
def analysis_data(messages):
    res = []
    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    print(messages)
    i = 0
    for message in messages:
        data = model.predict([message], k=2)[0]
        i += 1
        for key, value in data.items():
            if round(value, 1) == 1:
                res.append(key)
        if len(res) != i:
            res.append('neutral')
    return res
Esempio n. 7
0
def get_sentiment(text):

    if not text:
        return 5

    model = FastTextSocialNetworkModel(tokenizer=RegexTokenizer(),
                                       lemmatize=True)

    results = model.predict([text], k=2)
    result = 0
    result = result + (min(results[0]['positive'], 1)
                       if 'positive' in results[0] else 0)
    result = result - (min(results[0]['negative'], 1)
                       if 'negative' in results[0] else 0)

    return round((result + 1) * 5)
Esempio n. 8
0
def text_analysis(all_comments):

    texts = [comment['comment'] for comment in all_comments]

    tokenizer = RegexTokenizer()

    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    results = model.predict(texts, k=5)

    for i, sentiment in enumerate(results):
        all_comments[i]['positive'] = sentiment['positive']
        all_comments[i]['negative'] = sentiment['negative']
        all_comments[i]['neutral'] = sentiment['neutral']

    return all_comments
Esempio n. 9
0
class DostN:
    def __init__(self):
        self.db = DB()
        self.tokenizer = RegexTokenizer()
        self.model = FastTextSocialNetworkModel(tokenizer=self.tokenizer)
        self.tokens = self.tokenizer.split('всё очень плохо')

    def ready_msg(self, messages, nnID):
        results = self.model.predict(messages, k=2)
        sentiments = []
        for message, sentiment in zip(messages, results):
            sentiments.append(sentiment)
        try:
            pos = sentiments[0]['positive']
            self.db.setPos(nnID, pos)
        except Exception as e:
            print(e, 22)
            pos = 0
            self.db.setPos(nnID, pos)
        try:
            neg = sentiments[0]['negative']
            self.db.setNegative(nnID, neg)
        except Exception as e:
            print(e, 29)
            neg = 0
            self.db.setNegative(nnID, neg)
        try:
            ne = sentiments[0]['neutral']
            self.db.setNeutral(nnID, ne)
        except Exception as e:
            print(e, 36)
            ne = 0
            self.db.setNeutral(nnID, ne)
        return (sentiments)
Esempio n. 10
0
def test_tonality():
    test = [
        '''Ужасное расположение и распределение товаров. Два уровня и на каждом свои кассы. Чтобы купить разные группы товаров нужно отстоять две очереди.''',
        '''Это не шаурма это ужас,куча майонеза,лук одна кожура верхний слой который мы при готовке выкладываем, картофель фри из пачек сухой, мясо порезано тонкими пластами, не пойму как оно приготавливалось явно не на гриле, мясо было не свежее, в итоге самый съедобный оказался лаваш, не рекомендую.''',
        '''Рядом с домом, вкусная картошечка и обалденные молочные коктейли и довольно быстрое обслуживание, приятные кассиры''',
        '''Замечательный телефон, пользуюсь им уже 2 года, очень нравится!''',
        '''Был в этом магазине в прошлом году, больше туда приходить не собираюсь, некомпетентные продавцы, плохое обслуживание((''',
        '''Идеальный мастер!:)''',
        '''Уроды, как можно так поступить с человеком, просто ужас?!'''
    ]

    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    results = model.predict(test, k=2)
    for i, el in enumerate(zip(test, results)):
        token, sentiment = el
        print_tonality(test[i], sentiment.items())
Esempio n. 11
0
def sentiment_analysis(data):
    langid.set_languages(['en', 'ru'])
    lang = langid.classify(data['text'][0])[0]
    if lang == 'ru':
        labels = data['from'].unique()
        msg_df = data.loc[data.text != '']
        messages_1 = list(msg_df.text[msg_df['from'] == labels[0]])
        messages_2 = list(msg_df.text[msg_df['from'] == labels[1]])

        tokenizer = RegexTokenizer()

        model = FastTextSocialNetworkModel(tokenizer=tokenizer)

        results_1 = model.predict(messages_1, k=2)
        sentiments_1 = []

        for sentiment in results_1:
            # привет -> {'speech': 1.0000100135803223, 'skip': 0.0020607432816177607}
            # люблю тебя!! -> {'positive': 0.9886782765388489, 'skip': 0.005394937004894018}
            # малолетние дебилы -> {'negative': 0.9525841474533081, 'neutral': 0.13661839067935944}]

            tone = 0
            if 'positive' in sentiment:
                tone += sentiment['positive']
            if 'negative' in sentiment:
                tone -= sentiment['negative']
            sentiments_1.append(tone)

        results_2 = model.predict(messages_2, k=2)
        sentiments_2 = []

        for sentiment in results_2:

            tone = 0
            if 'positive' in sentiment:
                tone += sentiment['positive']
            if 'negative' in sentiment:
                tone -= sentiment['negative']
            sentiments_2.append(tone)

        return sentiments_1, sentiments_2
Esempio n. 12
0
def return_sentiment(shcode):

    response = requests.get(
        get_url(f'https://www.instagram.com/p/{shcode}/?__a=1'))

    data = json.loads(response.content)

    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    sentim = []

    edges = data['graphql']['shortcode_media']['edge_media_preview_comment'][
        'edges']

    for com in edges:
        # print(com['node']['text'])
        results = model.predict([com['node']['text']],
                                k=len(com['node']['text']))
        for x in results:
            sentim.append(list(x.keys())[0])
    return sentim
Esempio n. 13
0
def processSentiment(message, stage):
    duration = '0.0'

    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    phrases = []

    for phrase in message:
        duration = phrase['end_time']
        for item in phrase['alternatives']:
            phrases.append(item['transcript'])

    results = model.predict(phrases, k=2)
    for phrase, sentiments in zip(phrases, results):
        for sentiment in sentiments.keys():
            if sentiment not in ['neutral', 'skip']:
                answer = sentiment
                result = stages[stage][answer]

    return [answer, result, float(duration[0:-1:])]
Esempio n. 14
0
def tonal_analize(string):
    from dostoevsky.tokenization import RegexTokenizer
    from dostoevsky.models import FastTextSocialNetworkModel

    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    messages = [string]
    results = model.predict(messages, k=2)

    for message, sentiment in zip(messages, results):
        negative = 0
        positive = 0

        if 'positive' in sentiment:
            positive += sentiment['positive']
        if 'negative' in sentiment:
            negative += sentiment['negative']

        delta = positive - negative

    return delta
Esempio n. 15
0
def get_sent(x):
    def one_hot_encode_sent(x):
        """
        (pos, neg, neu)
        """
        if x[0] == 'positive':
            return (1, 0, 0)
        elif x[0] == 'negative':
            return (0, 1, 0)
        else:
            return (0, 0, 1)

    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    results = model.predict(x, k=1)
    results = [(list(r.keys())[0], list(r.values())[0]) for r in results]
    results = list(map(one_hot_encode_sent, results))
    results = [
        results[i] if x[i] != 'EMPTY_TEXT' else (0, 0, 0)
        for i in range(len(results))
    ]
    return [pd.Series(x) for x in zip(*results)]  # return three series
Esempio n. 16
0
def begin():
    file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"),
                                                 ("all files", "*.*")))
    f = open(file)
    raw = f.read()
    sentences = nltk.sent_tokenize(raw)
    command = 'download'
    arguments = ['fasttext-social-network-model']
    if command == 'download':
        downloader = DataDownloader()
        for filename in arguments:
            if filename not in AVAILABLE_FILES:
                raise ValueError(f'Unknown package: {filename}')
            source, destination = AVAILABLE_FILES[filename]
            destination_path: str = os.path.join(DATA_BASE_PATH, destination)
            if os.path.exists(destination_path):
                continue
            downloader.download(source=source, destination=destination)
    else:
        raise ValueError('Unknown command')

    tokenizer = RegexTokenizer()
    tokens = tokenizer.split(
        'всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    messages = sentences

    results = model.predict(messages, k=2)

    for message, sentiment in zip(messages, results):

        analysis_line = '\n', message, '\n', '->', '\n', sentiment, '\n'

        text.insert(END, analysis_line)
Esempio n. 17
0
def main():
    db_con = init_sync()
    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    with db_con:
        cur = db_con.cursor()
        # Запрос к предложениям выдленным томитой
        cur.execute("SELECT id, text FROM filtered WHERE text IS NOT NULL")

        filtreds = [{x[0]: x[1]} for x in cur.fetchall() if len(x[1]) > 0]
        tonality = []

        for filtred in filtreds:
            results = model.predict(filtred.values(), k=2)
            for i, el in enumerate(zip(filtred.values(), results)):
                token, sentiment = el
                tonality.clear()
                for key, value in filtred.items():
                    id_filtered = key
                    text = value
                for item in sentiment.items():
                    tonality.append(item)
                insert_tonality(db_con, id_filtered, tonality)
                print_tonality(text, tonality)
Esempio n. 18
0
 def _prepare_model():
     tokenizer = RegexTokenizer()
     model = FastTextSocialNetworkModel(tokenizer=tokenizer)
     return model
Esempio n. 19
0
import re
import nltk
import json
import logging
from navec import Navec
from slovnet import Syntax
from razdel import sentenize, tokenize
from dostoevsky.tokenization import UDBaselineTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

meaningful_tags = ['positive', 'negative']

# init models:
sentenizer = FastTextSocialNetworkModel(tokenizer=UDBaselineTokenizer())


class SimpleExtractor:

    def __init__(self, path_to_navec_data, path_to_syntax_data):
        self.navec = Navec.load(path_to_navec_data)
        self.syntax = Syntax.load(path_to_syntax_data).navec(self.navec)

    def is_values_lower(self, source, target):
        for v in source:
            for d in list(target):
                if v > d:
                    return False
        return True

    def extract_sentnces_with_names(self, text):
        text = re.sub("\\s+", " ", text)
Esempio n. 20
0
 def __init__(self):
     self.tokenizer = RegexTokenizer()
     self.model = FastTextSocialNetworkModel(tokenizer=self.tokenizer)
Esempio n. 21
0
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel
from .parse_kaldi import open_text

tokenizer = RegexTokenizer()  # токенизатор текста
text_model = FastTextSocialNetworkModel(
    tokenizer=tokenizer)  # модель анализа тональности

to_numbers = {
    'negative': 0,
    'neutral': 1,
    'positive': 2,
    'skip': 1,
    'speech': 1,
}


def choose_sentiment(pred):
    return sorted(pred, key=lambda x: pred[x])[-1]


def predict_sentiment(texts):
    preds = text_model.predict(texts)
    return list(map(lambda pred: choose_sentiment(pred), preds))


def predict_by_text_file(text_path):
    text = open_text(text_path).strip()
    if text == '':
        return 1
    return predict_by_texts([text])[0]
Esempio n. 22
0
from typing import Dict

from dostoevsky.models import FastTextSocialNetworkModel
from dostoevsky.tokenization import RegexTokenizer

tokenizer = RegexTokenizer()

model = FastTextSocialNetworkModel(tokenizer=tokenizer)


class SentimentData:
    high_negative: float
    low_negative: float
    neutral: float
    skip: float
    speech: float
    positive: float

    def __init__(self, sentiment_data: Dict[str, float] = {'negative': 0,
                                                           'neutral': 0,
                                                           'positive': 0,
                                                           'speech': 0,
                                                           'skip': 0}):
        if 'negative' in sentiment_data:
            if sentiment_data['negative'] > 0.59:
                self.high_negative = sentiment_data['negative']
                self.low_negative = 0
            else:
                self.high_negative = 0
                self.low_negative = sentiment_data['negative']
        if 'neutral' in sentiment_data:
Esempio n. 23
0
class CommentsResearcher:
    """ Класс для анализа тональности комментариев """

    def __init__(self):
        self.tokenizer = RegexTokenizer()
        self.model = FastTextSocialNetworkModel(tokenizer=self.tokenizer)

    def get_sentiment(self, file):
        """ Опредение тональности комментариев
        Возвращает 2 значения:
        - distribution (dict)
            словарь в количеством комментариев в разных категориях
        - sentiments (dict)
            словарь с тональностью каждого комментария
        Подробная информация о результатах записывается в файл с именем сообщества.
        Файл сохраняется в папку 'reports'.

        Аргументы:
        file (_io.TextIOWrapper): текстовой файл с комментариями (разделитель \t)
        """

        comments = file.read().split('\t')
        f_name = os.path.basename(file.name)
        
        # получение тональности для комментариев с помощью модели
        results = self.model.predict(comments[0:-1], k=1)
        output = []
        distribution = {}
        sentiments = {}

        for comment, sentiment in zip(comments, results):
            comment_sentiment = list(sentiment.keys())[0]
            output.append(comment_sentiment)
            sentiments[comment] = comment_sentiment

        # подсчет результатов для каждой категории
        distribution['positive'] = output.count('positive')
        distribution['negative'] = output.count('negative')
        distribution['neutral'] = output.count('neutral')
        distribution['skip'] = output.count('skip')
        distribution['speech'] = output.count('speech')

        # вызов функции для создания txt-отчета
        self.detailed_report(f_name, sentiments, distribution)

        return distribution, sentiments

    def detailed_report(self, group_name, sentiments, distribution):
        """ Создание детального txt-отчета.
        Вспомогательный метод для get_sentiment.
        Не рекомендуется для использования в качестве самостоятельной функции.
        """

        file = open(f'reports/{group_name}', 'w')
        file.write(f'Тональность комментариев из сообщества {group_name.replace(".txt", "")}: \n')

        negative = [k for k,v in sentiments.items() if v == 'negative']
        positive = [k for k,v in sentiments.items() if v == 'positive']
        neutral = [k for k,v in sentiments.items() if v == 'neutral']
        skip = [k for k,v in sentiments.items() if v == 'skip']
        speech = [k for k,v in sentiments.items() if v == 'speech']

        file.write(f'\nНегативные комментарии: {distribution["negative"]}\n\n')
        for comment in negative:
            file.write(comment + '\n')

        file.write(f'\nПозитивные комментарии: {distribution["positive"]}\n\n')
        for comment in positive:
            file.write(comment + '\n')

        file.write(f'\nНейтральные комментарии: {distribution["neutral"]}\n\n')
        for comment in neutral:
            file.write(comment + '\n')

        file.write(f'\nКомментарии без смысла: {distribution["skip"]}\n\n')
        for comment in skip:
            file.write(comment + '\n')

        file.write(f'\nРечь, цитирование: {distribution["speech"]}\n\n')
        for comment in speech:
            file.write(comment + '\n')

        file.close()
Esempio n. 24
0
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

tokenizer = RegexTokenizer()
tokens = tokenizer.split(
    'всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

model = FastTextSocialNetworkModel(tokenizer=tokenizer)

message = ['Волосы у девушки просто огонь!!! Красота!!!']

results = model.predict(message, k=len(message))

for message, sentiment in zip(message, results):
    # привет -> {'speech': 1.0000100135803223, 'skip': 0.0020607432816177607}
    # люблю тебя!! -> {'positive': 0.9886782765388489, 'skip': 0.005394937004894018}
    # малолетние дебилы -> {'negative': 0.9525841474533081, 'neutral': 0.13661839067935944}]
    print(message, '->', sentiment)
Esempio n. 25
0
from sklearn.metrics import classification_report
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.base import TransformerMixin
import nltk
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import log_loss, accuracy_score
from gensim.models import Doc2Vec
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

patterns = "[A-Za-z0-9!#$%&'()*+,./:;<=>?@[\]^_`{|}~—\"\-]+"
fasttext.FastText.eprint = lambda x: None
tokenizer = RegexTokenizer()
model = FastTextSocialNetworkModel(tokenizer=tokenizer, lemmatize=True)


def csv_reader(file_obj):
    """
    Read a csv file
    """

    reader = csv.reader(file_obj)
    data = []
    for row in reader:
        row = str(" ".join(row))
        row = row.replace("#", "hashtag")
        if row != "":
            data.append(row)
    data[
Esempio n. 26
0
STOP_WORDS = get_stop_words("ru")
MIN_WORD_LEN = 3

FIGURE_SIZE = (20, 8)
# FONT_SIZE = 20
FONT_SIZE = 40

ascii_lowercase = set(ascii_lowercase)
digits = set(digits)

OBSCENE_WORDS_FILTER = ObsceneWordsFilter(conf.bad_words_re,
                                          conf.good_words_re)
OBSCENE_FILTER_FALSES = ["команд", "дубляж"]

LETSGO_WORDS = {"пойдем", "пошли", "пойдемте", "погнали", "го", "хочет"}

SENTIMENT_TOKENIZER = RegexTokenizer()
SENTIMENT_MODEL = FastTextSocialNetworkModel(tokenizer=SENTIMENT_TOKENIZER)


def mode(a):
    mode_value, mode_count = scipy_mode_fn(a)
    return mode_value


def strip_message(msg):
    msg = msg.strip()
    msg = msg.replace('\n', ' ')
    msg = msg.lower()
    return msg
Esempio n. 27
0
 def __init__(self):
     self.db = DB()
     self.tokenizer = RegexTokenizer()
     self.model = FastTextSocialNetworkModel(tokenizer=self.tokenizer)
     self.tokens = self.tokenizer.split('всё очень плохо')
Esempio n. 28
0
import dostoevsky
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

#dostoevsky download fasttext-social-network-model

data = open("words.txt", "r")
words = data.readlines()
data.close()

file = open("estimations.txt", "w")

tokenizer = RegexTokenizer()
model = FastTextSocialNetworkModel(tokenizer=tokenizer)

i = 0
while i < len(words):
    results = model.predict(words[i:i + 100], k=1)
    for word, sentiment in zip(words[i:i + 100], results):
        estimation = list(sentiment.keys())[0]
        if estimation == 'skip' or estimation == 'speech':
            estimation = 'neutral'

        if estimation == 'negative': estimation = '-1'
        elif estimation == 'neutral': estimation = '0'
        elif estimation == 'positive': estimation = '1'

        file.write(word.split()[0] + ' ' + estimation + '\n')
    i += 100

file.close()
def sentiment_analysis(text):
    tokenizer = RegexTokenizer()
    model = FastTextSocialNetworkModel(tokenizer=tokenizer)
    results = model.predict(text, k=3)
    return results
Esempio n. 30
0
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel
import pymorphy2
import re
import matplotlib.pyplot as plt
import numpy as np
import pylab
from matplotlib import mlab

# Импортируем пакет со вспомогательными функциями
from matplotlib import mlab

morph = pymorphy2.MorphAnalyzer()
tokenizer = RegexTokenizer()
model = FastTextSocialNetworkModel(tokenizer=tokenizer)

tweets = []  # список со всеми не пустыми твиттами
tweets_time = []  # время в минутах
tweets_length = {}  # длина твиттов и количество твиттов с такой длиной
tweets_score = []  # оценка каждого твитта по третьему правилу
words_score = {}  # оценка положительное/нейтральное/отрицательное слово
adj_positive = {}
adj_negative = {}
tweet_score_1st_rule = {}  # оценка каждого твитта по первому правилу
tweet_score_2nd_rule = {}  # оценка каждого твитта по второму правилу
tweet_score_3rd_rule = {}  # оценка каждого твитта по третьему правилу
tweet_score_4th_rule = {}  # оценка каждого твитта по четвертому правилу
words_cr = {}  # словарь со словом и частью его частью речи
words_frequency = {}  # количество твиттов, в котором встречалось это слово
data = open('data.txt')
tweets_amount = 0