def __init__(self, database): """ (str) -> Initialize database and creates two DynamicArray objects, and trains the model """ self.database = database self.tweet = None self.emotions = DynamicArray() self.probabilities = DynamicArray() self._classifier = self._train_model()
def get_emotions_probability(self): """ () -> DynamicArray Returns an array with emotions of the TWEET and their probabilities and adds to the array """ emotions = DynamicArray() probabilities = self._classifier.prob_classify(self._tweets_features(self.tweet)) for sample in probabilities.samples(): emotions.append((sample, probabilities.prob(sample))) self.probabilities.append(emotions) return emotions
def _creating_set(self): """ () -> DynamicArray Returns an array to train model on it """ result_set = DynamicArray() file = open(self.database, "r", encoding='utf-8') file.readline() for line in file: line = line.split('\t') first_el = self._tweets_features(line[0]) result_set.append((first_el, line[1])) return result_set
def available_dates(self): """ () -> DynamicArray Returns a list with the dates on which user wrote tweets """ available_dates = DynamicArray() cur_node = self.data.next while cur_node is not None: try: date = cur_node.data[0].split() day = "{} {} {}".format(date[1], date[2], date[5]) available_dates.append((cur_node.data[0], day)) cur_node = cur_node.next except IndexError: pass return available_dates
def get_tweet_by_day(self, day): """ (str) -> DynamicArray Returns a list of tweets that have been written at given DAY """ counter = False for date in self.available_dates: if date[1] == day: day = date[0] counter = True if not counter: raise ValueError("This DAY is not available") days = DynamicArray() cur_node = self.data.next while cur_node is not None: if cur_node.data[0] == day: days.append(cur_node.data[1]) cur_node = cur_node.next return days
class EmotionsList: """Representing EmotionsList ADT that preserves emotions and their probabilities""" def __init__(self, database): """ (str) -> Initialize database and creates two DynamicArray objects, and trains the model """ self.database = database self.tweet = None self.emotions = DynamicArray() self.probabilities = DynamicArray() self._classifier = self._train_model() def set_tweet(self, tweet): """ (str) -> Sets tweet which the ADT will analyze """ self.tweet = tweet @staticmethod def _remove_stop_words(tweet): """ (str) -> str Returns tweet with removed stop words """ tokens_without_sw = "" for word in tweet.split(): if not word.lower() in STOPWORDS: tokens_without_sw += word.lower() + " " return tokens_without_sw def _tweets_features(self, tweet): """ (str) -> dict Additional method. Creates dictionary with tweet to use it in the classifying this tweet """ tweet = self._remove_stop_words(tweet) return {'tweet': tweet} def _creating_set(self): """ () -> DynamicArray Returns an array to train model on it """ result_set = DynamicArray() file = open(self.database, "r", encoding='utf-8') file.readline() for line in file: line = line.split('\t') first_el = self._tweets_features(line[0]) result_set.append((first_el, line[1])) return result_set def _train_model(self): """ () -> NaiveBayesClassifier Trains model with data """ train_set = self._creating_set() return NaiveBayesClassifier.train(train_set) def get_tweet_emotion(self): """ () -> str Returns a main emotion of the tweet and adds to the array """ label = self._classifier.classify(self._tweets_features(self.tweet)) self.emotions.append(label) return label def get_emotions_probability(self): """ () -> DynamicArray Returns an array with emotions of the tweet and their probabilities and adds to the array """ emotions = DynamicArray() probabilities = self._classifier.prob_classify( self._tweets_features(self.tweet)) for sample in probabilities.samples(): emotions.append((sample, probabilities.prob(sample))) self.probabilities.append(emotions) return emotions