Beispiel #1
0
 def __init__(self, database):
     """
     (str) ->
     Initialize database and creates two DynamicArray
     objects, and trains the model
     """
     self.database = database
     self.tweet = None
     self.emotions = DynamicArray()
     self.probabilities = DynamicArray()
     self._classifier = self._train_model()
 def get_emotions_probability(self):
     """
     () -> DynamicArray
     Returns an array with emotions of the TWEET
     and their probabilities and adds to the array
     """
     emotions = DynamicArray()
     probabilities = self._classifier.prob_classify(self._tweets_features(self.tweet))
     for sample in probabilities.samples():
         emotions.append((sample, probabilities.prob(sample)))
     self.probabilities.append(emotions)
     return emotions
Beispiel #3
0
 def _creating_set(self):
     """
     () -> DynamicArray
     Returns an array to train model on it
     """
     result_set = DynamicArray()
     file = open(self.database, "r", encoding='utf-8')
     file.readline()
     for line in file:
         line = line.split('\t')
         first_el = self._tweets_features(line[0])
         result_set.append((first_el, line[1]))
     return result_set
Beispiel #4
0
 def available_dates(self):
     """
     () -> DynamicArray
     Returns a list with the dates on which user wrote tweets
     """
     available_dates = DynamicArray()
     cur_node = self.data.next
     while cur_node is not None:
         try:
             date = cur_node.data[0].split()
             day = "{} {} {}".format(date[1], date[2], date[5])
             available_dates.append((cur_node.data[0], day))
             cur_node = cur_node.next
         except IndexError:
             pass
     return available_dates
Beispiel #5
0
    def get_tweet_by_day(self, day):
        """
        (str) -> DynamicArray
        Returns a list of tweets that have been written
        at given DAY
        """
        counter = False
        for date in self.available_dates:
            if date[1] == day:
                day = date[0]
                counter = True

        if not counter:
            raise ValueError("This DAY is not available")

        days = DynamicArray()
        cur_node = self.data.next
        while cur_node is not None:
            if cur_node.data[0] == day:
                days.append(cur_node.data[1])
            cur_node = cur_node.next
        return days
Beispiel #6
0
class EmotionsList:
    """Representing EmotionsList ADT that preserves
    emotions and their probabilities"""
    def __init__(self, database):
        """
        (str) ->
        Initialize database and creates two DynamicArray
        objects, and trains the model
        """
        self.database = database
        self.tweet = None
        self.emotions = DynamicArray()
        self.probabilities = DynamicArray()
        self._classifier = self._train_model()

    def set_tweet(self, tweet):
        """
        (str) ->
        Sets tweet which the ADT will analyze
        """
        self.tweet = tweet

    @staticmethod
    def _remove_stop_words(tweet):
        """
        (str) -> str
        Returns tweet with removed stop words
        """
        tokens_without_sw = ""
        for word in tweet.split():
            if not word.lower() in STOPWORDS:
                tokens_without_sw += word.lower() + " "
        return tokens_without_sw

    def _tweets_features(self, tweet):
        """
        (str) -> dict
        Additional method.
        Creates dictionary with tweet to use it in the
        classifying this tweet
        """
        tweet = self._remove_stop_words(tweet)
        return {'tweet': tweet}

    def _creating_set(self):
        """
        () -> DynamicArray
        Returns an array to train model on it
        """
        result_set = DynamicArray()
        file = open(self.database, "r", encoding='utf-8')
        file.readline()
        for line in file:
            line = line.split('\t')
            first_el = self._tweets_features(line[0])
            result_set.append((first_el, line[1]))
        return result_set

    def _train_model(self):
        """
        () -> NaiveBayesClassifier
        Trains model with data
        """
        train_set = self._creating_set()
        return NaiveBayesClassifier.train(train_set)

    def get_tweet_emotion(self):
        """
        () -> str
        Returns a main emotion of the tweet and
        adds to the array
        """
        label = self._classifier.classify(self._tweets_features(self.tweet))
        self.emotions.append(label)
        return label

    def get_emotions_probability(self):
        """
        () -> DynamicArray
        Returns an array with emotions of the tweet
        and their probabilities and adds to the array
        """
        emotions = DynamicArray()
        probabilities = self._classifier.prob_classify(
            self._tweets_features(self.tweet))
        for sample in probabilities.samples():
            emotions.append((sample, probabilities.prob(sample)))
        self.probabilities.append(emotions)
        return emotions