Exemple #1
0
    def test(self, testing_data):
        """
        Tests the precision/recall of the model

        Parameters:
        testing_data (array): data on which to test the model

        Returns:
        Null
        """

        for row in testing_data:
            u_priors = dict(self.priors)

            tokenized_turn1 = tokenize(row["turn1"]["text"])
            tokenized_turn2 = tokenize(row["turn2"]["text"])
            tokenized_turn3 = tokenize(row["turn3"]["text"])

            conv = tokenized_turn1 + tokenized_turn2 + tokenized_turn3

            parsed_message = flatten([
                ngrams_and_remove_stop_words(x, self.ngram_choice)
                for x in [tokenized_turn1, tokenized_turn2, tokenized_turn3]
            ])
            for var in self.variables:
                classification = normalize(
                    self._classify(self.ngrams[var], parsed_message, conv,
                                   u_priors[var], var))
                for i, e in enumerate(self.variable_dimensions):
                    u_priors[var][e] = classification[i]

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn1, self.ngram_choice)
            for var in self.variables:
                classification = normalize(
                    self._classify(self.ngrams[var], parsed_message,
                                   tokenized_turn1, u_priors[var], var))
                for i, e in enumerate(self.variable_dimensions):
                    u_priors[var][e] = classification[i]

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn3, self.ngram_choice)
            var_classification = {dim: "" for dim in self.variables}
            for var in self.variables:
                var_classification[var] = self._classify(
                    self.ngrams[var], parsed_message, tokenized_turn3,
                    u_priors[var], var, False)

            self.true.append(row["turn3"]["emotion"])
            emo_class = self.__map_to_emotion(var_classification)
            self.pred.append(emo_class[0])

        super(AVModel, self).calculate_scores()
Exemple #2
0
    def test(self, testing_data):
        """
        Tests the precision/recall of the model

        Parameters:
        testing_data (array): data on which to test the model

        Returns:
        Null
        """

        for var in self.variables:
            self.true[var] = []
            self.pred[var] = []

        for row in testing_data:
            u_priors = dict(self.priors)

            tokenized_turn1 = tokenize(row["turn1"]["text"])
            tokenized_turn2 = tokenize(row["turn2"]["text"])
            tokenized_turn3 = tokenize(row["turn3"]["text"])

            conv = tokenized_turn1 + tokenized_turn2 + tokenized_turn3

            parsed_message = flatten([ngrams_and_remove_stop_words(x, self.ngram_choice) for x in [
                                     tokenized_turn1, tokenized_turn2, tokenized_turn3]])
            for var in self.variables:
                classification = normalize(self.__classify(
                    self.ngrams[var], parsed_message, conv, u_priors[var], var))
                for i, e in enumerate(self.variable_dimensions):
                    u_priors[var][e] = classification[i]

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn1, self.ngram_choice)
            for var in self.variables:
                classification = normalize(self.__classify(
                    self.ngrams[var], parsed_message, tokenized_turn1, u_priors[var], var))
                for i, e in enumerate(self.variable_dimensions):
                    u_priors[var][e] = classification[i]

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn3, self.ngram_choice)
            for var in self.variables:
                weight = self.variable_dimensions[int(
                    row["turn3"]["appraisals"][var])]
                self.true[var].append(weight)
                classification = self.__classify(
                    self.ngrams[var], parsed_message, tokenized_turn3, u_priors[var], var, False)
                self.pred[var].append(classification)

        self.calculate_scores()
Exemple #3
0
    def test(self, testing_data):
        """
        Tests the precision/recall of the model

        Parameters:
        testing_data (array): data on which to test the model

        Returns:
        Null
        """

        for row in testing_data:
            u_priors = dict(self.priors)

            tokenized_turn1 = tokenize(row["turn1"]["text"])
            tokenized_turn2 = tokenize(row["turn2"]["text"])
            tokenized_turn3 = tokenize(row["turn3"]["text"])

            conv = tokenized_turn1 + tokenized_turn2 + tokenized_turn3

            parsed_message = flatten([
                ngrams_and_remove_stop_words(x, self.ngram_choice)
                for x in [tokenized_turn1, tokenized_turn2, tokenized_turn3]
            ])
            classification = normalize(
                self.__classify(self.ngrams, parsed_message, conv, u_priors))
            for i, e in enumerate(self.emotions):
                u_priors[e] = classification[i]

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn1, self.ngram_choice)
            classification = normalize(
                self.__classify(self.ngrams, parsed_message, tokenized_turn1,
                                u_priors))
            for i, e in enumerate(self.emotions):
                u_priors[e] = classification[i]

            emotion = row["turn3"]["emotion"]
            self.true.append(emotion)

            parsed_message = ngrams_and_remove_stop_words(
                tokenized_turn3, self.ngram_choice)
            classification = self.__classify(self.ngrams, parsed_message,
                                             tokenized_turn3, u_priors, False)

            self.pred.append(str(classification))

        self.calculate_scores()
Exemple #4
0
    def _train_by_variable(self, training_set, variable, data_points={}):
        """
        Calculates the counts for each unigram and priors for each classification

        Parameters:
        training_set (array): training data used to train the model
        variable (string): variable in use in training

        Returns:
        Object: ngrams with associated counts
        Object: sums for each classification
        Object: priors for each classification
        """

        words = {}
        words_totals = {dim: 0 for dim in self.variable_dimensions}
        tense_totals = {dim: 0 for dim in self.variable_dimensions}
        pronoun_totals = {dim: 0 for dim in self.variable_dimensions}
        words_vocab = set()
        tense_vocab = set()
        pronoun_vocab = set()

        for row in training_set:
            for turn in ["turn1", "turn2", "turn3"]:
                true_dim = self.variable_dimensions[int(
                    row[turn]["appraisals"][variable])]
                tokenized_res = tokenize(row[turn]["text"])

                pos = parts_of_speech(tokenized_res)
                tense_vocab, tense_totals, pronoun_vocab, pronoun_totals = self.__build_pos_counts(
                    pos, tense_vocab, variable, true_dim, tense_totals, pronoun_vocab, pronoun_totals)

                res = ngrams_and_remove_stop_words(
                    tokenized_res, self.ngram_choice)
                words_vocab, words, words_totals = self.__build_word_counts(
                    res, words_vocab, words, true_dim, words_totals)

        denom = sum(words_totals.values())
        self.priors[variable] = {dim: float(
            words_totals[dim])/float(denom) for dim in self.variable_dimensions}

        self.__calculate_probabilities(
            words, words_totals, words_vocab, tense_totals, tense_vocab, pronoun_totals, pronoun_vocab, variable)
Exemple #5
0
    def train(self, training_data):
        """
        Builds a trained Emotions model

        Parameters:
        training_data (array): training data used to train the model

        Returns:
        Model: a trained model
        """
        words = {}
        words_vocab = set()
        tense_vocab = set()
        pronoun_vocab = set()
        words_totals = {emotion: 0 for emotion in self.emotions}
        tense_totals = {emotion: 0 for emotion in self.emotions}
        pronoun_totals = {emotion: 0 for emotion in self.emotions}

        for row in training_data:
            for turn in ["turn1", "turn2", "turn3"]:
                true_emotion = row[turn]["emotion"]
                tokenized_res = tokenize(row[turn]["text"])

                pos = parts_of_speech(tokenized_res)
                tense_vocab, tense_totals, pronoun_vocab, pronoun_totals = self.__build_pos_counts(
                    pos, tense_vocab, true_emotion, tense_totals,
                    pronoun_vocab, pronoun_totals)

                res = ngrams_and_remove_stop_words(tokenized_res,
                                                   self.ngram_choice)

                words_vocab, words, words_totals = self.__build_word_counts(
                    res, words_vocab, words, true_emotion, words_totals)

        sum_totals = sum(words_totals.values())
        self.priors = {
            emotion: float(words_totals[emotion]) / float(sum_totals)
            for emotion in self.emotions
        }

        self.__calculate_probabilities(words, words_totals, words_vocab,
                                       tense_totals, tense_vocab,
                                       pronoun_totals, pronoun_vocab)