Esempio n. 1
0
    def load_train_validation_data(self):
        self.train = dh.loaddata(self._train_file,
                                 self._word_file_path,
                                 self._split_word_file_path,
                                 self._emoji_file_path,
                                 normalize_text=True,
                                 split_hashtag=True,
                                 ignore_profiles=False)
        print('Training data loading finished...')

        self.validation = dh.loaddata(self._validation_file,
                                      self._word_file_path,
                                      self._split_word_file_path,
                                      self._emoji_file_path,
                                      normalize_text=True,
                                      split_hashtag=True,
                                      ignore_profiles=False)
        print('Validation data loading finished...')

        if (self._test_file != None):
            self.test = dh.loaddata(self._test_file,
                                    self._word_file_path,
                                    normalize_text=True,
                                    split_hashtag=True,
                                    ignore_profiles=True)
    def load_train_validation_test_data(self):
        print("Loading resource...")
        self.train = dh.loaddata(self._train_file,
                                 self._word_file_path,
                                 self._split_word_file_path,
                                 self._emoji_file_path,
                                 normalize_text=True,
                                 split_hashtag=True,
                                 ignore_profiles=False)

        self.validation = dh.loaddata(self._validation_file,
                                      self._word_file_path,
                                      self._split_word_file_path,
                                      self._emoji_file_path,
                                      normalize_text=True,
                                      split_hashtag=True,
                                      ignore_profiles=False)

        if (self._test_file != None):
            self.test = dh.loaddata(self._test_file,
                                    self._word_file_path,
                                    self._split_word_file_path,
                                    self._emoji_file_path,
                                    normalize_text=True,
                                    split_hashtag=True,
                                    ignore_profiles=True)
Esempio n. 3
0
    def predict(self, test_file, verbose=False):
        try:
            start = time.time()
            self.test = dh.loaddata(test_file,
                                    self._word_file_path,
                                    self._split_word_file_path,
                                    self._emoji_file_path,
                                    normalize_text=True,
                                    split_hashtag=True,
                                    ignore_profiles=False,
                                    lowercase=False,
                                    n_grams=3,
                                    at_character=True)
            end = time.time()
            if (verbose == True):
                print('test resource loading time::', (end - start))

            self._vocab = self.load_vocab()
            print('vocab loaded...')

            start = time.time()
            tX, tY, tD, tC, tA = dh.vectorize_word_dimension(
                self.test, self._vocab)
            tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen)
            end = time.time()
            if (verbose == True):
                print('test resource preparation time::', (end - start))

            self.__predict_model(tX, self.test)
        except Exception as e:
            print('Error:', e)
Esempio n. 4
0
    def predict(self, test_file, verbose=False):
        start = time.time()
        self.test = dh.loaddata(test_file,
                                self._word_file_path,
                                normalize_text=True,
                                split_hashtag=True,
                                ignore_profiles=False)
        end = time.time()
        if (verbose == True):
            print('test resource loading time::', (end - start))

        self._vocab = self.load_vocab()

        start = time.time()
        tX, tY, tD, tC, tA = dh.vectorize_word_dimension(
            self.test, self._vocab)
        tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen)
        tC = dh.pad_sequence_1d(tC, maxlen=self._line_maxlen)
        tD = dh.pad_sequence_1d(tD, maxlen=11)

        end = time.time()
        if (verbose == True):
            print('test resource preparation time::', (end - start))

        self.__predict_model([tC, tX, tD], self.test)
 def load_train_validation_test_data(self):
     self.train = dh.loaddata(self._train_file,
                              self._word_file_path,
                              normalize_text=True,
                              split_hashtag=True,
                              ignore_profiles=False)
     self.validation = dh.loaddata(self._validation_file,
                                   self._word_file_path,
                                   normalize_text=True,
                                   split_hashtag=True,
                                   ignore_profiles=False)
     if (self._test_file != None):
         self.test = dh.loaddata(self._test_file,
                                 self._word_file_path,
                                 normalize_text=True,
                                 split_hashtag=True,
                                 ignore_profiles=True)
Esempio n. 6
0
 def load_train_data(self):
     self.train = dh.loaddata(self._train_file,
                              self._word_file_path,
                              self._split_word_file_path,
                              self._emoji_file_path,
                              normalize_text=True,
                              split_hashtag=True,
                              ignore_profiles=False)
     print('Training data loading finished...')
    def load_train_validation_data(self,
                                   ignore_profiles=True,
                                   lowercase=True,
                                   at_character=False):
        self.train = dh.loaddata(self._train_file,
                                 self._word_file_path,
                                 self._split_word_file_path,
                                 self._emoji_file_path,
                                 normalize_text=True,
                                 split_hashtag=True,
                                 lowercase=lowercase,
                                 ignore_profiles=ignore_profiles,
                                 at_character=at_character)

        self.validation = dh.loaddata(self._validation_file,
                                      self._word_file_path,
                                      self._split_word_file_path,
                                      self._emoji_file_path,
                                      normalize_text=True,
                                      split_hashtag=True,
                                      lowercase=lowercase,
                                      ignore_profiles=ignore_profiles,
                                      at_character=at_character)
Esempio n. 8
0
    def load_train_validation_data(self):
        self.train = dh.loaddata(self._train_file,
                                 self._word_file_path,
                                 self._split_word_file_path,
                                 self._emoji_file_path,
                                 normalize_text=True,
                                 split_hashtag=True,
                                 ignore_profiles=False,
                                 lowercase=False,
                                 n_grams=3,
                                 at_character=True)
        print('Training data loading finished...')

        self.validation = dh.loaddata(self._validation_file,
                                      self._word_file_path,
                                      self._split_word_file_path,
                                      self._emoji_file_path,
                                      normalize_text=True,
                                      split_hashtag=False,
                                      ignore_profiles=False,
                                      lowercase=False,
                                      n_grams=3,
                                      at_character=True)
        print('Validation data loading finished...')
Esempio n. 9
0
    def test_predict(self, verbose=False):
        start = time.time()
        self.test = dh.loaddata(self._test_file,
                                self._word_file_path,
                                self._split_word_file_path,
                                self._emoji_file_path,
                                normalize_text=True,
                                split_hashtag=True,
                                ignore_profiles=False)
        end = time.time()
        if (verbose == True):
            print('test resource loading time::', (end - start))

        self._vocab = dh.build_vocab(self.test, min_freq=1)
        if ('unk' not in self._vocab):
            self._vocab['unk'] = len(self._vocab.keys()) + 1

        dh.write_vocab(self._vocab_file_path, self._vocab)

        tX, tY, D, C, A = dh.vectorize_word_dimension(self.test, self._vocab)
        tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen)

        dimension_size = 300
        emb_weights = load_glove_model(
            self._vocab,
            n=dimension_size,
            glove_path='/content/SarcasmDetection/src/glove.6B.300d.txt')

        label_dict = {
            0: 'EXTRAVERSION',
            1: 'NEUROTICISM',
            2: 'AGREEABLENESS',
            3: 'CONSCIENTIOUSNESS',
            4: 'OPENNESS'
        }
        predictions = self.model.predict(tX)
        total_pred = np.array([0, 0, 0, 0, 0])
        for i in predictions:
            total_pred = np.add(total_pred, np.array(i))
        pos = np.where(total_pred == max(total_pred))
        l_pos = pos[0].tolist()
        RESULT = l_pos[0]
        print("THE RESULT IS " + str(label_dict[RESULT]))