def load_train_validation_data(self): self.train = dh.loaddata(self._train_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) print('Training data loading finished...') self.validation = dh.loaddata(self._validation_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) print('Validation data loading finished...') if (self._test_file != None): self.test = dh.loaddata(self._test_file, self._word_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=True)
def load_train_validation_test_data(self): print("Loading resource...") self.train = dh.loaddata(self._train_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) self.validation = dh.loaddata(self._validation_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) if (self._test_file != None): self.test = dh.loaddata(self._test_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=True)
def predict(self, test_file, verbose=False): try: start = time.time() self.test = dh.loaddata(test_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False, lowercase=False, n_grams=3, at_character=True) end = time.time() if (verbose == True): print('test resource loading time::', (end - start)) self._vocab = self.load_vocab() print('vocab loaded...') start = time.time() tX, tY, tD, tC, tA = dh.vectorize_word_dimension( self.test, self._vocab) tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen) end = time.time() if (verbose == True): print('test resource preparation time::', (end - start)) self.__predict_model(tX, self.test) except Exception as e: print('Error:', e)
def predict(self, test_file, verbose=False): start = time.time() self.test = dh.loaddata(test_file, self._word_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) end = time.time() if (verbose == True): print('test resource loading time::', (end - start)) self._vocab = self.load_vocab() start = time.time() tX, tY, tD, tC, tA = dh.vectorize_word_dimension( self.test, self._vocab) tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen) tC = dh.pad_sequence_1d(tC, maxlen=self._line_maxlen) tD = dh.pad_sequence_1d(tD, maxlen=11) end = time.time() if (verbose == True): print('test resource preparation time::', (end - start)) self.__predict_model([tC, tX, tD], self.test)
def load_train_validation_test_data(self): self.train = dh.loaddata(self._train_file, self._word_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) self.validation = dh.loaddata(self._validation_file, self._word_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) if (self._test_file != None): self.test = dh.loaddata(self._test_file, self._word_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=True)
def load_train_data(self): self.train = dh.loaddata(self._train_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) print('Training data loading finished...')
def load_train_validation_data(self, ignore_profiles=True, lowercase=True, at_character=False): self.train = dh.loaddata(self._train_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, lowercase=lowercase, ignore_profiles=ignore_profiles, at_character=at_character) self.validation = dh.loaddata(self._validation_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, lowercase=lowercase, ignore_profiles=ignore_profiles, at_character=at_character)
def load_train_validation_data(self): self.train = dh.loaddata(self._train_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False, lowercase=False, n_grams=3, at_character=True) print('Training data loading finished...') self.validation = dh.loaddata(self._validation_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=False, ignore_profiles=False, lowercase=False, n_grams=3, at_character=True) print('Validation data loading finished...')
def test_predict(self, verbose=False): start = time.time() self.test = dh.loaddata(self._test_file, self._word_file_path, self._split_word_file_path, self._emoji_file_path, normalize_text=True, split_hashtag=True, ignore_profiles=False) end = time.time() if (verbose == True): print('test resource loading time::', (end - start)) self._vocab = dh.build_vocab(self.test, min_freq=1) if ('unk' not in self._vocab): self._vocab['unk'] = len(self._vocab.keys()) + 1 dh.write_vocab(self._vocab_file_path, self._vocab) tX, tY, D, C, A = dh.vectorize_word_dimension(self.test, self._vocab) tX = dh.pad_sequence_1d(tX, maxlen=self._line_maxlen) dimension_size = 300 emb_weights = load_glove_model( self._vocab, n=dimension_size, glove_path='/content/SarcasmDetection/src/glove.6B.300d.txt') label_dict = { 0: 'EXTRAVERSION', 1: 'NEUROTICISM', 2: 'AGREEABLENESS', 3: 'CONSCIENTIOUSNESS', 4: 'OPENNESS' } predictions = self.model.predict(tX) total_pred = np.array([0, 0, 0, 0, 0]) for i in predictions: total_pred = np.add(total_pred, np.array(i)) pos = np.where(total_pred == max(total_pred)) l_pos = pos[0].tolist() RESULT = l_pos[0] print("THE RESULT IS " + str(label_dict[RESULT]))