def perform_expand(self, filename): get = ReadData(filename, choice=self.choice) self.lis = get.readDataReview('0') # print(self.lis[18]) self.process_list() SaveData(PROCESSED['spl_char'], choice=self.choice).saveDataReview(self.res)
def perform_expand(self, filename): get = ReadData(filename, choice=self.choice) self.lis = get.readDataReview('0') #print("Before lemmatize", filename, self.lis) #self.res = self.lis self.process_list() SaveData(PROCESSED['lemmatize'], choice=self.choice).saveDataReview(self.res)
def commentSentiment(self, model, comment): print("Read data..") lines_processed = ReadData(PROCESSED['lemmatize'], choice=self.choice).readProcessedReview() print("tokenize...") tokenizer = Tokenizer() tokenizer.fit_on_texts(lines_processed) tes_res = [0] print("Comment = ", comment, type(comment)) test_sample_1 = comment test_samples = [test_sample_1] test_samples_tokens = tokenizer.texts_to_sequences(test_samples) test_samples_tokens_pad = pad_sequences(test_samples_tokens, maxlen=200) print("Load Model...") with CustomObjectScope({'Attention': Attention}): new_model = load_model(os.path.join(config.MODELS, model)) probability = new_model.predict(x=test_samples_tokens_pad) print("Probability = ", probability) predictions = (probability > 0.5).astype('int32') print("Class = ", type(predictions)) if predictions == 0: sent = "Negative" print(sent) else: sent = "Positive" print(sent) return sent, probability
def readProcessedReview(self, filename): review_lines = GetDatasetPath(filename, choice='imdb') df = review_lines.readFileProcessed() lis = ReadData(filename, choice='imdb').readDataReview('0') swr = StopwordRemoval(lis) stopwords = swr.stopwordsToRem() afterRem = swr.removefromStopwordList(stopwords) lines_processed = swr.stopwordRemoval(afterRem) return lines_processed
def data_preparation(self): preprocessFinal = ReadData(PROCESSED['lemmatize']).readDataReview('0') stop_removed = StopwordRemoval(preprocessFinal).perform_removal() review_pad, word_index = StrToNum(stop_removed, 200).convert_str_to_nums() SaveData(WORDS['num_words']).saveNumWords(word_index) df = GetDatasetPath(PROCESSED['sentiment']).readFileProcessed() X_train_pad, y_train, X_test_pad, y_test = TrainTestSplit().train_test( review_pad, df, 0.2) trainTestVal = [X_train_pad, y_train, X_test_pad, y_test] SaveData().savePrepared(trainTestVal)
def embedding(self): lines_processed = ReadData(PROCESSED['lemmatize']).readDataReview('0') stop_removed = StopwordRemoval(lines_processed).perform_removal() lines_processed, word_index = Tokenize().tokenizer(stop_removed) num_words = ReadData(WORDS['num_words']).readNumWords() print("Embedding as fastext...") embprep = FastTextEmb(lines_processed, 100, EMBEDDING['fast_text']) embprep.generate_Embeddings() embMat = PrepareEmbeddingMatrix(EMBEDDING['fast_text'], 100, num_words) embeddings_index = embMat.get_EmbeddingIndex() mat = embMat.get_embMatrix(word_index, embeddings_index) data = asarray(mat) print("Convert to csv...") saveEmb = ReadWriteEmbedding(EMBEDDING['fast_emb'], data) saveEmb.writeEmb() print("Embedding as w2v...") embprep = W2vEmb(lines_processed, 100, EMBEDDING['w2v_text']) embprep.generate_Embeddings() embMat = PrepareEmbeddingMatrix(EMBEDDING['w2v_text'], 100, num_words) embeddings_index = embMat.get_EmbeddingIndex() mat = embMat.get_embMatrix(word_index, embeddings_index) data = asarray(mat) print("Convert to csv...") saveEmb = ReadWriteEmbedding(EMBEDDING['w2v_emb'], data) saveEmb.writeEmb() print("Embedding as glove...") embMat = PrepareEmbeddingMatrix(EMBEDDING['glove_text'], 100, num_words) embeddings_index = embMat.get_EmbeddingIndex() mat = embMat.get_embMatrix(word_index, embeddings_index) data = asarray(mat) print("Convert to csv...") saveEmb = ReadWriteEmbedding(EMBEDDING['glove_emb'], data) saveEmb.writeEmb()
def predictmodel(self): lines_processed = ReadData(PROCESSED['lemmatize'], choice=self.choice).readProcessedReview() tes_lis = ReadData(TEST['review'], choice=self.choice).readProcessedReview() tes_sen = ReadData(TEST['sentiment'], choice=self.choice).readProcessedSentiment() test_samples_tokens_pad = PrepareData(filename=None).prepTestData( lines_processed, tes_lis) print("Predicting for Fasttext...") results = PredictModel(choice=self.choice, model=MODEL['fasttext']) predictions = results.prepModel(test_samples_tokens_pad) results.resultsSingleClass(predictions, tes_sen) print("Predicting for W2v...") results = PredictModel(choice=self.choice, model=MODEL['w2v']) predictions = results.prepModel(test_samples_tokens_pad) results.resultsSingleClass(predictions, tes_sen) print("Predicting for Glove...") results = PredictModel(choice=self.choice, model=MODEL['glove']) predictions = results.prepModel(test_samples_tokens_pad) results.resultsSingleClass(predictions, tes_sen)
def perform_expand(self, filename): get = ReadData(filename, choice=self.choice) self.lis = get.readDataReview('0') self.res=self.lis self.process_list() SaveData(PROCESSED['contractions'], choice=self.choice).saveDataReview(self.res)
def perform_removal(self, filename): get = SaveData(filename, choice=self.choice) self.lis, self.sen = ReadData(filename, choice=self.choice).readData() self.htmlTagRemoval() get.saveData(self.res, self.sen)
def readProcessedSentiment(self, filename): review_lines = GetDatasetPath(filename, choice='imdb') df = review_lines.readFileProcessed() lis = ReadData(filename, choice='imdb').readDataReview('0') self.tes_res = lis