def readProcessedReview(self, filename): review_lines = GetDatasetPath(filename, choice='imdb') df = review_lines.readFileProcessed() lis = ReadData(filename, choice='imdb').readDataReview('0') swr = StopwordRemoval(lis) stopwords = swr.stopwordsToRem() afterRem = swr.removefromStopwordList(stopwords) lines_processed = swr.stopwordRemoval(afterRem) return lines_processed
def saveData(self, res, sen): df1 = pd.DataFrame(res) sv = GetDatasetPath(PROCESSED['html_review'], choice=self.choice) sv.saveDataset(df1) df1 = pd.DataFrame(sen) sv = GetDatasetPath(PROCESSED['sentiment'], choice=self.choice) sv.saveDataset(df1)
def readDataReview(self, column_name): #print("readdatareview") df = GetDatasetPath(self.filename, choice=self.choice).readFileProcessed() lis = df[column_name].values.tolist() #print(lis) return lis
def readProcessedReview(self): review_lines = GetDatasetPath(self.filename, choice=self.choice) lis = ReadData(self.filename, choice=self.choice).readDataReview('0') swr = StopwordRemoval(lis) stopwords = swr.stopwordsToRem() afterRem = swr.removefromStopwordList(stopwords) lines_processed = swr.stopwordRemoval(afterRem) return lines_processed
def data_preparation(self): preprocessFinal = ReadData(PROCESSED['lemmatize']).readDataReview('0') stop_removed = StopwordRemoval(preprocessFinal).perform_removal() review_pad, word_index = StrToNum(stop_removed, 200).convert_str_to_nums() SaveData(WORDS['num_words']).saveNumWords(word_index) df = GetDatasetPath(PROCESSED['sentiment']).readFileProcessed() X_train_pad, y_train, X_test_pad, y_test = TrainTestSplit().train_test( review_pad, df, 0.2) trainTestVal = [X_train_pad, y_train, X_test_pad, y_test] SaveData().savePrepared(trainTestVal)
def createmodel(self): print("Getting training and validation files...") dataObj = GetDatasetPath('x_train.csv') X_train_pad = dataObj.loadDataset() dataObj = GetDatasetPath('x_test.csv') X_test_pad = dataObj.loadDataset() dataObj = GetDatasetPath('y_test.csv') y_test = dataObj.loadDataset() dataObj = GetDatasetPath('y_train.csv') y_train = dataObj.loadDataset() print("Getting embedding file...") print("Fasttext embedding - model...") embObj = ReadWriteEmbedding(EMBEDDING['fast_emb']) embedding_matrix = embObj.readEmb() print("Creating model...") mod = CreateModel(200, 100) model = mod.existing_model(embedding_matrix, 1) #model = mod.existing_model(embedding_matrix, 1) print("Training model...") train = TrainModel(128, 50) model = train.model_train(model, X_train_pad, y_train, X_test_pad, y_test) print("Saving model...") GetDatasetPath(MODEL['fasttext']).saveModel(model) #elif num == 2: print("w2v embedding - model...") embObj = ReadWriteEmbedding(EMBEDDING['w2v_emb']) embedding_matrix = embObj.readEmb() print("Creating model...") mod = CreateModel(200, 100) model = mod.existing_model(embedding_matrix, 1) print("Training model...") train = TrainModel(128, 50) model = train.model_train(model, X_train_pad, y_train, X_test_pad, y_test) print("Saving model...") GetDatasetPath(MODEL['w2v']).saveModel(model) #elif num == 3: print("Glove embedding - model...") embObj = ReadWriteEmbedding(EMBEDDING['glove_emb']) embedding_matrix = embObj.readEmb() print("Creating model...") mod = CreateModel(200, 100) model = mod.existing_model(embedding_matrix, 1) print("Training model...") train = TrainModel(128, 50) model = train.model_train(model, X_train_pad, y_train, X_test_pad, y_test) print("Saving model...") GetDatasetPath(MODEL['glove']).saveModel(model)
def prepModel(self, test_samples_tokens_pad): with CustomObjectScope({'Attention': Attention}): new_model = GetDatasetPath(self.model, choice=self.choice).loadModel() predictions = new_model.predict(test_samples_tokens_pad) return predictions
def saveDataReview(self, res): df1 = pd.DataFrame(res) sv = GetDatasetPath(self.filename, choice=self.choice) sv.saveDataset(df1)
def savePrepared(self, trainTestVal): empty_df = pd.DataFrame() store_res = GetDatasetPath(SPLIT['X_train_pad'], data=trainTestVal[0], choice=self.choice) store_res.saveDataset(empty_df) store_res = GetDatasetPath(SPLIT['y_train'], data=trainTestVal[1], choice=self.choice) store_res.saveDataset(empty_df) store_res = GetDatasetPath(SPLIT['X_test_pad'], data=trainTestVal[2], choice=self.choice) store_res.saveDataset(empty_df) store_res = GetDatasetPath(SPLIT['y_test'], data=trainTestVal[3], choice=self.choice) store_res.saveDataset(empty_df)
def prepModel(self): with CustomObjectScope({'Attention': Attention}): new_model = GetDatasetPath(MODEL['fasttext'], choice='imdb').loadModel() self.predictions = new_model.predict(self.test_samples_tokens_pad)
def readProcessedSentiment(self, filename): review_lines = GetDatasetPath(filename, choice='imdb') df = review_lines.readFileProcessed() lis = ReadData(filename, choice='imdb').readDataReview('0') self.tes_res = lis
def readProcessedSentiment(self): review_lines = GetDatasetPath(self.filename, choice=self.choice) df = review_lines.readFileProcessed() lis = ReadData(self.filename, choice=self.choice).readDataReview('0') return lis