Esempio n. 1
0
 def perform_expand(self, filename):
     get = ReadData(filename, choice=self.choice)
     self.lis = get.readDataReview('0')
     # print(self.lis[18])
     self.process_list()
     SaveData(PROCESSED['spl_char'],
              choice=self.choice).saveDataReview(self.res)
Esempio n. 2
0
    def perform_expand(self, filename):
        get = ReadData(filename, choice=self.choice)
        self.lis = get.readDataReview('0')

        #print("Before lemmatize", filename, self.lis)
        #self.res = self.lis
        self.process_list()
        SaveData(PROCESSED['lemmatize'],
                 choice=self.choice).saveDataReview(self.res)
Esempio n. 3
0
 def commentSentiment(self, model, comment):
     print("Read data..")
     lines_processed = ReadData(PROCESSED['lemmatize'],
                                choice=self.choice).readProcessedReview()
     print("tokenize...")
     tokenizer = Tokenizer()
     tokenizer.fit_on_texts(lines_processed)
     tes_res = [0]
     print("Comment = ", comment, type(comment))
     test_sample_1 = comment
     test_samples = [test_sample_1]
     test_samples_tokens = tokenizer.texts_to_sequences(test_samples)
     test_samples_tokens_pad = pad_sequences(test_samples_tokens,
                                             maxlen=200)
     print("Load Model...")
     with CustomObjectScope({'Attention': Attention}):
         new_model = load_model(os.path.join(config.MODELS, model))
     probability = new_model.predict(x=test_samples_tokens_pad)
     print("Probability = ", probability)
     predictions = (probability > 0.5).astype('int32')
     print("Class = ", type(predictions))
     if predictions == 0:
         sent = "Negative"
         print(sent)
     else:
         sent = "Positive"
         print(sent)
     return sent, probability
Esempio n. 4
0
 def readProcessedReview(self, filename):
     review_lines = GetDatasetPath(filename, choice='imdb')
     df = review_lines.readFileProcessed()
     lis = ReadData(filename, choice='imdb').readDataReview('0')
     swr = StopwordRemoval(lis)
     stopwords = swr.stopwordsToRem()
     afterRem = swr.removefromStopwordList(stopwords)
     lines_processed = swr.stopwordRemoval(afterRem)
     return lines_processed
Esempio n. 5
0
 def data_preparation(self):
     preprocessFinal = ReadData(PROCESSED['lemmatize']).readDataReview('0')
     stop_removed = StopwordRemoval(preprocessFinal).perform_removal()
     review_pad, word_index = StrToNum(stop_removed,
                                       200).convert_str_to_nums()
     SaveData(WORDS['num_words']).saveNumWords(word_index)
     df = GetDatasetPath(PROCESSED['sentiment']).readFileProcessed()
     X_train_pad, y_train, X_test_pad, y_test = TrainTestSplit().train_test(
         review_pad, df, 0.2)
     trainTestVal = [X_train_pad, y_train, X_test_pad, y_test]
     SaveData().savePrepared(trainTestVal)
Esempio n. 6
0
    def embedding(self):
        lines_processed = ReadData(PROCESSED['lemmatize']).readDataReview('0')
        stop_removed = StopwordRemoval(lines_processed).perform_removal()
        lines_processed, word_index = Tokenize().tokenizer(stop_removed)
        num_words = ReadData(WORDS['num_words']).readNumWords()

        print("Embedding as fastext...")
        embprep = FastTextEmb(lines_processed, 100, EMBEDDING['fast_text'])
        embprep.generate_Embeddings()
        embMat = PrepareEmbeddingMatrix(EMBEDDING['fast_text'], 100, num_words)
        embeddings_index = embMat.get_EmbeddingIndex()
        mat = embMat.get_embMatrix(word_index, embeddings_index)
        data = asarray(mat)
        print("Convert to csv...")
        saveEmb = ReadWriteEmbedding(EMBEDDING['fast_emb'], data)
        saveEmb.writeEmb()

        print("Embedding as w2v...")
        embprep = W2vEmb(lines_processed, 100, EMBEDDING['w2v_text'])
        embprep.generate_Embeddings()
        embMat = PrepareEmbeddingMatrix(EMBEDDING['w2v_text'], 100, num_words)
        embeddings_index = embMat.get_EmbeddingIndex()
        mat = embMat.get_embMatrix(word_index, embeddings_index)
        data = asarray(mat)
        print("Convert to csv...")
        saveEmb = ReadWriteEmbedding(EMBEDDING['w2v_emb'], data)
        saveEmb.writeEmb()

        print("Embedding as glove...")
        embMat = PrepareEmbeddingMatrix(EMBEDDING['glove_text'], 100,
                                        num_words)
        embeddings_index = embMat.get_EmbeddingIndex()
        mat = embMat.get_embMatrix(word_index, embeddings_index)
        data = asarray(mat)
        print("Convert to csv...")
        saveEmb = ReadWriteEmbedding(EMBEDDING['glove_emb'], data)
        saveEmb.writeEmb()
Esempio n. 7
0
    def predictmodel(self):
        lines_processed = ReadData(PROCESSED['lemmatize'],
                                   choice=self.choice).readProcessedReview()
        tes_lis = ReadData(TEST['review'],
                           choice=self.choice).readProcessedReview()
        tes_sen = ReadData(TEST['sentiment'],
                           choice=self.choice).readProcessedSentiment()
        test_samples_tokens_pad = PrepareData(filename=None).prepTestData(
            lines_processed, tes_lis)

        print("Predicting for Fasttext...")
        results = PredictModel(choice=self.choice, model=MODEL['fasttext'])
        predictions = results.prepModel(test_samples_tokens_pad)
        results.resultsSingleClass(predictions, tes_sen)

        print("Predicting for W2v...")
        results = PredictModel(choice=self.choice, model=MODEL['w2v'])
        predictions = results.prepModel(test_samples_tokens_pad)
        results.resultsSingleClass(predictions, tes_sen)

        print("Predicting for Glove...")
        results = PredictModel(choice=self.choice, model=MODEL['glove'])
        predictions = results.prepModel(test_samples_tokens_pad)
        results.resultsSingleClass(predictions, tes_sen)
 def perform_expand(self, filename):
     get = ReadData(filename, choice=self.choice)
     self.lis = get.readDataReview('0')
     self.res=self.lis
     self.process_list()
     SaveData(PROCESSED['contractions'], choice=self.choice).saveDataReview(self.res)
Esempio n. 9
0
 def perform_removal(self, filename):
     get = SaveData(filename, choice=self.choice)
     self.lis, self.sen = ReadData(filename, choice=self.choice).readData()
     self.htmlTagRemoval()
     get.saveData(self.res, self.sen)
Esempio n. 10
0
 def readProcessedSentiment(self, filename):
     review_lines = GetDatasetPath(filename, choice='imdb')
     df = review_lines.readFileProcessed()
     lis = ReadData(filename, choice='imdb').readDataReview('0')
     self.tes_res = lis