import gensim import os import numpy as np from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences import pandas as pd from keras.models import Sequential from keras.layers import Dense, Embedding, GRU from keras.initializers import Constant import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score dataset = read_data( '/media/sakib/alpha/work/EmotionDetectionDir/stanfordSentimentTreebank') # binarizing sentiments dataset['sentiment_values'] = pd.to_numeric(dataset['sentiment_values'], downcast='float') dataset['sentiment_values'] = (dataset['sentiment_values'] >= 0.4).astype(float) review_lines = list() lines = dataset['Phrase'].values.tolist() for line in lines: tokens = word_tokenize(line) token = [w.lower() for w in tokens] table = str.maketrans('', '', string.punctuation) stripped = [w.translate(table) for w in tokens] words = [word for word in stripped if word.isalpha()] stop_words = set(stopwords.words('english'))
import re from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from read_stanford_sentiment_treebank import read_data import gensim import os import numpy as np from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences import pandas as pd from keras.models import Sequential from keras.layers import Dense,Embedding,LSTM,GRU from keras.layers.embeddings import Embedding from keras.initializers import Constant import matplotlib.pyplot as plt dataset = read_data ('/media/sakib/alpha/work/EmotionDetectionDir/pretrained embedding/word2vec_embedding/stanfordSentimentTreebank') # binarizing sentiments dataset['sentiment_values'] = pd.to_numeric(dataset['sentiment_values'], downcast = 'float') dataset['sentiment_values'] = (dataset['sentiment_values'] >= 0.4).astype(float) review_lines = list() lines = dataset['Phrase'].values.tolist() sentiment = dataset['sentiment_values'] for line in lines: review = re.sub('[^a-zA-Z]', ' ', line) review = review.lower() review_lines.append(review)