import gensim
import os
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Embedding, GRU
from keras.initializers import Constant
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

dataset = read_data(
    '/media/sakib/alpha/work/EmotionDetectionDir/stanfordSentimentTreebank')
# binarizing sentiments
dataset['sentiment_values'] = pd.to_numeric(dataset['sentiment_values'],
                                            downcast='float')
dataset['sentiment_values'] = (dataset['sentiment_values'] >=
                               0.4).astype(float)
review_lines = list()
lines = dataset['Phrase'].values.tolist()

for line in lines:
    tokens = word_tokenize(line)
    token = [w.lower() for w in tokens]
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    words = [word for word in stripped if word.isalpha()]
    stop_words = set(stopwords.words('english'))
Esempio n. 2
0
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from read_stanford_sentiment_treebank import read_data
import gensim
import os
import numpy as np
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense,Embedding,LSTM,GRU
from keras.layers.embeddings import Embedding
from keras.initializers import Constant
import matplotlib.pyplot as plt
dataset = read_data ('/media/sakib/alpha/work/EmotionDetectionDir/pretrained embedding/word2vec_embedding/stanfordSentimentTreebank') 
# binarizing sentiments
dataset['sentiment_values'] = pd.to_numeric(dataset['sentiment_values'], downcast = 'float')
dataset['sentiment_values'] = (dataset['sentiment_values'] >= 0.4).astype(float)
review_lines = list()
lines = dataset['Phrase'].values.tolist()

sentiment = dataset['sentiment_values']


for line in lines:
    review = re.sub('[^a-zA-Z]', ' ', line)
    review = review.lower()
    review_lines.append(review)