matrix_bow = pd.DataFrame(cm_bow,
                          index=[i for i in "01"],
                          columns=[i for i in "01"])
plt.figure(figsize=(10, 7))
sns.heatmap(matrix_bow, annot=True)
"""## Nueral Net

"""

from keras.preprocessing.text import Tokenizer

# Here we must turn our reviews into tokens for our model to run
max_features = 600000
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(review_p_train + review_n_train)
list_tokenized_train = tokenizer.texts_to_sequences(review_p_train +
                                                    review_n_train)

# here we pad the tokens to ensure that the reviewis all are the same length in order to pass it into the nueral net
maxlen = 100
x = pad_sequences(list_tokenized_train, maxlen=maxlen)
y = np.array(sentiment_p_train + sentiment_n_train)

# just do the same for test reviews
y_test = np.array(sentiment_p_test + sentiment_n_test)
list_tokenized_test = tokenizer.texts_to_sequences(review_p_test +
                                                   review_n_test)
x_test = pad_sequences(list_tokenized_test, maxlen=maxlen)

# define the nueral net
Beispiel #2
0
dataset2 = dataset.dropna()

#splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train1, y_test1 = train_test_split(dataset2['clean_text'],
                                                      dataset2['category'],
                                                      test_size=0.2)

# 41 news groups
num_labels = 41
vocab_size = 12000
batch_size = 100

# define Tokenizer with Vocab Size
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_train)

x_train = tokenizer.texts_to_matrix(X_train, mode='tfidf')
x_test = tokenizer.texts_to_matrix(X_test, mode='tfidf')

encoder = LabelBinarizer()
encoder.fit(y_train1)
y_train = encoder.transform(y_train1)
y_test = encoder.transform(y_test1)

model = Sequential()
model.add(Dense(512, input_shape=(vocab_size, )))
model.add(Activation('relu'))
model.add(Dropout(0.3))
model.add(Dense(512))
model.add(Activation('relu'))