matrix_bow = pd.DataFrame(cm_bow, index=[i for i in "01"], columns=[i for i in "01"]) plt.figure(figsize=(10, 7)) sns.heatmap(matrix_bow, annot=True) """## Nueral Net """ from keras.preprocessing.text import Tokenizer # Here we must turn our reviews into tokens for our model to run max_features = 600000 tokenizer = Tokenizer(num_words=max_features) tokenizer.fit_on_texts(review_p_train + review_n_train) list_tokenized_train = tokenizer.texts_to_sequences(review_p_train + review_n_train) # here we pad the tokens to ensure that the reviewis all are the same length in order to pass it into the nueral net maxlen = 100 x = pad_sequences(list_tokenized_train, maxlen=maxlen) y = np.array(sentiment_p_train + sentiment_n_train) # just do the same for test reviews y_test = np.array(sentiment_p_test + sentiment_n_test) list_tokenized_test = tokenizer.texts_to_sequences(review_p_test + review_n_test) x_test = pad_sequences(list_tokenized_test, maxlen=maxlen) # define the nueral net
dataset2 = dataset.dropna() #splitting the dataset from sklearn.model_selection import train_test_split X_train, X_test, y_train1, y_test1 = train_test_split(dataset2['clean_text'], dataset2['category'], test_size=0.2) # 41 news groups num_labels = 41 vocab_size = 12000 batch_size = 100 # define Tokenizer with Vocab Size tokenizer = Tokenizer(num_words=vocab_size) tokenizer.fit_on_texts(X_train) x_train = tokenizer.texts_to_matrix(X_train, mode='tfidf') x_test = tokenizer.texts_to_matrix(X_test, mode='tfidf') encoder = LabelBinarizer() encoder.fit(y_train1) y_train = encoder.transform(y_train1) y_test = encoder.transform(y_test1) model = Sequential() model.add(Dense(512, input_shape=(vocab_size, ))) model.add(Activation('relu')) model.add(Dropout(0.3)) model.add(Dense(512)) model.add(Activation('relu'))