clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy:",accuracy_score(y_test, y_pred)) vocab_size=5000 one_hot_repr=[one_hot(words,vocab_size) for words in corpus] one_hot_repr sent_length=40 embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length) embedded_docs ## Creating model embedding_vector_features=80 model=Sequential() model.add(Embedding(vocab_size,embedding_vector_features,input_length=sent_length)) model.add(LSTM(100)) model.add(Dense(1,activation='sigmoid')) model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) print(model.summary()) import numpy as np X_final=np.array(embedded_docs) y_final=np.array(y) from sklearn.model_selection import train_test_split X_train1, X_test1, y_train1, y_test1 = train_test_split(X_final, y_final, test_size=0.2, random_state=42) ### Finally Training model.fit(X_train1,y_train1,validation_data=(X_test1,y_test1),epochs=10,batch_size=64)
from keras.utils.np_utils import to_categorical labels = to_categorical(labels) #### Loading all essential layer from keras.models import Sequential from keras.layers import Dense, LSTM, Embedding, Dropout #print(padsequences.shape) ### Again splitting data into train and test set from sklearn.model_selection import train_test_split feature_train, feature_test, label_train, label_test = train_test_split( padsequences, labels, test_size=0.1, random_state=42) # creating LSTM model = Sequential() model.add(Embedding(27, 64, input_length=15)) model.add(LSTM(2048, return_sequences=True)) model.add(LSTM(256, return_sequences=False)) model.add(Dropout(0.2)) model.add(Dense(2, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() # fitting the model model.fit(feature_train, label_train, epochs=10, validation_data=(feature_test, label_test), batch_size=1000)
train_matrix = train_matrix[0: train_matrix.shape[0], 0: train_matrix.shape[1] - model.intercept_.shape[0] ] test_matrix = test_matrix[0: train_matrix.shape[0], 0: test_matrix.shape[1] - model.intercept_.shape[0] ] print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results ) )) print (sklearn.metrics.classification_report( test_labels , results )) print ("Method = KNN with word mover's distance as described in 'From Word Embeddings To Document Distances'") model = WordMoversKNN(W_embed=embedding_weights , n_neighbors=3) model.fit( train_matrix , train_labels ) results = model.predict( test_matrix ) print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results ) )) print (sklearn.metrics.classification_report( test_labels , results )) print ("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(1, activation='sigmoid')) if num_classes == 2: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode='binary') else: model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit( train_matrix , train_labels , nb_epoch=30, batch_size=32) results = model.predict_classes( test_matrix ) print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results ) )) print (sklearn.metrics.classification_report( test_labels , results )) print ("Method = Stack of two LSTMs") np.random.seed(0) model = Sequential() model.add(Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=True, weights=[embedding_weights] ))
corpus.append(cleaning) voc_size = 5000 #Number of words for the one hot encoding sent_length = 20 #Max length for padding embedding_vector_features = 40 #Number of vector features for embedding #One hot encoding onehot_repr = [one_hot(sentence, voc_size) for sentence in corpus] #Padding embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length) #Model model = Sequential() model.add(Embedding(voc_size, embedding_vector_features, input_length=sent_length)) model.add(Dropout(0.4)) model.add(LSTM(100, return_sequences=True)) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(LSTM(100, return_sequences=True)) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(LSTM(100)) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(1, activation='sigmoid'))