Ejemplo n.º 1
0
vocab_size=5000
one_hot_repr=[one_hot(words,vocab_size) for words in corpus]
one_hot_repr

sent_length=40
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
embedded_docs

## Creating model
embedding_vector_features=80
model=Sequential()
model.add(Embedding(vocab_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_final, y_final, test_size=0.2, random_state=42)

### Finally Training
model.fit(X_train1,y_train1,validation_data=(X_test1,y_test1),epochs=10,batch_size=64)

y_pred=model.predict_classes(X_test1)
y_pred
Ejemplo n.º 2
0
model.add(Dropout(0.4))

model.add(LSTM(100, return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(LSTM(100))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(1, activation='sigmoid'))

model.summary()

#Compile model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])

#Converting to numpy array

X_final = np.array(embedded_docs)

y_final = np.array(y)

#Splitting dataset to training and testing 

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=77)

#Model training

model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=100)
Ejemplo n.º 3
0
print ("Method = KNN with word mover's distance as described in 'From Word Embeddings To Document Distances'")
model = WordMoversKNN(W_embed=embedding_weights , n_neighbors=3)
model.fit( train_matrix , train_labels )
results = model.predict( test_matrix )
print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results )  ))
print (sklearn.metrics.classification_report( test_labels , results ))

print ("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(embeddings_dim, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
if num_classes == 2: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode='binary')
else: model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit( train_matrix , train_labels , nb_epoch=30, batch_size=32)
results = model.predict_classes( test_matrix )
print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results )  ))
print (sklearn.metrics.classification_report( test_labels , results ))

print ("Method = Stack of two LSTMs")
np.random.seed(0)
model = Sequential()
model.add(Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=True, weights=[embedding_weights] ))
model.add(Dropout(0.1))
model.add(LSTM(output_dim=embeddings_dim , activation='relu', return_sequences=True, init='zero'))
model.add(Dropout(0.1))
model.add(LSTM(output_dim=embeddings_dim , activation='relu', init='zero'))
model.add(Dense(1,init='zero',activation='linear'))
print("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(
    Dense(embeddings_dim,
          input_dim=train_matrix.shape[1],
          init='uniform',
          activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(embeddings_dim, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
if num_classes == 2:
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  class_mode='binary')
else:
    model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(train_matrix, train_labels, nb_epoch=30, batch_size=32)
results = model.predict_classes(test_matrix)
print("Accuracy = " +
      repr(sklearn.metrics.accuracy_score(test_labels, results)))
print(sklearn.metrics.classification_report(test_labels, results))

print("Method = Stack of two LSTMs")
np.random.seed(0)
model = Sequential()
model.add(
    Embedding(max_features,
              embeddings_dim,