Esempio n. 1
0
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))

vocab_size=5000
one_hot_repr=[one_hot(words,vocab_size) for words in corpus]
one_hot_repr

sent_length=40
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
embedded_docs

## Creating model
embedding_vector_features=80
model=Sequential()
model.add(Embedding(vocab_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_final, y_final, test_size=0.2, random_state=42)

### Finally Training
model.fit(X_train1,y_train1,validation_data=(X_test1,y_test1),epochs=10,batch_size=64)
Esempio n. 2
0
from keras.utils.np_utils import to_categorical
labels = to_categorical(labels)

#### Loading all essential  layer
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Dropout
#print(padsequences.shape)

### Again splitting data into train and test set
from sklearn.model_selection import train_test_split
feature_train, feature_test, label_train, label_test = train_test_split(
    padsequences, labels, test_size=0.1, random_state=42)

# creating LSTM
model = Sequential()
model.add(Embedding(27, 64, input_length=15))
model.add(LSTM(2048, return_sequences=True))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

# fitting the model
model.fit(feature_train,
          label_train,
          epochs=10,
          validation_data=(feature_test, label_test),
          batch_size=1000)
Esempio n. 3
0
train_matrix = train_matrix[0: train_matrix.shape[0], 0: train_matrix.shape[1] - model.intercept_.shape[0] ]
test_matrix = test_matrix[0: train_matrix.shape[0], 0: test_matrix.shape[1] - model.intercept_.shape[0] ]
print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results )  ))
print (sklearn.metrics.classification_report( test_labels , results ))

print ("Method = KNN with word mover's distance as described in 'From Word Embeddings To Document Distances'")
model = WordMoversKNN(W_embed=embedding_weights , n_neighbors=3)
model.fit( train_matrix , train_labels )
results = model.predict( test_matrix )
print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results )  ))
print (sklearn.metrics.classification_report( test_labels , results ))

print ("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(embeddings_dim, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
if num_classes == 2: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode='binary')
else: model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit( train_matrix , train_labels , nb_epoch=30, batch_size=32)
results = model.predict_classes( test_matrix )
print ("Accuracy = " + repr( sklearn.metrics.accuracy_score( test_labels , results )  ))
print (sklearn.metrics.classification_report( test_labels , results ))

print ("Method = Stack of two LSTMs")
np.random.seed(0)
model = Sequential()
model.add(Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=True, weights=[embedding_weights] ))
Esempio n. 4
0
  corpus.append(cleaning)

voc_size = 5000 #Number of words for the one hot encoding
sent_length = 20 #Max length for padding
embedding_vector_features = 40 #Number of vector features for embedding

#One hot encoding
onehot_repr = [one_hot(sentence, voc_size) for sentence in corpus]

#Padding
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)

#Model

model = Sequential()
model.add(Embedding(voc_size, embedding_vector_features, input_length=sent_length))
model.add(Dropout(0.4))

model.add(LSTM(100, return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(LSTM(100, return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(LSTM(100))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(1, activation='sigmoid'))