Esempio n. 1
0
    Y = data['gender'].values
    trX = X[:-ntest]
    teX = X[-ntest:]
    trY = Y[:-ntest]
    teY = Y[-ntest:]
    return trX, teX, trY, teY


trX, teX, trY, teY = load_gender_data(
    ntrain=10000)  #Can increase up to 250K or so

tokenizer = Tokenizer(min_df=10, max_features=50000)
print trX[1:2]  #see a blog example
trX = tokenizer.fit_transform(trX)
teX = tokenizer.transform(teX)
print tokenizer.inverse_transform(trX[1:2])  #see what words are kept
print tokenizer.n_features

layers = [
    Embedding(size=128, n_features=tokenizer.n_features),
    GatedRecurrent(size=256,
                   activation='tanh',
                   gate_activation='steeper_sigmoid',
                   init='orthogonal',
                   seq_output=False),
    Dense(size=1, activation='sigmoid',
          init='orthogonal')  #sigmoid for binary classification
]

model = RNN(
    layers=layers, cost='bce'