sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: 2 stacked LSTM
print('Build model...')
model = Model()
model.add(LSTMLayer((512, 1, 1, 1), return_sequences=True))
model.add(DropoutLayer(0.2))
model.add(LSTMLayer((512, 1, 1, 1), return_sequences=False))
model.add(DropoutLayer(0.2))
model.add(FullyConnectedLayer(len(chars)))
model.add(ActivationLayer(Activations.softmax))


def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

# train the model, output generated text after each iteration
batch_size = 128
h_layer_size = 50
d_layer_size = 20
learning_rate = 0.01
l1_param = 0.01
l2_param = 0.01
dropout_p = 0.5
max_num_layers = 10
max_layer_size = 50
rmsprop_momentum = 0.9

num_epochs = 5


start_time_1 = time.clock()
model = Model()
model.add(GRULayer((h_layer_size, 1, 1, 1), return_sequences=False))
model.add(BatchNormalizationLayer())
model.add(PReLULayer())
model.add(DropoutLayer(dropout_p))
model.add(GRULayer((h_layer_size, 1, 1, 1), return_sequences=False))
model.add(BatchNormalizationLayer())
model.add(PReLULayer())
model.add(DropoutLayer(dropout_p))
model.add(FullyConnectedLayer((d_layer_size, 1, 1, 1)))
model.add(BatchNormalizationLayer())
model.add(PReLULayer())
model.add(DropoutLayer(dropout_p))
model.add(FullyConnectedLayer())
model.add(ActivationLayer(Activations.softmax))