예제 #1
0
# define train model
vocab_size = len(train_data_iterator.idx_to_word)
cnn_features_dim = 4096
word_embed_dim = 1024
hidden_state_dim = 1024
normal_init = Normal((word_embed_dim, hidden_state_dim))
orthog_init = Orthogonal((hidden_state_dim, hidden_state_dim))
b_init = Constant((hidden_state_dim, ))

word_embedding_layer = EmbeddingLayer(name='word_embedding',
                                      embedding_init=Normal(
                                          (vocab_size, word_embed_dim)))
cnn_embedding_layer = DenseLayer(name='cnn_embedding',
                                 W_init=Normal(
                                     (cnn_features_dim, word_embed_dim)))
row_stack_layer = RowStackLayer('row_stack')
embedding_dropout_layer = DropoutLayer(name='embedding_dropout',
                                       dropout_prob=0.5)
lstm_layer = LstmLayer(name='lstm',
                       W_z_init=normal_init,
                       W_i_init=normal_init,
                       W_f_init=normal_init,
                       W_o_init=normal_init,
                       R_z_init=orthog_init,
                       R_i_init=orthog_init,
                       R_f_init=orthog_init,
                       R_o_init=orthog_init,
                       b_z_init=b_init,
                       b_i_init=b_init,
                       b_f_init=b_init,
                       b_o_init=b_init)