예제 #1
0
                                           dropout_prob=0.5)
pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)),
                               b_init=Constant((vocab_size, )),
                               name='pre_softmax')
softmax_layer = NonlinearityLayer(name='softmax',
                                  nonlinearity=nonlinearities.softmax)

# define forward propagation expression for train model and loss function
learning_rate = theano.shared(np.float32(0.01))
word_indices = T.ivector()
cnn_features = T.fvector()
true_dist = T.ivector()

word_embedings = word_embedding_layer.get_output_expr(word_indices)
cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features)
embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings)
masked_embedings = embedding_dropout_layer.get_output_expr(embedings)
h = lstm_layer.get_output_expr(masked_embedings)
masked_h = hidden_states_dropout_layer.get_output_expr(h[1:])
unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h)
probs = softmax_layer.get_output_expr(unnormalized_probs)
loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist))
updates = get_nesterov_momentum_updates(loss_expr=loss,
                                        dense_parameters=cnn_embedding_layer.get_parameters() + \
                                                         row_stack_layer.get_parameters() + \
                                                         embedding_dropout_layer.get_parameters() + \
                                                         lstm_layer.get_parameters() + \
                                                         hidden_states_dropout_layer.get_parameters() + \
                                                         pre_softmax_layer.get_parameters() + \
                                                         softmax_layer.get_parameters(),
                                        sparse_parameters=word_embedding_layer.get_parameters(),
예제 #2
0
pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)),
                               b_init=Constant((vocab_size, )),
                               name='pre_softmax')
softmax_layer = NonlinearityLayer(name='softmax',
                                  nonlinearity=nonlinearities.softmax)


# define forward propagation expression for train model and loss function
learning_rate = theano.shared(np.float32(0.01))
word_indices = T.ivector()
cnn_features = T.fvector()
true_dist = T.ivector()

word_embedings = word_embedding_layer.get_output_expr(word_indices)
cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features)
embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings)
masked_embedings = embedding_dropout_layer.get_output_expr(embedings)
h = lstm_layer.get_output_expr(masked_embedings)
masked_h = hidden_states_dropout_layer.get_output_expr(h[1:])
unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h)
probs = softmax_layer.get_output_expr(unnormalized_probs)
loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist))
updates = get_nesterov_momentum_updates(loss_expr=loss,
                                        dense_parameters=cnn_embedding_layer.get_parameters() + \
                                                         row_stack_layer.get_parameters() + \
                                                         embedding_dropout_layer.get_parameters() + \
                                                         lstm_layer.get_parameters() + \
                                                         hidden_states_dropout_layer.get_parameters() + \
                                                         pre_softmax_layer.get_parameters() + \
                                                         softmax_layer.get_parameters(),
                                        sparse_parameters=word_embedding_layer.get_parameters(),