dropout_prob=0.5) pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)), b_init=Constant((vocab_size, )), name='pre_softmax') softmax_layer = NonlinearityLayer(name='softmax', nonlinearity=nonlinearities.softmax) # define forward propagation expression for train model and loss function learning_rate = theano.shared(np.float32(0.01)) word_indices = T.ivector() cnn_features = T.fvector() true_dist = T.ivector() word_embedings = word_embedding_layer.get_output_expr(word_indices) cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features) embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings) masked_embedings = embedding_dropout_layer.get_output_expr(embedings) h = lstm_layer.get_output_expr(masked_embedings) masked_h = hidden_states_dropout_layer.get_output_expr(h[1:]) unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h) probs = softmax_layer.get_output_expr(unnormalized_probs) loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist)) updates = get_nesterov_momentum_updates(loss_expr=loss, dense_parameters=cnn_embedding_layer.get_parameters() + \ row_stack_layer.get_parameters() + \ embedding_dropout_layer.get_parameters() + \ lstm_layer.get_parameters() + \ hidden_states_dropout_layer.get_parameters() + \ pre_softmax_layer.get_parameters() + \ softmax_layer.get_parameters(), sparse_parameters=word_embedding_layer.get_parameters(),
pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)), b_init=Constant((vocab_size, )), name='pre_softmax') softmax_layer = NonlinearityLayer(name='softmax', nonlinearity=nonlinearities.softmax) # define forward propagation expression for train model and loss function learning_rate = theano.shared(np.float32(0.01)) word_indices = T.ivector() cnn_features = T.fvector() true_dist = T.ivector() word_embedings = word_embedding_layer.get_output_expr(word_indices) cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features) embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings) masked_embedings = embedding_dropout_layer.get_output_expr(embedings) h = lstm_layer.get_output_expr(masked_embedings) masked_h = hidden_states_dropout_layer.get_output_expr(h[1:]) unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h) probs = softmax_layer.get_output_expr(unnormalized_probs) loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist)) updates = get_nesterov_momentum_updates(loss_expr=loss, dense_parameters=cnn_embedding_layer.get_parameters() + \ row_stack_layer.get_parameters() + \ embedding_dropout_layer.get_parameters() + \ lstm_layer.get_parameters() + \ hidden_states_dropout_layer.get_parameters() + \ pre_softmax_layer.get_parameters() + \ softmax_layer.get_parameters(), sparse_parameters=word_embedding_layer.get_parameters(),