예제 #1
0
word_indices = T.ivector()
cnn_features = T.fvector()
true_dist = T.ivector()

word_embedings = word_embedding_layer.get_output_expr(word_indices)
cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features)
embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings)
masked_embedings = embedding_dropout_layer.get_output_expr(embedings)
h = lstm_layer.get_output_expr(masked_embedings)
masked_h = hidden_states_dropout_layer.get_output_expr(h[1:])
unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h)
probs = softmax_layer.get_output_expr(unnormalized_probs)
loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist))
updates = get_nesterov_momentum_updates(loss_expr=loss,
                                        dense_parameters=cnn_embedding_layer.get_parameters() + \
                                                         row_stack_layer.get_parameters() + \
                                                         embedding_dropout_layer.get_parameters() + \
                                                         lstm_layer.get_parameters() + \
                                                         hidden_states_dropout_layer.get_parameters() + \
                                                         pre_softmax_layer.get_parameters() + \
                                                         softmax_layer.get_parameters(),
                                        sparse_parameters=word_embedding_layer.get_parameters(),
                                        learning_rate=learning_rate, momentum=0.9)

# compile model training function
cnn_features_idx = T.iscalar()
caption_begin = T.iscalar()
caption_end = T.iscalar()
train_model = theano.function(
    inputs=[cnn_features_idx, caption_begin, caption_end],
    outputs=loss,
예제 #2
0
word_indices = T.ivector()
cnn_features = T.fvector()
true_dist = T.ivector()

word_embedings = word_embedding_layer.get_output_expr(word_indices)
cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features)
embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings)
masked_embedings = embedding_dropout_layer.get_output_expr(embedings)
h = lstm_layer.get_output_expr(masked_embedings)
masked_h = hidden_states_dropout_layer.get_output_expr(h[1:])
unnormalized_probs = pre_softmax_layer.get_output_expr(masked_h)
probs = softmax_layer.get_output_expr(unnormalized_probs)
loss = T.mean(T.nnet.categorical_crossentropy(probs, true_dist))
updates = get_nesterov_momentum_updates(loss_expr=loss,
                                        dense_parameters=cnn_embedding_layer.get_parameters() + \
                                                         row_stack_layer.get_parameters() + \
                                                         embedding_dropout_layer.get_parameters() + \
                                                         lstm_layer.get_parameters() + \
                                                         hidden_states_dropout_layer.get_parameters() + \
                                                         pre_softmax_layer.get_parameters() + \
                                                         softmax_layer.get_parameters(),
                                        sparse_parameters=word_embedding_layer.get_parameters(),
                                        learning_rate=learning_rate, momentum=0.9)


# compile model training function
cnn_features_idx = T.iscalar()
caption_begin = T.iscalar()
caption_end = T.iscalar()
train_model = theano.function(
        inputs=[cnn_features_idx, caption_begin, caption_end],