Esempio n. 1
0
def get_predict_function(file_path):
    with open(file_path) as f:
        model_state = cPickle.load(f)

    # define model architecture
    word_embedding_layer = EmbeddingLayer.create_from_state(model_state['word_embedding_layer'])
    cnn_embedding_layer = DenseLayer.create_from_state(model_state['cnn_embedding_layer'])
    row_stack_layer = RowStackLayer.create_from_state(model_state['row_stack_layer'])
    embedding_scale_layer = ScaleLayer.create_from_state(model_state['embedding_scale_layer'])
    lstm_layer = LstmLayer.create_from_state(model_state['lstm_layer'])
    hidden_states_scale_layer = ScaleLayer.create_from_state(model_state['hidden_states_scale_layer'])
    pre_softmax_layer = DenseLayer.create_from_state(model_state['pre_softmax_layer'])
    softmax_layer = NonlinearityLayer.create_from_state(model_state['softmax_layer'])

    # define forward propagation expression for model
    word_indices = T.ivector()
    cnn_features = T.fvector()
    word_embedings = word_embedding_layer.get_output_expr(word_indices)
    cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features)
    embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings)
    scaled_embedings = embedding_scale_layer.get_output_expr(embedings)
    h = lstm_layer.get_output_expr(scaled_embedings)
    scaled_h = hidden_states_scale_layer.get_output_expr(h[h.shape[0]-1])
    unnormalized_probs = pre_softmax_layer.get_output_expr(scaled_h)
    probs = softmax_layer.get_output_expr(unnormalized_probs)

    predict_probs = theano.function(inputs=[word_indices, cnn_features],
                                    outputs=probs)

    return predict_probs, model_state['word_to_idx'], model_state['idx_to_word']
Esempio n. 2
0
# define train model
vocab_size = len(train_data_iterator.idx_to_word)
cnn_features_dim = 4096
word_embed_dim = 1024
hidden_state_dim = 1024
normal_init = Normal((word_embed_dim, hidden_state_dim))
orthog_init = Orthogonal((hidden_state_dim, hidden_state_dim))
b_init = Constant((hidden_state_dim, ))

word_embedding_layer = EmbeddingLayer(name='word_embedding',
                                      embedding_init=Normal(
                                          (vocab_size, word_embed_dim)))
cnn_embedding_layer = DenseLayer(name='cnn_embedding',
                                 W_init=Normal(
                                     (cnn_features_dim, word_embed_dim)))
row_stack_layer = RowStackLayer('row_stack')
embedding_dropout_layer = DropoutLayer(name='embedding_dropout',
                                       dropout_prob=0.5)
lstm_layer = LstmLayer(name='lstm',
                       W_z_init=normal_init,
                       W_i_init=normal_init,
                       W_f_init=normal_init,
                       W_o_init=normal_init,
                       R_z_init=orthog_init,
                       R_i_init=orthog_init,
                       R_f_init=orthog_init,
                       R_o_init=orthog_init,
                       b_z_init=b_init,
                       b_i_init=b_init,
                       b_f_init=b_init,
                       b_o_init=b_init)
Esempio n. 3
0
# define train model
vocab_size = len(train_data_iterator.idx_to_word)
cnn_features_dim = 4096
word_embed_dim = 1024
hidden_state_dim = 1024
normal_init = Normal((word_embed_dim, hidden_state_dim))
orthog_init = Orthogonal((hidden_state_dim, hidden_state_dim))
b_init = Constant((hidden_state_dim, ))


word_embedding_layer = EmbeddingLayer(name='word_embedding',
                                      embedding_init=Normal((vocab_size, word_embed_dim)))
cnn_embedding_layer = DenseLayer(name='cnn_embedding',
                                 W_init=Normal((cnn_features_dim, word_embed_dim)))
row_stack_layer = RowStackLayer('row_stack')
embedding_dropout_layer = DropoutLayer(name='embedding_dropout',
                                       dropout_prob=0.5)
lstm_layer = LstmLayer(name='lstm',
                       W_z_init=normal_init, W_i_init=normal_init, W_f_init=normal_init, W_o_init=normal_init,
                       R_z_init=orthog_init, R_i_init=orthog_init, R_f_init=orthog_init, R_o_init=orthog_init,
                       b_z_init=b_init,      b_i_init=b_init,      b_f_init=b_init,      b_o_init=b_init)
hidden_states_dropout_layer = DropoutLayer(name='hidden_states_dropout',
                                           dropout_prob=0.5)
pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)),
                               b_init=Constant((vocab_size, )),
                               name='pre_softmax')
softmax_layer = NonlinearityLayer(name='softmax',
                                  nonlinearity=nonlinearities.softmax)