def get_predict_function(file_path): with open(file_path) as f: model_state = cPickle.load(f) # define model architecture word_embedding_layer = EmbeddingLayer.create_from_state(model_state['word_embedding_layer']) cnn_embedding_layer = DenseLayer.create_from_state(model_state['cnn_embedding_layer']) row_stack_layer = RowStackLayer.create_from_state(model_state['row_stack_layer']) embedding_scale_layer = ScaleLayer.create_from_state(model_state['embedding_scale_layer']) lstm_layer = LstmLayer.create_from_state(model_state['lstm_layer']) hidden_states_scale_layer = ScaleLayer.create_from_state(model_state['hidden_states_scale_layer']) pre_softmax_layer = DenseLayer.create_from_state(model_state['pre_softmax_layer']) softmax_layer = NonlinearityLayer.create_from_state(model_state['softmax_layer']) # define forward propagation expression for model word_indices = T.ivector() cnn_features = T.fvector() word_embedings = word_embedding_layer.get_output_expr(word_indices) cnn_embedings = cnn_embedding_layer.get_output_expr(cnn_features) embedings = row_stack_layer.get_output_expr(cnn_embedings, word_embedings) scaled_embedings = embedding_scale_layer.get_output_expr(embedings) h = lstm_layer.get_output_expr(scaled_embedings) scaled_h = hidden_states_scale_layer.get_output_expr(h[h.shape[0]-1]) unnormalized_probs = pre_softmax_layer.get_output_expr(scaled_h) probs = softmax_layer.get_output_expr(unnormalized_probs) predict_probs = theano.function(inputs=[word_indices, cnn_features], outputs=probs) return predict_probs, model_state['word_to_idx'], model_state['idx_to_word']
word_embedding_layer = EmbeddingLayer(name='word_embedding', embedding_init=Normal( (vocab_size, word_embed_dim))) cnn_embedding_layer = DenseLayer(name='cnn_embedding', W_init=Normal( (cnn_features_dim, word_embed_dim))) row_stack_layer = RowStackLayer('row_stack') embedding_dropout_layer = DropoutLayer(name='embedding_dropout', dropout_prob=0.5) lstm_layer = LstmLayer(name='lstm', W_z_init=normal_init, W_i_init=normal_init, W_f_init=normal_init, W_o_init=normal_init, R_z_init=orthog_init, R_i_init=orthog_init, R_f_init=orthog_init, R_o_init=orthog_init, b_z_init=b_init, b_i_init=b_init, b_f_init=b_init, b_o_init=b_init) hidden_states_dropout_layer = DropoutLayer(name='hidden_states_dropout', dropout_prob=0.5) pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)), b_init=Constant((vocab_size, )), name='pre_softmax') softmax_layer = NonlinearityLayer(name='softmax', nonlinearity=nonlinearities.softmax) # define forward propagation expression for train model and loss function
word_embed_dim = 1024 hidden_state_dim = 1024 normal_init = Normal((word_embed_dim, hidden_state_dim)) orthog_init = Orthogonal((hidden_state_dim, hidden_state_dim)) b_init = Constant((hidden_state_dim, )) word_embedding_layer = EmbeddingLayer(name='word_embedding', embedding_init=Normal((vocab_size, word_embed_dim))) cnn_embedding_layer = DenseLayer(name='cnn_embedding', W_init=Normal((cnn_features_dim, word_embed_dim))) row_stack_layer = RowStackLayer('row_stack') embedding_dropout_layer = DropoutLayer(name='embedding_dropout', dropout_prob=0.5) lstm_layer = LstmLayer(name='lstm', W_z_init=normal_init, W_i_init=normal_init, W_f_init=normal_init, W_o_init=normal_init, R_z_init=orthog_init, R_i_init=orthog_init, R_f_init=orthog_init, R_o_init=orthog_init, b_z_init=b_init, b_i_init=b_init, b_f_init=b_init, b_o_init=b_init) hidden_states_dropout_layer = DropoutLayer(name='hidden_states_dropout', dropout_prob=0.5) pre_softmax_layer = DenseLayer(W_init=Constant((hidden_state_dim, vocab_size)), b_init=Constant((vocab_size, )), name='pre_softmax') softmax_layer = NonlinearityLayer(name='softmax', nonlinearity=nonlinearities.softmax) # define forward propagation expression for train model and loss function learning_rate = theano.shared(np.float32(0.01)) word_indices = T.ivector() cnn_features = T.fvector() true_dist = T.ivector()