def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, n_topics=2, topic_embedding_dim=5, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'n_topics': n_topics, 'topic_embedding_dim': topic_embedding_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self. adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) #print self.tparams['Temb'] #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA']) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.style = tensor.matrix('style', dtype='int64') self.style_mask = tensor.matrix('style_mask', dtype=config.globalFloatType()) self.style_embedding = self.tparams['Wemb_e'][ self.style.flatten()].reshape([ self.style.shape[0], self.style.shape[1], options['word_embedding_dim'] ]) #self.question_mask = tensor.matrix('question_mask', dtype='int64') self.topic = tensor.matrix('topic', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][ self.question.flatten()].reshape([ self.question.shape[0], self.question.shape[1], options['word_embedding_dim'] ]) # 1. encoder layer self.encoder_layer = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status1 = self.encoder_layer.getOutput( inputs=(self.question_embedding, self.question_mask)) self.encoder_layer_style = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='encoder_layer_style') self.encoder_hidden_status_style = self.encoder_layer_style.getOutput( inputs=(self.style_embedding, self.style_mask)) self.encoder_hidden_status = tensor.concatenate( [ self.encoder_hidden_status1[-1], self.encoder_hidden_status_style[-1] ], axis=1).reshape([ 1, self.encoder_hidden_status_style.shape[1], 2 * self.encoder_hidden_status_style.shape[2] ]) #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.question.shape[1], options['topic_embedding_dim']]) #self.topic_change=T.alloc(self.topic_states,self.question.shape[0], self.question.shape[1], options['topic_embedding_dim']) #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_e'][ self.answer.flatten()].reshape([ self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim'] ]) self.decoder_layer = DecoderLayer_Cho( word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.getOutput( inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 2.5 softmax layer self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"], n_out=2, tparams=self.tparams) self.softmax_input = self.encoder_hidden_status1[-1] self.output_error_vector = self.softmax_layer.negative_log_likelihood( self.softmax_input, tensor.cast(self.topic.flatten(), 'int64')) self.class_cost = self.output_error_vector.sum( ) / self.question.shape[1] # 3. maxout layer self.maxout_layer = MaxoutLayer( base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = tensor.concatenate([ self.decoder_hidden_status[:-1, :, :].reshape( [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), tensor.alloc(self.encoder_hidden_status[-1, :, :], self.answer.shape[0] - 1, self.answer.shape[1], 2 * options['hidden_status_dim']).reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim'] ]), self.answer_embedding[:-1, :, :].reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim'] ]) ], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_e'], self.maxout_input, y=self.answer[1:, :].flatten()) self.topic_matrix = tensor.alloc(self.topic.flatten(), self.answer.shape[0] - 1, self.answer.shape[1]).flatten() #self.topic_matrix_change=2*(self.topic_matrix-0.5) self.topic_matrix_change = self.topic_matrix m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot( output_error_vector, m.flatten() * self.topic_matrix_change) / m.sum() self.cost = self.cost - self.class_cost self.output_error_vector = output_error_vector.reshape( [self.answer.shape[0] - 1, self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum( axis=0) self._set_parameters(input_params) # params from list to TensorVirable
def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. style layer self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'], refer_dim=options["hidden_status_dim"], tparams=self.tparams, prefix="style") self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.encoder_hidden_status[-1, :, :]) # (samples,n_style) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'] + options['style_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + \ options['word_embedding_dim'] + options['style_embedding_dim'], tparams=self.tparams, prefix="maxout") self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) # extend the data n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] extent_data_size = n_samples * self.options['n_style'] # hidden status with style answer_mask = tensor.alloc(self.answer_mask, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer = tensor.alloc(self.answer, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer_embedding = tensor.alloc(self.answer_embedding, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples, \ options['word_embedding_dim'])\ .dimshuffle(1, 0, 2, 3)\ .reshape([n_answer_time_stpes, extent_data_size, options['word_embedding_dim']]) encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1], \ self.options['n_style'], \ n_samples, \ options['hidden_status_dim'])\ .reshape([1, extent_data_size, options['hidden_status_dim']]) style_embeddings = tensor.alloc(self.tparams['Semb'], \ n_samples, \ self.options['n_style'], \ self.options['style_embedding_dim'])\ .dimshuffle(1, 0, 2)\ .reshape([1, extent_data_size, self.options['style_embedding_dim']]) encoder_hidden_status = tensor.concatenate([encoder_hidden_status, style_embeddings], axis=2) # 3. decoder layer decoder_hidden_status = self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask, encoder_hidden_status]) # 4. maxout layer self.maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], n_answer_time_stpes - 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['word_embedding_dim']])], axis=1) predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :]. reshape([extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[-1, :, :]. reshape([extent_data_size, options['word_embedding_dim']])], axis=1) likelihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=answer[1:, :].flatten()) pred_word, predict_probability = \ self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check likelihood_vector = likelihood_vector.reshape( [n_answer_time_stpes - 1, options['n_style'], n_samples]) predict_probability = predict_probability.reshape( [options['n_style'], n_samples, options['n_words']]) self.predict_probability = predict_probability pred_word = pred_word.reshape( [options['n_style'], n_samples]) self.pred_word = pred_word # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.log(likelihood_vector) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1) log_likelihood_vector = log_likelihood_vector.sum(axis=0) self.log_likelihood_vector_on_diff_styles = log_likelihood_vector sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log()) sentence_probability = self.remove_min_neg_log_prob(sentence_probability) sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style']) negative_log_sentence_probability = -sentence_probability self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.set_parameters(input_params) # params from list to TensorVirable
class RnnEncoderDecoderNetwork(Network): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) #params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) #randn = numpy.random.rand(options['topic_embedding_dim'], options['topic_embedding_dim'])/options['topic_embedding_dim']*2 #params['QTA']=(1.0 * randn).astype(config.globalFloatType()) #randn = numpy.random.rand(options['n_topics'], options['topic_embedding_dim']) #params['Temb'] = (0.1 * randn).astype(config.globalFloatType()) #params['Temb'] = numpy.dot(params['Qemb'],params['QTA']) return params def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, n_topics=2, topic_embedding_dim=5, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'n_topics': n_topics, 'topic_embedding_dim': topic_embedding_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self. adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) #print self.tparams['Temb'] #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA']) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.style = tensor.matrix('style', dtype='int64') self.style_mask = tensor.matrix('style_mask', dtype=config.globalFloatType()) self.style_embedding = self.tparams['Wemb_e'][ self.style.flatten()].reshape([ self.style.shape[0], self.style.shape[1], options['word_embedding_dim'] ]) #self.question_mask = tensor.matrix('question_mask', dtype='int64') self.topic = tensor.matrix('topic', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][ self.question.flatten()].reshape([ self.question.shape[0], self.question.shape[1], options['word_embedding_dim'] ]) # 1. encoder layer self.encoder_layer = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status1 = self.encoder_layer.getOutput( inputs=(self.question_embedding, self.question_mask)) self.encoder_layer_style = EncoderLayer( word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='encoder_layer_style') self.encoder_hidden_status_style = self.encoder_layer_style.getOutput( inputs=(self.style_embedding, self.style_mask)) self.encoder_hidden_status = tensor.concatenate( [ self.encoder_hidden_status1[-1], self.encoder_hidden_status_style[-1] ], axis=1).reshape([ 1, self.encoder_hidden_status_style.shape[1], 2 * self.encoder_hidden_status_style.shape[2] ]) #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.question.shape[1], options['topic_embedding_dim']]) #self.topic_change=T.alloc(self.topic_states,self.question.shape[0], self.question.shape[1], options['topic_embedding_dim']) #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2) # 2. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_e'][ self.answer.flatten()].reshape([ self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim'] ]) self.decoder_layer = DecoderLayer_Cho( word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.decoder_hidden_status = self.decoder_layer.getOutput( inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status)) # 2.5 softmax layer self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"], n_out=2, tparams=self.tparams) self.softmax_input = self.encoder_hidden_status1[-1] self.output_error_vector = self.softmax_layer.negative_log_likelihood( self.softmax_input, tensor.cast(self.topic.flatten(), 'int64')) self.class_cost = self.output_error_vector.sum( ) / self.question.shape[1] # 3. maxout layer self.maxout_layer = MaxoutLayer( base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = tensor.concatenate([ self.decoder_hidden_status[:-1, :, :].reshape( [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), tensor.alloc(self.encoder_hidden_status[-1, :, :], self.answer.shape[0] - 1, self.answer.shape[1], 2 * options['hidden_status_dim']).reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim'] ]), self.answer_embedding[:-1, :, :].reshape([ (self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim'] ]) ], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_e'], self.maxout_input, y=self.answer[1:, :].flatten()) self.topic_matrix = tensor.alloc(self.topic.flatten(), self.answer.shape[0] - 1, self.answer.shape[1]).flatten() #self.topic_matrix_change=2*(self.topic_matrix-0.5) self.topic_matrix_change = self.topic_matrix m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot( output_error_vector, m.flatten() * self.topic_matrix_change) / m.sum() self.cost = self.cost - self.class_cost self.output_error_vector = output_error_vector.reshape( [self.answer.shape[0] - 1, self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum( axis=0) self._set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1): optimizer = self.options["optimizer"] lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [ self.question, self.question_mask, self.answer, self.answer_mask, self.style, self.style_mask, self.topic ], [self.cost]) def update_function(index): (question, question_mask), (answer, answer_mask),(style,style_mask),(topic,topic_mask), _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask, style, style_mask, topic) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr): (question, question_mask), (answer, answer_mask), ( style, style_mask), (topic, topic_mask), _, _ = cr.get_valid_set() #print topic valid_function = theano.function(inputs=[], outputs=[self.cost], givens={ self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask, self.style: style, self.style_mask: style_mask, self.topic: topic }, name='valid_function') return valid_function def get_testing_function(self, cr): (question, question_mask), (answer, answer_mask), ( style, style_mask), (topic, topic_mask), _, _ = cr.get_test_set() test_function = theano.function(inputs=[], outputs=[self.cost], givens={ self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask, self.style: style, self.style_mask: style_mask, self.topic: topic }, name='test_function') (question, question_mask), (answer, answer_mask), ( style, style_mask), (topic, topic_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.output_error_vector], givens={ self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask, self.style: style, self.style_mask: style_mask, self.topic: topic }, on_unused_input='ignore', name='pr_function') return test_function, pr_function def get_deploy_function(self): maxout_input = tensor.concatenate([ self.decoder_hidden_status[-1, :, :], self.encoder_hidden_status[-1, :, :], self.answer_embedding[-1, :, :] ], axis=1) pred_word, pred_word_probability = self.maxout_layer.getOutput( self.tparams['Wemb_e'], maxout_input) pred_words_array = theano.tensor.argsort(pred_word_probability)[:, -1000:] pred_word_probability_array = theano.tensor.transpose( pred_word_probability[ theano.tensor.arange(pred_words_array.shape[0]), theano.tensor.transpose(pred_words_array)]) deploy_function = theano.function( inputs=[ self.question, self.question_mask, self.answer, self.answer_mask, self.style, self.style_mask, self.topic ], outputs=[pred_words_array, pred_word_probability_array], on_unused_input='ignore', name='deploy_function') return deploy_function def get_cost(self): deploy_function = theano.function(inputs=[ self.question, self.question_mask, self.answer, self.answer_mask, self.topic ], outputs=self.cost) return deploy_function
def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = \ tensor.concatenate(\ [self.decoder_hidden_status[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), self.context[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]), self.answer_embedding[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']])], axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=self.answer[1:, :].flatten()) likihood_vector = - tensor.log(likihood_vector) m = self.answer_mask[1:, :] # cost self.cost = tensor.dot(likihood_vector, m.flatten()) / self.question.shape[1] # pr self.likihood_vector = likihood_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.likihood_vector = tensor.sum(self.likihood_vector * m, axis=0) self._set_parameters(input_params) # params from list to TensorVirable
class StyleEncoderDecoderNetwork(StyleBase): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A 3. Model the style """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_style'], options['style_embedding_dim']) params['Semb'] = (0.2 * randn).astype(config.globalFloatType()) # style embedding return params def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. style layer self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'], refer_dim=options["hidden_status_dim"], tparams=self.tparams, prefix="style") self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.encoder_hidden_status[-1, :, :]) # (samples,n_style) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'] + options['style_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + \ options['word_embedding_dim'] + options['style_embedding_dim'], tparams=self.tparams, prefix="maxout") self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) # extend the data n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] extent_data_size = n_samples * self.options['n_style'] # hidden status with style answer_mask = tensor.alloc(self.answer_mask, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer = tensor.alloc(self.answer, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples)\ .dimshuffle([1, 0, 2])\ .reshape([n_answer_time_stpes, extent_data_size]) answer_embedding = tensor.alloc(self.answer_embedding, \ self.options['n_style'], \ n_answer_time_stpes, \ n_samples, \ options['word_embedding_dim'])\ .dimshuffle(1, 0, 2, 3)\ .reshape([n_answer_time_stpes, extent_data_size, options['word_embedding_dim']]) encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1], \ self.options['n_style'], \ n_samples, \ options['hidden_status_dim'])\ .reshape([1, extent_data_size, options['hidden_status_dim']]) style_embeddings = tensor.alloc(self.tparams['Semb'], \ n_samples, \ self.options['n_style'], \ self.options['style_embedding_dim'])\ .dimshuffle(1, 0, 2)\ .reshape([1, extent_data_size, self.options['style_embedding_dim']]) encoder_hidden_status = tensor.concatenate([encoder_hidden_status, style_embeddings], axis=2) # 3. decoder layer decoder_hidden_status = self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask, encoder_hidden_status]) # 4. maxout layer self.maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], n_answer_time_stpes - 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([(n_answer_time_stpes - 1) * extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * extent_data_size, options['word_embedding_dim']])], axis=1) predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :]. reshape([extent_data_size, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], 1, extent_data_size, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([extent_data_size, options['hidden_status_dim'] + \ options['style_embedding_dim']]), answer_embedding[-1, :, :]. reshape([extent_data_size, options['word_embedding_dim']])], axis=1) likelihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=answer[1:, :].flatten()) pred_word, predict_probability = \ self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check likelihood_vector = likelihood_vector.reshape( [n_answer_time_stpes - 1, options['n_style'], n_samples]) predict_probability = predict_probability.reshape( [options['n_style'], n_samples, options['n_words']]) self.predict_probability = predict_probability pred_word = pred_word.reshape( [options['n_style'], n_samples]) self.pred_word = pred_word # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.log(likelihood_vector) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1) log_likelihood_vector = log_likelihood_vector.sum(axis=0) self.log_likelihood_vector_on_diff_styles = log_likelihood_vector sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log()) sentence_probability = self.remove_min_neg_log_prob(sentence_probability) sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style']) negative_log_sentence_probability = -sentence_probability self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1): optimizer = self.options["optimizer"] lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [self.question, self.question_mask, self.answer, self.answer_mask], [self.cost]) def update_function(index): (question, question_mask), (answer, answer_mask), _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set() valid_function = theano.function(inputs=[], outputs=[self.zi_error, self.st_error], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='valid_function') return valid_function def get_testing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set() test_function = theano.function(inputs=[], outputs=[self.cost, self.zi_error, self.st_error], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='test_function') (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.likelihood_vector], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='pr_function') return test_function, pr_function
def __init__(self, n_words, word_embedding_dim, hidden_status_dim, style_number, style_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'style_number': style_number, 'style_dim': style_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. style layer self.style_layer = StyleLayer(style_number, style_dim, 3 * hidden_status_dim, self.tparams, 'style') question_style_input_embedding1 = self.forward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) question_style_input_embedding2 = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) answer_style_input_embedding = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) pair_embedding = tensor.concatenate([question_style_input_embedding1, question_style_input_embedding2, answer_style_input_embedding], axis=1) self.style = self.style_layer.getOutput(pair_embedding) # 5. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'] + options['style_dim'], tparams=self.tparams, prefix="maxout") maxout_decoder_hidden_status_input = self.decoder_hidden_status[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]) maxout_context_hidden_status_input = self.context[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]) maxout_answer_wordEmbedding_input = self.answer_embedding[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']]) maxout_answer_style_input = tensor.alloc(self.style.dimshuffle('x', 0, 1), self.answer.shape[0] - 1, self.answer.shape[1], options['style_dim']) maxout_answer_style_input = maxout_answer_style_input.\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['style_dim']]) self.maxout_input = \ tensor.concatenate(\ [maxout_decoder_hidden_status_input, maxout_context_hidden_status_input, maxout_answer_wordEmbedding_input, maxout_answer_style_input], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_d'], self.maxout_input, y=self.answer[1:, :].flatten()) m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum() self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0) self._set_parameters(input_params) # params from list to TensorVirable
class BiEncoderAttentionDecoderNetwork(Network): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) return params def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") self.maxout_input = \ tensor.concatenate(\ [self.decoder_hidden_status[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]), self.context[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]), self.answer_embedding[:-1, :, :]. reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']])], axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=self.answer[1:, :].flatten()) likihood_vector = - tensor.log(likihood_vector) m = self.answer_mask[1:, :] # cost self.cost = tensor.dot(likihood_vector, m.flatten()) / self.question.shape[1] # pr self.likihood_vector = likihood_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.likihood_vector = tensor.sum(self.likihood_vector * m, axis=0) self._set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1): optimizer = self.options["optimizer"] lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [self.question, self.question_mask, self.answer, self.answer_mask], [self.cost]) def update_function(index): (question, question_mask), (answer, answer_mask), _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set() valid_function = theano.function(inputs=[], outputs=[self.cost], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='valid_function') return valid_function def get_testing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set() test_function = theano.function(inputs=[], outputs=[self.cost], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='test_function') (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.likihood_vector], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='pr_function') return test_function, pr_function def get_deploy_function(self): maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :], self.encoder_hidden_status[-1, :, :], self.answer_embedding[-1, :, :]], axis=1) pred_word, pred_word_probability = self.maxout_layer.getOutput(self.tparams['Wemb_d'], maxout_input) deploy_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask], outputs=[pred_word, pred_word_probability], name='deploy_function') return deploy_function
class BiEncoderAttentionDecoderStyleNetwork(Network): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A 3. Model the attention 4. Model the style """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) return params def __init__(self, n_words, word_embedding_dim, hidden_status_dim, style_number, style_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'style_number': style_number, 'style_dim': style_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. forward encoder layer self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='forward_Encoder') self.forward_encoder_hidden_status = \ self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask)) # 2. backward encoder layer self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='backward_Encoder') self.backward_encoder_hidden_status = \ self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :], self.question_mask[::-1, :])) self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status, self.backward_encoder_hidden_status[::-1, :, :]], axis=2) # 3. decoder layer self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.decoder_layer = \ AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], encoder_hidden_dim=2 * options['hidden_status_dim'], tparams=self.tparams, prefix='Decoder') self.decoder_hidden_status, self.context = \ self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask, self.encoder_hidden_status, self.question_mask)) # 4. style layer self.style_layer = StyleLayer(style_number, style_dim, 3 * hidden_status_dim, self.tparams, 'style') question_style_input_embedding1 = self.forward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) question_style_input_embedding2 = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) answer_style_input_embedding = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']]) pair_embedding = tensor.concatenate([question_style_input_embedding1, question_style_input_embedding2, answer_style_input_embedding], axis=1) self.style = self.style_layer.getOutput(pair_embedding) # 5. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'] + options['style_dim'], tparams=self.tparams, prefix="maxout") maxout_decoder_hidden_status_input = self.decoder_hidden_status[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']]) maxout_context_hidden_status_input = self.context[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], 2 * options['hidden_status_dim']]) maxout_answer_wordEmbedding_input = self.answer_embedding[:-1, :, :].\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['word_embedding_dim']]) maxout_answer_style_input = tensor.alloc(self.style.dimshuffle('x', 0, 1), self.answer.shape[0] - 1, self.answer.shape[1], options['style_dim']) maxout_answer_style_input = maxout_answer_style_input.\ reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], options['style_dim']]) self.maxout_input = \ tensor.concatenate(\ [maxout_decoder_hidden_status_input, maxout_context_hidden_status_input, maxout_answer_wordEmbedding_input, maxout_answer_style_input], axis=1) output_error_vector = self.maxout_layer.negative_log_likelihood( self.tparams['Wemb_d'], self.maxout_input, y=self.answer[1:, :].flatten()) m = self.answer_mask[1:, :] self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum() self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) self.output_error_vector = self.output_error_vector * m self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0) self._set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1): optimizer = self.options["optimizer"] lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [self.question, self.question_mask, self.answer, self.answer_mask], [self.cost]) def update_function(index): (question, question_mask), (answer, answer_mask), _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set() valid_function = theano.function(inputs=[], outputs=[self.cost], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='valid_function') return valid_function def get_testing_function(self, cr): (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set() test_function = theano.function(inputs=[], outputs=[self.cost], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='test_function') (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.output_error_vector], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='pr_function') return test_function, pr_function def get_deploy_function(self, style_type): maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :], self.encoder_hidden_status[-1, :, :], self.answer_embedding[-1, :, :]], axis=1) b = self.style_layer.getStyleMatrix().get_value()[style_type] pred_word, pred_word_probability = self.maxout_layer.getOutput(self.tparams['Wemb_d'], maxout_input) deploy_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask], outputs=[pred_word, pred_word_probability], givens={self.style: b}, name='deploy_function') return deploy_function
def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 'tiny_float': 1e-20 } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. style layer self.style_layer = MaxoutBiasedLayer(base_dim=options['style_embedding_dim'], refer_dim=options["hidden_status_dim"], tparams=self.tparams, prefix="style") self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.encoder_hidden_status[-1, :, :]) # (samples,n_style) self.answer = tensor.matrix('answer', dtype='int64') self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) # extend the data n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] self.cost = 0 self.zi_error = 0 self.st_error = 0 self.predict_probability_list = [] log_likelihood_vector_list = [] for style_index in range(self.options['n_style']): decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'] + options['style_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='decoder' + str(style_index)) style_embedding = tensor.alloc(self.tparams['Semb'][style_index], n_question_time_stpes, n_samples, self.options['style_embedding_dim']) encoder_hidden_status = tensor.concatenate([self.encoder_hidden_status, style_embedding], axis=2) # 3. decoder layer decoder_hidden_status = decoder_layer.get_output(inputs=[self.answer_embedding, self.answer_mask, encoder_hidden_status]) # 4. maxout layer maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * n_samples, options['hidden_status_dim']]), tensor.alloc(encoder_hidden_status[-1, :, :], n_answer_time_stpes - 1, n_samples, options['style_embedding_dim'] + \ options['hidden_status_dim']). reshape([(n_answer_time_stpes - 1) * n_samples, options['hidden_status_dim'] + \ options['style_embedding_dim']]), self.answer_embedding[:-1, :, :]. reshape([(n_answer_time_stpes - 1) * n_samples, options['word_embedding_dim']])], axis=1) a_index = tensor.cast(self.answer_mask, 'int64').sum(axis=0) - 1 predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :]. reshape([n_samples, options['hidden_status_dim']]), encoder_hidden_status[-1, :, :]. reshape([n_samples, options['hidden_status_dim'] + \ options['style_embedding_dim']]), self.answer_embedding[a_index, tensor.arange(a_index.shape[0]), :]. reshape([n_samples, options['word_embedding_dim']])], axis=1) maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options["hidden_status_dim"] + \ options['word_embedding_dim'] + options['style_embedding_dim'], tparams=self.tparams, prefix='decoder_maxout' + str(style_index)) likelihood_vector = \ maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=maxout_input, y=self.answer[1:, :].flatten()) pred_word, predict_probability0 = \ maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check likelihood_vector = likelihood_vector.reshape( [n_answer_time_stpes - 1, n_samples]) log_likelihood_vector0 = tensor.log(likelihood_vector + self.options['tiny_float']) * self.answer_mask[1:, :] log_likelihood_vector0 = log_likelihood_vector0.sum(axis=0) log_likelihood_vector_list.append(log_likelihood_vector0.dimshuffle(0, 'x')) predict_probability0 = predict_probability0.reshape( [n_samples, options['n_words']]) self.predict_probability_list.append(predict_probability0) # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.concatenate(log_likelihood_vector_list, axis=1) sentence_probability = log_likelihood_vector.exp() * self.style_prob sentence_probability = sentence_probability + self.options['tiny_float'] sentence_probability = tensor.sum(sentence_probability, axis=1) negative_log_sentence_probability = -tensor.log(sentence_probability) self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.pred_word = pred_word self.set_parameters(input_params) # params from list to TensorVirable
def __init__(self, n_words, word_embedding_dim, hidden_status_dim, input_params): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001 } # global paramters params = self.init_global_params(options) # Theano paramters self.tparams = self.init_tparams(params) # construct network self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) ''' theano.config.compute_test_value = 'off' self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug ''' # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 2. decoder layer self.decoder_layer = \ DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'], \ hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 3. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options['hidden_status_dim'] + \ options['word_embedding_dim'], \ tparams=self.tparams, prefix='maxout') # 1. encoder layer self.encoder_hidden_status = \ self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. decoder layer self.decoder_hidden_status = \ self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask, \ self.encoder_hidden_status)) # 3. maxout layer self.maxout_input = \ tensor.concatenate([self.decoder_hidden_status[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ tensor.alloc(self.encoder_hidden_status[-1, :, :], \ self.answer.shape[0] - 1, \ self.answer.shape[1], \ options['hidden_status_dim'])\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ self.answer_embedding[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['word_embedding_dim']])], \ axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], \ refer_data=self.maxout_input, \ y=self.answer[1:, :].flatten()) # get evaluation and cost likihood_vector = -tensor.log(likihood_vector) self.cost = tensor.dot(likihood_vector.flatten(), self.answer_mask[1:, :].flatten()) \ / self.answer_mask[1:, :].sum() prob_matrix = likihood_vector.reshape([self.answer_mask[1:,:].shape[0], \ self.answer_mask[1:,:].shape[1]]) self.likihood_vector = tensor.sum(prob_matrix * self.answer_mask[1:, :], axis=0) \ / tensor.sum(self.answer_mask[1:,:], axis=0) self.set_parameters(input_params) # params from list to TensorVirable
class ChoEncoderDecoderNetwork(Network): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) return params def __init__(self, n_words, word_embedding_dim, hidden_status_dim, input_params): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001 } # global paramters params = self.init_global_params(options) # Theano paramters self.tparams = self.init_tparams(params) # construct network self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']]) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']]) ''' theano.config.compute_test_value = 'off' self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug ''' # 1. encoder layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 2. decoder layer self.decoder_layer = \ DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'], \ hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, node_type=GRUNode) # 3. maxout layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2 * options['hidden_status_dim'] + \ options['word_embedding_dim'], \ tparams=self.tparams, prefix='maxout') # 1. encoder layer self.encoder_hidden_status = \ self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. decoder layer self.decoder_hidden_status = \ self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask, \ self.encoder_hidden_status)) # 3. maxout layer self.maxout_input = \ tensor.concatenate([self.decoder_hidden_status[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ tensor.alloc(self.encoder_hidden_status[-1, :, :], \ self.answer.shape[0] - 1, \ self.answer.shape[1], \ options['hidden_status_dim'])\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['hidden_status_dim']]), \ self.answer_embedding[:-1, :, :]\ .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \ options['word_embedding_dim']])], \ axis=1) likihood_vector = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], \ refer_data=self.maxout_input, \ y=self.answer[1:, :].flatten()) # get evaluation and cost likihood_vector = -tensor.log(likihood_vector) self.cost = tensor.dot(likihood_vector.flatten(), self.answer_mask[1:, :].flatten()) \ / self.answer_mask[1:, :].sum() prob_matrix = likihood_vector.reshape([self.answer_mask[1:,:].shape[0], \ self.answer_mask[1:,:].shape[1]]) self.likihood_vector = tensor.sum(prob_matrix * self.answer_mask[1:, :], axis=0) \ / tensor.sum(self.answer_mask[1:,:], axis=0) self.set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, batch_size, batch_repeat=1): lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = self.adadelta(lr, self.tparams, grads, \ [self.question, self.question_mask, \ self.answer, self.answer_mask], \ [self.cost]) def update_function(index): question, question_mask, answer, answer_mask = \ cr.get_trainset([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr, batch_size=200): valid_function = self.get_cost_function() def update_function(): n_validset = cr.get_size()[1] n_batches = (n_validset - 1) / batch_size + 1 cost = 0.0 for index in range(n_batches) : question, question_mask, answer, answer_mask = \ cr.get_validset([index * batch_size, (index + 1) * batch_size]) cost += valid_function(question, question_mask, answer, answer_mask)[0] cost = cost / n_batches return [cost] return update_function def get_testing_function(self, cr, batch_size=100): test_function = self.get_cost_function() def update_function(): n_testset = cr.get_size()[2] n_batches = (n_testset - 1) / batch_size + 1 cost = 0.0 for index in range(n_batches) : question, question_mask, answer, answer_mask = \ cr.get_testset([index * batch_size, (index + 1) * batch_size]) cost += test_function(question, question_mask, answer, answer_mask)[0] cost = cost / n_batches return [cost] return update_function def get_pr_function(self, cr, batch_size=100): pr_function = theano.function(inputs=[self.question, self.question_mask, \ self.answer, self.answer_mask], \ outputs=[self.likihood_vector], name='pr_function', \ on_unused_input='ignore') def update_function(): n_prset = cr.get_size()[2] n_batches = (n_prset - 1) / batch_size + 1 cost_list = list() for index in range(n_batches) : question, question_mask, answer, answer_mask = \ cr.get_prset([index * batch_size, (index + 1) * batch_size]) score_list = pr_function(question, question_mask, answer, answer_mask)[0] cost_list.extend(score_list) corre_score = pearsonr(cost_list, cr.get_pr_score()) return [corre_score[0]] return update_function def get_cost_function(self): return theano.function(inputs=[self.question, self.question_mask, \ self.answer, self.answer_mask], \ outputs=[self.cost], name='cost_function', \ on_unused_input='ignore') def get_deploy_function(self): maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :], \ self.encoder_hidden_status[-1, :, :], \ self.answer_embedding[-1, :, :]], \ axis=1) pred_word, pred_word_probability = \ self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=maxout_input) deploy_function = theano.function(inputs=[self.question, self.question_mask, \ self.answer, self.answer_mask], \ outputs=[pred_word, pred_word_probability], \ name='deploy_function') return deploy_function def get_observe_function(self): observe_function = theano.function(inputs=[self.question, self.question_mask], \ outputs=[self.encoder_hidden_status[-1, :, :]], \ name='observe_function') return observe_function
class StyleEncoderDecoderNetwork(Network): """ This class will process the dialog pair with a encoder-decoder network. It has 2 abilities: 1. Train the language model. 2. Model the relationship of Q&A 3. Model the style """ def init_global_params(self, options): """ Global (not LSTM) parameter. For the embeding and the classifier. """ params = OrderedDict() randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_words'], options['word_embedding_dim']) params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) randn = numpy.random.rand(options['n_style'], options['style_embedding_dim']) params['Semb'] = (0.2 * randn).astype(config.globalFloatType()) # style embedding randn = numpy.random.rand(options['n_topic'], options['topic_embedding_dim']) params['Temb'] = (0.2 * randn).astype(config.globalFloatType()) # topic embedding return params def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim, n_topic, topic_embedding_dim, hidden_status_dim, input_params=None): self.options = options = { 'n_words': n_words, 'word_embedding_dim': word_embedding_dim, 'n_style': n_style, 'style_embedding_dim': style_embedding_dim, 'n_topic': n_topic, 'topic_embedding_dim': topic_embedding_dim, 'hidden_status_dim': hidden_status_dim, 'learning_rate': 0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) 'optimizer': self.rmsprop, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 'tiny_float': 1e-20 } # global paramters. params = self.init_global_params(options) # Theano paramters, self.tparams = self.init_tparams(params) # Used for dropout. # self.use_noise = theano.shared(numpy_floatX(0.)) # construct network theano.config.compute_test_value = 'off' self.question = tensor.matrix('question', dtype='int64') self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType()) self.answer = tensor.matrix('answer', dtype='int64') self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType()) n_question_time_stpes = self.question.shape[0] n_answer_time_stpes = self.answer.shape[0] n_samples = self.question.shape[1] extent_style_size = n_samples * self.options['n_style'] self.topic = tensor.vector('topic', dtype='int64') self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape( [n_question_time_stpes, n_samples, options['word_embedding_dim']]) self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape( [n_answer_time_stpes, n_samples, options['word_embedding_dim']]) self.topic_embedding = self.tparams['Temb'][self.topic.flatten()].reshape( [1, n_samples, options['topic_embedding_dim']]) # for debug ''' self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) self.answer.tag.test_value = numpy.array([[10, 2, 0, 0], [5, 9, 2, 4]]) self.answer_mask.tag.test_value = numpy.array([[1, 1, 0, 0], [1, 1, 1, 1]]) self.topic.tag.test_value = numpy.array([0, 2]) # for debug ''' # statement of layer # 1. encoder_layer self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) # 2. topic_layer self.topic_layer = MaxoutLayer(base_dim=options['topic_embedding_dim'], refer_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='topic') # 3. style_layer self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'], refer_dim=options['hidden_status_dim'], tparams=self.tparams, prefix="style") # 4. decoder_layer self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \ options['hidden_status_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams) # 5. output_layer self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'], refer_dim=2*options["hidden_status_dim"] + \ options['word_embedding_dim'], tparams=self.tparams, prefix="maxout") # defination of layer varibles # 1. encoder layer self.encoder_hidden_status = \ self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask)) # 2. topic layer self.topic_prob = \ self.topic_layer.probability(base_data=self.tparams['Temb'], refer_data=self.encoder_hidden_status[-1, :, :]) self.topic_likelihood = \ self.topic_layer.likelihood(base_data=self.tparams['Temb'], refer_data=self.encoder_hidden_status[-1, :, :], y=self.topic) # hidden status with topic topic_mixed_node = GRUNode(word_embedding_dim=self.options['topic_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='topic_mixed_node') encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1, :, :], \ 1, n_samples, options['hidden_status_dim']) topic_mask = tensor.alloc(numpy.ones((1,), dtype=config.globalFloatType()), n_samples) state_below = tensor.dot(self.topic_embedding, topic_mixed_node.get_params_W()) self.topic_hidden_status = \ topic_mixed_node.node_update(m_=topic_mask, x_=state_below, h_=encoder_hidden_status) # 3. style layer self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'], refer_data=self.topic_hidden_status[-1,:,:]) # hidden status with style style_mixed_node = GRUNode(word_embedding_dim=options['style_embedding_dim'] + options['topic_embedding_dim'], hidden_status_dim=options['hidden_status_dim'], tparams=self.tparams, prefix='style_mixed_node') encoder_hidden_status = \ tensor.alloc(self.encoder_hidden_status[-1, :, :], self.options['n_style'], n_samples, options['hidden_status_dim'])\ .reshape([1, extent_style_size, options['hidden_status_dim']]) style_embeddings = \ tensor.concatenate([ \ tensor.alloc(self.tparams['Semb'], n_samples, self.options['n_style'], self.options['style_embedding_dim'])\ .dimshuffle(1, 0, 2)\ .reshape([1, extent_style_size, self.options['style_embedding_dim']]), \ tensor.alloc(self.topic_embedding[-1, :, :], self.options['n_style'], n_samples, self.options['topic_embedding_dim'])\ .reshape([1, extent_style_size, self.options['topic_embedding_dim']])], axis=2) style_mask = tensor.alloc(numpy.ones((1,), dtype=config.globalFloatType()), extent_style_size) state_below = tensor.dot(style_embeddings, style_mixed_node.get_params_W()) self.style_hidden_status = \ style_mixed_node.node_update(m_=style_mask, x_=state_below, h_=encoder_hidden_status) # 4. decoder layer answer_mask = tensor.alloc(self.answer_mask, \ self.options['n_style'], n_answer_time_stpes, n_samples) \ .dimshuffle([1, 0, 2]).reshape([n_answer_time_stpes, extent_style_size]) answer = tensor.alloc(self.answer, \ self.options['n_style'], n_answer_time_stpes, n_samples) \ .dimshuffle([1, 0, 2]).reshape([n_answer_time_stpes, extent_style_size]) answer_embedding = tensor.alloc(self.answer_embedding, \ self.options['n_style'], n_answer_time_stpes, \ n_samples, options['word_embedding_dim'])\ .dimshuffle(1, 0, 2, 3)\ .reshape([n_answer_time_stpes, extent_style_size, options['word_embedding_dim']]) mixed_hidden_status = self.style_hidden_status self.decoder_hidden_status = \ self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask, mixed_hidden_status]) # 5. maxout layer self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :].\ reshape([(n_answer_time_stpes - 1) * extent_style_size, options['hidden_status_dim']]), tensor.alloc(mixed_hidden_status[-1, :, :], n_answer_time_stpes - 1, extent_style_size, options['hidden_status_dim']).\ reshape([(n_answer_time_stpes - 1) * extent_style_size, options['hidden_status_dim']]), answer_embedding[:-1, :, :].\ reshape([(n_answer_time_stpes - 1) * extent_style_size, options['word_embedding_dim']])], axis=1) predict_maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :].\ reshape([extent_style_size, options['hidden_status_dim']]), mixed_hidden_status[-1, :, :].\ reshape([extent_style_size, options['hidden_status_dim']]), answer_embedding[-1, :, :].\ reshape([extent_style_size, options['word_embedding_dim']])], axis=1) self.maxout_likelihood = \ self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], refer_data=self.maxout_input, y=answer[1:, :].flatten()) pred_word, predict_probability = \ self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'], refer_data=predict_maxout_input) # carefully check self.maxout_likelihood = self.maxout_likelihood.reshape( [n_answer_time_stpes - 1, options['n_style'], n_samples]) predict_probability = predict_probability.reshape( [options['n_style'], n_samples, options['n_words']]) self.predict_probability = predict_probability pred_word = pred_word.reshape( [options['n_style'], n_samples]) self.pred_word = pred_word # options['n_style'], n_answer_time_stpes - 1, n_samples # Transform the multiplication into add. log_likelihood_vector = tensor.log(self.maxout_likelihood) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1) log_likelihood_vector = log_likelihood_vector.sum(axis=0) self.log_likelihood_vector_on_diff_styles = log_likelihood_vector sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log()) sentence_probability = self.remove_min_neg_log_prob(sentence_probability) sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style']) sentence_probability += tensor.log(self.topic_likelihood) negative_log_sentence_probability = -sentence_probability self.likelihood_vector = negative_log_sentence_probability self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum() self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0) self.zi_error = self.zi_error.mean() self.st_error = negative_log_sentence_probability.mean() self.set_parameters(input_params) # params from list to TensorVirable def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1): optimizer = self.options["optimizer"] lr = tensor.scalar(name='lr') grads = tensor.grad(self.cost, wrt=self.tparams.values()) f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [self.question, self.question_mask, self.answer, self.answer_mask, self.topic], [self.cost]) def update_function(index): (question, question_mask), (answer, answer_mask), topic, _, _ = \ cr.get_train_set([index * batch_size, (index + 1) * batch_size]) for _ in xrange(batch_repeat): cost = f_grad_shared(question, question_mask, answer, answer_mask, topic) f_update(self.options["learning_rate"]) return cost return update_function def get_validing_function(self, cr): (question, question_mask), (answer, answer_mask), topic, _, _ = cr.get_valid_set() valid_function = theano.function(inputs=[], outputs=[self.zi_error, self.st_error], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask, self.topic: topic}, name='valid_function') return valid_function def get_testing_function(self, cr): (question, question_mask), (answer, answer_mask), topic, _, _ = cr.get_test_set() test_function = theano.function(inputs=[], outputs=[self.cost, self.zi_error, self.st_error], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask, self.topic: topic}, name='test_function') ''' (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set() pr_function = theano.function(inputs=[], outputs=[self.likelihood_vector], givens={self.question: question, self.question_mask: question_mask, self.answer: answer, self.answer_mask: answer_mask}, name='pr_function') ''' return test_function, None def get_style_distribution_function(self): style_distribution_function = \ theano.function(inputs=[self.question, self.question_mask, self.topic], outputs=[self.style_prob], name='style_function') return style_distribution_function def get_topic_distribution_function(self): topic_distribution_function = \ theano.function(inputs=[self.question, self.question_mask], outputs=[self.encoder_hidden_status, self.topic_prob], name='topic_function') return topic_distribution_function def get_deploy_function(self): st = tensor.scalar('style_type', dtype='int64') prob = self.predict_probability[st] p_w = self.pred_word[st] deploy_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask, self.encoder_hidden_status, self.topic, st], outputs=[p_w, prob], name='style_deploy_function', on_unused_input='warn') return deploy_function def get_outtest_function(self, cr, max_iter, batch_size=200): outtest_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask], outputs=[self.zae, self.sae, self.bae], name='outtest_function') def update_function() : zae, sae, bae = 0.0, 0.0, 0.0 for idx in range(max_iter) : (question, question_mask), (answer, answer_mask), _, _ = \ cr.get_outtest_set([idx * batch_size, (idx + 1) * batch_size]) z, s, b = outtest_function(question, question_mask, answer, answer_mask) zae += z sae += s bae += b zae /= max_iter sae /= max_iter bae /= max_iter return zae, sae, bae return update_function