Ejemplo n.º 1
0
    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.getOutput(inputs=(question_embedding, self.question_mask))
        
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = DecoderLayer_Seq2Seq(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(inputs=(answer_embedding, self.answer_mask,
                                                                          self.encoder_hidden_status))
    
        #   3. softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"] ,
                                          n_out=options["n_words"] ,
                                          tparams=self.tparams)
        self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape(
            [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']])
        output_error_vector = self.softmax_layer.negative_log_likelihood(self.softmax_input,
                                                                     y=self.answer[1:, :].flatten())
        
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum()
        self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) 
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0)
        
        self._set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 2
0
    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(question_embedding, self.question_mask))
        
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = DecoderLayer_Seq2Seq(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.get_output(inputs=(answer_embedding, self.answer_mask,
                                                                          self.encoder_hidden_status))
    
        #   3. softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"] ,
                                          n_out=options["n_words"] ,
                                          tparams=self.tparams)
        self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape(
            [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']])
        
        likihood_vector = \
            self.softmax_layer.likelihood(input_data=self.softmax_input,
                                          y=self.answer[1:, :].flatten())
        # get evaluation and cost
        likihood_vector = - tensor.log(likihood_vector)
        self.zae, self.sae, self.bae, self.likihood_vector = \
            self.get_evaluation(likihood_vector, self.answer_mask[1:, :], 
                                self.answer.shape[0], self.answer.shape[1])
        self.cost = self.zae
        
        self.set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 3
0
class SkipThoughtNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.1 * randn).astype(config.globalFloatType()) 

        return params


    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams, node_type=GRUNode)
        self.encoder_hidden_status = \
            self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = \
            DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + options['hidden_status_dim'],
                             hidden_status_dim=options['hidden_status_dim'],
                             tparams=self.tparams, node_type=GRUNode)
        self.decoder_hidden_status = \
            self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask,
                                                  self.encoder_hidden_status))
        
        #   3. maxout  layer
        self.softmax_layer = SoftmaxLayer(n_out=options['word_embedding_dim'],
                                        n_in=2 * options["hidden_status_dim"] + options['word_embedding_dim'],
                                        tparams=self.tparams,
                                        prefix="softmax")
        self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(self.encoder_hidden_status[-1, :, :],
                                                              self.answer.shape[0] - 1,
                                                              self.answer.shape[1],
                                                              options['hidden_status_dim']).
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 self.answer_embedding[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['word_embedding_dim']])],
                                                axis=1)
        likihood_vector = \
            self.maxout_layer.likelihood(input_data=self.maxout_input,
                                         y=self.answer[1:, :].flatten())
        # get evaluation and cost
        likihood_vector = - tensor.log(likihood_vector)
        self.zae, self.sae, self.bae, self.likihood_vector = \
            self.get_evaluation(likihood_vector, self.answer_mask[1:, :], 
                                self.answer.shape[0], self.answer.shape[1])
        self.cost = self.zae
        
        self.set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask],
                                            [self.cost])
        
        def update_function(index):
            (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            # alignment
            if answer_up.shape[0] > answer_down.shape[0] :
                answer_down = numpy.concatenate([answer_down, 
                                                 numpy.zeros((answer_up.shape[0] - answer_down.shape[0],
                                                             answer_down.shape[1]), dtype='int64')], axis=0)
                answer_down_mask = numpy.concatenate([answer_down_mask, 
                                                 numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0],
                                                            answer_down_mask.shape[1]), 
                                                            dtype=config.globalFloatType())], axis=0)
            elif answer_up.shape[0] < answer_down.shape[0] :
                answer_up = numpy.concatenate([answer_up, 
                                               numpy.zeros((answer_down.shape[0] - answer_up.shape[0],
                                                           answer_up.shape[1]), dtype='int64')], axis=0)
                answer_up_mask = numpy.concatenate([answer_up_mask, 
                                                 numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0],
                                                            answer_up_mask.shape[1]),
                                                            dtype=config.globalFloatType())], axis=0)
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(numpy.concatenate([question, question], axis=1),
                                     numpy.concatenate([question_mask, question_mask], axis=1),
                                     numpy.concatenate([answer_up, answer_down], axis=1),
                                     numpy.concatenate([answer_up_mask, answer_down_mask], axis=1))
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \
            cr.get_valid_set()
        # alignment
        if answer_up.shape[0] > answer_down.shape[0] :
            answer_down = numpy.concatenate([answer_down, 
                                             numpy.zeros((answer_up.shape[0] - answer_down.shape[0],
                                                         answer_down.shape[1]), dtype='int64')], axis=0)
            answer_down_mask = numpy.concatenate([answer_down_mask, 
                                             numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0],
                                                        answer_down_mask.shape[1]), 
                                                        dtype=config.globalFloatType())], axis=0)
        elif answer_up.shape[0] < answer_down.shape[0] :
            answer_up = numpy.concatenate([answer_up, 
                                           numpy.zeros((answer_down.shape[0] - answer_up.shape[0],
                                                       answer_up.shape[1]), dtype='int64')], axis=0)
            answer_up_mask = numpy.concatenate([answer_up_mask, 
                                             numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0],
                                                        answer_up_mask.shape[1]),
                                                        dtype=config.globalFloatType())], axis=0)
        valid_function = \
            theano.function(inputs=[],
                            outputs=[self.cost],
                            givens={self.question: numpy.concatenate([question, question], axis=1),
                                    self.question_mask: numpy.concatenate([question_mask, question_mask], axis=1),
                                    self.answer: numpy.concatenate([answer_up, answer_down], axis=1),
                                    self.answer_mask: numpy.concatenate([answer_up_mask, answer_down_mask], axis=1)},
                            name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (answer_up, answer_up_mask), (question, question_mask), (answer_down, answer_down_mask), _, _ = \
            cr.get_test_set()
        # alignment
        if answer_up.shape[0] > answer_down.shape[0] :
            answer_down = numpy.concatenate([answer_down, 
                                             numpy.zeros((answer_up.shape[0] - answer_down.shape[0],
                                                         answer_down.shape[1]), dtype='int64')], axis=0)
            answer_down_mask = numpy.concatenate([answer_down_mask, 
                                             numpy.zeros((answer_up_mask.shape[0] - answer_down_mask.shape[0],
                                                        answer_down_mask.shape[1]), 
                                                        dtype=config.globalFloatType())], axis=0)
        elif answer_up.shape[0] < answer_down.shape[0] :
            answer_up = numpy.concatenate([answer_up, 
                                           numpy.zeros((answer_down.shape[0] - answer_up.shape[0],
                                                       answer_up.shape[1]), dtype='int64')], axis=0)
            answer_up_mask = numpy.concatenate([answer_up_mask, 
                                             numpy.zeros((answer_down_mask.shape[0] - answer_up_mask.shape[0],
                                                        answer_up_mask.shape[1]),
                                                        dtype=config.globalFloatType())], axis=0)
        test_function = \
            theano.function(inputs=[],
                            outputs=[self.zae, self.sae, self.bae],
                            givens={self.question: numpy.concatenate([question, question], axis=1),
                                    self.question_mask: numpy.concatenate([question_mask, question_mask], axis=1),
                                    self.answer: numpy.concatenate([answer_up, answer_down], axis=1),
                                    self.answer_mask: numpy.concatenate([answer_up_mask, answer_down_mask], axis=1)},
                            name='test_function')
        '''
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.likihood_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        '''
        return test_function, None
    

    def get_deploy_function(self):
        deploy_function = theano.function(inputs=[self.question, self.question_mask],
                                          outputs=[self.encoder_hidden_status[-1,:,:]],
                                          name='deploy_function')
        
        return deploy_function
    
    
    def get_evaluation_function(self):
        evaluate_function = theano.function(inputs=[self.question, self.question_mask,
                                                    self.answer, self.answer_mask],
                                        outputs=[self.zae, self.sae, self.bae],
                                        name='evaluate_function')
        
        return evaluate_function
    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. forward encoder layer
        self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams, prefix='forward_Encoder')
        self.forward_encoder_hidden_status = \
            self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask))
            
        #   2. backward encoder layer
        self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                   hidden_status_dim=options['hidden_status_dim'],
                                                   tparams=self.tparams, prefix='backward_Encoder')
        self.backward_encoder_hidden_status = \
            self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :],
                                                          self.question_mask[::-1, :]))
        self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status,
                                                         self.backward_encoder_hidden_status[::-1, :, :]],
                                                        axis=2)
        
        #   3. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = \
            AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'],
                                  hidden_status_dim=options['hidden_status_dim'],
                                  encoder_hidden_dim=2 * options['hidden_status_dim'],
                                  tparams=self.tparams, prefix='Decoder')
        self.decoder_hidden_status, self.context = \
            self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                 self.encoder_hidden_status, self.question_mask))
        
        #   4. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        self.maxout_input = \
        tensor.concatenate(\
                           [self.decoder_hidden_status[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['hidden_status_dim']]),
                             self.context[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         2 * options['hidden_status_dim']]),
                             self.answer_embedding[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['word_embedding_dim']])],
                            axis=1)
        likihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                         refer_data=self.maxout_input,
                                         y=self.answer[1:, :].flatten())
        likihood_vector = - tensor.log(likihood_vector)
        m = self.answer_mask[1:, :]
        # cost
        self.cost = tensor.dot(likihood_vector, m.flatten()) / self.question.shape[1]
        # pr
        self.likihood_vector = likihood_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) 
        self.likihood_vector = tensor.sum(self.likihood_vector * m, axis=0)
        
        self._set_parameters(input_params)  # params from list to TensorVirable
class BiEncoderAttentionDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 

        return params


    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. forward encoder layer
        self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams, prefix='forward_Encoder')
        self.forward_encoder_hidden_status = \
            self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask))
            
        #   2. backward encoder layer
        self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                   hidden_status_dim=options['hidden_status_dim'],
                                                   tparams=self.tparams, prefix='backward_Encoder')
        self.backward_encoder_hidden_status = \
            self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :],
                                                          self.question_mask[::-1, :]))
        self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status,
                                                         self.backward_encoder_hidden_status[::-1, :, :]],
                                                        axis=2)
        
        #   3. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = \
            AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'],
                                  hidden_status_dim=options['hidden_status_dim'],
                                  encoder_hidden_dim=2 * options['hidden_status_dim'],
                                  tparams=self.tparams, prefix='Decoder')
        self.decoder_hidden_status, self.context = \
            self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                 self.encoder_hidden_status, self.question_mask))
        
        #   4. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        self.maxout_input = \
        tensor.concatenate(\
                           [self.decoder_hidden_status[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['hidden_status_dim']]),
                             self.context[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         2 * options['hidden_status_dim']]),
                             self.answer_embedding[:-1, :, :].
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['word_embedding_dim']])],
                            axis=1)
        likihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                         refer_data=self.maxout_input,
                                         y=self.answer[1:, :].flatten())
        likihood_vector = - tensor.log(likihood_vector)
        m = self.answer_mask[1:, :]
        # cost
        self.cost = tensor.dot(likihood_vector, m.flatten()) / self.question.shape[1]
        # pr
        self.likihood_vector = likihood_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) 
        self.likihood_vector = tensor.sum(self.likihood_vector * m, axis=0)
        
        self._set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask],
                                            [self.cost])
        
        def update_function(index):
            (question, question_mask), (answer, answer_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={self.question: question,
                                                 self.question_mask: question_mask,
                                                 self.answer: answer,
                                                 self.answer_mask: answer_mask},
                                         name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={self.question: question,
                                                self.question_mask: question_mask,
                                                self.answer: answer,
                                                self.answer_mask: answer_mask},
                                        name='test_function')
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.likihood_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        
        return test_function, pr_function
    

    def get_deploy_function(self):
        maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :],
                                            self.encoder_hidden_status[-1, :, :],
                                            self.answer_embedding[-1, :, :]],
                                           axis=1)
        pred_word, pred_word_probability = self.maxout_layer.getOutput(self.tparams['Wemb_d'], maxout_input)
        deploy_function = theano.function(inputs=[self.question, self.question_mask,
                                                  self.answer, self.answer_mask],
                                          outputs=[pred_word, pred_word_probability],
                                          name='deploy_function')
        
        return deploy_function
class RnnEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """
    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'],
                                  options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType())
        randn = numpy.random.rand(options['n_words'],
                                  options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType())

        return params

    def __init__(self,
                 n_words,
                 word_embedding_dim=128,
                 hidden_status_dim=128,
                 input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate':
            0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.
            rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask',
                                           dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][
            self.question.flatten()].reshape([
                self.question.shape[0], self.question.shape[1],
                options['word_embedding_dim']
            ])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.getOutput(
            inputs=(self.question_embedding, self.question_mask))

        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask',
                                         dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][
            self.answer.flatten()].reshape([
                self.answer.shape[0], self.answer.shape[1],
                options['word_embedding_dim']
            ])
        self.decoder_layer = DecoderLayer_Cho(
            word_embedding_dim=options['word_embedding_dim'] +
            options['hidden_status_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(
            inputs=(self.answer_embedding, self.answer_mask,
                    self.encoder_hidden_status))

        #   3. softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"],
                                          n_out=options["n_words"],
                                          tparams=self.tparams)
        self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape([
            (self.answer.shape[0] - 1) * self.answer.shape[1],
            options['hidden_status_dim']
        ])
        output_error_vector = self.softmax_layer.negative_log_likelihood(
            self.softmax_input, y=self.answer[1:, :].flatten())

        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector,
                                      m.flatten()) / m.sum()
        self.output_error_vector = output_error_vector.reshape(
            [self.answer.shape[0] - 1, self.answer.shape[1]])
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -self.output_error_vector.sum(
            axis=0) / m.sum(axis=0)

        self._set_parameters(input_params)  # params from list to TensorVirable

    def get_training_function(self,
                              cr,
                              error_type="RMSE",
                              batch_size=10,
                              batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(
            lr, self.tparams, grads,
            [self.question, self.question_mask, self.answer, self.answer_mask],
            [self.cost])

        def update_function(index):
            (question, question_mask), (answer, answer_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer,
                                     answer_mask)
                f_update(self.options["learning_rate"])
            return cost

        return update_function

    def get_validing_function(self, cr):
        (question, question_mask), (answer,
                                    answer_mask), _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={
                                             self.question: question,
                                             self.question_mask: question_mask,
                                             self.answer: answer,
                                             self.answer_mask: answer_mask
                                         },
                                         name='valid_function')

        return valid_function

    def get_testing_function(self, cr):
        (question, question_mask), (answer,
                                    answer_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={
                                            self.question: question,
                                            self.question_mask: question_mask,
                                            self.answer: answer,
                                            self.answer_mask: answer_mask
                                        },
                                        name='test_function')
        (question, question_mask), (answer,
                                    answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.output_error_vector],
                                      givens={
                                          self.question: question,
                                          self.question_mask: question_mask,
                                          self.answer: answer,
                                          self.answer_mask: answer_mask
                                      },
                                      name='pr_function')

        return test_function, pr_function

    def get_deploy_function(self):
        softmax_input = self.decoder_hidden_status[-1, :, :]
        pred_word, pred_word_probability = self.softmax_layer.getOutput(
            softmax_input)
        deploy_function = theano.function(
            inputs=[
                self.question, self.question_mask, self.answer,
                self.answer_mask
            ],
            outputs=[pred_word, pred_word_probability],
            name='deploy_function')

        return deploy_function
    def __init__(self, n_words, word_embedding_dim, hidden_status_dim, style_number, style_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'style_number': style_number,
            'style_dim': style_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. forward encoder layer
        self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams, prefix='forward_Encoder')
        self.forward_encoder_hidden_status = \
            self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask))
            
        #   2. backward encoder layer
        self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                   hidden_status_dim=options['hidden_status_dim'],
                                                   tparams=self.tparams, prefix='backward_Encoder')
        self.backward_encoder_hidden_status = \
            self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :],
                                                          self.question_mask[::-1, :]))
        self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status,
                                                         self.backward_encoder_hidden_status[::-1, :, :]],
                                                        axis=2)
        
        #   3. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = \
            AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'],
                                  hidden_status_dim=options['hidden_status_dim'],
                                  encoder_hidden_dim=2 * options['hidden_status_dim'],
                                  tparams=self.tparams, prefix='Decoder')
        self.decoder_hidden_status, self.context = \
            self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                 self.encoder_hidden_status, self.question_mask))
        #   4. style  layer
        
        self.style_layer = StyleLayer(style_number, style_dim, 3 * hidden_status_dim, self.tparams, 'style')
        
        question_style_input_embedding1 = self.forward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        question_style_input_embedding2 = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        answer_style_input_embedding = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        
        pair_embedding = tensor.concatenate([question_style_input_embedding1,
                                                                             question_style_input_embedding2,
                                                                             answer_style_input_embedding],
                                                                             axis=1)
        self.style = self.style_layer.getOutput(pair_embedding)
        
        
        #   5. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'] + options['style_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        
        maxout_decoder_hidden_status_input = self.decoder_hidden_status[:-1, :, :].\
                                        reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['hidden_status_dim']])
        
        maxout_context_hidden_status_input = self.context[:-1, :, :].\
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         2 * options['hidden_status_dim']])
        
        maxout_answer_wordEmbedding_input = self.answer_embedding[:-1, :, :].\
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['word_embedding_dim']])
        
        maxout_answer_style_input = tensor.alloc(self.style.dimshuffle('x', 0, 1),
                                                                                    self.answer.shape[0] - 1,
                                                                                    self.answer.shape[1],
                                                                                    options['style_dim'])
        
        maxout_answer_style_input = maxout_answer_style_input.\
                                                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                                                     options['style_dim']])
                                                                                    
        self.maxout_input = \
            tensor.concatenate(\
                               [maxout_decoder_hidden_status_input,
                                maxout_context_hidden_status_input,
                                maxout_answer_wordEmbedding_input,
                                maxout_answer_style_input],
                                axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(
                                                                     self.tparams['Wemb_d'],
                                                                     self.maxout_input,
                                                                     y=self.answer[1:, :].flatten())
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum()
        self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) 
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0)
        
        self._set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 8
0
class RnnEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """
    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'],
                                  options['word_embedding_dim'])
        params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType())
        randn = numpy.random.rand(options['n_words'],
                                  options['word_embedding_dim'])
        #params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType())
        #randn = numpy.random.rand(options['topic_embedding_dim'], options['topic_embedding_dim'])/options['topic_embedding_dim']*2
        #params['QTA']=(1.0 * randn).astype(config.globalFloatType())
        #randn = numpy.random.rand(options['n_topics'], options['topic_embedding_dim'])
        #params['Temb'] = (0.1 * randn).astype(config.globalFloatType())
        #params['Temb'] = numpy.dot(params['Qemb'],params['QTA'])
        return params

    def __init__(self,
                 n_words,
                 word_embedding_dim=128,
                 hidden_status_dim=128,
                 n_topics=2,
                 topic_embedding_dim=5,
                 input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'n_topics': n_topics,
            'topic_embedding_dim': topic_embedding_dim,
            'learning_rate':
            0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.
            adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        #print self.tparams['Temb']
        #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA'])
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask',
                                           dtype=config.globalFloatType())
        self.style = tensor.matrix('style', dtype='int64')
        self.style_mask = tensor.matrix('style_mask',
                                        dtype=config.globalFloatType())
        self.style_embedding = self.tparams['Wemb_e'][
            self.style.flatten()].reshape([
                self.style.shape[0], self.style.shape[1],
                options['word_embedding_dim']
            ])
        #self.question_mask = tensor.matrix('question_mask', dtype='int64')
        self.topic = tensor.matrix('topic', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][
            self.question.flatten()].reshape([
                self.question.shape[0], self.question.shape[1],
                options['word_embedding_dim']
            ])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.encoder_hidden_status1 = self.encoder_layer.getOutput(
            inputs=(self.question_embedding, self.question_mask))
        self.encoder_layer_style = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='encoder_layer_style')
        self.encoder_hidden_status_style = self.encoder_layer_style.getOutput(
            inputs=(self.style_embedding, self.style_mask))
        self.encoder_hidden_status = tensor.concatenate(
            [
                self.encoder_hidden_status1[-1],
                self.encoder_hidden_status_style[-1]
            ],
            axis=1).reshape([
                1, self.encoder_hidden_status_style.shape[1],
                2 * self.encoder_hidden_status_style.shape[2]
            ])
        #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.question.shape[1], options['topic_embedding_dim']])
        #self.topic_change=T.alloc(self.topic_states,self.question.shape[0], self.question.shape[1], options['topic_embedding_dim'])
        #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2)
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask',
                                         dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_e'][
            self.answer.flatten()].reshape([
                self.answer.shape[0], self.answer.shape[1],
                options['word_embedding_dim']
            ])
        self.decoder_layer = DecoderLayer_Cho(
            word_embedding_dim=options['word_embedding_dim'] +
            2 * options['hidden_status_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(
            inputs=(self.answer_embedding, self.answer_mask,
                    self.encoder_hidden_status))

        #   2.5 softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"],
                                          n_out=2,
                                          tparams=self.tparams)
        self.softmax_input = self.encoder_hidden_status1[-1]
        self.output_error_vector = self.softmax_layer.negative_log_likelihood(
            self.softmax_input, tensor.cast(self.topic.flatten(), 'int64'))
        self.class_cost = self.output_error_vector.sum(
        ) / self.question.shape[1]
        #   3. maxout  layer
        self.maxout_layer = MaxoutLayer(
            base_dim=options['word_embedding_dim'],
            refer_dim=3 * options["hidden_status_dim"] +
            options['word_embedding_dim'],
            tparams=self.tparams,
            prefix="maxout")
        self.maxout_input = tensor.concatenate([
            self.decoder_hidden_status[:-1, :, :].reshape(
                [(self.answer.shape[0] - 1) * self.answer.shape[1],
                 options['hidden_status_dim']]),
            tensor.alloc(self.encoder_hidden_status[-1, :, :],
                         self.answer.shape[0] - 1, self.answer.shape[1],
                         2 * options['hidden_status_dim']).reshape([
                             (self.answer.shape[0] - 1) * self.answer.shape[1],
                             2 * options['hidden_status_dim']
                         ]), self.answer_embedding[:-1, :, :].reshape([
                             (self.answer.shape[0] - 1) * self.answer.shape[1],
                             options['word_embedding_dim']
                         ])
        ],
                                               axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(
            self.tparams['Wemb_e'],
            self.maxout_input,
            y=self.answer[1:, :].flatten())
        self.topic_matrix = tensor.alloc(self.topic.flatten(),
                                         self.answer.shape[0] - 1,
                                         self.answer.shape[1]).flatten()
        #self.topic_matrix_change=2*(self.topic_matrix-0.5)
        self.topic_matrix_change = self.topic_matrix
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(
            output_error_vector,
            m.flatten() * self.topic_matrix_change) / m.sum()
        self.cost = self.cost - self.class_cost
        self.output_error_vector = output_error_vector.reshape(
            [self.answer.shape[0] - 1, self.answer.shape[1]])
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum(
            axis=0)

        self._set_parameters(input_params)  # params from list to TensorVirable

    def get_training_function(self,
                              cr,
                              error_type="RMSE",
                              batch_size=10,
                              batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [
            self.question, self.question_mask, self.answer, self.answer_mask,
            self.style, self.style_mask, self.topic
        ], [self.cost])

        def update_function(index):
            (question, question_mask), (answer, answer_mask),(style,style_mask),(topic,topic_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer,
                                     answer_mask, style, style_mask, topic)
                f_update(self.options["learning_rate"])
            return cost

        return update_function

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), (
            style, style_mask), (topic, topic_mask), _, _ = cr.get_valid_set()
        #print topic
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={
                                             self.question: question,
                                             self.question_mask: question_mask,
                                             self.answer: answer,
                                             self.answer_mask: answer_mask,
                                             self.style: style,
                                             self.style_mask: style_mask,
                                             self.topic: topic
                                         },
                                         name='valid_function')

        return valid_function

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), (
            style, style_mask), (topic, topic_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={
                                            self.question: question,
                                            self.question_mask: question_mask,
                                            self.answer: answer,
                                            self.answer_mask: answer_mask,
                                            self.style: style,
                                            self.style_mask: style_mask,
                                            self.topic: topic
                                        },
                                        name='test_function')
        (question, question_mask), (answer, answer_mask), (
            style, style_mask), (topic, topic_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.output_error_vector],
                                      givens={
                                          self.question: question,
                                          self.question_mask: question_mask,
                                          self.answer: answer,
                                          self.answer_mask: answer_mask,
                                          self.style: style,
                                          self.style_mask: style_mask,
                                          self.topic: topic
                                      },
                                      on_unused_input='ignore',
                                      name='pr_function')

        return test_function, pr_function

    def get_deploy_function(self):
        maxout_input = tensor.concatenate([
            self.decoder_hidden_status[-1, :, :],
            self.encoder_hidden_status[-1, :, :],
            self.answer_embedding[-1, :, :]
        ],
                                          axis=1)
        pred_word, pred_word_probability = self.maxout_layer.getOutput(
            self.tparams['Wemb_e'], maxout_input)
        pred_words_array = theano.tensor.argsort(pred_word_probability)[:,
                                                                        -1000:]
        pred_word_probability_array = theano.tensor.transpose(
            pred_word_probability[
                theano.tensor.arange(pred_words_array.shape[0]),
                theano.tensor.transpose(pred_words_array)])
        deploy_function = theano.function(
            inputs=[
                self.question, self.question_mask, self.answer,
                self.answer_mask, self.style, self.style_mask, self.topic
            ],
            outputs=[pred_words_array, pred_word_probability_array],
            on_unused_input='ignore',
            name='deploy_function')

        return deploy_function

    def get_cost(self):
        deploy_function = theano.function(inputs=[
            self.question, self.question_mask, self.answer, self.answer_mask,
            self.topic
        ],
                                          outputs=self.cost)
        return deploy_function
Ejemplo n.º 9
0
class RnnEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 

        return params


    def __init__(self, n_words, word_embedding_dim=128, hidden_status_dim=128, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(question_embedding, self.question_mask))
        
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = DecoderLayer_Seq2Seq(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.get_output(inputs=(answer_embedding, self.answer_mask,
                                                                          self.encoder_hidden_status))
    
        #   3. softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"] ,
                                          n_out=options["n_words"] ,
                                          tparams=self.tparams)
        self.softmax_input = self.decoder_hidden_status[:-1, :, :].reshape(
            [(self.answer.shape[0] - 1) * self.answer.shape[1], options['hidden_status_dim']])
        
        likihood_vector = \
            self.softmax_layer.likelihood(input_data=self.softmax_input,
                                          y=self.answer[1:, :].flatten())
        # get evaluation and cost
        likihood_vector = - tensor.log(likihood_vector)
        self.zae, self.sae, self.bae, self.likihood_vector = \
            self.get_evaluation(likihood_vector, self.answer_mask[1:, :], 
                                self.answer.shape[0], self.answer.shape[1])
        self.cost = self.zae
        
        self.set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask],
                                            [self.cost])
        
        def update_function(index):
            (question, question_mask), (answer, answer_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={self.question: question,
                                                 self.question_mask: question_mask,
                                                 self.answer: answer,
                                                 self.answer_mask: answer_mask},
                                         name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={self.question: question,
                                                self.question_mask: question_mask,
                                                self.answer: answer,
                                                self.answer_mask: answer_mask},
                                        name='test_function')
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.likihood_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        
        return test_function, pr_function
    

    def get_deploy_function(self):
        softmax_input = self.decoder_hidden_status[-1, :, :]
        pred_word, pred_word_probability = self.softmax_layer.get_output(softmax_input)
        deploy_function = theano.function(inputs=[self.question, self.question_mask,
                                                  self.answer, self.answer_mask],
                                          outputs=[pred_word, pred_word_probability],
                                          name='deploy_function')
        
        return deploy_function
    
    
    def get_outtest_function(self, cr, max_iter, batch_size=200):
        
        outtest_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask],
                                           outputs=[self.zae, self.sae, self.bae],
                                           name='outtest_function')
        def update_function() :
            zae, sae, bae = 0.0, 0.0, 0.0
            for idx in range(max_iter) :
                (question, question_mask), (answer, answer_mask), _, _ = \
                    cr.get_outtest_set([idx * batch_size, (idx + 1) * batch_size])
                z, s, b = outtest_function(question, question_mask, answer, answer_mask)
                zae += z
                sae += s
                bae += b
            zae /= max_iter
            sae /= max_iter
            bae /= max_iter
            return zae, sae, bae
        
        return update_function
    def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim,
                 hidden_status_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'n_style': n_style,
            'style_embedding_dim': style_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            'tiny_float': 1e-20
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        
        #   2. style layer
        self.style_layer = MaxoutBiasedLayer(base_dim=options['style_embedding_dim'],
                                       refer_dim=options["hidden_status_dim"],
                                       tparams=self.tparams,
                                       prefix="style")
        self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'],
                                                      refer_data=self.encoder_hidden_status[-1, :, :])  # (samples,n_style)
        
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        
        # extend the data
        n_question_time_stpes = self.question.shape[0]
        n_answer_time_stpes = self.answer.shape[0]
        n_samples = self.question.shape[1]
        
        
        self.cost = 0
        self.zi_error = 0
        self.st_error = 0
        self.predict_probability_list = []
        log_likelihood_vector_list = []
        for style_index in range(self.options['n_style']):
            decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                                                  options['hidden_status_dim'] + options['style_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams,
                                                  prefix='decoder' + str(style_index))
            
            style_embedding = tensor.alloc(self.tparams['Semb'][style_index],
                                n_question_time_stpes,
                                n_samples,
                                self.options['style_embedding_dim'])
            
            encoder_hidden_status = tensor.concatenate([self.encoder_hidden_status, style_embedding], axis=2)
            
            #   3. decoder layer
            decoder_hidden_status = decoder_layer.get_output(inputs=[self.answer_embedding, self.answer_mask,
                                                                              encoder_hidden_status])
            #   4. maxout  layer
            maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :].
                                                        reshape([(n_answer_time_stpes - 1) * n_samples,
                                                                 options['hidden_status_dim']]),
                                                     tensor.alloc(encoder_hidden_status[-1, :, :],
                                                                  n_answer_time_stpes - 1,
                                                                  n_samples,
                                                                  options['style_embedding_dim'] + \
                                                                  options['hidden_status_dim']).
                                                        reshape([(n_answer_time_stpes - 1) * n_samples,
                                                                 options['hidden_status_dim'] + \
                                                                 options['style_embedding_dim']]),
                                                     self.answer_embedding[:-1, :, :].
                                                        reshape([(n_answer_time_stpes - 1) * n_samples,
                                                                 options['word_embedding_dim']])],
                                                    axis=1)
            a_index = tensor.cast(self.answer_mask, 'int64').sum(axis=0) - 1
            predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :].
                                                            reshape([n_samples,
                                                            options['hidden_status_dim']]),
                                                       encoder_hidden_status[-1, :, :].
                                                        reshape([n_samples,
                                                                 options['hidden_status_dim'] + \
                                                                 options['style_embedding_dim']]),
                                                       self.answer_embedding[a_index, tensor.arange(a_index.shape[0]), :].
                                                            reshape([n_samples,
                                                                     options['word_embedding_dim']])],
                                                    axis=1)
            
            maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                            refer_dim=2 * options["hidden_status_dim"] + \
                                            options['word_embedding_dim'] + options['style_embedding_dim'],
                                            tparams=self.tparams,
                                            prefix='decoder_maxout' + str(style_index))
            
            likelihood_vector = \
                maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                             refer_data=maxout_input,
                                             y=self.answer[1:, :].flatten())
                
            pred_word, predict_probability0 = \
                maxout_layer.get_output(base_data=self.tparams['Wemb_d'],
                                             refer_data=predict_maxout_input)
            
            # carefully check
            likelihood_vector = likelihood_vector.reshape(
                  [n_answer_time_stpes - 1, n_samples])
            log_likelihood_vector0 = tensor.log(likelihood_vector + self.options['tiny_float']) * self.answer_mask[1:, :]
            log_likelihood_vector0 = log_likelihood_vector0.sum(axis=0)
            log_likelihood_vector_list.append(log_likelihood_vector0.dimshuffle(0, 'x'))
            
            predict_probability0 = predict_probability0.reshape(
                  [n_samples, options['n_words']])
            self.predict_probability_list.append(predict_probability0)
            
           
            # options['n_style'], n_answer_time_stpes - 1,  n_samples
            # Transform the multiplication into add.
            
        log_likelihood_vector = tensor.concatenate(log_likelihood_vector_list, axis=1)
        
        sentence_probability = log_likelihood_vector.exp() * self.style_prob
        sentence_probability = sentence_probability + self.options['tiny_float']
        sentence_probability = tensor.sum(sentence_probability, axis=1)
        
        negative_log_sentence_probability = -tensor.log(sentence_probability)
        self.likelihood_vector = negative_log_sentence_probability
        self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum()
        
        self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0)
        self.zi_error = self.zi_error.mean()
        self.st_error = negative_log_sentence_probability.mean()
            
        self.pred_word = pred_word
        self.set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 11
0
    def __init__(self,
                 n_words,
                 word_embedding_dim=128,
                 hidden_status_dim=128,
                 n_topics=2,
                 topic_embedding_dim=5,
                 input_params=None,
                 word_embedings=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'n_topics': n_topics,
            'topic_embedding_dim': topic_embedding_dim,
            'learning_rate':
            0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.
            adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        }
        # global paramters.
        params = self.init_global_params(options, word_embedings)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        #print self.tparams['Temb']
        #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA'])
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.reference = tensor.matrix('reference', dtype='int64')
        self.reference_mask = tensor.matrix('reference_mask',
                                            dtype=config.globalFloatType())
        #self.reference_mask = tensor.matrix('reference_mask', dtype='int64')
        self.topic = tensor.matrix('topic', dtype=config.globalFloatType())
        self.context = tensor.tensor3('context', dtype='int64')
        self.context_mask = tensor.tensor3('context_mask',
                                           dtype=config.globalFloatType())
        self.context_mask2 = tensor.matrix('context_mask2',
                                           dtype=config.globalFloatType())
        # self.reference.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.reference_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.reference_embedding = self.tparams['Wemb_e'][
            self.reference.flatten()].reshape([
                self.reference.shape[0], self.reference.shape[1],
                options['word_embedding_dim']
            ])
        #   1. encoder layer
        self.encoder_layer_reference = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='Encoder')
        self.encoder_hidden_status_reference = self.encoder_layer_reference.getOutput(
            inputs=(self.reference_embedding, self.reference_mask))
        #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.reference.shape[1], options['topic_embedding_dim']])
        #self.topic_change=T.alloc(self.topic_states,self.reference.shape[0], self.reference.shape[1], options['topic_embedding_dim'])
        #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2)
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask',
                                         dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_e'][
            self.answer.flatten()].reshape([
                self.answer.shape[0], self.answer.shape[1],
                options['word_embedding_dim']
            ])
        self.encoder_hidden_status_answer = self.encoder_layer_reference.getOutput(
            inputs=(self.answer_embedding, self.answer_mask))
        self.context_emdedding = self.tparams['Wemb_e'][
            self.context.flatten()].reshape([
                self.context.shape[0],
                self.context.shape[1] * self.context.shape[2],
                options['word_embedding_dim']
            ])
        self.encoder_hidden_status_context1 = self.encoder_layer_reference.getOutput(
            inputs=(self.context_emdedding,
                    self.context_mask.flatten().reshape([
                        self.context.shape[0], self.context.shape[1] *
                        self.context.shape[2]
                    ])))
        self.encoder_layer_context2 = EncoderLayer(
            word_embedding_dim=options['hidden_status_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='Encoder2')
        self.encoder_hidden_status_context2 = self.encoder_layer_context2.getOutput(
            inputs=(self.encoder_hidden_status_context1[-1, :, :].reshape([
                self.context.shape[1], self.context.shape[2],
                options['hidden_status_dim']
            ]), self.context_mask2))
        #self.context_processed=tensor.transpose(tensor.transpose(self.encoder_hidden_status_context2[-1])*self.topic.flatten())
        self.context_processed = self.encoder_hidden_status_context2[-1]
        #self.rcm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_reference[-1],self.context_processed],axis=1),self.tparams['P_M'])
        #self.acm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_answer[-1],self.context_processed],axis=1),self.tparams['P_M'])
        #self.softmax_input=tensor.dot(tensor.concatenate([self.acm,self.rcm],axis=1),self.tparams['P_N'])
        self.rmc = tensor.batched_dot(
            tensor.dot(self.encoder_hidden_status_reference[-1],
                       self.tparams['P_M']), self.context_processed)
        self.amc = tensor.batched_dot(
            tensor.dot(self.encoder_hidden_status_answer[-1],
                       self.tparams['P_M']), self.context_processed)
        self.softmax_input = tensor.dot(
            tensor.concatenate([self.rmc, self.amc], axis=1),
            self.tparams['P_N'])
        #self.softmax_input=self.encoder_hidden_status_reference[-1]-self.encoder_hidden_status_reference[-1]+self.encoder_hidden_status_context2[-1]-self.encoder_hidden_status_answer[-1]+self.encoder_hidden_status_answer[-1]
        #self.softmax_input=self.rcm-self.acm
        #self.softmax_layer=SoftmaxLayer(n_in=options['hidden_status_dim'],n_out=3,tparams=self.tparams)
        self.softmax_layer = SoftmaxLayer(n_in=options['hidden_status_dim'],
                                          n_out=3,
                                          tparams=self.tparams)
        self.output_vector = self.softmax_layer.negative_log_likelihood(
            self.softmax_input,
            tensor.cast(
                self.topic.flatten() + tensor.ones_like(self.topic.flatten()),
                'int64'))
        self.cost = -1.0 * self.output_vector.sum() / self.context.shape[2]
        #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        #self.cost=((tensor.max(tensor.dot(mutti_m_am,(topic.flatten()-score))*tensor.sgn(tensor.dot(mutti_m_am,(topic.flatten()))-tensor.ones(self.context.shape[2]/2)/2),tensor.zeros(self.context.shape[2]/2))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        self.ground_truth=tensor.dot(mutti_m_am,topic.flatten())
        self.score_diff=tensor.dot(mutti_m_am,score)
        self.ground_minus_score=(self.ground_truth-self.score_diff)
        #self.cost_max=(tensor.max(tensor.zeros_like(self.ground_truth),self.ground_truth*self.ground_minus_score))**2
        self.cost_max=(tensor.max(tensor.concatenate(([tensor.zeros_like(self.ground_truth)],[self.ground_truth*self.ground_minus_score]),axis=0),axis=0))**2+(tensor.ones_like(self.ground_truth)-tensor.abs_(self.ground_truth))*(self.ground_minus_score)**2
        self.cost=(self.cost_max.sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+((score-topic.flatten())**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + options['hidden_status_dim'],
                                              hidden_status_dim=options['hidden_status_dim'],
                                              tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                                          self.encoder_hidden_status))

        #   3. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=2 * options["hidden_status_dim"] + options['word_embedding_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(self.encoder_hidden_status[-1, :, :],
                                                              self.answer.shape[0] - 1,
                                                              self.answer.shape[1],
                                                              options['hidden_status_dim']).
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 self.answer_embedding[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['word_embedding_dim']])],
                                                axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(self.tparams['Wemb_d'],
                                                                    self.maxout_input,
                                                                    y=self.answer[1:, :].flatten())
        self.topic_matrix=tensor.alloc(self.topic.flatten(),self.answer.shape[0] - 1,self.answer.shape[1]).flatten()
        #self.topic_matrix_change=2*(self.topic_matrix-0.5)
        self.topic_matrix_change=self.topic_matrix
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()*self.topic_matrix_change) / m.sum()
        self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]])
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum(axis=0)
        '''
        self.output_error_vector = self.cost
        self._set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 12
0
class RnnEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """
    def init_global_params(self, options, word_embedings):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'],
                                  options['word_embedding_dim'])
        params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType())
        #params['Wemb_e'] =word_embedings
        #randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        #params['Wemb_e'] = (0.1 * randn).astype(config.globalFloatType())
        randn = numpy.random.rand(options['hidden_status_dim'],
                                  options['hidden_status_dim'],
                                  options['hidden_status_dim'])
        params['P_M'] = (0.1 * randn).astype(config.globalFloatType())
        randn = numpy.random.rand(2 * options['hidden_status_dim'],
                                  options['hidden_status_dim'])
        params['P_N'] = (0.1 * randn).astype(config.globalFloatType())
        '''
        randn = numpy.random.rand(1)
        params['P_alpha']= (1 * randn).astype(config.globalFloatType())
        randn = numpy.random.rand(1)
        params['P_beta']= (1 * randn).astype(config.globalFloatType())
        '''
        #randn = numpy.random.rand(options['topic_embedding_dim'], options['topic_embedding_dim'])/options['topic_embedding_dim']*2
        #params['QTA']=(1.0 * randn).astype(config.globalFloatType())
        #randn = numpy.random.rand(options['n_topics'], options['topic_embedding_dim'])
        #params['Temb'] = (0.1 * randn).astype(config.globalFloatType())
        #params['Temb'] = numpy.dot(params['Qemb'],params['QTA'])
        return params

    def __init__(self,
                 n_words,
                 word_embedding_dim=128,
                 hidden_status_dim=128,
                 n_topics=2,
                 topic_embedding_dim=5,
                 input_params=None,
                 word_embedings=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'n_topics': n_topics,
            'topic_embedding_dim': topic_embedding_dim,
            'learning_rate':
            0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.
            adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        }
        # global paramters.
        params = self.init_global_params(options, word_embedings)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        #print self.tparams['Temb']
        #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA'])
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.reference = tensor.matrix('reference', dtype='int64')
        self.reference_mask = tensor.matrix('reference_mask',
                                            dtype=config.globalFloatType())
        #self.reference_mask = tensor.matrix('reference_mask', dtype='int64')
        self.topic = tensor.matrix('topic', dtype=config.globalFloatType())
        self.context = tensor.tensor3('context', dtype='int64')
        self.context_mask = tensor.tensor3('context_mask',
                                           dtype=config.globalFloatType())
        self.context_mask2 = tensor.matrix('context_mask2',
                                           dtype=config.globalFloatType())
        # self.reference.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.reference_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.reference_embedding = self.tparams['Wemb_e'][
            self.reference.flatten()].reshape([
                self.reference.shape[0], self.reference.shape[1],
                options['word_embedding_dim']
            ])
        #   1. encoder layer
        self.encoder_layer_reference = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='Encoder')
        self.encoder_hidden_status_reference = self.encoder_layer_reference.getOutput(
            inputs=(self.reference_embedding, self.reference_mask))
        #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.reference.shape[1], options['topic_embedding_dim']])
        #self.topic_change=T.alloc(self.topic_states,self.reference.shape[0], self.reference.shape[1], options['topic_embedding_dim'])
        #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2)
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask',
                                         dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_e'][
            self.answer.flatten()].reshape([
                self.answer.shape[0], self.answer.shape[1],
                options['word_embedding_dim']
            ])
        self.encoder_hidden_status_answer = self.encoder_layer_reference.getOutput(
            inputs=(self.answer_embedding, self.answer_mask))
        self.context_emdedding = self.tparams['Wemb_e'][
            self.context.flatten()].reshape([
                self.context.shape[0],
                self.context.shape[1] * self.context.shape[2],
                options['word_embedding_dim']
            ])
        self.encoder_hidden_status_context1 = self.encoder_layer_reference.getOutput(
            inputs=(self.context_emdedding,
                    self.context_mask.flatten().reshape([
                        self.context.shape[0], self.context.shape[1] *
                        self.context.shape[2]
                    ])))
        self.encoder_layer_context2 = EncoderLayer(
            word_embedding_dim=options['hidden_status_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='Encoder2')
        self.encoder_hidden_status_context2 = self.encoder_layer_context2.getOutput(
            inputs=(self.encoder_hidden_status_context1[-1, :, :].reshape([
                self.context.shape[1], self.context.shape[2],
                options['hidden_status_dim']
            ]), self.context_mask2))
        #self.context_processed=tensor.transpose(tensor.transpose(self.encoder_hidden_status_context2[-1])*self.topic.flatten())
        self.context_processed = self.encoder_hidden_status_context2[-1]
        #self.rcm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_reference[-1],self.context_processed],axis=1),self.tparams['P_M'])
        #self.acm=tensor.dot(tensor.concatenate([self.encoder_hidden_status_answer[-1],self.context_processed],axis=1),self.tparams['P_M'])
        #self.softmax_input=tensor.dot(tensor.concatenate([self.acm,self.rcm],axis=1),self.tparams['P_N'])
        self.rmc = tensor.batched_dot(
            tensor.dot(self.encoder_hidden_status_reference[-1],
                       self.tparams['P_M']), self.context_processed)
        self.amc = tensor.batched_dot(
            tensor.dot(self.encoder_hidden_status_answer[-1],
                       self.tparams['P_M']), self.context_processed)
        self.softmax_input = tensor.dot(
            tensor.concatenate([self.rmc, self.amc], axis=1),
            self.tparams['P_N'])
        #self.softmax_input=self.encoder_hidden_status_reference[-1]-self.encoder_hidden_status_reference[-1]+self.encoder_hidden_status_context2[-1]-self.encoder_hidden_status_answer[-1]+self.encoder_hidden_status_answer[-1]
        #self.softmax_input=self.rcm-self.acm
        #self.softmax_layer=SoftmaxLayer(n_in=options['hidden_status_dim'],n_out=3,tparams=self.tparams)
        self.softmax_layer = SoftmaxLayer(n_in=options['hidden_status_dim'],
                                          n_out=3,
                                          tparams=self.tparams)
        self.output_vector = self.softmax_layer.negative_log_likelihood(
            self.softmax_input,
            tensor.cast(
                self.topic.flatten() + tensor.ones_like(self.topic.flatten()),
                'int64'))
        self.cost = -1.0 * self.output_vector.sum() / self.context.shape[2]
        #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        #self.cost=((tensor.max(tensor.dot(mutti_m_am,(topic.flatten()-score))*tensor.sgn(tensor.dot(mutti_m_am,(topic.flatten()))-tensor.ones(self.context.shape[2]/2)/2),tensor.zeros(self.context.shape[2]/2))**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        self.ground_truth=tensor.dot(mutti_m_am,topic.flatten())
        self.score_diff=tensor.dot(mutti_m_am,score)
        self.ground_minus_score=(self.ground_truth-self.score_diff)
        #self.cost_max=(tensor.max(tensor.zeros_like(self.ground_truth),self.ground_truth*self.ground_minus_score))**2
        self.cost_max=(tensor.max(tensor.concatenate(([tensor.zeros_like(self.ground_truth)],[self.ground_truth*self.ground_minus_score]),axis=0),axis=0))**2+(tensor.ones_like(self.ground_truth)-tensor.abs_(self.ground_truth))*(self.ground_minus_score)**2
        self.cost=(self.cost_max.sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        #self.cost=((tensor.dot(mutti_m_am,(score-topic.flatten()))**2).sum()+((score-topic.flatten())**2).sum()+0.01*self.l2)/(self.context.shape[2]/2)
        '''
        self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + options['hidden_status_dim'],
                                              hidden_status_dim=options['hidden_status_dim'],
                                              tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                                          self.encoder_hidden_status))

        #   3. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=2 * options["hidden_status_dim"] + options['word_embedding_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(self.encoder_hidden_status[-1, :, :],
                                                              self.answer.shape[0] - 1,
                                                              self.answer.shape[1],
                                                              options['hidden_status_dim']).
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['hidden_status_dim']]),
                                                 self.answer_embedding[:-1, :, :].
                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                             options['word_embedding_dim']])],
                                                axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(self.tparams['Wemb_d'],
                                                                    self.maxout_input,
                                                                    y=self.answer[1:, :].flatten())
        self.topic_matrix=tensor.alloc(self.topic.flatten(),self.answer.shape[0] - 1,self.answer.shape[1]).flatten()
        #self.topic_matrix_change=2*(self.topic_matrix-0.5)
        self.topic_matrix_change=self.topic_matrix
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()*self.topic_matrix_change) / m.sum()
        self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]])
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum(axis=0)
        '''
        self.output_error_vector = self.cost
        self._set_parameters(input_params)  # params from list to TensorVirable

    def get_training_function(self,
                              cr,
                              error_type="RMSE",
                              batch_size=10,
                              batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads, [
            self.reference, self.reference_mask, self.answer, self.answer_mask,
            self.topic, self.context, self.context_mask, self.context_mask2
        ], [self.cost])

        def update_function(index):
            (reference, reference_mask), (answer, answer_mask),(topic,topic_mask),(context,context_mask,context_mask2), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(reference, reference_mask, answer,
                                     answer_mask, topic, context, context_mask,
                                     context_mask2)
                f_update(self.options["learning_rate"])
            return cost

        return update_function

    def get_validing_function(self, cr):
        (reference, reference_mask), (answer, answer_mask), (
            topic, topic_mask), (context, context_mask,
                                 context_mask2), _, _ = cr.get_valid_set()
        #print len(reference[0])
        #print len(answer[0])
        #print len(topic[0])
        #print len(context[0])
        #print topic
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={
                                             self.reference: reference,
                                             self.reference_mask:
                                             reference_mask,
                                             self.answer: answer,
                                             self.answer_mask: answer_mask,
                                             self.topic: topic,
                                             self.context: context,
                                             self.context_mask: context_mask,
                                             self.context_mask2: context_mask2
                                         },
                                         name='valid_function')

        return valid_function

    def get_testing_function(self, cr):
        (reference, reference_mask), (answer, answer_mask), (
            topic, topic_mask), (context, context_mask,
                                 context_mask2), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={
                                            self.reference: reference,
                                            self.reference_mask:
                                            reference_mask,
                                            self.answer: answer,
                                            self.answer_mask: answer_mask,
                                            self.topic: topic,
                                            self.context: context,
                                            self.context_mask: context_mask,
                                            self.context_mask2: context_mask2
                                        },
                                        name='test_function')
        (reference, reference_mask), (answer, answer_mask), (
            topic, topic_mask), (context, context_mask,
                                 context_mask2), _, _ = cr.get_pr_set()
        #print context,context_mask,context_mask2
        pr_function = theano.function(inputs=[],
                                      outputs=[self.output_error_vector],
                                      givens={
                                          self.reference: reference,
                                          self.reference_mask: reference_mask,
                                          self.answer: answer,
                                          self.answer_mask: answer_mask,
                                          self.topic: topic,
                                          self.context: context,
                                          self.context_mask: context_mask,
                                          self.context_mask2: context_mask2
                                      },
                                      on_unused_input='ignore',
                                      name='pr_function')

        return test_function, pr_function

    def get_deploy_function(self):
        maxout_input = tensor.concatenate([
            self.decoder_hidden_status[-1, :, :],
            self.encoder_hidden_status[-1, :, :],
            self.answer_embedding[-1, :, :]
        ],
                                          axis=1)
        pred_word, pred_word_probability = self.maxout_layer.getOutput(
            self.tparams['Wemb_d'], maxout_input)
        pred_words_array = theano.tensor.argsort(pred_word_probability)[:,
                                                                        -10:]
        pred_word_probability_array = theano.tensor.transpose(
            pred_word_probability[
                theano.tensor.arange(pred_words_array.shape[0]),
                theano.tensor.transpose(pred_words_array)])
        deploy_function = theano.function(
            inputs=[
                self.reference, self.reference_mask, self.answer,
                self.answer_mask, self.topic
            ],
            outputs=[pred_words_array, pred_word_probability_array],
            on_unused_input='ignore',
            name='deploy_function')

        return deploy_function

    def classification_deploy(self):
        pred_word, pred_word_probability = self.softmax_layer.getOutput(
            self.softmax_input)
        deploy_function = theano.function(inputs=[
            self.reference, self.reference_mask, self.answer, self.answer_mask,
            self.context, self.context_mask, self.context_mask2
        ],
                                          outputs=[pred_word],
                                          on_unused_input='ignore',
                                          name='deploy_function')

        return deploy_function

    def get_cost(self):
        deploy_function = theano.function(inputs=[
            self.reference, self.reference_mask, self.answer, self.answer_mask,
            self.context, self.context_mask, self.context_mask2
        ],
                                          outputs=self.score)
        return deploy_function
Ejemplo n.º 13
0
    def __init__(self, n_words, word_embedding_dim, hidden_status_dim, input_params):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001
            }
        # global paramters
        params = self.init_global_params(options)
        # Theano paramters
        self.tparams = self.init_tparams(params)

        # construct network
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        '''
        theano.config.compute_test_value = 'off'
        self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        '''
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams, node_type=GRUNode)
        #   2. decoder layer
        self.decoder_layer = \
            DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                             options['hidden_status_dim'], \
                             hidden_status_dim=options['hidden_status_dim'],
                             tparams=self.tparams, node_type=GRUNode)
        #   3. maxout layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                        refer_dim=2 * options['hidden_status_dim'] + \
                                         options['word_embedding_dim'], \
                                         tparams=self.tparams, prefix='maxout')
        
        #   1. encoder layer
        self.encoder_hidden_status = \
            self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        #   2. decoder layer
        self.decoder_hidden_status = \
            self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask, \
                                                  self.encoder_hidden_status))
        #   3. maxout layer
        self.maxout_input = \
            tensor.concatenate([self.decoder_hidden_status[:-1, :, :]\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['hidden_status_dim']]), \
                                tensor.alloc(self.encoder_hidden_status[-1, :, :], \
                                             self.answer.shape[0] - 1, \
                                             self.answer.shape[1], \
                                             options['hidden_status_dim'])\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['hidden_status_dim']]), \
                                self.answer_embedding[:-1, :, :]\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['word_embedding_dim']])], \
                               axis=1)
        likihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], \
                                         refer_data=self.maxout_input, \
                                         y=self.answer[1:, :].flatten())
        # get evaluation and cost
        likihood_vector = -tensor.log(likihood_vector)
        self.cost = tensor.dot(likihood_vector.flatten(), self.answer_mask[1:, :].flatten()) \
            / self.answer_mask[1:, :].sum()
        prob_matrix = likihood_vector.reshape([self.answer_mask[1:,:].shape[0], \
                                               self.answer_mask[1:,:].shape[1]])
        self.likihood_vector = tensor.sum(prob_matrix * self.answer_mask[1:, :], axis=0) \
            / tensor.sum(self.answer_mask[1:,:], axis=0)
        
        self.set_parameters(input_params)  # params from list to TensorVirable
Ejemplo n.º 14
0
class ChoEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 

        return params


    def __init__(self, n_words, word_embedding_dim, hidden_status_dim, input_params):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001
            }
        # global paramters
        params = self.init_global_params(options)
        # Theano paramters
        self.tparams = self.init_tparams(params)

        # construct network
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        '''
        theano.config.compute_test_value = 'off'
        self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        '''
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams, node_type=GRUNode)
        #   2. decoder layer
        self.decoder_layer = \
            DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                             options['hidden_status_dim'], \
                             hidden_status_dim=options['hidden_status_dim'],
                             tparams=self.tparams, node_type=GRUNode)
        #   3. maxout layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                        refer_dim=2 * options['hidden_status_dim'] + \
                                         options['word_embedding_dim'], \
                                         tparams=self.tparams, prefix='maxout')
        
        #   1. encoder layer
        self.encoder_hidden_status = \
            self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        #   2. decoder layer
        self.decoder_hidden_status = \
            self.decoder_layer.get_output(inputs=(self.answer_embedding, self.answer_mask, \
                                                  self.encoder_hidden_status))
        #   3. maxout layer
        self.maxout_input = \
            tensor.concatenate([self.decoder_hidden_status[:-1, :, :]\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['hidden_status_dim']]), \
                                tensor.alloc(self.encoder_hidden_status[-1, :, :], \
                                             self.answer.shape[0] - 1, \
                                             self.answer.shape[1], \
                                             options['hidden_status_dim'])\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['hidden_status_dim']]), \
                                self.answer_embedding[:-1, :, :]\
                                .reshape([(self.answer.shape[0] - 1) * self.answer.shape[1], \
                                          options['word_embedding_dim']])], \
                               axis=1)
        likihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'], \
                                         refer_data=self.maxout_input, \
                                         y=self.answer[1:, :].flatten())
        # get evaluation and cost
        likihood_vector = -tensor.log(likihood_vector)
        self.cost = tensor.dot(likihood_vector.flatten(), self.answer_mask[1:, :].flatten()) \
            / self.answer_mask[1:, :].sum()
        prob_matrix = likihood_vector.reshape([self.answer_mask[1:,:].shape[0], \
                                               self.answer_mask[1:,:].shape[1]])
        self.likihood_vector = tensor.sum(prob_matrix * self.answer_mask[1:, :], axis=0) \
            / tensor.sum(self.answer_mask[1:,:], axis=0)
        
        self.set_parameters(input_params)  # params from list to TensorVirable
    


    def get_training_function(self, cr, batch_size, batch_repeat=1):
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = self.adadelta(lr, self.tparams, grads, \
                                                [self.question, self.question_mask, \
                                                 self.answer, self.answer_mask], \
                                                [self.cost])
        
        def update_function(index):
            question, question_mask, answer, answer_mask = \
                cr.get_trainset([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    
    

    def get_validing_function(self, cr, batch_size=200):
        valid_function = self.get_cost_function()
        
        def update_function():
            n_validset = cr.get_size()[1]
            n_batches = (n_validset - 1) / batch_size + 1
            cost = 0.0
            for index in range(n_batches) :
                question, question_mask, answer, answer_mask = \
                    cr.get_validset([index * batch_size, (index + 1) * batch_size])
                cost += valid_function(question, question_mask, answer, answer_mask)[0]
            cost = cost / n_batches
            return [cost]
        
        return update_function
    

    def get_testing_function(self, cr, batch_size=100):
        test_function = self.get_cost_function()
        
        def update_function():
            n_testset = cr.get_size()[2]
            n_batches = (n_testset - 1) / batch_size + 1
            cost = 0.0
            for index in range(n_batches) :
                question, question_mask, answer, answer_mask = \
                    cr.get_testset([index * batch_size, (index + 1) * batch_size])
                cost += test_function(question, question_mask, answer, answer_mask)[0]
            cost = cost / n_batches
            return [cost]
        return update_function
    

    def get_pr_function(self, cr, batch_size=100):
        pr_function = theano.function(inputs=[self.question, self.question_mask, \
                                              self.answer, self.answer_mask], \
                                      outputs=[self.likihood_vector], name='pr_function', \
                                      on_unused_input='ignore')
        
        def update_function():
            n_prset = cr.get_size()[2]
            n_batches = (n_prset - 1) / batch_size + 1
            cost_list = list()
            for index in range(n_batches) :
                question, question_mask, answer, answer_mask = \
                    cr.get_prset([index * batch_size, (index + 1) * batch_size])
                score_list = pr_function(question, question_mask, answer, answer_mask)[0]
                cost_list.extend(score_list)
            corre_score = pearsonr(cost_list, cr.get_pr_score()) 
            return [corre_score[0]]
        return update_function
    

    def get_cost_function(self):
        return theano.function(inputs=[self.question, self.question_mask, \
                                       self.answer, self.answer_mask], \
                               outputs=[self.cost], name='cost_function', \
                               on_unused_input='ignore')


    def get_deploy_function(self):
        maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :], \
                                           self.encoder_hidden_status[-1, :, :], \
                                           self.answer_embedding[-1, :, :]], \
                                          axis=1) 
        pred_word, pred_word_probability = \
                                    self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'],
                                                                 refer_data=maxout_input)
        deploy_function = theano.function(inputs=[self.question, self.question_mask, \
                                                  self.answer, self.answer_mask], \
                                          outputs=[pred_word, pred_word_probability], \
                                          name='deploy_function')
        return deploy_function
    

    def get_observe_function(self):
        observe_function = theano.function(inputs=[self.question, self.question_mask], \
                                           outputs=[self.encoder_hidden_status[-1, :, :]], \
                                           name='observe_function')
        
        return observe_function
class StyleEncoderDecoderNetwork(StyleBase):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
        3. Model the style
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_style'], options['style_embedding_dim'])
        params['Semb'] = (0.2 * randn).astype(config.globalFloatType())  # style embedding

        return params


    def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim,
                 hidden_status_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'n_style': n_style,
            'style_embedding_dim': style_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        
        #   2. style layer
        self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'],
                                       refer_dim=options["hidden_status_dim"],
                                       tparams=self.tparams,
                                       prefix="style")
        self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'],
                                                      refer_data=self.encoder_hidden_status[-1, :, :])  # (samples,n_style)
        
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                                              options['hidden_status_dim'] + options['style_embedding_dim'],
                                              hidden_status_dim=options['hidden_status_dim'],
                                              tparams=self.tparams)
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                        refer_dim=2 * options["hidden_status_dim"] + \
                                        options['word_embedding_dim'] + options['style_embedding_dim'],
                                        tparams=self.tparams,
                                        prefix="maxout")
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        
        # extend the data
        n_question_time_stpes = self.question.shape[0]
        n_answer_time_stpes = self.answer.shape[0]
        n_samples = self.question.shape[1]
        extent_data_size = n_samples * self.options['n_style']
        
        # hidden status with style
        answer_mask = tensor.alloc(self.answer_mask, \
                                    self.options['n_style'], \
                                    n_answer_time_stpes, \
                                    n_samples)\
                                    .dimshuffle([1, 0, 2])\
                                    .reshape([n_answer_time_stpes, extent_data_size])
        
        answer = tensor.alloc(self.answer, \
                            self.options['n_style'], \
                            n_answer_time_stpes, \
                            n_samples)\
                            .dimshuffle([1, 0, 2])\
                            .reshape([n_answer_time_stpes, extent_data_size])
        
        answer_embedding = tensor.alloc(self.answer_embedding, \
                                        self.options['n_style'], \
                                        n_answer_time_stpes, \
                                        n_samples, \
                                        options['word_embedding_dim'])\
                                        .dimshuffle(1, 0, 2, 3)\
                                        .reshape([n_answer_time_stpes, extent_data_size, options['word_embedding_dim']])
        

        
        encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1], \
                                            self.options['n_style'], \
                                            n_samples, \
                                            options['hidden_status_dim'])\
                                        .reshape([1, extent_data_size, options['hidden_status_dim']])
        
        style_embeddings = tensor.alloc(self.tparams['Semb'], \
                                                    n_samples, \
                                                    self.options['n_style'], \
                                                    self.options['style_embedding_dim'])\
                                        .dimshuffle(1, 0, 2)\
                                        .reshape([1, extent_data_size, self.options['style_embedding_dim']])
        
        encoder_hidden_status = tensor.concatenate([encoder_hidden_status, style_embeddings], axis=2)
        
        #   3. decoder layer
        decoder_hidden_status = self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask,
                                                                          encoder_hidden_status])
        #   4. maxout  layer
        self.maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :].
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(encoder_hidden_status[-1, :, :],
                                                              n_answer_time_stpes - 1,
                                                              extent_data_size,
                                                              options['style_embedding_dim'] + \
                                                              options['hidden_status_dim']).
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['hidden_status_dim'] + \
                                                             options['style_embedding_dim']]),
                                                 answer_embedding[:-1, :, :].
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        
        predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :].
                                                    reshape([extent_data_size,
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(encoder_hidden_status[-1, :, :],
                                                              1,
                                                              extent_data_size,
                                                              options['style_embedding_dim'] + \
                                                              options['hidden_status_dim']).
                                                    reshape([extent_data_size,
                                                             options['hidden_status_dim'] + \
                                                             options['style_embedding_dim']]),
                                                 answer_embedding[-1, :, :].
                                                    reshape([extent_data_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        
        likelihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                         refer_data=self.maxout_input,
                                         y=answer[1:, :].flatten())
            
        pred_word, predict_probability = \
            self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'],
                                         refer_data=predict_maxout_input)
        
        # carefully check
        likelihood_vector = likelihood_vector.reshape(
              [n_answer_time_stpes - 1, options['n_style'], n_samples])
        
        predict_probability = predict_probability.reshape(
              [options['n_style'], n_samples, options['n_words']])
        self.predict_probability = predict_probability
        
        pred_word = pred_word.reshape(
              [options['n_style'], n_samples])
        self.pred_word = pred_word
        
        # options['n_style'], n_answer_time_stpes - 1,  n_samples
        # Transform the multiplication into add.
        
        log_likelihood_vector = tensor.log(likelihood_vector) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1)
        log_likelihood_vector = log_likelihood_vector.sum(axis=0)
        self.log_likelihood_vector_on_diff_styles = log_likelihood_vector
        
        sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log())
        sentence_probability = self.remove_min_neg_log_prob(sentence_probability)
        
        sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style'])
        
        negative_log_sentence_probability = -sentence_probability

        
        self.likelihood_vector = negative_log_sentence_probability
        self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum()
        
        self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0)
        self.zi_error = self.zi_error.mean()
        self.st_error = negative_log_sentence_probability.mean()
        
        self.set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask],
                                            [self.cost])
        
        def update_function(index):
            (question, question_mask), (answer, answer_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.zi_error, self.st_error],
                                         givens={self.question: question,
                                                 self.question_mask: question_mask,
                                                 self.answer: answer,
                                                 self.answer_mask: answer_mask},
                                         name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost, self.zi_error, self.st_error],
                                        givens={self.question: question,
                                                self.question_mask: question_mask,
                                                self.answer: answer,
                                                self.answer_mask: answer_mask},
                                        name='test_function')
        
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.likelihood_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        
        return test_function, pr_function
    def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim,
                 hidden_status_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'n_style': n_style,
            'style_embedding_dim': style_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        self.encoder_hidden_status = self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        
        #   2. style layer
        self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'],
                                       refer_dim=options["hidden_status_dim"],
                                       tparams=self.tparams,
                                       prefix="style")
        self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'],
                                                      refer_data=self.encoder_hidden_status[-1, :, :])  # (samples,n_style)
        
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                                              options['hidden_status_dim'] + options['style_embedding_dim'],
                                              hidden_status_dim=options['hidden_status_dim'],
                                              tparams=self.tparams)
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                        refer_dim=2 * options["hidden_status_dim"] + \
                                        options['word_embedding_dim'] + options['style_embedding_dim'],
                                        tparams=self.tparams,
                                        prefix="maxout")
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        
        # extend the data
        n_question_time_stpes = self.question.shape[0]
        n_answer_time_stpes = self.answer.shape[0]
        n_samples = self.question.shape[1]
        extent_data_size = n_samples * self.options['n_style']
        
        # hidden status with style
        answer_mask = tensor.alloc(self.answer_mask, \
                                    self.options['n_style'], \
                                    n_answer_time_stpes, \
                                    n_samples)\
                                    .dimshuffle([1, 0, 2])\
                                    .reshape([n_answer_time_stpes, extent_data_size])
        
        answer = tensor.alloc(self.answer, \
                            self.options['n_style'], \
                            n_answer_time_stpes, \
                            n_samples)\
                            .dimshuffle([1, 0, 2])\
                            .reshape([n_answer_time_stpes, extent_data_size])
        
        answer_embedding = tensor.alloc(self.answer_embedding, \
                                        self.options['n_style'], \
                                        n_answer_time_stpes, \
                                        n_samples, \
                                        options['word_embedding_dim'])\
                                        .dimshuffle(1, 0, 2, 3)\
                                        .reshape([n_answer_time_stpes, extent_data_size, options['word_embedding_dim']])
        

        
        encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1], \
                                            self.options['n_style'], \
                                            n_samples, \
                                            options['hidden_status_dim'])\
                                        .reshape([1, extent_data_size, options['hidden_status_dim']])
        
        style_embeddings = tensor.alloc(self.tparams['Semb'], \
                                                    n_samples, \
                                                    self.options['n_style'], \
                                                    self.options['style_embedding_dim'])\
                                        .dimshuffle(1, 0, 2)\
                                        .reshape([1, extent_data_size, self.options['style_embedding_dim']])
        
        encoder_hidden_status = tensor.concatenate([encoder_hidden_status, style_embeddings], axis=2)
        
        #   3. decoder layer
        decoder_hidden_status = self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask,
                                                                          encoder_hidden_status])
        #   4. maxout  layer
        self.maxout_input = tensor.concatenate([decoder_hidden_status[:-1, :, :].
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(encoder_hidden_status[-1, :, :],
                                                              n_answer_time_stpes - 1,
                                                              extent_data_size,
                                                              options['style_embedding_dim'] + \
                                                              options['hidden_status_dim']).
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['hidden_status_dim'] + \
                                                             options['style_embedding_dim']]),
                                                 answer_embedding[:-1, :, :].
                                                    reshape([(n_answer_time_stpes - 1) * extent_data_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        
        predict_maxout_input = tensor.concatenate([decoder_hidden_status[-1, :, :].
                                                    reshape([extent_data_size,
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(encoder_hidden_status[-1, :, :],
                                                              1,
                                                              extent_data_size,
                                                              options['style_embedding_dim'] + \
                                                              options['hidden_status_dim']).
                                                    reshape([extent_data_size,
                                                             options['hidden_status_dim'] + \
                                                             options['style_embedding_dim']]),
                                                 answer_embedding[-1, :, :].
                                                    reshape([extent_data_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        
        likelihood_vector = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                         refer_data=self.maxout_input,
                                         y=answer[1:, :].flatten())
            
        pred_word, predict_probability = \
            self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'],
                                         refer_data=predict_maxout_input)
        
        # carefully check
        likelihood_vector = likelihood_vector.reshape(
              [n_answer_time_stpes - 1, options['n_style'], n_samples])
        
        predict_probability = predict_probability.reshape(
              [options['n_style'], n_samples, options['n_words']])
        self.predict_probability = predict_probability
        
        pred_word = pred_word.reshape(
              [options['n_style'], n_samples])
        self.pred_word = pred_word
        
        # options['n_style'], n_answer_time_stpes - 1,  n_samples
        # Transform the multiplication into add.
        
        log_likelihood_vector = tensor.log(likelihood_vector) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1)
        log_likelihood_vector = log_likelihood_vector.sum(axis=0)
        self.log_likelihood_vector_on_diff_styles = log_likelihood_vector
        
        sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log())
        sentence_probability = self.remove_min_neg_log_prob(sentence_probability)
        
        sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style'])
        
        negative_log_sentence_probability = -sentence_probability

        
        self.likelihood_vector = negative_log_sentence_probability
        self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum()
        
        self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0)
        self.zi_error = self.zi_error.mean()
        self.st_error = negative_log_sentence_probability.mean()
        
        self.set_parameters(input_params)  # params from list to TensorVirable
class BiEncoderAttentionDecoderStyleNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
        3. Model the attention
        4. Model the style
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 

        return params


    def __init__(self, n_words, word_embedding_dim, hidden_status_dim, style_number, style_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'style_number': style_number,
            'style_dim': style_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [self.question.shape[0], self.question.shape[1], options['word_embedding_dim']])
        #   1. forward encoder layer
        self.forward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                  hidden_status_dim=options['hidden_status_dim'],
                                                  tparams=self.tparams, prefix='forward_Encoder')
        self.forward_encoder_hidden_status = \
            self.forward_encoder_layer.getOutput(inputs=(self.question_embedding, self.question_mask))
            
        #   2. backward encoder layer
        self.backward_encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                                   hidden_status_dim=options['hidden_status_dim'],
                                                   tparams=self.tparams, prefix='backward_Encoder')
        self.backward_encoder_hidden_status = \
            self.backward_encoder_layer.getOutput(inputs=(self.question_embedding[::-1, :, :],
                                                          self.question_mask[::-1, :]))
        self.encoder_hidden_status = tensor.concatenate([self.forward_encoder_hidden_status,
                                                         self.backward_encoder_hidden_status[::-1, :, :]],
                                                        axis=2)
        
        #   3. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [self.answer.shape[0], self.answer.shape[1], options['word_embedding_dim']])
        self.decoder_layer = \
            AttentionDecoderLayer(word_embedding_dim=options['word_embedding_dim'] + 2 * options['hidden_status_dim'],
                                  hidden_status_dim=options['hidden_status_dim'],
                                  encoder_hidden_dim=2 * options['hidden_status_dim'],
                                  tparams=self.tparams, prefix='Decoder')
        self.decoder_hidden_status, self.context = \
            self.decoder_layer.getOutput(inputs=(self.answer_embedding, self.answer_mask,
                                                 self.encoder_hidden_status, self.question_mask))
        #   4. style  layer
        
        self.style_layer = StyleLayer(style_number, style_dim, 3 * hidden_status_dim, self.tparams, 'style')
        
        question_style_input_embedding1 = self.forward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        question_style_input_embedding2 = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        answer_style_input_embedding = self.backward_encoder_hidden_status[-1, :, :].reshape([self.answer.shape[1], options['hidden_status_dim']])
        
        pair_embedding = tensor.concatenate([question_style_input_embedding1,
                                                                             question_style_input_embedding2,
                                                                             answer_style_input_embedding],
                                                                             axis=1)
        self.style = self.style_layer.getOutput(pair_embedding)
        
        
        #   5. maxout  layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                                    refer_dim=3 * options["hidden_status_dim"] + options['word_embedding_dim'] + options['style_dim'],
                                                    tparams=self.tparams,
                                                    prefix="maxout")
        
        maxout_decoder_hidden_status_input = self.decoder_hidden_status[:-1, :, :].\
                                        reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['hidden_status_dim']])
        
        maxout_context_hidden_status_input = self.context[:-1, :, :].\
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         2 * options['hidden_status_dim']])
        
        maxout_answer_wordEmbedding_input = self.answer_embedding[:-1, :, :].\
                                reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                         options['word_embedding_dim']])
        
        maxout_answer_style_input = tensor.alloc(self.style.dimshuffle('x', 0, 1),
                                                                                    self.answer.shape[0] - 1,
                                                                                    self.answer.shape[1],
                                                                                    options['style_dim'])
        
        maxout_answer_style_input = maxout_answer_style_input.\
                                                                                    reshape([(self.answer.shape[0] - 1) * self.answer.shape[1],
                                                                                     options['style_dim']])
                                                                                    
        self.maxout_input = \
            tensor.concatenate(\
                               [maxout_decoder_hidden_status_input,
                                maxout_context_hidden_status_input,
                                maxout_answer_wordEmbedding_input,
                                maxout_answer_style_input],
                                axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(
                                                                     self.tparams['Wemb_d'],
                                                                     self.maxout_input,
                                                                     y=self.answer[1:, :].flatten())
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(output_error_vector, m.flatten()) / m.sum()
        self.output_error_vector = output_error_vector.reshape([self.answer.shape[0] - 1 , self.answer.shape[1]]) 
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -self.output_error_vector.sum(axis=0) / m.sum(axis=0)
        
        self._set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask],
                                            [self.cost])
        
        def update_function(index):
            (question, question_mask), (answer, answer_mask), _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.cost],
                                         givens={self.question: question,
                                                 self.question_mask: question_mask,
                                                 self.answer: answer,
                                                 self.answer_mask: answer_mask},
                                         name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost],
                                        givens={self.question: question,
                                                self.question_mask: question_mask,
                                                self.answer: answer,
                                                self.answer_mask: answer_mask},
                                        name='test_function')
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.output_error_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        
        return test_function, pr_function
    

    def get_deploy_function(self, style_type):
        maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :],
                                            self.encoder_hidden_status[-1, :, :],
                                            self.answer_embedding[-1, :, :]],
                                           axis=1)
        b = self.style_layer.getStyleMatrix().get_value()[style_type]
        
        pred_word, pred_word_probability = self.maxout_layer.getOutput(self.tparams['Wemb_d'], maxout_input)
        deploy_function = theano.function(inputs=[self.question, self.question_mask,
                                                  self.answer, self.answer_mask],
                                          outputs=[pred_word, pred_word_probability],
                                          givens={self.style: b},
                                          name='deploy_function')
        
        return deploy_function
Ejemplo n.º 18
0
    def __init__(self,
                 n_words,
                 word_embedding_dim=128,
                 hidden_status_dim=128,
                 n_topics=2,
                 topic_embedding_dim=5,
                 input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'n_topics': n_topics,
            'topic_embedding_dim': topic_embedding_dim,
            'learning_rate':
            0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.
            adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        #print self.tparams['Temb']
        #self.answer_emb=T.dot(self.tparams['Qemb'],self.tparams['QTA'])
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask',
                                           dtype=config.globalFloatType())
        self.style = tensor.matrix('style', dtype='int64')
        self.style_mask = tensor.matrix('style_mask',
                                        dtype=config.globalFloatType())
        self.style_embedding = self.tparams['Wemb_e'][
            self.style.flatten()].reshape([
                self.style.shape[0], self.style.shape[1],
                options['word_embedding_dim']
            ])
        #self.question_mask = tensor.matrix('question_mask', dtype='int64')
        self.topic = tensor.matrix('topic', dtype=config.globalFloatType())
        # self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]]) # for debug
        # self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]]) # for debug
        self.question_embedding = self.tparams['Wemb_e'][
            self.question.flatten()].reshape([
                self.question.shape[0], self.question.shape[1],
                options['word_embedding_dim']
            ])
        #   1. encoder layer
        self.encoder_layer = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.encoder_hidden_status1 = self.encoder_layer.getOutput(
            inputs=(self.question_embedding, self.question_mask))
        self.encoder_layer_style = EncoderLayer(
            word_embedding_dim=options['word_embedding_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams,
            prefix='encoder_layer_style')
        self.encoder_hidden_status_style = self.encoder_layer_style.getOutput(
            inputs=(self.style_embedding, self.style_mask))
        self.encoder_hidden_status = tensor.concatenate(
            [
                self.encoder_hidden_status1[-1],
                self.encoder_hidden_status_style[-1]
            ],
            axis=1).reshape([
                1, self.encoder_hidden_status_style.shape[1],
                2 * self.encoder_hidden_status_style.shape[2]
            ])
        #self.topic_states = self.tparams['Temb'][self.topic.flatten()].reshape([1,self.question.shape[1], options['topic_embedding_dim']])
        #self.topic_change=T.alloc(self.topic_states,self.question.shape[0], self.question.shape[1], options['topic_embedding_dim'])
        #self.encoder_hidden_status = T.concatenate([self.encoder_hidden_status,self.topic_change], axis=2)
        #   2. decoder layer
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask',
                                         dtype=config.globalFloatType())
        # self.answer.tag.test_value = numpy.array([[11, 10, 2], [5, 2, 0]]) # for debug
        # self.answer_mask.tag.test_value = numpy.array([[1, 1, 1], [1, 1, 0]]) # for debug
        self.answer_embedding = self.tparams['Wemb_e'][
            self.answer.flatten()].reshape([
                self.answer.shape[0], self.answer.shape[1],
                options['word_embedding_dim']
            ])
        self.decoder_layer = DecoderLayer_Cho(
            word_embedding_dim=options['word_embedding_dim'] +
            2 * options['hidden_status_dim'],
            hidden_status_dim=options['hidden_status_dim'],
            tparams=self.tparams)
        self.decoder_hidden_status = self.decoder_layer.getOutput(
            inputs=(self.answer_embedding, self.answer_mask,
                    self.encoder_hidden_status))

        #   2.5 softmax layer
        self.softmax_layer = SoftmaxLayer(n_in=options["hidden_status_dim"],
                                          n_out=2,
                                          tparams=self.tparams)
        self.softmax_input = self.encoder_hidden_status1[-1]
        self.output_error_vector = self.softmax_layer.negative_log_likelihood(
            self.softmax_input, tensor.cast(self.topic.flatten(), 'int64'))
        self.class_cost = self.output_error_vector.sum(
        ) / self.question.shape[1]
        #   3. maxout  layer
        self.maxout_layer = MaxoutLayer(
            base_dim=options['word_embedding_dim'],
            refer_dim=3 * options["hidden_status_dim"] +
            options['word_embedding_dim'],
            tparams=self.tparams,
            prefix="maxout")
        self.maxout_input = tensor.concatenate([
            self.decoder_hidden_status[:-1, :, :].reshape(
                [(self.answer.shape[0] - 1) * self.answer.shape[1],
                 options['hidden_status_dim']]),
            tensor.alloc(self.encoder_hidden_status[-1, :, :],
                         self.answer.shape[0] - 1, self.answer.shape[1],
                         2 * options['hidden_status_dim']).reshape([
                             (self.answer.shape[0] - 1) * self.answer.shape[1],
                             2 * options['hidden_status_dim']
                         ]), self.answer_embedding[:-1, :, :].reshape([
                             (self.answer.shape[0] - 1) * self.answer.shape[1],
                             options['word_embedding_dim']
                         ])
        ],
                                               axis=1)
        output_error_vector = self.maxout_layer.negative_log_likelihood(
            self.tparams['Wemb_e'],
            self.maxout_input,
            y=self.answer[1:, :].flatten())
        self.topic_matrix = tensor.alloc(self.topic.flatten(),
                                         self.answer.shape[0] - 1,
                                         self.answer.shape[1]).flatten()
        #self.topic_matrix_change=2*(self.topic_matrix-0.5)
        self.topic_matrix_change = self.topic_matrix
        m = self.answer_mask[1:, :]
        self.cost = -1.0 * tensor.dot(
            output_error_vector,
            m.flatten() * self.topic_matrix_change) / m.sum()
        self.cost = self.cost - self.class_cost
        self.output_error_vector = output_error_vector.reshape(
            [self.answer.shape[0] - 1, self.answer.shape[1]])
        self.output_error_vector = self.output_error_vector * m
        self.output_error_vector = -output_error_vector.sum(axis=0) / m.sum(
            axis=0)

        self._set_parameters(input_params)  # params from list to TensorVirable
class StyleEncoderDecoderNetwork(Network):
    """
    This class will process the dialog pair with a encoder-decoder network.
    It has 2 abilities:
        1. Train the language model.
        2. Model the relationship of Q&A
        3. Model the style
    """

    def init_global_params(self, options):
        """
        Global (not LSTM) parameter. For the embeding and the classifier.
        """
        params = OrderedDict()
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_e'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_words'], options['word_embedding_dim'])
        params['Wemb_d'] = (0.01 * randn).astype(config.globalFloatType()) 
        randn = numpy.random.rand(options['n_style'], options['style_embedding_dim'])
        params['Semb'] = (0.2 * randn).astype(config.globalFloatType())  # style embedding
        randn = numpy.random.rand(options['n_topic'], options['topic_embedding_dim'])
        params['Temb'] = (0.2 * randn).astype(config.globalFloatType())  # topic embedding

        return params


    def __init__(self, n_words, word_embedding_dim, n_style, style_embedding_dim,
                 n_topic, topic_embedding_dim, hidden_status_dim, input_params=None):
        self.options = options = {
            'n_words': n_words,
            'word_embedding_dim': word_embedding_dim,
            'n_style': n_style,
            'style_embedding_dim': style_embedding_dim,
            'n_topic': n_topic,
            'topic_embedding_dim': topic_embedding_dim,
            'hidden_status_dim': hidden_status_dim,
            'learning_rate': 0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            'optimizer': self.rmsprop,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            'tiny_float': 1e-20
            }
        # global paramters.
        params = self.init_global_params(options)
        # Theano paramters,
        self.tparams = self.init_tparams(params)
        # Used for dropout.
        # self.use_noise = theano.shared(numpy_floatX(0.))

        # construct network
        theano.config.compute_test_value = 'off'
        self.question = tensor.matrix('question', dtype='int64')
        self.question_mask = tensor.matrix('question_mask', dtype=config.globalFloatType())
        self.answer = tensor.matrix('answer', dtype='int64')
        self.answer_mask = tensor.matrix('answer_mask', dtype=config.globalFloatType())
        n_question_time_stpes = self.question.shape[0]
        n_answer_time_stpes = self.answer.shape[0]
        n_samples = self.question.shape[1]
        extent_style_size = n_samples * self.options['n_style']
        self.topic = tensor.vector('topic', dtype='int64')
        self.question_embedding = self.tparams['Wemb_e'][self.question.flatten()].reshape(
            [n_question_time_stpes, n_samples, options['word_embedding_dim']])
        self.answer_embedding = self.tparams['Wemb_d'][self.answer.flatten()].reshape(
            [n_answer_time_stpes, n_samples, options['word_embedding_dim']])
        self.topic_embedding = self.tparams['Temb'][self.topic.flatten()].reshape(
            [1, n_samples, options['topic_embedding_dim']])
        # for debug
        '''
        self.question.tag.test_value = numpy.array([[10, 2, 0], [5, 9, 2]])
        self.question_mask.tag.test_value = numpy.array([[1, 1, 0], [1, 1, 1]])
        self.answer.tag.test_value = numpy.array([[10, 2, 0, 0], [5, 9, 2, 4]])
        self.answer_mask.tag.test_value = numpy.array([[1, 1, 0, 0], [1, 1, 1, 1]])
        self.topic.tag.test_value = numpy.array([0, 2]) # for debug
        '''
        # statement of layer
        #    1. encoder_layer
        self.encoder_layer = EncoderLayer(word_embedding_dim=options['word_embedding_dim'],
                                          hidden_status_dim=options['hidden_status_dim'],
                                          tparams=self.tparams)
        #    2. topic_layer
        self.topic_layer = MaxoutLayer(base_dim=options['topic_embedding_dim'],
                                       refer_dim=options['hidden_status_dim'],
                                       tparams=self.tparams,
                                       prefix='topic')
        #    3. style_layer
        self.style_layer = MaxoutLayer(base_dim=options['style_embedding_dim'],
                                       refer_dim=options['hidden_status_dim'],
                                       tparams=self.tparams,
                                       prefix="style")
        #    4. decoder_layer
        self.decoder_layer = DecoderLayer_Cho(word_embedding_dim=options['word_embedding_dim'] + \
                                              options['hidden_status_dim'],
                                              hidden_status_dim=options['hidden_status_dim'],
                                              tparams=self.tparams)
        #    5. output_layer
        self.maxout_layer = MaxoutLayer(base_dim=options['word_embedding_dim'],
                                        refer_dim=2*options["hidden_status_dim"] + \
                                        options['word_embedding_dim'],
                                        tparams=self.tparams,
                                        prefix="maxout")
        
        # defination of layer varibles
        #    1. encoder layer
        self.encoder_hidden_status = \
            self.encoder_layer.get_output(inputs=(self.question_embedding, self.question_mask))
        
        #    2. topic layer
        self.topic_prob = \
            self.topic_layer.probability(base_data=self.tparams['Temb'],
                                         refer_data=self.encoder_hidden_status[-1, :, :])
        self.topic_likelihood = \
            self.topic_layer.likelihood(base_data=self.tparams['Temb'],
                                        refer_data=self.encoder_hidden_status[-1, :, :],
                                        y=self.topic)
        # hidden status with topic
        topic_mixed_node = GRUNode(word_embedding_dim=self.options['topic_embedding_dim'],
                                   hidden_status_dim=options['hidden_status_dim'],
                                   tparams=self.tparams, prefix='topic_mixed_node')
        encoder_hidden_status = tensor.alloc(self.encoder_hidden_status[-1, :, :], \
                                             1, n_samples, options['hidden_status_dim'])
        topic_mask = tensor.alloc(numpy.ones((1,), dtype=config.globalFloatType()), n_samples)
        state_below = tensor.dot(self.topic_embedding, topic_mixed_node.get_params_W())
        self.topic_hidden_status = \
            topic_mixed_node.node_update(m_=topic_mask, x_=state_below, h_=encoder_hidden_status)
        
        #    3. style layer
        self.style_prob = self.style_layer.probability(base_data=self.tparams['Semb'],
                                                       refer_data=self.topic_hidden_status[-1,:,:])
        # hidden status with style
        style_mixed_node = GRUNode(word_embedding_dim=options['style_embedding_dim'] +
                                   options['topic_embedding_dim'],
                                   hidden_status_dim=options['hidden_status_dim'],
                                   tparams=self.tparams, prefix='style_mixed_node')
        encoder_hidden_status = \
            tensor.alloc(self.encoder_hidden_status[-1, :, :], 
                         self.options['n_style'], n_samples, options['hidden_status_dim'])\
                         .reshape([1, extent_style_size, options['hidden_status_dim']])
        style_embeddings = \
            tensor.concatenate([ \
                tensor.alloc(self.tparams['Semb'],
                             n_samples, self.options['n_style'], self.options['style_embedding_dim'])\
                             .dimshuffle(1, 0, 2)\
                             .reshape([1, extent_style_size, self.options['style_embedding_dim']]), \
                tensor.alloc(self.topic_embedding[-1, :, :],
                             self.options['n_style'], n_samples, self.options['topic_embedding_dim'])\
                             .reshape([1, extent_style_size, self.options['topic_embedding_dim']])], axis=2)
        style_mask = tensor.alloc(numpy.ones((1,), dtype=config.globalFloatType()), extent_style_size)
        state_below = tensor.dot(style_embeddings, style_mixed_node.get_params_W())
        self.style_hidden_status = \
            style_mixed_node.node_update(m_=style_mask, x_=state_below, h_=encoder_hidden_status)
        
        #    4. decoder layer
        answer_mask = tensor.alloc(self.answer_mask, \
                                   self.options['n_style'], n_answer_time_stpes, n_samples) \
                                   .dimshuffle([1, 0, 2]).reshape([n_answer_time_stpes, extent_style_size])
        answer = tensor.alloc(self.answer, \
                              self.options['n_style'], n_answer_time_stpes, n_samples) \
                              .dimshuffle([1, 0, 2]).reshape([n_answer_time_stpes, extent_style_size])
        answer_embedding = tensor.alloc(self.answer_embedding, \
                                        self.options['n_style'], n_answer_time_stpes, \
                                        n_samples, options['word_embedding_dim'])\
                                        .dimshuffle(1, 0, 2, 3)\
                                        .reshape([n_answer_time_stpes, extent_style_size, 
                                                  options['word_embedding_dim']])
        mixed_hidden_status = self.style_hidden_status
        self.decoder_hidden_status = \
            self.decoder_layer.get_output(inputs=[answer_embedding, answer_mask, mixed_hidden_status])
            
        #    5. maxout layer
        self.maxout_input = tensor.concatenate([self.decoder_hidden_status[:-1, :, :].\
                                                    reshape([(n_answer_time_stpes - 1) * extent_style_size,
                                                             options['hidden_status_dim']]),
                                                 tensor.alloc(mixed_hidden_status[-1, :, :],
                                                              n_answer_time_stpes - 1,
                                                              extent_style_size,
                                                              options['hidden_status_dim']).\
                                                    reshape([(n_answer_time_stpes - 1) * extent_style_size,
                                                             options['hidden_status_dim']]),
                                                 answer_embedding[:-1, :, :].\
                                                    reshape([(n_answer_time_stpes - 1) * extent_style_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        predict_maxout_input = tensor.concatenate([self.decoder_hidden_status[-1, :, :].\
                                                    reshape([extent_style_size,
                                                             options['hidden_status_dim']]),
                                                 mixed_hidden_status[-1, :, :].\
                                                    reshape([extent_style_size,
                                                             options['hidden_status_dim']]),
                                                 answer_embedding[-1, :, :].\
                                                    reshape([extent_style_size,
                                                             options['word_embedding_dim']])],
                                                axis=1)
        self.maxout_likelihood = \
            self.maxout_layer.likelihood(base_data=self.tparams['Wemb_d'],
                                         refer_data=self.maxout_input,
                                         y=answer[1:, :].flatten())
            
        pred_word, predict_probability = \
            self.maxout_layer.get_output(base_data=self.tparams['Wemb_d'],
                                         refer_data=predict_maxout_input)
        
        # carefully check
        self.maxout_likelihood = self.maxout_likelihood.reshape(
              [n_answer_time_stpes - 1, options['n_style'], n_samples])
        predict_probability = predict_probability.reshape(
              [options['n_style'], n_samples, options['n_words']])
        self.predict_probability = predict_probability
        pred_word = pred_word.reshape(
              [options['n_style'], n_samples])
        self.pred_word = pred_word
        # options['n_style'], n_answer_time_stpes - 1,  n_samples
        # Transform the multiplication into add.
        log_likelihood_vector = tensor.log(self.maxout_likelihood) * self.answer_mask[1:, :].dimshuffle(0, 'x', 1)
        log_likelihood_vector = log_likelihood_vector.sum(axis=0)
        self.log_likelihood_vector_on_diff_styles = log_likelihood_vector
        
        sentence_probability = log_likelihood_vector + tensor.transpose(self.style_prob.log())
        sentence_probability = self.remove_min_neg_log_prob(sentence_probability)
        sentence_probability = self.total_probability_precise(sentence_probability, 0, options['n_style'])
        sentence_probability += tensor.log(self.topic_likelihood)
        negative_log_sentence_probability = -sentence_probability
        
        self.likelihood_vector = negative_log_sentence_probability
        self.cost = negative_log_sentence_probability.sum() / self.answer_mask[1:, :].sum()
        
        self.zi_error = negative_log_sentence_probability / tensor.sum(self.answer_mask[1:, :], 0)
        self.zi_error = self.zi_error.mean()
        self.st_error = negative_log_sentence_probability.mean()
        
        self.set_parameters(input_params)  # params from list to TensorVirable
    

    def get_training_function(self, cr, error_type="RMSE", batch_size=10, batch_repeat=1):
        optimizer = self.options["optimizer"]
        lr = tensor.scalar(name='lr')
        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            [self.question, self.question_mask,
                                             self.answer, self.answer_mask, self.topic],
                                            [self.cost])
        
        def update_function(index):
            (question, question_mask), (answer, answer_mask), topic, _, _ = \
                cr.get_train_set([index * batch_size, (index + 1) * batch_size])
            for _ in xrange(batch_repeat):
                cost = f_grad_shared(question, question_mask, answer, answer_mask, topic)
                f_update(self.options["learning_rate"])
            return cost
        
        return update_function
    

    def get_validing_function(self, cr):
        (question, question_mask), (answer, answer_mask), topic, _, _ = cr.get_valid_set()
        valid_function = theano.function(inputs=[],
                                         outputs=[self.zi_error, self.st_error],
                                         givens={self.question: question,
                                                 self.question_mask: question_mask,
                                                 self.answer: answer,
                                                 self.answer_mask: answer_mask,
                                                 self.topic: topic},
                                         name='valid_function')
        
        return valid_function
    

    def get_testing_function(self, cr):
        (question, question_mask), (answer, answer_mask), topic, _, _ = cr.get_test_set()
        test_function = theano.function(inputs=[],
                                        outputs=[self.cost, self.zi_error, self.st_error],
                                        givens={self.question: question,
                                                self.question_mask: question_mask,
                                                self.answer: answer,
                                                self.answer_mask: answer_mask,
                                                self.topic: topic},
                                        name='test_function')
        '''
        (question, question_mask), (answer, answer_mask), _, _ = cr.get_pr_set()
        pr_function = theano.function(inputs=[],
                                      outputs=[self.likelihood_vector],
                                      givens={self.question: question,
                                              self.question_mask: question_mask,
                                              self.answer: answer,
                                              self.answer_mask: answer_mask},
                                      name='pr_function')
        '''
        return test_function, None
    
    
    def get_style_distribution_function(self):
        style_distribution_function = \
            theano.function(inputs=[self.question, self.question_mask, self.topic],
                            outputs=[self.style_prob],
                            name='style_function')
        
        return style_distribution_function
    
    
    def get_topic_distribution_function(self):
        topic_distribution_function = \
            theano.function(inputs=[self.question, self.question_mask],
                            outputs=[self.encoder_hidden_status, self.topic_prob],
                            name='topic_function')
        
        return topic_distribution_function
    
    
    def get_deploy_function(self):
        st = tensor.scalar('style_type', dtype='int64')
        prob = self.predict_probability[st]
        p_w = self.pred_word[st]
        deploy_function = theano.function(inputs=[self.question, self.question_mask,
                                                  self.answer, self.answer_mask,
                                                  self.encoder_hidden_status, 
                                                  self.topic, st],
                                          outputs=[p_w, prob],
                                          name='style_deploy_function',
                                          on_unused_input='warn')
        
        return deploy_function
    
    
    def get_outtest_function(self, cr, max_iter, batch_size=200):
        
        outtest_function = theano.function(inputs=[self.question, self.question_mask, self.answer, self.answer_mask],
                                           outputs=[self.zae, self.sae, self.bae],
                                           name='outtest_function')
        def update_function() :
            zae, sae, bae = 0.0, 0.0, 0.0
            for idx in range(max_iter) :
                (question, question_mask), (answer, answer_mask), _, _ = \
                    cr.get_outtest_set([idx * batch_size, (idx + 1) * batch_size])
                z, s, b = outtest_function(question, question_mask, answer, answer_mask)
                zae += z
                sae += s
                bae += b
            zae /= max_iter
            sae /= max_iter
            bae /= max_iter
            return zae, sae, bae
        
        return update_function