예제 #1
0
 def adadelta(self, lr, tparams, grads, model_input, cost, givens=None):
     """
     An adaptive learning rate optimizer
 
     Parameters
     ----------
     lr : Theano SharedVariable
         Initial learning rate
     tpramas: Theano SharedVariable
         Model parameters
     grads: Theano variable
         Gradients of cost w.r.t to parameres
     input: Theano variable of input, list.
     cost: Theano variable
         Objective fucntion to minimize
 
     Notes
     -----
     For more information, see [ADADELTA]_.
 
     .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
        Rate Method*, arXiv:1212.5701.
     """
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
                     for k, p in tparams.iteritems()]
     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_rup2' % k)
                    for k, p in tparams.iteritems()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
                       for k, p in tparams.iteritems()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
              for rg2, g in zip(running_grads2, grads)]
 
     f_grad_shared = theano.function(model_input, cost, updates=zgup + rg2up,
                                     name='adadelta_f_grad_shared', givens=givens)
 
     updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg
              for zg, ru2, rg2 in zip(zipped_grads,
                                      running_up2,
                                      running_grads2)]
     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
              for ru2, ud in zip(running_up2, updir)]
     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
 
     f_update = theano.function([lr], [], updates=ru2up + param_up,
                                on_unused_input='ignore',
                                name='adadelta_f_update',
                                givens=givens)
 
     return f_grad_shared, f_update
예제 #2
0
 def adadelta(self, lr, tparams, grads, model_input, cost, givens=None):
     """
     An adaptive learning rate optimizer
 
     Parameters
     ----------
     lr : tensorheano SharedVariable
         Initial learning rate
     tpramas: tensorheano SharedVariable
         Model parameters
     grads: tensorheano variable
         Gradients of cost w.r.t to parameres
     input: tensorheano variable of input, list.
     cost: tensorheano variable
         Objective fucntion to minimize
 
     Notes
     -----
     For more information, see [ADADELtensorA]_.
 
     .. [ADADELtensorA] Matthew D. Zeiler, *ADADELtensorA: An Adaptive Learning
        Rate Method*, arXiv:1212.5701.
     """
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
                     for k, p in tparams.iteritems()]
     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_rup2' % k)
                    for k, p in tparams.iteritems()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
                       for k, p in tparams.iteritems()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
              for rg2, g in zip(running_grads2, grads)]
 
     f_grad_shared = theano.function(model_input, cost, updates=zgup + rg2up,
                                     name='adadelta_f_grad_shared', givens=givens)
 
     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
              for zg, ru2, rg2 in zip(zipped_grads,
                                      running_up2,
                                      running_grads2)]
     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
              for ru2, ud in zip(running_up2, updir)]
     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
 
     f_update = theano.function([lr], [], updates=ru2up + param_up,
                                on_unused_input='ignore',
                                name='adadelta_f_update',
                                givens=givens)
 
     return f_grad_shared, f_update
예제 #3
0
    def getOutput(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask) = inputs
        
        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        last_h = tensor.alloc(numpy_floatX(0.), n_samples, self.hidden_status_dim)
        state_below = tensor.dot(self.sentence, self.node.get_params_W())

        results, _ = theano.scan(self.node.node_update,
                                 sequences=[self.mask, state_below],
                                 outputs_info=[last_h],
                                 name=self._p(self.prefix, '_scan'),
                                 n_steps=n_steps)
        hidden_status_outputs = results
        
        return hidden_status_outputs
    def get_output(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask, self.encoder_hidden_status, self.question_mask) = inputs
        
        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1
            
        # last_s = tensor.dot(self.encoder_hidden_status[0, :, self.hidden_status_dim:],
        #                     self.params[self._p(self.prefix, 'Ws')])
        last_s = tensor.alloc(numpy_floatX(0.), n_samples, self.hidden_status_dim)
        state_below = self.sentence

        def upd(am_, x_, s_, h_, qm_):
            c, alpha = self.attention_node.node_update(s_, h_, qm_)
            x_ = tensor.dot(tensor.concatenate([x_, c], axis=1), self.node.get_params_W())
            s = self.node.node_update(am_, x_, s_)
            
            return s, c, alpha

        results, _ = theano.scan(upd,
                                 sequences=[self.mask, state_below],
                                 outputs_info=[last_s, None, None],
                                 non_sequences=[self.encoder_hidden_status, self.question_mask],
                                 name=self._p(self.prefix, '_scan'),
                                 n_steps=n_steps)
        hidden_status_outputs, context_outputs, alpha_outputs = results
        
        return hidden_status_outputs, context_outputs, alpha_outputs
    def getOutput(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask) = inputs

        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        last_h = tensor.alloc(numpy_floatX(0.), n_samples,
                              self.hidden_status_dim)
        state_below = tensor.dot(self.sentence, self.node.get_params_W())

        results, _ = theano.scan(self.node.node_update,
                                 sequences=[self.mask, state_below],
                                 outputs_info=[last_h],
                                 name=self._p(self.prefix, '_scan'),
                                 n_steps=n_steps)
        hidden_status_outputs = results

        return hidden_status_outputs
예제 #6
0
    def get_output(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask) = inputs
        
        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        hidden_states_list = [self.sentence]
        for idx in range(self.n_layers) :
            sentence = hidden_states_list[idx]
            state_below = tensor.dot(sentence, self.node_list[idx].get_params_W())
            last_h = tensor.alloc(numpy_floatX(0.), n_samples, self.hidden_status_dim)
            results, _ = theano.scan(self.node_list[idx].node_update,
                                     sequences=[self.mask, state_below],
                                     outputs_info=[last_h],
                                     name=self._p(self.prefix, '_scan'+str(idx)),
                                     n_steps=n_steps)
            hidden_states_list.append(results)
        hidden_status_outputs = hidden_states_list
        
        return hidden_status_outputs
예제 #7
0
    def get_output(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask, self.question_children, self.question_children_mask, self.max_offset) = inputs
        
        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        queue_buffer = tensor.alloc(numpy_floatX(0.), n_samples,
                            self.max_offset, self.hidden_status_dim)
        state_below = tensor.dot(self.sentence, self.node.get_params_W())

        non_seq = self.node.get_non_seq_parameter(n_samples)
        self.question_children_mask= self.question_children_mask.dimshuffle([0, 1, 2, 'x'])
        results, _ = theano.scan(self.node.node_update,
             sequences=[self.mask, state_below, self.question_children, self.question_children_mask],
             outputs_info=[queue_buffer, queue_buffer[:, -1, :]],
             non_sequences=non_seq,
             name=self._p(self.prefix, '_scan'),
             n_steps=n_steps)
        hidden_status_outputs = results[1]
        return hidden_status_outputs
예제 #8
0
    def get_output(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """

        if len(inputs) == 4:
            (self.sentence, self.mask, self.forward_hidden_status,
             self.direction) = inputs

        if len(inputs) == 5:
            (self.sentence, self.mask, self.forward_hidden_status,
             self.direction, _) = inputs

        assert self.mask is not None

        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        if len(inputs) == 4:
            last_h = tensor.alloc(numpy_floatX(0.), n_samples,
                                  self.layer_number, self.hidden_status_dim)

        if len(inputs) == 5:
            last_h = inputs[4]
            last_h = tensor.alloc(last_h, self.layer_number, last_h.shape[0],
                                  last_h.shape[1]).dimshuffle([1, 0, 2])

        state_below = tensor.dot(
            tensor.concatenate([
                self.sentence,
                tensor.alloc(self.forward_hidden_status[-1, :, :],
                             self.sentence.shape[0],
                             self.forward_hidden_status.shape[1],
                             self.hidden_status_dim)
            ],
                               axis=2), self.node.get_params_W())

        results, _ = theano.scan(
            self.node.node_update,
            sequences=[self.mask, state_below, self.direction],
            outputs_info=[last_h],
            name=self._p(self.prefix, '_scan'),
            n_steps=n_steps)
        hidden_status_outputs = results

        #         p = printing.Print('hidden_status_outputs')
        #         hidden_status_outputs = p(hidden_status_outputs)

        return hidden_status_outputs
예제 #9
0
    def get_output(self, inputs):
        """
        Get outputs of encoder layer.
        Return all of the hidden status.
        """
        (self.sentence, self.mask, self.encoder_hidden_status,
         self.question_mask) = inputs

        assert self.mask is not None
        n_steps = self.sentence.shape[0]
        if self.sentence.ndim == 3:
            n_samples = self.sentence.shape[1]
        else:
            n_samples = 1

        # last_s = tensor.dot(self.encoder_hidden_status[0, :, self.hidden_status_dim:],
        #                     self.params[self._p(self.prefix, 'Ws')])
        last_s = tensor.alloc(numpy_floatX(0.), n_samples,
                              self.hidden_status_dim)
        state_below = self.sentence

        def upd(am_, x_, s_, h_, qm_):
            c, alpha = self.attention_node.node_update(s_, h_, qm_)
            x_ = tensor.dot(tensor.concatenate([x_, c], axis=1),
                            self.node.get_params_W())
            s = self.node.node_update(am_, x_, s_)

            return s, c, alpha

        results, _ = theano.scan(
            upd,
            sequences=[self.mask, state_below],
            outputs_info=[last_s, None, None],
            non_sequences=[self.encoder_hidden_status, self.question_mask],
            name=self._p(self.prefix, '_scan'),
            n_steps=n_steps)
        hidden_status_outputs, context_outputs, alpha_outputs = results

        return hidden_status_outputs, context_outputs, alpha_outputs
예제 #10
0
    def rmsprop(self, lr, tparams, grads, model_input, cost, givens=None):
        """
        A variant of  SGD that scales the step size by running average of the
        recent step norms.
    
        Parameters
        ----------
        lr : Theano SharedVariable
            Initial learning rate
        tpramas: Theano SharedVariable
            Model parameters
        grads: Theano variable
            Gradients of cost w.r.t to parameres
        x: Theano variable
            Model inputs
        mask: Theano variable
            Sequence mask
        y: Theano variable
            Targets
        cost: Theano variable
            Objective fucntion to minimize
    
        Notes
        -----
        For more information, see [Hint2014]_.
    
        .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
           lecture 6a,
           http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
        """

        zipped_grads = [
            theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k)
            for k, p in tparams.iteritems()
        ]
        running_grads = [
            theano.shared(p.get_value() * numpy_floatX(0.),
                          name='%s_rgrad' % k) for k, p in tparams.iteritems()
        ]
        running_grads2 = [
            theano.shared(p.get_value() * numpy_floatX(0.),
                          name='%s_rgrad2' % k)
            for k, p in tparams.iteritems()
        ]

        zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
        rgup = [(rg, 0.95 * rg + 0.05 * g)
                for rg, g in zip(running_grads, grads)]
        rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g**2))
                 for rg2, g in zip(running_grads2, grads)]

        f_grad_shared = theano.function(model_input,
                                        cost,
                                        updates=zgup + rgup + rg2up,
                                        name='rmsprop_f_grad_shared',
                                        givens=givens)

        updir = [
            theano.shared(p.get_value() * numpy_floatX(0.),
                          name='%s_updir' % k) for k, p in tparams.iteritems()
        ]
        updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg**2 + 1e-4))
                     for ud, zg, rg, rg2 in zip(updir, zipped_grads,
                                                running_grads, running_grads2)]
        param_up = [(p, p + udn[1])
                    for p, udn in zip(tparams.values(), updir_new)]
        f_update = theano.function([lr], [],
                                   updates=updir_new + param_up,
                                   on_unused_input='ignore',
                                   name='rmsprop_f_update',
                                   givens=givens)

        return f_grad_shared, f_update
예제 #11
0
 def rmsprop(self, lr, tparams, grads, model_input, cost, givens=None):
     """
     A variant of  SGD that scales the step size by running average of the
     recent step norms.
 
     Parameters
     ----------
     lr : tensorheano SharedVariable
         Initial learning rate
     tpramas: tensorheano SharedVariable
         Model parameters
     grads: tensorheano variable
         Gradients of cost w.r.t to parameres
     x: tensorheano variable
         Model inputs
     mask: tensorheano variable
         Sequence mask
     y: tensorheano variable
         tensorargets
     cost: tensorheano variable
         Objective fucntion to minimize
 
     Notes
     -----
     For more information, see [Hint2014]_.
 
     .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
        lecture 6a,
        http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
     """
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
                     for k, p in tparams.iteritems()]
     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad' % k)
                      for k, p in tparams.iteritems()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
                       for k, p in tparams.iteritems()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
              for rg2, g in zip(running_grads2, grads)]
 
     f_grad_shared = theano.function(model_input, cost,
                                     updates=zgup + rgup + rg2up,
                                     name='rmsprop_f_grad_shared',
                                     givens=givens)
 
     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
                            name='%s_updir' % k)
              for k, p in tparams.iteritems()]
     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                             running_grads2)]
     param_up = [(p, p + udn[1])
                 for p, udn in zip(tparams.values(), updir_new)]
     f_update = theano.function([lr], [], updates=updir_new + param_up,
                                on_unused_input='ignore',
                                name='rmsprop_f_update', givens=givens)
 
     return f_grad_shared, f_update