コード例 #1
0
ファイル: updates.py プロジェクト: zhanghonglishanzai/drmad
def update_fun(param, grad, dataset, history, opt, learnParams, params):
    """
        Computing the update from gradient. 
        Adaptive step sizes, learning rate, momentum etc. 
    """
    epsilon = np.asarray(0.0, dtype=theano.config.floatX)

    # specification of learning rate, (hyper)param specific
    globalLR1, globalLR2, momentParam1, momentParam2 = learnParams
    assert dataset in ['T1', 'T2']
    lr = globalLR1 if dataset == 'T1' else separateLR(params, param.name, globalLR1, globalLR2) 
 
    # update with sgd
    if opt is None:
        updates = []
        if params.trackGrads:
            updates, trackGrads = grad_monitor(param, grad, updates, params, opt)
            other = [grad]
        else:    
            trackGrads = []
            other = [grad]                          
        up = - lr * grad

    # update with adam    
    else:
        up, updates, trackGrads, other = opt.up(param, grad, params, lr, dataset)

    # dictionary param to grad (first time around)
    if params.useT2 and dataset == 'T1':
        history['grad'][param] = grad
        history['up'][param] = up

    # momentum
    if params.use_momentum:
        oldup = theano.shared(np.asarray(param.get_value() * 0., dtype='float32'),
                              broadcastable=param.broadcastable,
                              name='oldup_%s' % param.name)
        momentParam = momentParam1 if dataset == 'T1' else momentParam2
        up += momentParam * oldup
        updates += [(oldup, up)]

    # new parameter
    newparam = param + up

    # min value (assumption: all hyperparams >= 0)
    if dataset == 'T2':
        newparam = T.maximum(epsilon, newparam)

    updates += [(param, newparam)]
    adamGrad = [other]
    return updates, trackGrads, adamGrad
コード例 #2
0
ファイル: updates.py プロジェクト: jelennal/t1t2
def update_fun(param, grad, dataset, history, opt, learnParams, params):
    ''' 
        Computing the update from gradient. 
        Adaptive step sizes, learning rate, momentum etc. 
    '''
    epsilon = np.asarray(0.0, dtype=theano.config.floatX)

    # specification of learning rate, (hyper)param specific
    globalLR1, globalLR2, momentParam1, momentParam2 = learnParams
    assert dataset in ['T1', 'T2']
    lr = globalLR1 if dataset == 'T1' else separateLR(params, param.name, globalLR1, globalLR2) 
 
    # update without adam
    if opt is None:
        updates = []
        if params.trackGrads:
            updates, trackGrads = grad_monitor(param, grad, updates, params, opt)
            other = [grad]
        else:    
            trackGrads = []
            other = [grad]                          
        up = - lr * grad

    # update with adam    
    else:
        up, updates, trackGrads, other = opt.up(param, grad, params, lr, dataset)

    # dictionary param to grad (first time around)
    if params.useT2 and dataset == 'T1':
        history['grad'][param] = grad
        history['up'][param] = up

    # momentum
    if params.use_momentum:
        oldup = theano.shared(np.asarray(param.get_value() * 0., dtype='float32'),
                              broadcastable=param.broadcastable,
                              name='oldup_%s' % param.name)
        momentParam = momentParam1 if dataset == 'T1' else momentParam2
        up += momentParam * oldup
        updates += [(oldup, up)]

    # new parameter
    newparam = param + up

    # min value (assumption: all hyperparams >= 0)
    if dataset == 'T2':
        newparam = T.maximum(epsilon, newparam)

    updates += [(param, newparam)]
    adamGrad = [other]
    return updates, trackGrads, adamGrad
コード例 #3
0
ファイル: adaptive.py プロジェクト: jelennal/t1t2
    def up(self, param, grad, params, lr=1e-4, dataset='T1'):
        zero = np.float32(0.)
        one = np.float32(1.)
        updates = []
        trackGrads = []
        other = []

        # initialize adam shared variables
        m = theano.shared(np.float32(param.get_value()) * zero,
                          name="m_%s" % param.name)
        v = theano.shared(np.float32(param.get_value()) * zero,
                          name="v_%s" % param.name)

        fix1 = one - self.b1**self.i
        fix2 = one - self.b2**self.i
        b1_t = self.b1 * self.lam**(self.i - 1)

        lr_t = lr * (T.sqrt(fix2) / fix1)
        m_t = ((one - b1_t) * grad) + (b1_t * m)
        #       m_t = ((one - self.b1) * grad) + (self.b1 * m)
        v_t = ((one - self.b2) * T.sqr(grad)) + (self.b2 * v)
        g_t = m_t / (T.sqrt(v_t) + self.e)
        p_t = -(lr_t * g_t)

        # update Adam shared variables
        updates.append((m, m_t))
        updates.append((v, v_t))

        # in case of gradient tracking
        if params.trackGrads:
            updates, trackGrads = grad_monitor(param, grad, updates, params,
                                               'adam', g_t, m, v, self.e)

        # if approximationg gradC2 with adam
        if params.avC2grad in ['adam', 'momentum']:
            other = g_t * (T.sqrt(fix2) / fix1)  # alt: -lr_t*g_t or m_t

        return p_t, updates, trackGrads, other
コード例 #4
0
ファイル: adaptive.py プロジェクト: jelennal/t1t2
    def up(self, param, grad, params, lr=1e-4, dataset='T1'):
        zero = np.float32(0.)
        one = np.float32(1.)
        updates = []
        trackGrads = []
        other = []
      
        # initialize adam shared variables
        m = theano.shared(np.float32(param.get_value()) * zero, name="m_%s" % param.name)
        v = theano.shared(np.float32(param.get_value()) * zero, name="v_%s" % param.name)

        fix1 = one - self.b1 ** self.i
        fix2 = one - self.b2 ** self.i
        b1_t = self.b1 * self.lam ** (self.i - 1)
        
        lr_t = lr * (T.sqrt(fix2) / fix1)        
        m_t = ((one - b1_t) * grad) + (b1_t * m)
 #       m_t = ((one - self.b1) * grad) + (self.b1 * m)
        v_t = ((one - self.b2) * T.sqr(grad)) + (self.b2 * v)
        g_t = m_t / (T.sqrt(v_t) + self.e)
        p_t = - (lr_t * g_t)

        # update Adam shared variables
        updates.append((m, m_t))
        updates.append((v, v_t))

        # in case of gradient tracking
        if params.trackGrads:
            updates, trackGrads = grad_monitor(param, grad, updates, params, 'adam', g_t, m, v, self.e)

        # if approximationg gradC2 with adam
        if params.avC2grad in ['adam', 'momentum']:
            other = g_t * (T.sqrt(fix2) / fix1) # alt: -lr_t*g_t or m_t

 
        return p_t, updates, trackGrads, other