Example #1
0
def fd2(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):

    cost2 = mlp.classError2
    gradC2 = T.grad(cost2, mlp.paramsT1)

    tempUps = []
    history = {'grad': dict(), 'up': dict()}

    if params.avC2grad in ['adam', 'momentum']:
        if params.avC2grad == 'adam': opt3 = adam()
        else: opt3 = None
        tempUps = [] if opt3 is None else opt3.initial_updates()

        newC2 = []
        for param, grad in zip(mlp.paramsT1, gradC2):
            tempUp, _, newGrad = update_fun(param,
                                            T.reshape(grad, param.shape), None,
                                            'T1', history, opt3, params,
                                            globalLR1, globalLR2, momentParam1,
                                            momentParam2)
            newC2 += newGrad
            tempUps += tempUp[:-1]
        gradC2 = newC2

    updateT1 = []
    updateT2 = []
    # save grad W of C2 as shared (3), update W - (1) + (3)
    for param, grad, uC1, uC2 in zip(mlp.paramsT1, gradC2, fdm.updateC1T1,
                                     fdm.updateC2T1):
        updateT1 += [(uC2, -step * globalLR1 * grad)]
        updateT1 += [(param, param - uC1 - step * globalLR1 * grad)]

    return updateT1 + updateT2 + tempUps
Example #2
0
def fd3(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):

    cost1 = mlp.classError1 + mlp.penalty
    gradT1reg = T.grad(cost1, mlp.paramsT2)

    updateT1 = []
    updateT2 = []
    onlyT2param = []
    # take opt from Adam?
    if params.opt2 in ['adam']: opt2 = adam()
    else: opt2 = None

    # update W - (1) + (3)
    for param, uC1, uC2 in zip(mlp.paramsT1, fdm.updateC1T1, fdm.updateC2T1):
        updateT1 += [(param, param + uC1 - uC2)]

    # compute grad T2 of C1,  update T2 - [(4) - (2) ] / lr1
    for param, grad, gT2 in zip(mlp.paramsT2, gradT1reg, fdm.gradC1T2):
        if params.T2onlySGN:
            grad_proxi = T.sgn((grad - gT2) / step * globalLR1)
        else:
            grad_proxi = (grad - gT2) / step * globalLR1

        tempUp, tempPair, _ = update_fun(param,
                                         T.reshape(grad_proxi,
                                                   param.shape), None, 'T2',
                                         {}, opt2, params, globalLR1,
                                         globalLR2, momentParam1, momentParam2)
        updateT2 += tempUp
        onlyT2param += tempPair

    debugs = [check for (_, check) in onlyT2param]
    return updateT1 + updateT2, debugs
Example #3
0
def fd3(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):

    cost1 = mlp.classError1 + mlp.penalty
    gradT1reg = T.grad(cost1, mlp.paramsT2)        

    updateT1 = []; updateT2 = []; onlyT2param = []    
    # take opt from Adam?
    if params.opt2 in ['adam']: opt2 = adam()
    else: opt2 = None    

    # update W - (1) + (3)            
    for param, uC1, uC2 in zip(mlp.paramsT1, fdm.updateC1T1, fdm.updateC2T1):                               
        updateT1 += [(param, param + uC1 - uC2)]

    # compute grad T2 of C1,  update T2 - [(4) - (2) ] / lr1
    for param, grad, gT2 in zip(mlp.paramsT2, gradT1reg, fdm.gradC1T2):   
        if params.T2onlySGN:
           grad_proxi = T.sgn((grad - gT2)/step*globalLR1)
        else:
           grad_proxi = (grad - gT2)/step*globalLR1
            
        tempUp, tempPair, _ = update_fun(param, T.reshape(grad_proxi, param.shape), None,
                              'T2', {}, opt2, params,
                              globalLR1, globalLR2, momentParam1, momentParam2)
        updateT2 += tempUp
        onlyT2param += tempPair        
     
     
    debugs = [check for (_, check) in onlyT2param]  
    return updateT1 + updateT2, debugs
    

    
Example #4
0
def fd2(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):

    cost2 = mlp.classError2
    gradC2 = T.grad(cost2, mlp.paramsT1)  

    tempUps = []      
    history = {'grad': dict(), 'up': dict()}


    if params.avC2grad in ['adam', 'momentum']:
                if params.avC2grad == 'adam': opt3 = adam()
                else: opt3 = None
                tempUps = [] if opt3 is None else opt3.initial_updates()
        
                newC2 = []
                for param, grad in zip(mlp.paramsT1, gradC2):            
                    tempUp, _, newGrad = update_fun(param, T.reshape(grad, param.shape), None, 
                                                   'T1', history, opt3, params,
                                                   globalLR1, globalLR2, momentParam1, momentParam2)
                    newC2 += newGrad
                    tempUps += tempUp[:-1]
                gradC2 = newC2


    updateT1 = []; updateT2 = []
    # save grad W of C2 as shared (3), update W - (1) + (3)
    for param, grad, uC1, uC2 in zip(mlp.paramsT1, gradC2, fdm.updateC1T1, fdm.updateC2T1):                               
                updateT1 += [(uC2, - step*globalLR1*grad)]                
                updateT1 += [(param, param - uC1 - step*globalLR1*grad)]    



    return updateT1 + updateT2 + tempUps
Example #5
0
def fd1(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):

    # gradient of T1 ----------------------------------- GRADS
    cost1 = mlp.classError1 + mlp.penalty
    gradT1 = T.grad(cost1, mlp.paramsT1)
    gradT1reg = T.grad(cost1, mlp.paramsT2)

    # take opt from Adam?
    if params.opt1 in ['adam']: opt1 = adam()
    else: opt1 = None
    if params.opt2 in ['adam']: opt2 = adam()
    else: opt2 = None

    updateT1 = [] if opt1 is None else opt1.initial_updates()
    updateT2 = [] if opt2 is None else opt2.initial_updates()

    onlyT1param = []
    history = {'grad': dict(), 'up': dict()}

    assert len(mlp.paramsT1) == len(gradT1)
    assert len(mlp.paramsT1) == len(fdm.updateC1T1)
    assert len(mlp.paramsT2) == len(gradT1reg)
    assert len(mlp.paramsT2) == len(fdm.gradC1T2)

    for param, grad, uC1 in zip(mlp.paramsT1, gradT1, fdm.updateC1T1):
        tempUp, tempPair, _ = update_fun(param, grad,
                                         mlp.penaltyMaxParams.get(param,
                                                                  None), 'T1',
                                         history, opt1, params, globalLR1,
                                         globalLR2, momentParam1, momentParam2)
        updateT1 += tempUp
        onlyT1param += tempPair

        newparam = tempUp[-1][-1]
        just_up = newparam - param
        updateT1 += [(uC1, just_up)]

    # save grad T2 of C1 as shared (2) in gradT1reg
    for param, grad, gT2 in zip(mlp.paramsT2, gradT1reg, fdm.gradC1T2):
        updateT2 += [(gT2, grad)]

    debugs = [check for (_, check) in onlyT1param]
    return updateT1 + updateT2, debugs  #, T2_grads
Example #6
0
def fd1(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):
    
    # gradient of T1 ----------------------------------- GRADS
    cost1 = mlp.classError1 + mlp.penalty
    gradT1 = T.grad(cost1, mlp.paramsT1)
    gradT1reg = T.grad(cost1, mlp.paramsT2)
        
    # take opt from Adam?
    if params.opt1 in ['adam']: opt1 = adam()
    else: opt1 = None
    if params.opt2 in ['adam']: opt2 = adam()
    else: opt2 = None    

    updateT1 = [] if opt1 is None else opt1.initial_updates()
    updateT2 = [] if opt2 is None else opt2.initial_updates()

    onlyT1param = []
    history = {'grad': dict(), 'up': dict()}

    assert len(mlp.paramsT1) == len(gradT1) 
    assert len(mlp.paramsT1) == len(fdm.updateC1T1) 
    assert len(mlp.paramsT2) == len(gradT1reg) 
    assert len(mlp.paramsT2) == len(fdm.gradC1T2) 


    for param, grad, uC1 in zip(mlp.paramsT1, gradT1, fdm.updateC1T1):
                tempUp, tempPair, _ = update_fun(param, grad, mlp.penaltyMaxParams.get(param, None),
                                                'T1', history, opt1, params,
                                                globalLR1, globalLR2, momentParam1, momentParam2)
                updateT1 += tempUp
                onlyT1param += tempPair                

                newparam = tempUp[-1][-1] 
                just_up = newparam - param
                updateT1 += [(uC1, just_up)]                                 

     # save grad T2 of C1 as shared (2) in gradT1reg
    for param, grad, gT2 in zip(mlp.paramsT2, gradT1reg, fdm.gradC1T2):
                updateT2 += [(gT2, grad)]                           
                    
    
    debugs = [check for (_, check) in onlyT1param]                        
    return updateT1 + updateT2, debugs#, T2_grads
Example #7
0
def updates(mlp, params, globalLR1, globalLR2, momentParam1, momentParam2):
    
    ''' 
        Computing updates of T1 and T2 parameters.
    
    Inputs:
        mlp :: model
        params :: specification of the model and training
        globalLR1, globalLR2 :: global learning rates for T1 and T2
        momentParam1, momentParam2 :: momentum parameters for T1 and T2
        phase :: external parameter in case of ifelse (currently not in use)        
        
    Outputs:
        updateT1 :: update of T1 parameters and related shared variables                        
        updateT2 :: update of T2 parameters and related shared variables 
        upnormdiff, debugs :: variable tracked for debugging
                        
    '''    

    # gradients
    cost1 = mlp.trainCost + mlp.penalty
    cost2 = mlp.trainCost

    # dC1/dT1
    gradC1T1 = T.grad(cost1, mlp.paramsT1)
    gradC2T1temp = T.grad(cost2, mlp.paramsT1)
        
    # initialzations    
    opt1 = adam() if params.opt1 in ['adam'] else None
    opt2 = adam() if params.opt2 in ['adam'] else None
    updateT1 = [] if opt1 is None else opt1.initial_updates()
    updateT2 = [] if opt2 is None else opt2.initial_updates() 

    updateC2grad = []; gradC2T1 = []; gradC2T2 = []; tempUps = []
    trackT1grads = []; trackT2grads = []
    history = {'grad': dict(), 'up': dict()}
    historyC2 = {'grad': dict(), 'up': dict()}

    learnParams = [globalLR1, globalLR2, momentParam1, momentParam2]

                   
    ''' 
        Updating T1 params
    '''
    for param, grad in zip(mlp.paramsT1, gradC1T1):                

            grad = scale_norm(remove_nans(grad), threshold=3.)                
            ups, track, _ = update_fun(param, grad, 'T1',
                                       history, opt1, learnParams, params)
            updateT1 += ups
            trackT1grads += [track]



    ''' 
        Updating T2 params
    '''

    if params.useT2:     


        '''
            Save grads C2T1 for the T2 update:
        '''
        for param, grad in zip(mlp.paramsT1, gradC2T1temp):   

                grad = scale_norm(remove_nans(grad), threshold=3.)         
                grad = clip_grad(grad, threshold=10.)                
                saveGrad = theano.shared(np.asarray(param.get_value() * 0., dtype='float32'),
                                         broadcastable=param.broadcastable,
                                         name='gradC2T1_%s' % param.name)
                updateC2grad += [(saveGrad, grad)]                         
                gradC2T1 += [saveGrad]

        ''' 
            If gradient dC2/dT1 is also estimated with adam
        '''        
        if params.avC2grad in ['adam', 'momentum']:
                #gradC2T1 = T.grad(cost2, mlp.paramsT1)
                if params.avC2grad == 'adam': opt3 = adam()
                else: opt3 = None
                tempUps = [] if opt3 is None else opt3.initial_updates()
        
                newC2 = []
                grad = scale_norm(remove_nans(grad), threshold=3.)                                
                grad = clip_grad(grad, threshold=10.)                
                for param, grad in zip(mlp.paramsT1, gradC2T1):            
                    tempUp, _, newGrad = update_fun(param, T.reshape(grad, param.shape), 'T1', 
                                                    historyC2, opt3, learnParams, params)
                    tempUps += tempUp[:-1]
                    newC2 += newGrad
                gradC2T1 = newC2
                
        
        paramsT2, gradC2T2 = hypergrad(mlp.paramsT1, mlp.paramsT2, gradC2T1, 
                                       mlp.trainCost, mlp.trainCost, mlp.penalty)            

        for param, grad in zip(mlp.paramsT2, gradC2T2):
            paramName, _ = param.name.split('_')
            if params.decayT2 > 0. and paramName not in ['L2', 'L1']:
                grad += params.decayT2*param 

            grad = scale_norm(remove_nans(grad), threshold=3.) 
            grad = clip_grad(grad, threshold=10.)                              
            tempUp, track, _ = update_fun(param, T.reshape(grad, param.shape),'T2',
                                          {}, opt2, learnParams, params)
            updateT2 += tempUp
            trackT2grads += [track]       
                         
    # monitored variables for output                         
    if (not params.useT2) and params.trackGrads:
        debugs = trackT1grads
    elif params.trackGrads:
        debugs = trackT1grads + trackT2grads    
    else:
        debugs = []
    print "Parameters ",
    print ", ".join([p.name for p in mlp.paramsT2]),
    print "are trained on T2"

    return updateT1, updateT2+tempUps, updateC2grad, debugs