Example #1
0
    updates = lasagne.updates.adam(loss, all_params)
elif OPT_METHOD=='SGD':
    learning_rate0=0.5
    learning_rate=learning_rate0
    lr=theano.shared(np.asarray(learning_rate,dtype=theano.config.floatX),borrow=True)
    
    updates = lasagne.updates.sgd(scaled_grads, all_params, lr)
else:
    print 'unknown optimization method'

angle=T.constant(0)        
#            angle=theano.typed_list.TypedListType(T.dscalar)('angle')
for param in all_params:
    if param in param2orthogonlize:
        delta=updates[param]-param
        tan_grad=tangent_grad(param,delta)
        angle=angle+T.sqrt((tan_grad**2).sum() / (delta**2).sum())/len(param2orthogonlize)
        if PROJ_GRAD==True:            
            updates[param] = param + tan_grad 
                
retract_updates=[]
for p in param2orthogonlize:
    retract_updates.append((p,GAIN*retraction(p)))

J=theano.gradient.jacobian(loss,param2orthogonlize) 
hidden=lasagne.layers.get_output(layers_to_concat) 

# Compile functions for training and computing output
train = theano.function([l_in.input_var, target], [loss,angle,grad_norm], updates=updates,allow_input_downcast=True)
probe_loss = theano.function([l_in.input_var, target], loss,allow_input_downcast=True)
probe_J = theano.function([l_in.input_var, target], J,allow_input_downcast=True)
Example #2
0
 # Compute symbolic expression for predicted values
 network_output = lasagne.layers.get_output(l_out)
 # Remove a dimension from the output
 predicted_values = network_output[:, -1]
 target_values = T.vector('target_values')
 # Our cost will be mean-squared error
 loss = T.mean((predicted_values - target_values)**2)
 # Retrieve all parameters from the network
 all_params = lasagne.layers.get_all_params(l_out)
 # Compute SGD updates for training
 updates = compute_updates_options[compute_updates](
     loss, all_params, learning_rate)
 # Project gradient updates for recurrent hid-to-hid matrix
 if orthogonalize:
     new_update = util.tangent_grad(
         l_rec.W_hid_to_hid,
         updates[l_rec.W_hid_to_hid] - l_rec.W_hid_to_hid)
     updates[l_rec.W_hid_to_hid] = l_rec.W_hid_to_hid + new_update
 # Theano functions for training and computing cost
 train = theano.function(
     [l_in.input_var, target_values, l_mask.input_var],
     loss, updates=updates)
 # Accuracy is defined as the proportion of examples whose absolute
 # error is less than .04
 accuracy = T.mean(abs(predicted_values - target_values) < .04)
 # Theano function for computing accuracy
 compute_accuracy = theano.function(
     [l_in.input_var, target_values, l_mask.input_var], accuracy)
 # Function for orthogonalizing weight matrix
 retract_w = theano.function(
     [], [],