updates = lasagne.updates.adam(loss, all_params) elif OPT_METHOD=='SGD': learning_rate0=0.5 learning_rate=learning_rate0 lr=theano.shared(np.asarray(learning_rate,dtype=theano.config.floatX),borrow=True) updates = lasagne.updates.sgd(scaled_grads, all_params, lr) else: print 'unknown optimization method' angle=T.constant(0) # angle=theano.typed_list.TypedListType(T.dscalar)('angle') for param in all_params: if param in param2orthogonlize: delta=updates[param]-param tan_grad=tangent_grad(param,delta) angle=angle+T.sqrt((tan_grad**2).sum() / (delta**2).sum())/len(param2orthogonlize) if PROJ_GRAD==True: updates[param] = param + tan_grad retract_updates=[] for p in param2orthogonlize: retract_updates.append((p,GAIN*retraction(p))) J=theano.gradient.jacobian(loss,param2orthogonlize) hidden=lasagne.layers.get_output(layers_to_concat) # Compile functions for training and computing output train = theano.function([l_in.input_var, target], [loss,angle,grad_norm], updates=updates,allow_input_downcast=True) probe_loss = theano.function([l_in.input_var, target], loss,allow_input_downcast=True) probe_J = theano.function([l_in.input_var, target], J,allow_input_downcast=True)
# Compute symbolic expression for predicted values network_output = lasagne.layers.get_output(l_out) # Remove a dimension from the output predicted_values = network_output[:, -1] target_values = T.vector('target_values') # Our cost will be mean-squared error loss = T.mean((predicted_values - target_values)**2) # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(l_out) # Compute SGD updates for training updates = compute_updates_options[compute_updates]( loss, all_params, learning_rate) # Project gradient updates for recurrent hid-to-hid matrix if orthogonalize: new_update = util.tangent_grad( l_rec.W_hid_to_hid, updates[l_rec.W_hid_to_hid] - l_rec.W_hid_to_hid) updates[l_rec.W_hid_to_hid] = l_rec.W_hid_to_hid + new_update # Theano functions for training and computing cost train = theano.function( [l_in.input_var, target_values, l_mask.input_var], loss, updates=updates) # Accuracy is defined as the proportion of examples whose absolute # error is less than .04 accuracy = T.mean(abs(predicted_values - target_values) < .04) # Theano function for computing accuracy compute_accuracy = theano.function( [l_in.input_var, target_values, l_mask.input_var], accuracy) # Function for orthogonalizing weight matrix retract_w = theano.function( [], [],