else: print 'unknown optimization method' angle=T.constant(0) # angle=theano.typed_list.TypedListType(T.dscalar)('angle') for param in all_params: if param in param2orthogonlize: delta=updates[param]-param tan_grad=tangent_grad(param,delta) angle=angle+T.sqrt((tan_grad**2).sum() / (delta**2).sum())/len(param2orthogonlize) if PROJ_GRAD==True: updates[param] = param + tan_grad retract_updates=[] for p in param2orthogonlize: retract_updates.append((p,GAIN*retraction(p))) J=theano.gradient.jacobian(loss,param2orthogonlize) hidden=lasagne.layers.get_output(layers_to_concat) # Compile functions for training and computing output train = theano.function([l_in.input_var, target], [loss,angle,grad_norm], updates=updates,allow_input_downcast=True) probe_loss = theano.function([l_in.input_var, target], loss,allow_input_downcast=True) probe_J = theano.function([l_in.input_var, target], J,allow_input_downcast=True) retract_w = theano.function([], [], updates=retract_updates,allow_input_downcast=True) get_output = theano.function([l_in.input_var], network_output,allow_input_downcast=True) probe_hidden = theano.function([l_in.input_var], hidden,allow_input_downcast=True) track_train_error=[] track_valid_error=[] track_test_error=[]
updates[l_rec.W_hid_to_hid] - l_rec.W_hid_to_hid) updates[l_rec.W_hid_to_hid] = l_rec.W_hid_to_hid + new_update # Theano functions for training and computing cost train = theano.function( [l_in.input_var, target_values, l_mask.input_var], loss, updates=updates) # Accuracy is defined as the proportion of examples whose absolute # error is less than .04 accuracy = T.mean(abs(predicted_values - target_values) < .04) # Theano function for computing accuracy compute_accuracy = theano.function( [l_in.input_var, target_values, l_mask.input_var], accuracy) # Function for orthogonalizing weight matrix retract_w = theano.function( [], [], updates={l_rec.W_hid_to_hid: util.retraction(l_rec.W_hid_to_hid)}) # Keep track of the number of samples used to train samples_trained = 0 # Did we converge? success = True # Store cost over minibatches cost = 0 while samples_trained < N_SAMPLES: # Generate a batch of data X, y, mask = task_options[task](sequence_length, BATCH_SIZE) cost += train(X.astype(theano.config.floatX), y.astype(theano.config.floatX), mask.astype(theano.config.floatX)) # Quit when a non-finite value is found if any([not np.isfinite(cost), any([not np.all(np.isfinite(p.get_value()))