def rnn_cell_fa(rnn_input, state, W, U, B): ones0 = tf.ones([batch_size, 1], tf.float32) state_p = tf.concat([state, ones0], 1) #return activation(tf.matmul(rnn_input[:,0:-1:2], U) + tf_matmul_r(state_p, W, B)) return activation( tf_matmul_r(rnn_input[:, 0:-1:2], U, B[0:2, :]) + tf_matmul_r(state_p, W, B))
y_p_man = tf.matmul(h_aug_man, W) loss_man = tf.reduce_sum(tf.pow(y_p_man - y, 2)) / 2 grad_W_man = tf.gradients(xs=W, ys=loss_man)[0] e_man = (y_p_man - y) h_prime_man = h_man * (1 - h_man) grad_A_manual = tf.matmul( tf.transpose(x_aug), tf.multiply(h_prime_man, tf.matmul(e_man, tf.transpose(B[0:m, :])))) #FA, computed automatically x_aug = tf.concat([x, e0], 1) h = tf.sigmoid(tf.matmul(x_aug, A)) h_aug = tf.concat([h, e1], 1) #The key line! Replace W with B in any backprop step y_p = tf_matmul_r(h_aug, W, B) #y_p = tf.matmul(h_aug, W) loss = tf.reduce_sum(tf.pow(y_p - y, 2)) / 2 grad_W_auto = tf.gradients(xs=W, ys=loss)[0] grad_A_auto = tf.gradients(xs=A, ys=loss)[0] norms_W = np.zeros(n_tests) norms_A = np.zeros(n_tests) #Compare to overridden matmul functions with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(n_tests): batch_x, batch_y = mnist.train.next_batch(batch_size) feed_dict = {x: batch_x, y: batch_y}
def main(): args = get_args() method = args.method save = bool(args.save) # Global config variables anneal = True num_steps = 10 # number of truncated backprop steps ('n' in the discussion above) batch_size = 20 in_dim = 4 state_size = 50 learning_rate = 1e-3 alpha2 = 1 activation = tf.tanh act_prime = lambda x: 1.0 - tf.multiply(x, x) acclimatize = True grad_max = 10 N_epochs = 10000 N_episodes = 10 n_runs = 1 delay = 2 #Node pert params lmbda = 5e-3 var_xi = 0.01 p_fire = 1.0 #prob of firing beta = 0.1 report_rate = 100 fn_out = './experiments/cartpole_rnn_partialobs_sgdnp/%s_learning_rate_%f_lmbda_%f_varxi_%f_multipleruns.npz' % ( method, learning_rate, lmbda, var_xi) #Things to save with output params = { 'num_steps': num_steps, 'batch_size': batch_size, 'in_dim': in_dim, 'state_size': state_size, 'learning_rate': learning_rate, 'alpha2': alpha2, 'lmbda': lmbda, 'var_xi': var_xi, 'p_fire': p_fire, 'grad_max': grad_max, 'N_epochs': N_epochs, 'N_episodes': N_episodes, 'acclimatize': acclimatize } print("Using %s" % method) print("For %d epochs" % N_epochs) print("Learning rate: %f" % learning_rate) print("Lambda learning rate: %f" % lmbda) print("Variance xi: %f" % var_xi) print("Saving results: %d" % save) def rnn_cell_bp(rnn_input, state, W, U, B): ones0 = tf.ones([batch_size, 1], tf.float32) state_p = tf.concat([state, ones0], 1) return activation( tf.matmul(rnn_input[:, 0:-1:2], U) + tf.matmul(state_p, W)) def rnn_cell_fa(rnn_input, state, W, U, B): ones0 = tf.ones([batch_size, 1], tf.float32) state_p = tf.concat([state, ones0], 1) #return activation(tf.matmul(rnn_input[:,0:-1:2], U) + tf_matmul_r(state_p, W, B)) return activation( tf_matmul_r(rnn_input[:, 0:-1:2], U, B[0:2, :]) + tf_matmul_r(state_p, W, B)) if method == 'backprop': rnn_cell = rnn_cell_bp else: rnn_cell = rnn_cell_fa def train_network(num_episodes, num_steps, state_size=state_size, verbose=True, n_runs=5): xs = np.zeros( (n_runs, N_epochs, num_episodes, num_steps, batch_size, in_dim)) for run_idx in range(n_runs): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) training_losses = [] alignments = [] for idx in range(N_epochs): print("Epoch: %d" % idx) if idx < 4 and acclimatize: ts = train_step_B else: ts = train_step training_loss = 0 training_x = np.zeros((batch_size, in_dim)) training_state = np.zeros((batch_size, state_size)) for step in range(num_episodes): tr_init_gradW = np.zeros((state_size + 1, state_size)) tr_init_gradB = np.zeros((state_size + 1, state_size)) tr_init_gradC = np.zeros((state_size + 1, 1)) tr_init_gradU = np.zeros((int(in_dim / 2), state_size)) tr_loss, tr_losses, training_loss_, training_state, training_x, _, align, x_o = \ sess.run([loss, losses, total_loss, final_state, final_x, ts, aments, rnn_inputs], \ feed_dict={init_state:training_state, init_x: training_x, \ init_gradU: tr_init_gradU, init_gradW: tr_init_gradW, \ init_gradB: tr_init_gradB, init_gradC: tr_init_gradC}) xs[run_idx, idx, step, :, :, :] = np.array(x_o)[:, :, :] training_loss += training_loss_ if idx % report_rate == 0 and idx > 0: if verbose: print("Average loss at epoch %d for last %d steps: %f"%(idx, report_rate, \ training_loss/report_rate/num_episodes)) training_losses.append(training_loss / report_rate / num_episodes) alignments.append(align) training_loss = 0 return training_losses, step, alignments, xs ############## ## BACKPROP ## ############## def backprop(): grad_B = init_gradB grad_C = init_gradC alnments = [] grad_V = tf.gradients(xs=V, ys=total_loss)[0] grad_W = tf.gradients(xs=W, ys=total_loss)[0] grad_U = tf.gradients(xs=U, ys=total_loss)[0] return grad_U, grad_W, grad_B, grad_C, grad_V, alnments ############### ## NODE PERT ## ############### def nodepert(): grad_B = init_gradB grad_C = init_gradC alnments = [] for i in range(num_steps): for j in range(i + 1 - delay)[::-1]: print(i, j) np_est = tf.matmul( tf.diag(loss_pert[i] - loss[i]) / var_xi / var_xi, noise_outputs[j]) delta = tf.gradients(xs=rnn_outputs[j], ys=loss[i])[0] #print(i,j) #print(delta) aux_loss = tf.reduce_sum(tf.pow(np_est - delta, 2)) grad_B += tf.squeeze(tf.gradients(xs=B, ys=aux_loss)) grad_V = tf.gradients(xs=V, ys=total_loss)[0] grad_W = tf.gradients(xs=W, ys=total_loss)[0] grad_U = tf.gradients(xs=U, ys=total_loss)[0] return grad_U, grad_W, grad_B, grad_C, grad_V, alnments if method == 'backprop': trainer = backprop elif method == 'feedbackalignment': trainer = backprop elif method == 'nodepert': trainer = nodepert else: raise NotImplementedError init_x = tf.zeros([batch_size, in_dim], dtype=np.float32) init_state = tf.zeros([batch_size, state_size], dtype=np.float32) init_gradW = tf.zeros([state_size + 1, state_size], dtype=np.float32) init_gradB = tf.zeros([state_size + 1, state_size], dtype=np.float32) init_gradC = tf.zeros([state_size + 1, 1], dtype=np.float32) init_gradU = tf.zeros([in_dim / 2, state_size], dtype=np.float32) alignment = tf.zeros([in_dim / 2, state_size], dtype=np.float32) ones0 = tf.ones([batch_size, 1], tf.float32) U = tf.Variable(rng.randn(int(in_dim / 2), state_size) * alpha2, name="input_weights", dtype=tf.float32) W = tf.Variable(rng.randn(state_size + 1, state_size) * alpha2, name="feedforward_weights", dtype=tf.float32) V = tf.Variable(rng.randn(state_size + 1, 1) * alpha2, name="output_weights", dtype=tf.float32) B = tf.Variable(rng.randn(state_size + 1, state_size) * alpha2, name="feedback_weights", dtype=tf.float32) C = tf.Variable(rng.randn(state_size + 1, 1) * alpha2, name="feedback_weights", dtype=tf.float32) ############################################## ## Define the cartpole dynamics and network ## ############################################## x = init_x state = init_state state_p = init_state rnn_inputs = [] rnn_outputs = [] rnn_pert_outputs = [] noise_outputs = [] heights = [] hs = [] actions = [] m = 1.1 mp = 0.1 g = 9.8 l = 0.5 tau = 0.04 Fmax = 10 max_h = 3 gamma = 10 #Equations of motion: #theta_dd = (m*g*sin(theta) - cos(theta)*(F + mp*l*theta_d*theta_d*sin(theta)))/((4/3)*m*l - mp*l*cos(theta)*cos(theta)) #theta_d += tau*theta_dd #theta += tau*theta #h_dd = (F + mp*l*(theta_d*theta_d*sin(theta)-theta_dd*cos(theta)))/m #h_d += tau*h_dd #h += tau*h_d for idx in range(num_steps): mask = tf.random_uniform([batch_size, state_size]) < p_fire xi = tf.multiply( tf.random_normal([batch_size, state_size]) * var_xi, tf.to_float(mask)) phi = tf.random_normal((batch_size, 1)) * Fmax / 500 #Compute new state state = rnn_cell(x, state, W, U, B) state_p = rnn_cell(x, state_p, W, U, B) + xi[:, 0:state_size] #Compute action if method == 'backprop': action = tf.matmul(tf.concat([state, ones0], 1), V) else: action = tf_matmul_r(tf.concat([state, ones0], 1), V, C) actions.append(action) d_idx = max(0, idx - delay) #d_idx = idx F = tf.squeeze(Fmax * activation(actions[d_idx]) + phi) #Compute new x theta_dd = (m*g*tf.sin(x[:,1]) - tf.cos(x[:,1])*(F + mp*l*x[:,0]*x[:,0]*tf.sin(x[:,1])))/((4/3)*m*l -\ mp*l*tf.cos(x[:,1])*tf.cos(x[:,1])) h_dd = (F + mp * l * (x[:, 0] * x[:, 0] * tf.sin(x[:, 1]) - theta_dd * tf.cos(x[:, 1]))) / m #h_dd = (F - mp*l*x[:,0]*x[:,0]*tf.sin(x[:,1]) + mp*g*tf.sin(x[:,1])*tf.cos(x[:,1]))/(m - mp*tf.cos(x[:,1])*tf.cos(x[:,1])) #theta_dd = (h_dd*tf.cos(x[:,1]) + g*tf.sin(x[:,1]))/l x_list = [] x_list.append(x[:, 0] + tau * theta_dd) #x0 = theta_dot x_list.append(x[:, 1] + tau * x[:, 0]) #x1 = theta x_list.append(x[:, 2] + tau * h_dd) #x2 = h_dot x_list.append(x[:, 3] + tau * x[:, 2]) #x3 = h #x_list.append(tf.clip_by_value(x[:,3] + tau*x[:,2], -4*max_h, 4*max_h)) #x3 = h x = tf.stack(x_list, axis=1) #height = tf.cos(x[:,1]) height = x[:, 1] heights.append(height) hs.append(x[:, 2]) rnn_inputs.append(x) rnn_outputs.append(state) rnn_pert_outputs.append(state_p) noise_outputs.append(xi) final_x = rnn_inputs[-1] final_state = rnn_outputs[-1] #Define loss function.... loss = [ gamma * tf.pow(height, 2) / 2 + tf.pow(tf.maximum(0.0, tf.abs(h) - max_h), 2) / 2 for h, height in zip(hs, heights) ] losses = [ gamma * tf.reduce_sum(tf.pow(height, 2)) / 2 + tf.pow(tf.maximum(0.0, tf.abs(h) - max_h), 2) / 2 for h, height in zip(hs, heights) ] total_loss = tf.reduce_mean(losses) #Perturbed outputs and loss loss_pert = [ gamma * tf.pow(height, 2) / 2 + tf.pow(tf.maximum(0.0, tf.abs(h) - max_h), 2) / 2 for h, height in zip(hs, heights) ] losses_pert = [ gamma * tf.reduce_sum(tf.pow(height, 2)) / 2 + tf.pow(tf.maximum(0.0, tf.abs(h) - max_h), 2) / 2 for h, height in zip(hs, heights) ] total_loss_pert = tf.reduce_mean(losses_pert) ################################################## grad_U, grad_W, grad_B, grad_C, grad_V, aments = trainer() new_U = U.assign(U - learning_rate * grad_U) new_W = W.assign(W - learning_rate * grad_W) new_V = V.assign(V - learning_rate * grad_V) new_C = C.assign(C - lmbda * tf.clip_by_value(grad_C, -grad_max, grad_max, name=None)) new_B = B.assign(B - lmbda * tf.clip_by_value(grad_B, -grad_max, grad_max, name=None)) train_step_B = [new_B, new_C] train_step = [new_U, new_W, new_V, new_B, new_C] #Save training losses, params, number of runs in epoch, alignment to BP all_losses = [] all_alignments = [] training_losses, n_in_epoch, alignments, xs = train_network(N_episodes, num_steps, n_runs=n_runs) all_losses.append(training_losses) all_alignments.append(alignments) if save: with open(fn_out, 'wb') as f: to_save = { 'all_losses': all_losses, 'all_alignments': all_alignments, 'n_in_epoch': n_in_epoch, 'params': params, 'xs': xs } pickle.dump(to_save, f)
def build_model(self): self.is_training = tf.placeholder(tf.bool) self.x = tf.placeholder(tf.float32, shape=[None] + self.config.state_size) self.y = tf.placeholder(tf.float32, shape=[None, 10]) # set initial feedforward and feedback weights m = 50 j = 20 n = 10 p = self.config.state_size[0] #Scale weight initialization alpha0 = np.sqrt(2.0/p) alpha1 = np.sqrt(2.0/m) alpha2 = np.sqrt(2.0/j) alpha3 = 1 #Plus one for bias terms A = tf.Variable(rng.randn(p+1,m)*alpha0, name="hidden_weights", dtype=tf.float32) W1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="hidden_weights2", dtype=tf.float32) W2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="output_weights", dtype=tf.float32) B1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="feedback_weights1", dtype=tf.float32) B2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="feedback_weights2", dtype=tf.float32) # network architecture with ones added for bias terms e0 = tf.ones([self.config.batch_size, 1], tf.float32) e1 = tf.ones([self.config.batch_size, 1], tf.float32) x_aug = tf.concat([self.x, e0], 1) h1 = tf.sigmoid(tf.matmul(x_aug, A)) h1_aug = tf.concat([h1, e1], 1) h2 = tf.sigmoid(tf_matmul_r(h1_aug, W1, B1)) h2_aug = tf.concat([h2, e1], 1) y_p = tf_matmul_r(h2_aug, W2, B2) with tf.name_scope("loss"): #mean squared error #cost = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2/self.config.batch_size self.loss = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2 grad_W2 = tf.gradients(xs=W2, ys=self.loss)[0] grad_W1 = tf.gradients(xs=W1, ys=self.loss)[0] grad_A = tf.gradients(xs=A, ys=self.loss)[0] e = (y_p - self.y) h1_prime = tf.multiply(h1_aug, 1-h1_aug)[:,0:m] h2_prime = tf.multiply(h2_aug, 1-h2_aug)[:,0:j] #FA lmda2 = tf.matmul(e, tf.transpose(B2[0:j,:])) d2 = np.multiply(h2_prime, lmda2) new_W2 = W2.assign(W2 - self.config.learning_rate*grad_W2) new_W1 = W1.assign(W1 - self.config.learning_rate*grad_W1) new_A = A.assign(A - self.config.learning_rate*grad_A) self.train_step = [new_W2, new_W1, new_A] correct_prediction = tf.equal(tf.argmax(y_p, 1), tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #Save training metrics Bs = [B1, B2] Ws = [W1, W2] es = [d2, e] dls = [] ls = [h1_aug, h2_aug] self._set_training_metrics(es, Bs, Ws)
def fa_layer(prev, input_size, output_size): W = weight_variable([input_size, output_size]) B = weight_variable([input_size, output_size]) b = bias_variable([output_size]) return tf_matmul_r(prev, W, B) + b
def build_model(self): self.is_training = tf.placeholder(tf.bool) self.x = tf.placeholder(tf.float32, shape=[None] + self.config.state_size) self.y = tf.placeholder(tf.float32, shape=[None, 10]) # set initial feedforward and feedback weights m = 512 j = 200 #m = 50 #j = 20 n = 10 p = self.config.state_size[0] #Scale weight initialization alpha0 = np.sqrt(2.0/p) alpha1 = np.sqrt(2.0/m) alpha2 = np.sqrt(2.0/j) alpha3 = 1 #Plus one for bias terms A = tf.Variable(rng.randn(p+1,m)*alpha0, name="hidden_weights", dtype=tf.float32) W1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="hidden_weights2", dtype=tf.float32) W2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="output_weights", dtype=tf.float32) B1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="feedback_weights1", dtype=tf.float32) B2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="feedback_weights2", dtype=tf.float32) # network architecture with ones added for bias terms e0 = tf.ones([self.config.batch_size, 1], tf.float32) e1 = tf.ones([self.config.batch_size, 1], tf.float32) x_aug = tf.concat([self.x, e0], 1) h1 = tf.sigmoid(tf.matmul(x_aug, A)) h1_aug = tf.concat([h1, e1], 1) h2 = tf.sigmoid(tf_matmul_r(h1_aug, W1, B1)) h2_aug = tf.concat([h2, e1], 1) y_p = tf_matmul_r(h2_aug, W2, B2) with tf.name_scope("loss"): #mean squared error #cost = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2/self.config.batch_size self.loss = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2 grad_W2 = tf.gradients(xs=W2, ys=self.loss)[0] grad_W1 = tf.gradients(xs=W1, ys=self.loss)[0] grad_A = tf.gradients(xs=A, ys=self.loss)[0] e = (y_p - self.y) h1_prime = tf.multiply(h1_aug, 1-h1_aug)[:,0:m] h2_prime = tf.multiply(h2_aug, 1-h2_aug)[:,0:j] #FA lmda2 = tf.matmul(e, tf.transpose(B2[0:j,:])) d2 = np.multiply(h2_prime, lmda2) #Feedback data for saving #Only take first item in epoch delta_bp2 = tf.matmul(e, tf.transpose(W2[0:m,:]))[0,:] delta_fa2 = tf.matmul(e, tf.transpose(B2[0:m,:]))[0,:] delta_bp1 = tf.matmul(d2, tf.transpose(W1[0:m,:]))[0,:] delta_fa1 = tf.matmul(d2, tf.transpose(B1[0:m,:]))[0,:] norm_W1 = tf.norm(W1) norm_B1 = tf.norm(B1) norm_W2 = tf.norm(W2) norm_B2 = tf.norm(B2) error_FA1 = tf.norm(delta_bp1 - delta_fa1) error_FA2 = tf.norm(delta_bp2 - delta_fa2) alignment1 = tf_align(delta_fa1, delta_bp1) alignment2 = tf_align(delta_fa2, delta_bp2) #evals = tf_eigvals(tf.matmul(tf.transpose(B), W)) #evecs = tf_eigvecs(tf.matmul(tf.transpose(B), W)) #self.training_metrics = [alignment1, norm_W1, norm_B1, error_FA1, alignment2, norm_W2, norm_B2, error_FA2] #for idx in range(n): #Compute alignment with evecs of new_W2 = W2.assign(W2 - self.config.learning_rate*grad_W2) new_W1 = W1.assign(W1 - self.config.learning_rate*grad_W1) new_A = A.assign(A - self.config.learning_rate*grad_A) self.train_step = [new_W2, new_W1, new_A] correct_prediction = tf.equal(tf.argmax(y_p, 1), tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #True gradients... #dl1 = tf.gradients(xs=l1, ys=self.loss)[0] #dl2 = tf.gradients(xs=l2, ys=self.loss)[0] #Save training metrics Bs = [B1, B2] Ws = [W1, W2] es = [d2, e] #dls = [dl1, dl2, dl3, dl4, dl5] #ls = [l1, l2, l3, l4, l5] dls = [] ls = [h1_aug, h2_aug] self._set_training_metrics(es, Bs, Ws, dls, ls, self.config.learning_rate)