def __init__(self, **kwargs): super(Control, self).__init__(**kwargs) self.old_target = [None, None] # load up our network import glob # this code goes into the weights folder, finds the most # recent trial, and loads up the weights files = sorted(glob.glob('controllers/weights/rnn*')) print 'loading weights from %s'%files[-1] W = np.load(files[-1])['arr_0'] num_states = 4 self.rnn = RNNet(shape=[num_states * 2, 32, 32, num_states, num_states], layers=[Linear(), Tanh(), Tanh(), Linear(), Linear()], rec_layers=[1,2], conns={0:[1, 2], 1:[2], 2:[3], 3:[4]}, load_weights=W, use_GPU=False) offset, W_end, b_end = self.rnn.offsets[(3,4)] self.rnn.mask = np.zeros(self.rnn.W.shape, dtype=bool) self.rnn.mask[offset:b_end] = True self.rnn.W[offset:W_end] = np.eye(4).flatten() self.joint_targets = None self.act = None # set up recorders if self.write_to_file is True: from recorder import Recorder self.u_recorder = Recorder('control signal', self.task, 'hf') self.xy_recorder = Recorder('end-effector position', self.task, 'hf') self.dist_recorder = Recorder('distance from target', self.task, 'hf') self.recorders = [self.u_recorder, self.xy_recorder, self.dist_recorder]
def test_plant(): """Example of a network using a dynamic plant as the output layer.""" eps = 1e-6 # value to use for finite differences computations dt = 1e-2 # size of time step sig_len = 40 # how many time steps to train over batch_size = 32 # how many updates to perform with static input num_batches = 20000 # how many batches to run total import sys # NOTE: Change to wherever you keep your arm models sys.path.append("../../../studywolf_control/studywolf_control/") from arms.two_link.arm_python import Arm as Arm print 'Plant is: ', Arm arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003]) num_states = arm.DOF * 2 # are states are [positions, velocities] targets = gen_targets(arm=arm, sig_len=sig_len) # target joint angles init_state = np.zeros((len(targets), num_states)) # initial velocity = 0 init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles plant = PlantArm(arm=arm, targets=targets, init_state=init_state, eps=eps) # open up weights folder and checked for saved weights import glob files = sorted(glob.glob('weights/rnn*')) if len(files) > 0: # if weights found, load them up and keep going from last trial W = np.load(files[-1])['arr_0'] print 'loading from ', files[-1] last_trial = int( files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0]) print 'last_trial: ', last_trial else: # if no weights found, start fresh with new random seed W = None last_trial = -1 seed = np.random.randint(100000000) print 'seed : ', seed np.random.seed(seed) # specify the network structure and loss functions from hessianfree.loss_funcs import SquaredError, SparseL2 rnn = RNNet( # specify the number of nodes in each layer shape=[num_states * 2, 32, 32, num_states, num_states], # specify the function of the nodes in each layer layers=[Linear(), Tanh(), Tanh(), Linear(), plant], # specify the layers that have recurrent connections rec_layers=[1, 2], # specify the connections between layers conns={ 0: [1, 2], 1: [2], 2: [3], 3: [4] }, # specify the loss function loss_type=[ # squared error between plant output and targets SquaredError() ], load_weights=W, use_GPU=False) # set up masking so that weights between network output # and the plant aren't modified in learning, always = 1 offset, W_end, b_end = rnn.offsets[(3, 4)] rnn.mask = np.zeros(rnn.W.shape, dtype=bool) rnn.mask[offset:b_end] = True rnn.W[offset:W_end] = np.eye(4).flatten() for ii in range(last_trial + 1, num_batches): print '=============================================' print 'training batch ', ii err = rnn.run_epochs(plant, None, max_epochs=batch_size, optimizer=HessianFree(CG_iter=96, init_damping=100)) # save the weights to file, track trial and error err = rnn.best_error name = 'weights/rnn_weights-trial%04i-err%.5f' % (ii, err) np.savez_compressed(name, rnn.W) print '=============================================' print 'network: ', name print 'final error: ', err print '=============================================' return rnn.best_error
def gen_data_plot(folder="weights", index=-1, show_plot=True, save_plot=None, save_paths=False, verbose=True): files = sorted(glob.glob('%s/rnn*' % folder)) # plot the values over time vals = [] for ii, name in enumerate(files[:index]): if verbose: print name name = name.split('err')[1] name = name.split('.npz')[0] vals.append(float(name)) vals = np.array(vals) plt.figure(figsize=(10, 3)) ax = plt.subplot2grid((1, 3), (0, 0), colspan=2) ax.loglog(vals) ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') plt.xlim([0, len(files)]) plt.ylim([10**-5, 10]) plt.title('AHF training error') plt.xlabel('Training iterations') plt.ylabel('Error') plt.yscale('log') # load in the weights and see how well they control the arm dt = 1e-2 sig_len = 40 # HACK: append system path to have access to the arm code # NOTE: Change this path to wherever your plant model is kept! sys.path.append("../../../studywolf_control/studywolf_control/") from arms.two_link.arm_python import Arm as Arm if verbose: print 'Plant is: ', Arm arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003]) from hessianfree import RNNet from hessianfree.nonlinearities import (Tanh, Linear) from train_hf import PlantArm, gen_targets init_type = "sparse" rec_coeff = [1, 1] rec_type = "sparse" eps = 1e-6 num_states = 4 targets = gen_targets(arm, sig_len=sig_len) init_state = np.zeros((len(targets), num_states)) init_state[:, 0] = 0.736134824578 init_state[:, 1] = 1.85227640003 plant = PlantArm(arm, targets=targets, init_state=init_state, eps=eps) W = np.load(files[index])['arr_0'] last_trial = -1 # make sure this network is the same as the one you trained! from hessianfree.loss_funcs import SquaredError, SparseL2 suts_gain = 10e-2 * 1e-2 # for centre-out reaching task net_size = 96 if '32' in folder: net_size = 32 rnn = RNNet( shape=[num_states * 2, net_size, net_size, num_states, num_states], layers=[Linear(), Tanh(), Tanh(), Linear(), plant], debug=False, rec_layers=[1, 2], conns={ 0: [1, 2], 1: [2], 2: [3], 3: [4] }, W_rec_params={ "coeff": rec_coeff, "init_type": rec_type }, load_weights=W, use_GPU=False) full_output = rnn.forward(plant, rnn.W) outputs = full_output[-1] states = np.asarray(plant.get_vecs()[0][:, :, num_states:]) targets = np.asarray(plant.get_vecs()[1]) def kin(q): L = [.31, .27] q0 = q[:, 0] q1 = q[:, 1] return [ L[0] * np.cos(q0) + L[1] * np.cos(q0 + q1), L[0] * np.sin(q0) + L[1] * np.sin(q0 + q1) ] ax = plt.subplot2grid((1, 3), (0, 2)) # plot start point initx, inity = kin(init_state) ax.plot(initx, inity, 'x', mew=10) for jj in range(0, len(targets)): # plot target targetx, targety = kin(targets[jj]) ax.plot(targetx, targety, 'rx', mew=1) # plat path pathx, pathy = kin(states[jj, :, :]) path = np.hstack([pathx[:, None], pathy[:, None]]) if save_paths is True: np.savez_compressed('end-effector position%.3i.npz' % int(jj / 8), array1=path) ax.plot(path[:, 0], path[:, 1]) plt.tight_layout() plt.xlim([-.1, .1]) plt.ylim([.25, .45]) plt.title('Hand trajectory') plt.xlabel('x') plt.ylabel('y') if save_plot is not None: plt.savefig(save_plot) if show_plot is True: plt.show() plt.close()
def gen_data_plot(folder="weights", index=None, show_plot=True, save_plot=None, save_paths=False, verbose=True): files = sorted(glob.glob('%s/rnn*' % folder)) files = files[:index] if index is not None else files # plot the values over time vals = [] for ii, name in enumerate(files): if verbose: print(name) name = name.split('err')[1] name = name.split('.npz')[0] vals.append(float(name)) vals = np.array(vals) plt.figure(figsize=(10, 3)) ax = plt.subplot2grid((1, 3), (0, 0), colspan=2) ax.loglog(vals) ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') plt.xlim([0, len(files)]) plt.ylim([10**-5, 10]) plt.title('AHF training error') plt.xlabel('Training iterations') plt.ylabel('Error') plt.yscale('log') # load in the weights and see how well they control the arm dt = 1e-2 sig_len = 40 # HACK: append system path to have access to the arm code # NOTE: Change this path to wherever your plant model is kept! sys.path.append("../../../studywolf_control/studywolf_control/") # from arms.two_link.arm_python import Arm as Arm from arms.three_link.arm import Arm as Arm if verbose: print('Plant is: %s' % str(Arm)) arm = Arm(dt=dt) from hessianfree import RNNet from hessianfree.nonlinearities import (Tanh, Linear) from train_hf_3link import PlantArm, gen_targets rec_coeff = [1, 1] rec_type = "sparse" eps = 1e-6 num_states = arm.DOF * 2 targets = gen_targets(arm, sig_len=sig_len) init_state = np.zeros((len(targets), num_states), dtype=np.float32) init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles plant = PlantArm(arm, targets=targets, init_state=init_state, eps=eps) index = -1 if index is None else index W = np.load(files[index])['arr_0'] # make sure this network is the same as the one you trained! net_size = 96 if '32' in folder: net_size = 32 rnn = RNNet(shape=[num_states * 2, net_size, net_size, num_states, num_states], layers=[Linear(), Tanh(), Tanh(), Linear(), plant], debug=False, rec_layers=[1, 2], conns={0: [1, 2], 1: [2], 2: [3], 3: [4]}, W_rec_params={"coeff": rec_coeff, "init_type": rec_type}, load_weights=W, use_GPU=False) rnn.forward(plant, rnn.W) states = np.asarray(plant.get_vecs()[0][:, :, num_states:]) targets = np.asarray(plant.get_vecs()[1]) def kin(q): x = np.sum([arm.L[ii] * np.cos(np.sum(q[:, :ii+1], axis=1)) for ii in range(arm.DOF)], axis=0) y = np.sum([arm.L[ii] * np.sin(np.sum(q[:, :ii+1], axis=1)) for ii in range(arm.DOF)], axis=0) return x,y ax = plt.subplot2grid((1, 3), (0, 2)) # plot start point initx, inity = kin(init_state) ax.plot(initx, inity, 'x', mew=10) for jj in range(0, len(targets)): # plot target targetx, targety = kin(targets[jj]) ax.plot(targetx, targety, 'rx', mew=1) # plat path pathx, pathy = kin(states[jj, :, :]) path = np.hstack([pathx[:, None], pathy[:, None]]) if save_paths is True: np.savez_compressed('end-effector position%.3i.npz' % int(jj/8), array1=path) ax.plot(path[:, 0], path[:, 1]) plt.tight_layout() # plt.xlim([-.1, .1]) # plt.ylim([.25, .45]) plt.title('Hand trajectory') plt.xlabel('x') plt.ylabel('y') if save_plot is not None: plt.savefig(save_plot) if show_plot is True: plt.show() plt.close()
def test_plant(use_GPU): n_inputs = 32 sig_len = 15 class Plant(hf.nl.Plant): # this plant implements a simple dynamic system, with two-dimensional # state representing [position, velocity] def __init__(self, A, B, targets, init_state): super(Plant, self).__init__(stateful=True) self.A = np.asarray(A) self.B = B self.targets = targets self.init_state = init_state self.shape = [n_inputs, sig_len, len(A)] # derivative of output with respect to state (constant, so just # compute it once here) self.d_output = np.resize(np.eye( self.shape[-1]), (n_inputs, self.shape[-1], self.shape[-1], 1)) self.reset() def activation(self, x): self.act_count += 1 # this implements a basic s_{t+1} = A*s_t + B*x dynamic system. # but to make things a little more complicated we allow the B # matrix to be dynamic, so it's actually # s_{t+1} = A*s_t + B(s_t)*x self.B_matrix, self.d_B_matrix = self.B(self.state) self.state = (np.dot(self.state, self.A) + np.einsum("ij,ijk->ik", x, self.B_matrix)) return self.state[:x.shape[0]] def d_activation(self, x, _): self.d_act_count += 1 assert self.act_count == self.d_act_count # derivative of state with respect to input d_input = self.B_matrix.transpose((0, 2, 1))[..., None] # derivative of state with respect to previous state d_state = np.resize(self.A.T, np.concatenate(([x.shape[0]], self.A.shape))) d_state[:, 1, 0] += x[:, 1] * self.d_B_matrix[:, 1, 1] d_state = d_state[..., None] return np.concatenate((d_input, d_state, self.d_output), axis=-1) def __call__(self, _): self.inputs = np.concatenate((self.inputs, self.state[:, None, :]), axis=1) return self.state def get_inputs(self): return self.inputs def get_targets(self): return self.targets def reset(self, init=None): self.act_count = 0 self.d_act_count = 0 self.state = (self.init_state.copy() if init is None else init.copy()) self.inputs = np.zeros((self.shape[0], 0, self.shape[-1]), dtype=np.float32) self.B_matrix = self.d_B_matrix = None # static A matrix (converts velocity into a change in position) A = [[1, 0], [0.2, 1]] # dynamic B(s) matrix (converts input into velocity, modulated by current # state) # note that this dynamic B matrix doesn't really make much sense, it's # just here to demonstrate what happens with a plant whose dynamics # change over time def B(state): B = np.zeros((state.shape[0], state.shape[1], state.shape[1])) B[:, 1, 1] = np.tanh(state[:, 0]) d_B = np.zeros((state.shape[0], state.shape[1], state.shape[1])) d_B[:, 1, 1] = 1 - np.tanh(state[:, 0])**2 return B, d_B # initial position init_state = np.zeros((n_inputs, 2)) init_state[:, 0] = np.linspace(-1, 1, n_inputs) # the target will be to end at position 1 with velocity 0 targets = np.ones((n_inputs, sig_len, 2), dtype=np.float32) targets[:, :, 1] = 0 targets[:, :-1, :] = np.nan plant = Plant(A, B, targets, init_state) rnn = hf.RNNet(shape=[2, 16, 2], layers=[Linear(), Tanh(), plant], W_init_params={"coeff": 0.1}, W_rec_params={"coeff": 0.1}, use_GPU=use_GPU, rng=np.random.RandomState(0), debug=False) rnn.run_batches(plant, None, HessianFree(CG_iter=20, init_damping=10), max_epochs=150, plotting=True, print_period=None) outputs = rnn.forward(plant, rnn.W) try: assert rnn.loss.batch_loss(outputs, targets) < 1e-2 except AssertionError: plt.figure() plt.plot(outputs[-1][:, :, 0].squeeze().T) plt.plot(outputs[-1][:, :, 1].squeeze().T) plt.title("outputs") plt.savefig("test_plant_outputs.png") raise