def build_dataset(idims=9,odims=6,angi=[],f=test_func1,n_train=500,n_test=50, input_noise=0.01, output_noise=0.01,rand_seed=None): if rand_seed is not None: np.random.seed(rand_seed) # ================== train dataset ================== # sample training points x_train = 15*(np.random.rand(n_train,idims) - 0.25) # generate the output at the training points y_train = np.empty((n_train,odims)) for i in range(odims): y_train[:,i] = (i+1)*f(x_train) + output_noise*(np.random.randn(n_train)) x_train = utils.gTrig_np(x_train, angi) # ================== test dataset ================== # generate testing points #kk = input_noise*convolve2d(np.array([[1,2,3,2,1]]),np.array([[1,2,3,2,1]]).T)/9.0; #s_test = convolve2d(np.eye(idims),kk,'same') s_test = input_noise*np.eye(idims) s_test = np.tile(s_test,(n_test,1)).reshape(n_test,idims,idims) x_test = 120*(np.random.rand(n_test,idims) - 0.5) # generate the output at the test points y_test = np.empty((n_test,odims)) for i in range(odims): y_test[:,i] = (i+1)*f(x_test) if len(angi)>0: x_test,s_test = utils.gTrig2_np(x_test,s_test, angi, idims) return (x_train,y_train),(x_test,y_test,s_test)
def init_params(self): H_steps = int(np.ceil(self.H / self.dt)) self.state_changed = False # set random (uniform distribution) controls u = self.maxU * (2 * np.random.random((H_steps, len(self.maxU))) - 1) self.u_nominal = theano.shared(u) # intialize the nominal states to the appropriate size m0, S0 = utils.gTrig2_np( np.array(self.m0)[None, :], np.array(self.S0)[None, :, :], self.angle_dims, len(self.m0)) self.triu_indices = np.triu_indices(m0.size) z0 = np.concatenate([m0.flatten(), S0[0][self.triu_indices]]) z = np.tile(z0, (H_steps, 1)) self.z_nominal = theano.shared(z) # initialize the open loop and feedback matrices I = np.zeros((H_steps, len(self.maxU))) L = np.zeros((H_steps, len(self.maxU), z0.size)) self.I = theano.shared(I) self.L = theano.shared(L) # set a meaningful filename self.filename = self.name + '_' + str(len(self.m0)) + '_' + str( len(self.maxU))
def default_params(): # setup learner parameters angi = [0] x0 = np.array([0, 0]) S0 = np.eye(len(x0)) * (0.2**2) p0 = utils.distributions.Gaussian(x0, S0) x0a, S0a = utils.gTrig2_np(x0[None, :], np.array(S0)[None, :, :], angi, len(x0)) # plant parameters plant_params = {} plant_params['dt'] = 0.1 plant_params['pole_length'] = 1.0 plant_params['pole_mass'] = 1.0 plant_params['friction'] = 0.01 plant_params['gravity'] = 9.82 plant_params['state0_dist'] = p0 plant_params['noise_dist'] = utils.distributions.Gaussian( np.zeros((p0.dim, )), np.diag([0.01, 0.1])**2) # policy parameters policy_params = {} policy_params['state0_dist'] = p0 policy_params['angle_dims'] = angi policy_params['n_inducing'] = 20 policy_params['maxU'] = [2.5] # dynamics model parameters dynmodel_params = {} dynmodel_params['idims'] = x0a.size + len(policy_params['maxU']) dynmodel_params['odims'] = x0.size dynmodel_params['n_inducing'] = 100 # cost function parameters cost_params = {} cost_params['angle_dims'] = angi cost_params['target'] = [np.pi, 0] cost_params['cw'] = 0.5 cost_params['expl'] = 0.0 cost_params['pole_length'] = plant_params['pole_length'] cost_params['loss_func'] = cost.quadratic_saturating_loss # optimizer params opt_params = {} opt_params['max_evals'] = 100 opt_params['conv_thr'] = 1e-16 opt_params['min_method'] = 'L-BFGS-B' # general parameters params = {} params['state0_dist'] = p0 params['angle_dims'] = angi params['min_steps'] = int(4.0 / plant_params['dt']) # control horizon params['max_steps'] = int(4.0 / plant_params['dt']) # control horizon params['discount'] = 1.0 # discount factor params['plant'] = plant_params params['policy'] = policy_params params['dynamics_model'] = dynmodel_params params['cost'] = cost_params params['optimizer'] = opt_params return params