Esempio n. 1
0
def build_dataset(idims=9,odims=6,angi=[],f=test_func1,n_train=500,n_test=50, input_noise=0.01, output_noise=0.01,rand_seed=None):
    if rand_seed is not None:
        np.random.seed(rand_seed)
    #  ================== train dataset ==================
    # sample training points
    x_train = 15*(np.random.rand(n_train,idims) - 0.25)
    # generate the output at the training points
    y_train = np.empty((n_train,odims))
    for i in range(odims):
        y_train[:,i] =  (i+1)*f(x_train) + output_noise*(np.random.randn(n_train))
    x_train = utils.gTrig_np(x_train, angi)
    
    #  ================== test  dataset ==================
    # generate testing points
    #kk = input_noise*convolve2d(np.array([[1,2,3,2,1]]),np.array([[1,2,3,2,1]]).T)/9.0;
    #s_test = convolve2d(np.eye(idims),kk,'same')
    s_test = input_noise*np.eye(idims)
    s_test = np.tile(s_test,(n_test,1)).reshape(n_test,idims,idims)
    x_test = 120*(np.random.rand(n_test,idims) - 0.5)
    # generate the output at the test points
    y_test = np.empty((n_test,odims))
    for i in range(odims):
        y_test[:,i] =  (i+1)*f(x_test)
    if len(angi)>0:
        x_test,s_test = utils.gTrig2_np(x_test,s_test, angi, idims)

    return (x_train,y_train),(x_test,y_test,s_test)
Esempio n. 2
0
    def init_params(self):
        H_steps = int(np.ceil(self.H / self.dt))
        self.state_changed = False

        # set random (uniform distribution) controls
        u = self.maxU * (2 * np.random.random((H_steps, len(self.maxU))) - 1)
        self.u_nominal = theano.shared(u)

        # intialize the nominal states to the appropriate size
        m0, S0 = utils.gTrig2_np(
            np.array(self.m0)[None, :],
            np.array(self.S0)[None, :, :], self.angle_dims, len(self.m0))
        self.triu_indices = np.triu_indices(m0.size)
        z0 = np.concatenate([m0.flatten(), S0[0][self.triu_indices]])
        z = np.tile(z0, (H_steps, 1))
        self.z_nominal = theano.shared(z)

        # initialize the open loop and feedback matrices
        I = np.zeros((H_steps, len(self.maxU)))
        L = np.zeros((H_steps, len(self.maxU), z0.size))
        self.I = theano.shared(I)
        self.L = theano.shared(L)

        # set a meaningful filename
        self.filename = self.name + '_' + str(len(self.m0)) + '_' + str(
            len(self.maxU))
Esempio n. 3
0
def default_params():
    # setup learner parameters
    angi = [0]
    x0 = np.array([0, 0])
    S0 = np.eye(len(x0)) * (0.2**2)
    p0 = utils.distributions.Gaussian(x0, S0)
    x0a, S0a = utils.gTrig2_np(x0[None, :],
                               np.array(S0)[None, :, :], angi, len(x0))

    # plant parameters
    plant_params = {}
    plant_params['dt'] = 0.1
    plant_params['pole_length'] = 1.0
    plant_params['pole_mass'] = 1.0
    plant_params['friction'] = 0.01
    plant_params['gravity'] = 9.82
    plant_params['state0_dist'] = p0
    plant_params['noise_dist'] = utils.distributions.Gaussian(
        np.zeros((p0.dim, )),
        np.diag([0.01, 0.1])**2)

    # policy parameters
    policy_params = {}
    policy_params['state0_dist'] = p0
    policy_params['angle_dims'] = angi
    policy_params['n_inducing'] = 20
    policy_params['maxU'] = [2.5]

    # dynamics model parameters
    dynmodel_params = {}
    dynmodel_params['idims'] = x0a.size + len(policy_params['maxU'])
    dynmodel_params['odims'] = x0.size
    dynmodel_params['n_inducing'] = 100

    # cost function parameters
    cost_params = {}
    cost_params['angle_dims'] = angi
    cost_params['target'] = [np.pi, 0]
    cost_params['cw'] = 0.5
    cost_params['expl'] = 0.0
    cost_params['pole_length'] = plant_params['pole_length']
    cost_params['loss_func'] = cost.quadratic_saturating_loss

    # optimizer params
    opt_params = {}
    opt_params['max_evals'] = 100
    opt_params['conv_thr'] = 1e-16
    opt_params['min_method'] = 'L-BFGS-B'

    # general parameters
    params = {}
    params['state0_dist'] = p0
    params['angle_dims'] = angi
    params['min_steps'] = int(4.0 / plant_params['dt'])  # control horizon
    params['max_steps'] = int(4.0 / plant_params['dt'])  # control horizon
    params['discount'] = 1.0  # discount factor
    params['plant'] = plant_params
    params['policy'] = policy_params
    params['dynamics_model'] = dynmodel_params
    params['cost'] = cost_params
    params['optimizer'] = opt_params

    return params