Example #1
0
    x0 = [np.random.uniform(-0.005, 0.005), np.pi, 0, 0]
    return x0


x0 = [0, np.pi, 0, 0]
t = 5 # time of an episode
dt = args.time_step # time step-size

env = CartPole(c_k, x0, dt)

path = args.path + str(args.exp_id) + '/'

rl_algorithm = MBRL(env, t, dt,
                    test_t=args.test_t,
                    path=path,
                    horizon=2.,
                    fcost=None,
                    warm_up_episodes=args.warm_up_episodes,
                    use_mpc=args.use_mpc,
                    ilqr_print=True,
                    ilqr_save=False,
                    aggregation_interval=args.agg,
                    training_epochs=args.epochs,
                    weight_decay=args.weight_decay,
                    prediction_error_bound=args.pred_err_bound,
                    dyn_lr=1e-5)

#rl_algorithm.load()
rl_algorithm.run_learning(50)

Example #2
0
# define the function, that represents the initial value distribution p(x_0)
def p_x0():
    x0 = [np.random.uniform(-0.005, 0.005), np.pi, np.pi, 0, 0, 0]
    return x0

t = 3.5 # time of an episode
dt = args.time_step # time step-size

env = CartPoleDouble(c_k, p_x0, dt)

path = args.path + str(args.exp_id)+'/' 

rl_algorithm = MBRL(env, t, dt,
                    path=path,
                    horizon=2.,
                    fcost=c_N,
                    warm_up_episodes=args.warm_up_episodes,
                    use_mpc=args.use_mpc,
                    ilqr_print=True,
                    ilqr_save=True,
                    aggregation_interval=args.agg,
                    training_epochs=args.epochs,
                    weight_decay=args.weight_decay,
                    data_noise=args.data_noise)

if args.data_set != '':
    rl_algorithm.D_rand.load(args.data_set)
rl_algorithm.run_learning(args.episodes)


Example #3
0
t = args.t  # time of an episode

dt = args.time_step  # time step-size

env = Pendulum(c_k, x0, dt)

env.uMax = env.uMax / 3.5 * 5

path = args.path + str(args.exp_id) + '/'

rl_algorithm = MBRL(env,
                    t,
                    dt,
                    path=path,
                    horizon=t,
                    fcost=c_N,
                    test_t=args.test_t,
                    warm_up_episodes=args.warm_up_episodes,
                    use_mpc=args.use_mpc,
                    ilqr_print=True,
                    ilqr_save=True,
                    aggregation_interval=args.agg,
                    training_epochs=args.epochs,
                    weight_decay=args.weight_decay,
                    data_noise=args.data_noise,
                    prediction_error_bound=args.pred_err_bound,
                    maxIters=100,
                    sparse_dyn=True)

rl_algorithm.run_learning(args.episodes)