x0 = [np.random.uniform(-0.005, 0.005), np.pi, 0, 0] return x0 x0 = [0, np.pi, 0, 0] t = 5 # time of an episode dt = args.time_step # time step-size env = CartPole(c_k, x0, dt) path = args.path + str(args.exp_id) + '/' rl_algorithm = MBRL(env, t, dt, test_t=args.test_t, path=path, horizon=2., fcost=None, warm_up_episodes=args.warm_up_episodes, use_mpc=args.use_mpc, ilqr_print=True, ilqr_save=False, aggregation_interval=args.agg, training_epochs=args.epochs, weight_decay=args.weight_decay, prediction_error_bound=args.pred_err_bound, dyn_lr=1e-5) #rl_algorithm.load() rl_algorithm.run_learning(50)
# define the function, that represents the initial value distribution p(x_0) def p_x0(): x0 = [np.random.uniform(-0.005, 0.005), np.pi, np.pi, 0, 0, 0] return x0 t = 3.5 # time of an episode dt = args.time_step # time step-size env = CartPoleDouble(c_k, p_x0, dt) path = args.path + str(args.exp_id)+'/' rl_algorithm = MBRL(env, t, dt, path=path, horizon=2., fcost=c_N, warm_up_episodes=args.warm_up_episodes, use_mpc=args.use_mpc, ilqr_print=True, ilqr_save=True, aggregation_interval=args.agg, training_epochs=args.epochs, weight_decay=args.weight_decay, data_noise=args.data_noise) if args.data_set != '': rl_algorithm.D_rand.load(args.data_set) rl_algorithm.run_learning(args.episodes)
t = args.t # time of an episode dt = args.time_step # time step-size env = Pendulum(c_k, x0, dt) env.uMax = env.uMax / 3.5 * 5 path = args.path + str(args.exp_id) + '/' rl_algorithm = MBRL(env, t, dt, path=path, horizon=t, fcost=c_N, test_t=args.test_t, warm_up_episodes=args.warm_up_episodes, use_mpc=args.use_mpc, ilqr_print=True, ilqr_save=True, aggregation_interval=args.agg, training_epochs=args.epochs, weight_decay=args.weight_decay, data_noise=args.data_noise, prediction_error_bound=args.pred_err_bound, maxIters=100, sparse_dyn=True) rl_algorithm.run_learning(args.episodes)