def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--env_name', type=str, default='HalfCheetah-v2') #HalfCheetah-v2 # Experiment meta-params parser.add_argument('--exp_name', type=str, default='mb_mpc') parser.add_argument('--seed', type=int, default=3) parser.add_argument('--render', action='store_true') # Training args parser.add_argument('--learning_rate', '-lr', type=float, default=1e-3) parser.add_argument('--onpol_iters', '-n', type=int, default=5) # Aggregation iters 10 parser.add_argument('--dyn_iters', '-nd', type=int, default=60) # epochs 60 parser.add_argument('--batch_size', '-b', type=int, default=512) # Data collection parser.add_argument('--random_paths', '-r', type=int, default=700) # random path nums 700 parser.add_argument('--onpol_paths', '-d', type=int, default=10) # mpc path nums 10 parser.add_argument('--ep_len', '-ep', type=int, default=1000) # 1000 path length 1000 # Neural network architecture args parser.add_argument('--n_layers', '-l', type=int, default=2) parser.add_argument('--size', '-s', type=int, default=500) # MPC Controller parser.add_argument('--mpc_horizon', '-m', type=int, default=15) # mpc simulation H 10 parser.add_argument('--simulated_paths', '-sp', type=int, default=10000) # mpc candidate K 100 args = parser.parse_args() print(args) # Set seed np.random.seed(args.seed) tf.set_random_seed(args.seed) # Make data directory if it does not already exist # Make env if args.env_name is 'HalfCheetah-v2': env = HalfCheetahEnvNew() cost_fn = cheetah_cost_fn env_name = args.env_name # HalfCheetah-v2 My3LineDirect-v1 cost_fn = cheetah_cost_fn env = gym.make(env_name) # env.set_goals(45 * 3.14 / 180.0) # 角度要换成弧度 logdir = configure_log_dir(logname=env_name, txt='-train') utils.LOG_PATH = logdir with open(logdir + '/info.txt', 'wt') as f: print('Hello World!\n', file=f) print(args, file=f) train( env=env, cost_fn=cost_fn, logdir=logdir, render=args.render, learning_rate=args.learning_rate, onpol_iters=args.onpol_iters, dynamics_iters=args.dyn_iters, batch_size=args.batch_size, num_paths_random=args.random_paths, num_paths_onpol=args.onpol_paths, num_simulated_paths=args.simulated_paths, env_horizon=args.ep_len, mpc_horizon=args.mpc_horizon, n_layers=args.n_layers, size=args.size, activation='relu', output_activation=None, )
parser.add_argument('--pop_size', type=int, default=8) parser.add_argument('--max_gen', type=int, default=1000) parser.add_argument('--CXPB', type=float, default=0.8) parser.add_argument('--MUTPB', type=float, default=0.1) parser.add_argument('--gain_max', type=float, default=2.0) parser.add_argument('--speed_max', type=float, default=2.0) args = parser.parse_args() env_name = args.env_name env = gym.make(env_name) log_name = 'PSO4_open' # Set the logging variables # This also creates a new log file # Create log files log_dir = configure_log_dir(env_name, txt=log_name, No_time=False) logging_output(log_dir) logger = LoggerCsv(log_dir, csvname='log_results') results_IO = IO(os.path.join(log_dir, 'results.pkl')) args_IO = IO(os.path.join(log_dir, 'args.pkl')).to_pickle(args) def parmeter_generate(pmin, pmax): parm_list = [random.uniform(pmin, pmax) for _ in range(27)] return parm_list def generate(size, pmin, pmax, smin, smax): part = creator.Particle(parmeter_generate(pmin, pmax)) part.speed = [random.uniform(smin, smax) for _ in range(size)]
CPG_parm_num, ] else: assert print("env :{} task does not implemented.".format(args.env_name)) env = gym.make(env_name) log_name = args.env_name + '_PSO_' + args.task_mode evaluate_fun = partial(oscillator_nw, env_name=env_name, max_time=args.max_time, fitness_option=args.fitness_mode) # Create log files exp_group_dir = args.exp_group_dir log_dir = configure_log_dir(env_name, txt=log_name, No_time=False, log_group=exp_group_dir) logging_output(log_dir) logger = LoggerCsv(log_dir, csvname='log_results') results_IO = IO(os.path.join(log_dir, 'results.pkl')) args_IO = IO(os.path.join(log_dir, 'args.pkl')).to_pickle(args) gain_max = args.gain_max bias_max = args.bias_max phase_max = args.phase_max log.info('[System] parmeters: {}'.format(args)) log.info('*********************************************') log.info('ENV : {} task_mode: {}'.format(env_name, task_mode)) log.info('ENV : {} fitness: {}'.format(env_name, args.fitness_mode)) log.info('ENV : {} gain_max: {}'.format(env_name, gain_max))
def main(args): tf.set_random_seed(args.seed) np.random.seed(args.seed) env_name = args.env_name # HalfCheetah-v2 My3LineDirect-v1 print(env_name) if args.env_name == 'HalfCheetahEnvDisableEnv-v0': cost_fn = cheetah_cost_fn sample_task_fun = np.random.randint elif args.env_name == 'HalfCheetahVaryingEnv-v0': cost_fn = cheetah_cost_fn sample_task_fun = np.random.uniform else: print('env is error!!! ') env = gym.make(env_name) dim_input = env.observation_space.shape[0] + env.action_space.shape[0] dim_output = env.observation_space.shape[0] logdir = configure_log_dir(logname=env_name, txt=args.note) # save args prameters with open(logdir + '/info.txt', 'wt') as f: print('Hello World!\n', file=f) print(args, file=f) mpc_horizon = args.mpc_horizon num_simulated_paths = args.simulated_paths #10000 dyn_model = Dynamics( args.env_name, args.NumOfExp, args.model_type, args.loss_type, dim_input, dim_output, beta=args.beta, #args.beta, max_epochs=args.max_epochs, is_train=args.is_train, norm=args.norm, task_Note=args.note, restore_checkpoint=args.restore_checkpoint, restore_dir=args.restore_dir, logdir=logdir) mpc_controller = MPCcontroller( env=env, dyn_model=dyn_model, horizon=mpc_horizon, cost_fn=cost_fn, num_simulated_paths=num_simulated_paths, ) logger = Logger(logdir, csvname='log') num_itr = args.num_itr experiences, costs = [], [] print('MPC is beginning...') for itr in range(num_itr): reward, model_loss_mean = rollout( env, mpc_controller, task_goal=args.task_goal, dyn_model=dyn_model, experiences=experiences, NumOfExp=args.NumOfExp, horizon=args.horizon, cost_fn=cheetah_cost_fn, render=False, verbose=False, save_video=False, ignore_done=True, ) #print(time.asctime( time.localtime(time.time()) ), ' itr :', itr, 'Average reward :' , cost) log.infov( "Itr {}/{} Accumulated Reward: {:.4f} Model loss mean:{:.4f}". format(itr, num_itr, reward, model_loss_mean)) logger.log({ 'itr': itr, 'Accumulated Reward': reward, 'Model loss mean': model_loss_mean, }) print('MPC is over....') logger.write(display=False)
epoch_size = 1000 #1000 learning_rate = 0.0001 #DAGGER n_episode = 4 steps = 1000 # maximum step for a game dagger_epoch_size = 1000 #1000 dagger_batch_size = 1024 # #MPC dyn_model = torch.load('data/best_A01_net.pkl') #net.pkl cost_fn = cheetah_cost_fn mpc_horizon = 15 num_simulated_paths = 10000 # 10000 logdir = configure_log_dir(logname=env_name, txt='-Dagger-scale') def compute_normalization(data): """ Write a function to take in a dataset and compute the means, and stds. Return 6 elements: mean of s_t, std of s_t, mean of (s_t+1 - s_t), std of (s_t+1 - s_t), mean of actions, std of actions X_scaled = scaler.transform(X) X_inv=scaler.inverse_transform(X_scaled) """ """ YOUR CODE HERE """ scaler = preprocessing.StandardScaler().fit(data) return scaler
epoch_size =500 #1000 learning_rate =0.0001 #DAGGER n_episode =10 # num of rollout steps = 10000 # model-based length dagger_epoch_size =1000 #1000 dagger_batch_size =1024 # #MPC dyn_model = torch.load('data/net.pkl') cost_fn = cheetah_cost_fn mpc_horizon =15 num_simulated_paths=10000 # 10000 logdir = configure_log_dir(logname=env_name, txt='-Test_policy') def compute_normalization(data): """ Write a function to take in a dataset and compute the means, and stds. Return 6 elements: mean of s_t, std of s_t, mean of (s_t+1 - s_t), std of (s_t+1 - s_t), mean of actions, std of actions X_scaled = scaler.transform(X) X_inv=scaler.inverse_transform(X_scaled) """ """ YOUR CODE HERE """ scaler = preprocessing.StandardScaler().fit(data) return scaler
#(x_train,y_train),(x_test,y_test) = load_data('log-test1.csv', test_percentage = 1,data_num =1000) (x_train, y_train), (x_test, y_test) = load_data('log-test1.csv', test_percentage=1, data_num=1000) num_predict = 1000 # reload model dyn_model = torch.load('net.pkl') states_eval = predict_error_scaled(dyn_model, x_test, y_test, lengthOfRollout=num_predict) # Create log files logdir = configure_log_dir(logname=env_name, txt='ModelTest') # save traj of evaluation logger = Logger(logdir, csvname='log_test') trajectory = {} tra_name = [ 's1-qpos1', 's2-qpos2', 's3-qpos3', 's4-qpos4', 's5-qpos5', 's6-qpos6', 's7-qpos7', 's8-qpos8', 's9-qvel0', 's10-qvel1', 's11-qvel2', 's12-qvel3', 's13-qvel4', 's14-qvel5', 's15-qvel6', 's16-qvel7', 's17-qvel8', 's18-com0', 's19-com1', 's20-com2' ] for j in range(states_eval.shape[0]): for i in range(states_eval.shape[1]): trajectory[tra_name[i]] = states_eval[j][i] logger.log(trajectory) logger.write(display=False)