def main(args_list: list = []): parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') # parser.add_argument('--env-type', default='ant_semicircle_sparse') args, rest_args = parser.parse_known_args(args_list) env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available()) if hasattr(args, 'save_buffer') and args.save_buffer: os.makedirs(args.main_save_dir, exist_ok=True) learner = Learner(args) learner.train()
def main(): parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') # parser.add_argument('--env-type', default='ant_semicircle_sparse') parser.add_argument('--env-type', default='point_robot_wind') # parser.add_argument('--env-type', default='escape_room') args, rest_args = parser.parse_known_args() env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) elif env == 'escape_room': args = args_point_robot_barrier.get_args(rest_args) elif env == 'point_robot_wind': args = args_point_robot_rand_params.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) args, env = off_utl.expand_args(args) dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy') # dataset, goals = off_utl.load_dataset(args) if args.hindsight_relabelling: print('Perform reward relabelling...') dataset, goals = off_utl.mix_task_rollouts(dataset, env, goals, args) if args.policy_replaying: mix_dataset, mix_goals = off_utl.load_replaying_dataset(data_dir=args.replaying_data_dir, args=args) print('Perform policy replaying...') dataset, goals = off_utl.mix_policy_rollouts(dataset, goals, mix_dataset, mix_goals, args) # vis test tasks # vis_train_tasks(env.unwrapped, goals) # not with GridNavi if args.save_model: dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \ and args.save_dir_prefix is not None else '' args.full_save_path = os.path.join(args.save_dir, args.env_name, dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S')) os.makedirs(args.full_save_path, exist_ok=True) config_utl.save_config_file(args, args.full_save_path) vae = VAE(args) train(vae, dataset, goals, args)
def offline_experiment(doodad_config, variant): save_doodad_config(doodad_config) parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default='ant_semicircle_sparse') args, rest_args = parser.parse_known_args(args=[]) env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) vae_args = config_utl.load_config_file( os.path.join(args.vae_dir, args.env_name, args.vae_model_name, 'online_config.json')) args = config_utl.merge_configs( vae_args, args) # order of input to this function is important # Transform data BAMDP (state relabelling) if args.transform_data_bamdp: # load VAE for state relabelling vae_models_path = os.path.join(args.vae_dir, args.env_name, args.vae_model_name, 'models') vae = VAE(args) off_utl.load_trained_vae(vae, vae_models_path) # load data and relabel save_data_path = os.path.join(args.main_data_dir, args.env_name, args.relabelled_data_dir) os.makedirs(save_data_path) dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy') bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds( dataset, vae, args) # save relabelled data off_utl.save_dataset(save_data_path, bamdp_dataset, goals) learner = OfflineMetaLearner(args) learner.train()
def main(args_list: list = []): parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='gridworld') args, rest_args = parser.parse_known_args(args_list) env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot': args = args_point_robot.get_args(rest_args) elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle': args = args_ant_semicircle.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) # make sure we have log directories try: os.makedirs(args.agent_log_dir) except OSError: files = glob.glob(os.path.join(args.agent_log_dir, '*.monitor.csv')) for f in files: os.remove(f) eval_log_dir = args.agent_log_dir + "_eval" try: os.makedirs(eval_log_dir) except OSError: files = glob.glob(os.path.join(eval_log_dir, '*.monitor.csv')) for f in files: os.remove(f) # set gpu set_gpu_mode(torch.cuda.is_available() and args.use_gpu) # start training if args.output_file_prefix == 'only_sac': learner = MetaLearnerOnlySAC(args) elif args.output_file_prefix == 'sacmer': learner = MetaLearnerSACMER(args) else: raise NotImplementedError learner.train()
def main(): for i in range(10): parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') # parser.add_argument('--env-type', default='ant_semicircle_sparse') parser.add_argument('--env-type', default='point_robot_wind') # parser.add_argument('--env-type', default='escape_room') args, rest_args = parser.parse_known_args() env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) elif env == 'point_robot_wind': args = args_point_robot_rand_params.get_args(rest_args) elif env == 'escape_room': args = args_point_robot_barrier.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available()) print("start new round - seed ", i + 30) args.seed = i + 30 if hasattr(args, 'save_buffer') and args.save_buffer: os.makedirs(args.main_save_dir, exist_ok=True) learner = Learner(args) learner.train()
def collect_hindsight_data(): parser = argparse.ArgumentParser() parser.add_argument('--env-type', default='point_robot_wind') #parser.add_argument('--env-type', default='escape_room') args, rest_args = parser.parse_known_args() env = args.env_type if env == 'point_robot_wind': args = args_point_robot_rand_params.get_args(rest_args) elif env == 'escape_room': args = args_point_robot_barrier.get_args(rest_args) # necessary args because we use VAE functions args.main_data_dir = args.main_save_dir args.trajectory_len = 50 args.num_trajs_per_task = None args.num_rollouts = 10 set_gpu_mode(torch.cuda.is_available()) if hasattr(args, 'save_buffer') and args.save_buffer: os.makedirs(args.main_save_dir, exist_ok=True) _, goals = off_utl.load_dataset(data_dir=args.save_dir, args=args, arr_type='numpy') args.save_dir = "hindsight_data" args.save_data_path = os.path.join(args.main_data_dir, args.env_name, args.save_dir) os.makedirs(args.save_data_path) models_dir = './trained_agents' all_dirs = os.listdir(models_dir) for i, goal in enumerate(goals): print("start collect rollouts for task number ", i + 1) collect_rollout_per_goal(args, goal, all_dirs)
def _train_vae(log_dir, offline_buffer_path, saved_tasks_path, env_type, seed, path_length, meta_episode_len, load_buffer_kwargs=None, **kwargs): with open(os.path.join(log_dir, 'test.txt'), 'w') as f: f.write("hello from train_vae_offline.py") if load_buffer_kwargs is None: load_buffer_kwargs = {} random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default='ant_semicircle_sparse') extra_args = [] for k, v in kwargs.items(): extra_args.append('--{}'.format(k)) extra_args.append(str(v)) args, rest_args = parser.parse_known_args(args=extra_args) # --- GridWorld --- if env_type == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) args.env_name = 'HalfCheetahVel-v0' elif env_type == 'ant_dir': # TODO: replace with ant_dir env args = args_ant_semicircle_sparse.get_args(rest_args) parser.add_argument('--env-name', default='AntSemiCircleSparse-v0') args.env_name = 'AntDir-v0' elif env_type == 'walker': args = args_walker_param.get_args(rest_args) elif env_type == 'hopper': args = args_hopper_param.get_args(rest_args) elif env_type == 'humanoid': args = args_humanoid_dir.get_args(rest_args) else: raise ValueError('Unknown env_type: {}'.format(env_type)) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) args, env = off_utl.expand_args(args) args.save_dir = os.path.join(log_dir, 'trained_vae') args.trajectory_len = path_length task_data = joblib.load(saved_tasks_path) tasks = task_data['tasks'] print("loading dataset") with open(os.path.join(log_dir, 'tmp1.txt'), 'w') as f: f.write("train_vae_offline.py: start loading dataset") dataset, goals = off_utl.load_pearl_buffer( pretrain_buffer_path=offline_buffer_path, tasks=tasks, add_done_info=env.add_done_info, path_length=path_length, meta_episode_len=meta_episode_len, **load_buffer_kwargs) with open(os.path.join(log_dir, 'tmp1.txt'), 'a') as f: f.write("train_vae_offline.py: done loading dataset") print("done loading dataset") for data in dataset: print(data[0].shape) dataset = [[x.astype(np.float32) for x in d] for d in dataset] if args.save_model: dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \ and args.save_dir_prefix is not None else '' args.full_save_path = os.path.join( args.save_dir, args.env_name, dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S')) os.makedirs(args.full_save_path, exist_ok=True) config_utl.save_config_file(args, args.full_save_path) vae = VAE(args) train(vae, dataset, args)
def _borel( log_dir, pretrained_vae_dir, env_type, transform_data_bamdp, seed, path_length, meta_episode_len, relabelled_data_dir=None, offline_buffer_path_to_save_to=None, offline_buffer_path='', saved_tasks_path='', debug=False, vae_model_name=None, load_buffer_kwargs=None, gpu_id=0, **kwargs, ): if load_buffer_kwargs is None: load_buffer_kwargs = {} random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) parser = argparse.ArgumentParser() torch.autograd.set_detect_anomaly(True) if offline_buffer_path_to_save_to is None: offline_buffer_path_to_save_to = os.path.join(log_dir, 'transformed_data') # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default=env_type) extra_args = [] for k, v in kwargs.items(): extra_args.append('--{}'.format(k)) extra_args.append(str(v)) args, rest_args = parser.parse_known_args(args=extra_args) args = env_name_to_args[env_type].get_args(rest_args) set_gpu_mode(torch.cuda.is_available() and args.use_gpu, gpu_id=gpu_id) if vae_model_name is None: vae_model_name = os.listdir( os.path.join(pretrained_vae_dir, args.env_name) )[0] vae_args = config_utl.load_config_file(os.path.join(pretrained_vae_dir, args.env_name, vae_model_name, 'online_config.json')) args = config_utl.merge_configs(vae_args, args) # order of input to this function is important # _, env = off_utl.expand_args(args) from environments.make_env import make_env task_data = joblib.load(saved_tasks_path) tasks = task_data['tasks'] args.presampled_tasks = tasks env = make_env(args.env_name, args.max_rollouts_per_task, presampled_tasks=tasks, seed=args.seed)#, # n_tasks=1) args.vae_dir = pretrained_vae_dir args.data_dir = None args.vae_model_name = vae_model_name if transform_data_bamdp: # Transform data BAMDP (state relabelling) # load VAE for state relabelling print("performing state-relabeling") vae_models_path = os.path.join(pretrained_vae_dir, args.env_name, vae_model_name, 'models') vae = VAE(args) off_utl.load_trained_vae(vae, vae_models_path) # load data and relabel os.makedirs(offline_buffer_path_to_save_to, exist_ok=True) dataset, goals = off_utl.load_pearl_buffer( offline_buffer_path, tasks, add_done_info=env.add_done_info, path_length=path_length, meta_episode_len=meta_episode_len, **load_buffer_kwargs ) dataset = [[x.astype(np.float32) for x in d] for d in dataset] bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds(dataset, vae, args) # save relabelled data print("saving state-relabeled data to ", offline_buffer_path_to_save_to) off_utl.save_dataset(offline_buffer_path_to_save_to, bamdp_dataset, goals) relabelled_data_dir = offline_buffer_path_to_save_to args.relabelled_data_dir = relabelled_data_dir args.max_rollouts_per_task = 3 args.results_log_dir = log_dir if debug: print("DEBUG MODE ON") args.rl_updates_per_iter = 1 args.log_interval = 1 learner = OfflineMetaLearner(args) learner.train()