def get_parser(): parser = argparse.ArgumentParser() parser.add_argument('--fpath', type=str) parser.add_argument('--len', '-l', type=int, default=0) parser.add_argument('--norender', '-nr', action='store_true') parser.add_argument('--itr', '-i', type=int, default=-1) parser.add_argument('--deterministic', '-d', action='store_true') parser.add_argument('--episode_ids', '-id', type=str, default="0") parser.add_argument('--bests', type=str2bool, default=None) parser.add_argument('--fixed_test_set', '-ts', type=str, default=None) parser.add_argument('--load_env', action='store_true') parser.add_argument('--use_test_env', action='store_true') parser.add_argument('--record', type=str2bool, default=False) parser.add_argument('--recording_path', type=str, default=None) EnvironmentArgsHandler.set_parser_arguments(parser) StudentArgsHandler.set_parser_arguments(parser) return parser
"walker_type": ["old_classic_bipedal"], "movable_creepers": None, }, # { # 'env':'parametric-continuous-flat-parkour-v0', # "walker_type": ["classic_bipedal", "fish"], # "motors_torque": 80, # "water_level": 4, # "dummy_param": [0, 1] # } ] parser = argparse.ArgumentParser() parser.add_argument('--seed', '-s', type=int, default=0) StudentArgsHandler.set_parser_arguments(parser) EnvironmentArgsHandler.set_parser_arguments(parser) TeacherArgsHandler.set_parser_arguments(parser) for _env in envs: _env["teacher"] = 'Random' _env["nb_test_episodes"] = 1 _env["seed"] = 43 current_args_dict = deepcopy(_env) for walker_type in _env["walker_type"]: print("##### Benchmarking {0} with {1} body #####".format( _env['env'], walker_type)) current_args_dict["walker_type"] = walker_type args_str = dict_to_args_str(current_args_dict) args = parser.parse_args(args_str) for j in range(2): if j == 0:
def main(args): if args.fixed_test_set is None: # training_config = load_training_infos(args.fpath) # nb_test_episodes_during_training = training_config["num_test_episodes"] \ # if "num_test_episodes" in training_config \ # else training_config["nb_test_episodes"] test_set_params, _ = load_training_test_set(args.fpath, args.bests) else: test_set_params = load_fixed_test_set(args.fpath, args.fixed_test_set) os.environ["CUDA_VISIBLE_DEVICES"] = "-1" student_type = get_student_type(args.fpath) env = None if args.load_env: env = load_env(args.fpath, args.use_test_env is not None) if env is None: env_fn, _, _, _ = EnvironmentArgsHandler.get_object_from_arguments( args) if student_type == "spinup": env = env_fn() elif student_type == "baselines": env, _ = create_custom_vec_normalized_envs(env_fn) load_vectorized_env(args.fpath, env) if student_type == 'spinup': get_action = spinup_load_policy(args.fpath, args.itr if args.itr >= 0 else 'last', args.deterministic) env._SET_RENDERING_VIEWPORT_SIZE(600, 400) elif student_type == 'baselines': ac_kwargs = dict() ac_kwargs['hidden_sizes'] = [ int(layer) for layer in args.hidden_sizes.split("/") ] nbatch_train = args.nb_env_steps * 1e6 // int( args.sample_size // args.batch_size) model = get_baselines_model(network=args.network, nbatch_train=nbatch_train, ob_space=env.observation_space, ac_space=env.action_space, env=env, nsteps=args.sample_size, ent_coef=args.ent_coef, vf_coef=args.vf_coef, hidden_sizes=ac_kwargs['hidden_sizes']) last_checkpoint = get_baselines_last_checkpoint(args.fpath + "/checkpoints/") model.load(args.fpath + "/checkpoints/" + last_checkpoint) # Careful : The recurrent version is not implemented here yet get_action = lambda o: model.step(o)[0] env.get_raw_env()._SET_RENDERING_VIEWPORT_SIZE(600, 400) else: raise Exception('Unknown student type.') if args.episode_ids == "-1": print("Testing the policy on the whole test set...") episodes = [i for i in range(len(test_set_params))] else: episodes = [int(id) for id in args.episode_ids.split("/")] rewards = [] for episode_id in episodes: r = run_policy(env, get_action, test_set_params, args.len, episode_id, args.record, args.recording_path, args.norender, use_baselines=student_type == 'baselines') rewards.append(r) env.close() return rewards
import os from TeachMyAgent.run_utils.environment_args_handler import EnvironmentArgsHandler from TeachMyAgent.run_utils.teacher_args_handler import TeacherArgsHandler from TeachMyAgent.run_utils.student_args_handler import StudentArgsHandler if __name__ == '__main__': # Argument definition print('Preparing the parsing...') parser = argparse.ArgumentParser() parser.add_argument('--exp_name', type=str, default='test') parser.add_argument('--seed', '-s', type=int, default=0) StudentArgsHandler.set_parser_arguments(parser) EnvironmentArgsHandler.set_parser_arguments(parser) TeacherArgsHandler.set_parser_arguments(parser) # Argument parsing args = parser.parse_args() # Bind this run to specific GPU if there is one if args.gpu_id is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) print('Setting up the environment...') env_f, param_env_bounds, initial_dist, target_dist = EnvironmentArgsHandler.get_object_from_arguments( args) print('Setting up the teacher algorithm...') Teacher = TeacherArgsHandler.get_object_from_arguments( args, param_env_bounds, initial_dist, target_dist)