예제 #1
0
def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--fpath', type=str)
    parser.add_argument('--len', '-l', type=int, default=0)
    parser.add_argument('--norender', '-nr', action='store_true')
    parser.add_argument('--itr', '-i', type=int, default=-1)
    parser.add_argument('--deterministic', '-d', action='store_true')
    parser.add_argument('--episode_ids', '-id', type=str, default="0")
    parser.add_argument('--bests', type=str2bool, default=None)
    parser.add_argument('--fixed_test_set', '-ts', type=str, default=None)
    parser.add_argument('--load_env', action='store_true')
    parser.add_argument('--use_test_env', action='store_true')

    parser.add_argument('--record', type=str2bool, default=False)
    parser.add_argument('--recording_path', type=str, default=None)
    EnvironmentArgsHandler.set_parser_arguments(parser)
    StudentArgsHandler.set_parser_arguments(parser)

    return parser
예제 #2
0
            "walker_type": ["old_classic_bipedal"],
            "movable_creepers": None,
        },
        # {
        #     'env':'parametric-continuous-flat-parkour-v0',
        #     "walker_type": ["classic_bipedal", "fish"],
        #     "motors_torque": 80,
        #     "water_level": 4,
        #     "dummy_param": [0, 1]
        # }
    ]

    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', '-s', type=int, default=0)
    StudentArgsHandler.set_parser_arguments(parser)
    EnvironmentArgsHandler.set_parser_arguments(parser)
    TeacherArgsHandler.set_parser_arguments(parser)
    for _env in envs:
        _env["teacher"] = 'Random'
        _env["nb_test_episodes"] = 1
        _env["seed"] = 43
        current_args_dict = deepcopy(_env)
        for walker_type in _env["walker_type"]:
            print("##### Benchmarking {0} with {1} body #####".format(
                _env['env'], walker_type))
            current_args_dict["walker_type"] = walker_type
            args_str = dict_to_args_str(current_args_dict)
            args = parser.parse_args(args_str)

            for j in range(2):
                if j == 0:
예제 #3
0
def main(args):
    if args.fixed_test_set is None:
        # training_config = load_training_infos(args.fpath)
        # nb_test_episodes_during_training = training_config["num_test_episodes"] \
        #     if "num_test_episodes" in training_config \
        #     else training_config["nb_test_episodes"]
        test_set_params, _ = load_training_test_set(args.fpath, args.bests)
    else:
        test_set_params = load_fixed_test_set(args.fpath, args.fixed_test_set)

    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    student_type = get_student_type(args.fpath)

    env = None
    if args.load_env:
        env = load_env(args.fpath, args.use_test_env is not None)

    if env is None:
        env_fn, _, _, _ = EnvironmentArgsHandler.get_object_from_arguments(
            args)
        if student_type == "spinup":
            env = env_fn()
        elif student_type == "baselines":
            env, _ = create_custom_vec_normalized_envs(env_fn)
            load_vectorized_env(args.fpath, env)

    if student_type == 'spinup':
        get_action = spinup_load_policy(args.fpath,
                                        args.itr if args.itr >= 0 else 'last',
                                        args.deterministic)
        env._SET_RENDERING_VIEWPORT_SIZE(600, 400)
    elif student_type == 'baselines':
        ac_kwargs = dict()
        ac_kwargs['hidden_sizes'] = [
            int(layer) for layer in args.hidden_sizes.split("/")
        ]
        nbatch_train = args.nb_env_steps * 1e6 // int(
            args.sample_size // args.batch_size)

        model = get_baselines_model(network=args.network,
                                    nbatch_train=nbatch_train,
                                    ob_space=env.observation_space,
                                    ac_space=env.action_space,
                                    env=env,
                                    nsteps=args.sample_size,
                                    ent_coef=args.ent_coef,
                                    vf_coef=args.vf_coef,
                                    hidden_sizes=ac_kwargs['hidden_sizes'])
        last_checkpoint = get_baselines_last_checkpoint(args.fpath +
                                                        "/checkpoints/")
        model.load(args.fpath + "/checkpoints/" + last_checkpoint)
        # Careful : The recurrent version is not implemented here yet
        get_action = lambda o: model.step(o)[0]
        env.get_raw_env()._SET_RENDERING_VIEWPORT_SIZE(600, 400)
    else:
        raise Exception('Unknown student type.')

    if args.episode_ids == "-1":
        print("Testing the policy on the whole test set...")
        episodes = [i for i in range(len(test_set_params))]
    else:
        episodes = [int(id) for id in args.episode_ids.split("/")]

    rewards = []
    for episode_id in episodes:
        r = run_policy(env,
                       get_action,
                       test_set_params,
                       args.len,
                       episode_id,
                       args.record,
                       args.recording_path,
                       args.norender,
                       use_baselines=student_type == 'baselines')
        rewards.append(r)
    env.close()
    return rewards
예제 #4
0
import os

from TeachMyAgent.run_utils.environment_args_handler import EnvironmentArgsHandler
from TeachMyAgent.run_utils.teacher_args_handler import TeacherArgsHandler
from TeachMyAgent.run_utils.student_args_handler import StudentArgsHandler

if __name__ == '__main__':
    # Argument definition
    print('Preparing the parsing...')
    parser = argparse.ArgumentParser()

    parser.add_argument('--exp_name', type=str, default='test')
    parser.add_argument('--seed', '-s', type=int, default=0)

    StudentArgsHandler.set_parser_arguments(parser)
    EnvironmentArgsHandler.set_parser_arguments(parser)
    TeacherArgsHandler.set_parser_arguments(parser)

    # Argument parsing
    args = parser.parse_args()
    # Bind this run to specific GPU if there is one
    if args.gpu_id is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)

    print('Setting up the environment...')
    env_f, param_env_bounds, initial_dist, target_dist = EnvironmentArgsHandler.get_object_from_arguments(
        args)

    print('Setting up the teacher algorithm...')
    Teacher = TeacherArgsHandler.get_object_from_arguments(
        args, param_env_bounds, initial_dist, target_dist)