Example #1
0
def create_test_env(env_id, n_envs=1, is_atari=False,
                    stats_path=None, seed=0,
                    log_dir='', should_render=True, hyperparams=None):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param is_atari: (bool)
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :param env_wrapper: (type) A subclass of gym.Wrapper to wrap the original
                        env with
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)
    if 'env_wrapper' in hyperparams.keys():
        del hyperparams['env_wrapper']

    if is_atari:
        print("Using Atari wrapper")
        #env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        ## Frame-stacking with 4 frames
        #env = VecFrameStack(env, n_stack=4)
    elif n_envs > 1:
        # start_method = 'spawn' for thread safe
        env = SubprocVecEnv([make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper) for i in range(n_envs)])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        spec = gym.envs.registry.env_specs[env_id]
        try:
            class_ = load(spec.entry_point)
        except AttributeError:
            # Backward compatibility with gym
            class_ = load(spec._entry_point)
        # HACK: force SubprocVecEnv for Bullet env that does not
        # have a render argument
        render_name = None
        use_subproc = 'renders' not in inspect.getfullargspec(class_.__init__).args
        if not use_subproc:
            render_name = 'renders'
        # Dev branch of pybullet
        # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args
        # if not use_subproc and render_name is None:
        #     render_name = 'render'

        # Create the env, with the original kwargs, and the new ones overriding them if needed
        def _init():
            # TODO: fix for pybullet locomotion envs
            env = class_(**{**spec._kwargs}, **{render_name: should_render})
            env.seed(0)
            if log_dir is not None:
                env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True)
            return env

        if use_subproc:
            env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])
        else:
            env = DummyVecEnv([_init])
    else:
        env = DummyVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs'])
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Example #2
0
    actor_options['tensorboard_log'] = os.path.join(
        tensorboard_logdir, 'RemoteCenterPegInsertionNoHole-v0')
    print('\t--learning')
    model = PPO2(MlpPolicy, env, **actor_options)

    model.learn(**learning_options)
    save_path = os.path.join(actor_options['tensorboard_log'], 'model')
    running_average_path = actor_options['tensorboard_log']
    model.save(save_path)
    env.save_running_average(running_average_path)
finally:
    env.close()

# Visualize the solution
env = gym.make('RemoteCenterPegInsertionNoHole-v0')
env = DummyVecEnv([lambda: env])
env = VecNormalize(env,
                   training=False,
                   norm_reward=False,
                   clip_obs=np.inf,
                   clip_reward=np.inf)
env.load_running_average(running_average_path)
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    clipped_action = np.clip(action, env.action_space.low,
                             env.action_space.high)
    obs, rewards, dones, info = env.step(action)
    env.render()
    if dones[0]:
        env.reset()
Example #3
0
def evaluate_model_on_set(
    set_path,
    model,
    config_path=None,
    config_kw=None,
    metrics=("success", "control_variation", "rise_time", "overshoot",
             "settling_time"),
    norm_data_path=None,
    num_envs=1,
    turbulence_intensity="none",
    use_pid=False,
    writer=None,
    timestep=None,
):
    """
    :param set_path: (str) path to test set file
    :param model: (PPO2 object or [PIDController]) the controller to be evaluated
    :param config_path: (str) path to gym environment configuration file
    :param config_kw: (dict) dictionary of key value pairs to override settings in the configuration file of the gym environment
    :param metrics: ([str]) list of metrics to be computed and recorded
    :param norm_data_path: (str) path to folder containing normalization statistics
    :param num_envs: (int) number of gym environments to run in parallell using multiprocessing
    :param turbulence_intensity: (str) the intensity setting of the wind turbulence
    :param use_pid: (bool) Whether the evaluated controller is a PID controller or not
    :param writer: (tensorboard writer) If supplied, evaluation results will be written to tensorboard log, if not, results are printed to standard output
    :param timestep: (int) What timestep results are written to when using tensorboard logging
    :return: (dict) the metrics computed for the evaluated controller on the test set
    """
    scenarios = list(np.load(set_path, allow_pickle=True))
    scenario_count = len(scenarios)

    if config_kw is None:
        config_kw = {}

    config_kw.update({
        "steps_max": 1500,
        "target": {
            "on_success": "done",
            "success_streak_fraction": 1,
            "success_streak_req": 100,
            "states": {
                0: {
                    "bound": 5
                },
                1: {
                    "bound": 5
                },
                2: {
                    "bound": 2
                }
            },
        },
    })

    if use_pid:
        config_kw["action"] = {"scale_space": False}

    sim_config_kw = {
        "turbulence": turbulence_intensity != "None",
        "turbulence_intensity": turbulence_intensity,
    }

    test_env = SubprocVecEnv([
        make_env(config_path,
                 i,
                 config_kw=config_kw,
                 sim_config_kw=sim_config_kw) for i in range(num_envs)
    ])
    if use_pid:
        dt = test_env.get_attr("simulator")[0].dt
        for pid in model:
            pid.dt = dt
        env_cfg = test_env.get_attr("cfg")[0]
        obs_states = [var["name"] for var in env_cfg["observation"]["states"]]
        try:
            phi_i, theta_i, Va_i = (
                obs_states.index("roll"),
                obs_states.index("pitch"),
                obs_states.index("Va"),
            )
            omega_i = [
                obs_states.index("omega_p"),
                obs_states.index("omega_q"),
                obs_states.index("omega_r"),
            ]
        except ValueError:
            print(
                "When using PID roll, pitch, Va, omega_p, omega_q, omega_r must be part of the observation vector."
            )
    else:
        test_env = VecNormalize(test_env)
        if model.env is not None:
            test_env.obs_rms = model.env.obs_rms
            test_env.ret_rms = model.env.ret_rms
        else:
            assert norm_data_path is not None
            test_env.load_running_average(norm_data_path)
        test_env.training = False

    res = {metric: {} for metric in metrics}
    res["rewards"] = [[] for i in range(scenario_count)]
    active_envs = [i < scenario_count for i in range(num_envs)]
    env_scen_i = [i for i in range(num_envs)]
    test_done = False
    obs = np.array(
        [np.zeros(test_env.observation_space.shape) for i in range(num_envs)])
    done = [True for i in range(num_envs)]
    info = None

    while not test_done:
        for i, env_done in enumerate(done):
            if env_done:
                if len(scenarios) > 0 or active_envs[i]:
                    if len(scenarios) > 0:
                        print("{}/{} scenarios left".format(
                            len(scenarios), scenario_count))
                        scenario = scenarios.pop(0)
                        env_scen_i[i] = (scenario_count - 1) - len(scenarios)
                        obs[i] = test_env.env_method("reset",
                                                     indices=i,
                                                     **scenario)[0]
                        if use_pid:
                            model[i].reset()
                            model[i].set_reference(
                                scenario["target"]["roll"],
                                scenario["target"]["pitch"],
                                scenario["target"]["Va"],
                            )
                    else:
                        active_envs[i] = False
                    if info is not None:
                        for metric in metrics:
                            if isinstance(info[i][metric], dict):
                                for state, value in info[i][metric].items():
                                    if state not in res[metric]:
                                        res[metric][state] = []
                                    res[metric][state].append(value)
                            else:
                                if "all" not in res[metric]:
                                    res[metric]["all"] = []
                                res[metric]["all"].append(info[i][metric])

        if len(scenarios) == 0:
            test_done = not any(active_envs)
        if use_pid:
            actions = []
            for i, pid in enumerate(model):
                roll, pitch, Va = obs[i, phi_i], obs[i, theta_i], obs[i, Va_i]
                omega = obs[i, omega_i]
                if info is not None and "target" in info[i]:
                    pid.set_reference(
                        phi=info[i]["target"]["roll"],
                        theta=info[i]["target"]["pitch"],
                        va=info[i]["target"]["Va"],
                    )
                actions.append(pid.get_action(roll, pitch, Va, omega))
            actions = np.array(actions)
        else:
            actions, _ = model.predict(obs, deterministic=True)
        obs, rew, done, info = test_env.step(actions)
        for i, env_rew in enumerate(rew):
            res["rewards"][env_scen_i[i]].append(env_rew)

    if writer is not None:
        summaries = []
        for metric, metric_v in res.items():
            if isinstance(res[metric], dict):
                for state, v in res[metric].items():
                    summaries.append(
                        tf.Summary.Value(
                            tag="test_set/{}_{}".format(metric, state),
                            simple_value=np.nanmean(v),
                        ))
        writer.add_summary(tf.Summary(value=summaries), timestep)
    else:
        print_results(res)

        return res
Example #4
0
model = PPO2(CustomPolicy,
             env,
             n_steps=int(2048 / 128),
             nminibatches=64,
             noptepochs=10,
             lam=0.98,
             verbose=1,
             tensorboard_log='/home/xi/model/log')
# model = PPO2.load("ppo2_ipadgame")
# model.set_env(env)
# model.tensorboard_log='/home/xi/model/log'
# env.load_running_average("/home/xi/model/")

model.learn(total_timesteps=50000)

# model.save("ppo2_ipadgame")
# env.save_running_average("/home/xi/model/")
# print ('done')

env = gym.make(env_id)
env = DummyVecEnv([lambda: env])
env = VecNormalize(env)
obs = env.reset()
model = PPO2.load("ppo2_ipadgame")
env.load_running_average("/home/xi/model/")

for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()