def test_vec_env(tmpdir):
    """Test VecNormalize Object"""
    clip_obs = 0.5
    clip_reward = 5.0

    orig_venv = DummyVecEnv([make_env])
    norm_venv = VecNormalize(orig_venv,
                             norm_obs=True,
                             norm_reward=True,
                             clip_obs=clip_obs,
                             clip_reward=clip_reward)
    _, done = norm_venv.reset(), [False]
    while not done[0]:
        actions = [norm_venv.action_space.sample()]
        obs, rew, done, _ = norm_venv.step(actions)
        assert np.max(np.abs(obs)) <= clip_obs
        assert np.max(np.abs(rew)) <= clip_reward

    path = str(tmpdir.join("vec_normalize"))
    norm_venv.save(path)
    deserialized = VecNormalize.load(path, venv=orig_venv)
    check_vec_norm_equal(norm_venv, deserialized)
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = gym.make(env_id)
        env.seed(seed + rank)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return env

    set_global_seeds(seed)
    return _init


if __name__ == '__main__':

    log_str = sys.argv[1]

    env_id = 'ROAMHandGraspCube-v1'

    model = PPO2.load("logs/{}/trained_model".format(log_str))

    # render trained agent
    env = VecNormalize(DummyVecEnv([lambda: gym.make(env_id)]),
                       norm_reward=False)
    env.load_running_average("logs/{}".format(log_str))

    obs = env.reset()
    while True:
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()
Esempio n. 3
0
def test_executive():

    episodes = 50
    max_timesteps = 1000

    #Input elements for various trained policies
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        '--expert_policy_dir',
        type=str)  # trained policy directory, not necessary for openloop
    arg_parser.add_argument('--type', type=str)  #ars, ppo1, ppo2, openloop
    arg_parser.add_argument('--mode',
                            type=str)  #linearbias, mlp, 3finger, 2finger
    arg_parser.add_argument('--render', type=str)  #on, off
    args = arg_parser.parse_args()
    mode_dict = {}
    mode_dict['mode'] = args.mode

    #Parsing parameters/file to reset environment for testing objects
    categ = ['shape', 'x', 'y', 'rot_z', 'len', 'width', 'height', 'radius']
    path = os.path.join(os.environ['GRASPING_RL'],
                        'evaluation/baseline_testset_calcs.csv')
    csv_file = pd.read_csv(path,
                           sep='\t',
                           header=None,
                           names=categ,
                           skiprows=1)
    parser = BaselineParser(csv_file)

    #Enviornment Creation
    env_id = 'ROAMHandGraspCube-v1'
    if "ppo" in args.type:
        env = VecNormalize(DummyVecEnv([lambda: gym.make(env_id)]),
                           norm_reward=False)
        env.set_attr('_max_episode_steps', max_timesteps)
        env.env_method('set_evaluation')
        env.load_running_average(args.expert_policy_dir)
    else:
        env = gym.make(env_id)
        env._max_episode_steps = max_timesteps
        env.env.set_evaluation()

    #Testing loop to evaluate grasps on 50 objects
    total_successes = np.zeros(episodes)
    for i in range(episodes):
        obs = env.reset()
        params = parser.get_testcase(i)

        if "ppo" in args.type:
            env.env_method('set_object', params)
            success = SuccessCriterion(env.get_attr('sim')[0])
        else:
            env.env.set_object(params)
            success = SuccessCriterion(env.env.sim)

        agent = MetaAgent(env=env,
                          load_dir=args.expert_policy_dir,
                          load_type=args.type,
                          **mode_dict)

        #Per episode simulation and evaluation
        success_array = np.zeros(max_timesteps)
        for j in range(max_timesteps):
            action = agent.act(obs)
            if args.type == 'openloop':
                env.env.sim.step()
            else:
                obs, reward, done, info = env.step(action)
            if args.render != 'off':
                env.render()
            success_array[j] = success.grasp_criteria()

        #Success Criterion Evaluation
        if np.sum((success_array)) >= 250:
            total_successes[i] = 1
            print("Baseline {} is a Success!".format(i), np.sum(
                (success_array)))
        else:
            total_successes[i] = 0
            print("Baseline {} is a Failure!".format(i), np.sum(
                (success_array)))

    return total_successes