Esempio n. 1
0
    def f(root_dir, env, env_name, index, transparent_params):
        denv = FakeSingleSpacesVec(env, agent_id=index)
        pylog.info(
            f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'")
        model = load_backward_compatible_model(cls, root_dir, denv)
        try:
            vec_normalize = VecNormalize(denv, training=False)
            vec_normalize.load_running_average(root_dir)
            model = NormalizeModel(model, vec_normalize)
            pylog.info(f"Loaded normalization statistics from '{root_dir}'")
        except FileNotFoundError:
            # We did not use VecNormalize during training, skip
            pass

        return model
Esempio n. 2
0
def single_wrappers(single_venv, scheduler, our_idx, normalize, rew_shape,
                    rew_shape_params, victim_index, victim_path, victim_type,
                    debug, env_name, load_policy, lookback_params,
                    transparent_params, log_callbacks, save_callbacks):
    if rew_shape:
        rew_shape_venv = apply_reward_wrapper(single_env=single_venv,
                                              scheduler=scheduler,
                                              shaping_params=rew_shape_params,
                                              agent_idx=our_idx)
        log_callbacks.append(lambda logger, locals, globals: rew_shape_venv.
                             log_callback(logger))
        single_venv = rew_shape_venv

        for anneal_type in ['noise', 'rew_shape']:
            if scheduler.is_conditional(anneal_type):
                scheduler.set_annealer_get_logs(anneal_type,
                                                rew_shape_venv.get_logs)

    if lookback_params['lb_num'] > 0:
        lookback_venv = LookbackRewardVecWrapper(single_venv, env_name, debug,
                                                 victim_index, victim_path,
                                                 victim_type,
                                                 transparent_params,
                                                 **lookback_params)
        single_venv = lookback_venv

    if normalize:
        normalized_venv = VecNormalize(single_venv)

        if load_policy['path'] is not None:
            if load_policy['type'] == 'zoo':
                raise ValueError(
                    "Trying to normalize twice. Bansal et al's Zoo agents normalize "
                    "implicitly. Please set normalize=False to disable VecNormalize."
                )

                normalized_venv.load_running_average(load_policy['path'])

        save_callbacks.append(
            lambda root_dir: normalized_venv.save_running_average(root_dir))
        single_venv = normalized_venv

    return single_venv
Esempio n. 3
0
def single_wrappers(
    single_venv,
    scheduler,
    our_idx,
    normalize,
    normalize_observations,
    rew_shape,
    rew_shape_params,
    embed_index,
    embed_paths,
    embed_types,
    debug,
    env_name,
    load_policy,
    lookback_params,
    transparent_params,
    log_callbacks,
    save_callbacks,
):
    if rew_shape:
        rew_shape_venv = apply_reward_wrapper(
            single_env=single_venv,
            scheduler=scheduler,
            shaping_params=rew_shape_params,
            agent_idx=our_idx,
        )
        log_callbacks.append(LoggerOnlyLogCallback(rew_shape_venv))
        single_venv = rew_shape_venv

        for anneal_type in ["noise", "rew_shape"]:
            if scheduler.is_conditional(anneal_type):
                scheduler.set_annealer_get_logs(anneal_type,
                                                rew_shape_venv.get_logs)

    if lookback_params["lb_num"] > 0:
        if len(embed_types) > 1:
            raise ValueError(
                "Lookback is not supported with multiple embedded agents")
        embed_path = embed_paths[0]
        embed_type = embed_types[0]
        lookback_venv = LookbackRewardVecWrapper(
            single_venv,
            env_name,
            debug,
            embed_index,
            embed_path,
            embed_type,
            transparent_params,
            **lookback_params,
        )
        single_venv = lookback_venv

    if normalize:
        if normalize_observations:
            if load_policy["path"] is not None:
                if load_policy["type"] == "zoo":
                    raise ValueError(
                        "Trying to normalize twice. Bansal et al's Zoo agents normalize "
                        "implicitly. Please set normalize=False to disable VecNormalize."
                    )
            normalized_venv = VecNormalize(single_venv)
        else:
            normalized_venv = VecNormalize(single_venv, norm_obs=False)

        if load_policy["path"] is not None and load_policy["type"] != "zoo":
            normalized_venv.load_running_average(load_policy["path"])

        save_callbacks.append(lambda root_dir: normalized_venv.save(
            os.path.join(root_dir, "vec_normalize.pkl")))
        single_venv = normalized_venv

    return single_venv
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = gym.make(env_id)
        env.seed(seed + rank)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return env

    set_global_seeds(seed)
    return _init


if __name__ == '__main__':

    log_str = sys.argv[1]

    env_id = 'ROAMHandGraspCube-v1'

    model = PPO2.load("logs/{}/trained_model".format(log_str))

    # render trained agent
    env = VecNormalize(DummyVecEnv([lambda: gym.make(env_id)]),
                       norm_reward=False)
    env.load_running_average("logs/{}".format(log_str))

    obs = env.reset()
    while True:
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()
Esempio n. 5
0
def test_executive():

    episodes = 50
    max_timesteps = 1000

    #Input elements for various trained policies
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        '--expert_policy_dir',
        type=str)  # trained policy directory, not necessary for openloop
    arg_parser.add_argument('--type', type=str)  #ars, ppo1, ppo2, openloop
    arg_parser.add_argument('--mode',
                            type=str)  #linearbias, mlp, 3finger, 2finger
    arg_parser.add_argument('--render', type=str)  #on, off
    args = arg_parser.parse_args()
    mode_dict = {}
    mode_dict['mode'] = args.mode

    #Parsing parameters/file to reset environment for testing objects
    categ = ['shape', 'x', 'y', 'rot_z', 'len', 'width', 'height', 'radius']
    path = os.path.join(os.environ['GRASPING_RL'],
                        'evaluation/baseline_testset_calcs.csv')
    csv_file = pd.read_csv(path,
                           sep='\t',
                           header=None,
                           names=categ,
                           skiprows=1)
    parser = BaselineParser(csv_file)

    #Enviornment Creation
    env_id = 'ROAMHandGraspCube-v1'
    if "ppo" in args.type:
        env = VecNormalize(DummyVecEnv([lambda: gym.make(env_id)]),
                           norm_reward=False)
        env.set_attr('_max_episode_steps', max_timesteps)
        env.env_method('set_evaluation')
        env.load_running_average(args.expert_policy_dir)
    else:
        env = gym.make(env_id)
        env._max_episode_steps = max_timesteps
        env.env.set_evaluation()

    #Testing loop to evaluate grasps on 50 objects
    total_successes = np.zeros(episodes)
    for i in range(episodes):
        obs = env.reset()
        params = parser.get_testcase(i)

        if "ppo" in args.type:
            env.env_method('set_object', params)
            success = SuccessCriterion(env.get_attr('sim')[0])
        else:
            env.env.set_object(params)
            success = SuccessCriterion(env.env.sim)

        agent = MetaAgent(env=env,
                          load_dir=args.expert_policy_dir,
                          load_type=args.type,
                          **mode_dict)

        #Per episode simulation and evaluation
        success_array = np.zeros(max_timesteps)
        for j in range(max_timesteps):
            action = agent.act(obs)
            if args.type == 'openloop':
                env.env.sim.step()
            else:
                obs, reward, done, info = env.step(action)
            if args.render != 'off':
                env.render()
            success_array[j] = success.grasp_criteria()

        #Success Criterion Evaluation
        if np.sum((success_array)) >= 250:
            total_successes[i] = 1
            print("Baseline {} is a Success!".format(i), np.sum(
                (success_array)))
        else:
            total_successes[i] = 0
            print("Baseline {} is a Failure!".format(i), np.sum(
                (success_array)))

    return total_successes