Beispiel #1
0
def experiment(variant):
    with open('expert_demos_listing.yaml', 'r') as f:
        listings = yaml.load(f.read())
    demos_path = listings[variant['expert_name']]['file_paths'][
        variant['expert_idx']]
    print(demos_path)
    buffer_save_dict = joblib.load(demos_path)
    target_state_buffer = buffer_save_dict['data']
    # target_state_buffer /= variant['rescale']
    state_indices = torch.LongTensor(variant['state_indices'])

    env_specs = variant['env_specs']
    env = get_env(env_specs)
    env.seed(env_specs['eval_env_seed'])

    print('\n\nEnv: {}'.format(env_specs['env_name']))
    print('kwargs: {}'.format(env_specs['env_kwargs']))
    print('Obs Space: {}'.format(env.observation_space))
    print('Act Space: {}\n\n'.format(env.action_space))

    policy = joblib.load(variant['policy_checkpoint'])['exploration_policy']
    if variant['eval_deterministic']:
        policy = MakeDeterministic(policy)
    policy.to(ptu.device)

    eval_sampler = PathSampler(env,
                               policy,
                               variant['num_eval_steps'],
                               variant['max_path_length'],
                               no_terminal=variant['no_terminal'],
                               render=variant['render'],
                               render_kwargs=variant['render_kwargs'])
    test_paths = eval_sampler.obtain_samples()
    obs = []
    for path in test_paths:
        obs += path['observations']
    x = [o[0] for o in obs]
    y = [o[1] for o in obs]

    fig, ax = plt.subplots(figsize=(6, 6))
    plt.scatter(x, y)
    plt.xlim(-1.25, 20)
    plt.ylim(-1.25, 10)
    ax.set_yticks([0, 5, 10])
    ax.set_xticks([0, 5, 10, 15, 20])
    plt.savefig('./figs/' + variant['env_specs']['task_name'] + '.pdf',
                bbox_inches='tight')

    return 1
def experiment(variant):
    env_specs = variant['env_specs']
    env = get_env(env_specs)
    env.seed(env_specs['eval_env_seed'])

    print('\n\nEnv: {}'.format(env_specs['env_name']))
    print('kwargs: {}'.format(env_specs['env_kwargs']))
    print('Obs Space: {}'.format(env.observation_space))
    print('Act Space: {}\n\n'.format(env.action_space))

    if variant['scale_env_with_demo_stats']:
        with open('expert_demos_listing.yaml', 'r') as f:
            listings = yaml.load(f.read())
        expert_demos_path = listings[variant['expert_name']]['file_paths'][
            variant['expert_idx']]
        buffer_save_dict = joblib.load(expert_demos_path)
        env = ScaledEnv(
            env,
            obs_mean=buffer_save_dict['obs_mean'],
            obs_std=buffer_save_dict['obs_std'],
            acts_mean=buffer_save_dict['acts_mean'],
            acts_std=buffer_save_dict['acts_std'],
        )

    policy = joblib.load(variant['policy_checkpoint'])['exploration_policy']
    if variant['eval_deterministic']:
        policy = MakeDeterministic(policy)
    policy.to(ptu.device)

    eval_sampler = PathSampler(env,
                               policy,
                               variant['num_eval_steps'],
                               variant['max_path_length'],
                               no_terminal=variant['no_terminal'],
                               render=variant['render'],
                               render_kwargs=variant['render_kwargs'])
    test_paths = eval_sampler.obtain_samples()
    average_returns = eval_util.get_average_returns(test_paths)
    print(average_returns)

    return 1
Beispiel #3
0
env.seed(env_specs['eval_env_seed'])
with open('expert_demos_listing.yaml', 'r') as f:
    listings = yaml.load(f.read())
    expert_demos_path = listings['norm_halfcheetah_32_demos_sub_20']['file_paths'][0]
    buffer_save_dict = joblib.load(expert_demos_path)
    env = ScaledEnv(
        env,
        obs_mean=buffer_save_dict['obs_mean'],
        obs_std=buffer_save_dict['obs_std'],
        acts_mean=buffer_save_dict['acts_mean'],
        acts_std=buffer_save_dict['acts_std'],
    )

bc_policy = joblib.load('/scratch/hdd001/home/kamyar/output/paper-version-hc-bc/paper_version_hc_bc_2019_05_19_00_32_05_0000--s-0/params.pkl')['exploration_policy']
bc_policy = MakeDeterministic(bc_policy)
bc_policy.to(ptu.device)

dagger_policy = joblib.load('/scratch/hdd001/home/kamyar/output/dagger-halfcheetah/dagger_halfcheetah_2019_08_20_16_30_36_0000--s-0/params.pkl')['exploration_policy']
dagger_policy = MakeDeterministic(dagger_policy)
dagger_policy.to(ptu.device)

irl_policy = joblib.load('/scratch/hdd001/home/kamyar/output/hc_airl_ckpt/params.pkl')['exploration_policy']
irl_policy = MakeDeterministic(irl_policy)
irl_policy.to(ptu.device)

fig, ax = plt.subplots(1)

eval_sampler = PathSampler(
    env,
    bc_policy,
    20000,
def experiment(specs):
    if not specs['use_scripted_policy']:
        policy_is_scripted = False
        policy = joblib.load(specs['expert_path'])['policy']
    else:
        policy_is_scripted = True
        policy = get_scripted_policy(specs['scripted_policy_name'])

    if specs['use_deterministic_expert']:
        policy = MakeDeterministic(policy)
    if ptu.gpu_enabled():
        policy.to(ptu.device)

    env = get_env(specs['env_specs'])
    env.seed(specs['env_specs']['env_seed'])

    # make the replay buffers
    max_path_length = specs['max_path_length']
    if 'wrap_absorbing' in specs and specs['wrap_absorbing']:
        """
        There was an intial implementation for this in v1.0
        in gen_irl_expert_trajs.py
        """
        raise NotImplementedError()
        _max_buffer_size = (max_path_length + 2) * specs['num_rollouts']
    else:
        _max_buffer_size = max_path_length * specs['num_rollouts']
    _max_buffer_size = int(
        np.ceil(_max_buffer_size / float(specs['subsample_factor'])))
    buffer_constructor = lambda: EnvReplayBuffer(
        _max_buffer_size,
        env,
    )

    train_buffer = buffer_constructor()
    test_buffer = buffer_constructor()

    render = specs['render']
    render_kwargs = specs['render_kwargs']
    check_for_success = specs['check_for_success']

    print('\n')
    # fill the train buffer
    fill_buffer(train_buffer,
                env,
                policy,
                specs['num_rollouts'],
                max_path_length,
                no_terminal=specs['no_terminal'],
                policy_is_scripted=policy_is_scripted,
                render=render,
                render_kwargs=render_kwargs,
                check_for_success=check_for_success,
                wrap_absorbing=False,
                subsample_factor=specs['subsample_factor'])

    # fill the test buffer
    fill_buffer(test_buffer,
                env,
                policy,
                specs['num_rollouts'],
                max_path_length,
                no_terminal=specs['no_terminal'],
                policy_is_scripted=policy_is_scripted,
                render=render,
                render_kwargs=render_kwargs,
                check_for_success=check_for_success,
                wrap_absorbing=False,
                subsample_factor=specs['subsample_factor'])

    # save the replay buffers
    logger.save_extra_data({
        'train': train_buffer,
        'test': test_buffer
    },
                           name='expert_demos.pkl')

    return 1