from railrl.visualization import plot_util as plot

configure_matplotlib(matplotlib)

f = plot.filter_by_flat_params(
    {'replay_kwargs.fraction_goals_are_env_goals': 0.5})
exps = plot.load_exps([
    ashvin_base_dir +
    's3doodad/share/steven/pushing-multipushing/pusher-reward-variants'
],
                      f,
                      suppress_output=True)
plot.tag_exps(exps, "name", "dsae")

plot.comparison(exps,
                ["Final  puck_distance Mean", "Final  hand_distance Mean"],
                figsize=(6, 4),
                vary=["vae_wrapped_env_kwargs.reward_params.type"],
                default_vary={"reward_params.type": "unknown"},
                smooth=plot.padded_ma_filter(10),
                xlim=(0, 250000),
                ylim=(0.15, 0.22),
                method_order=None)
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(format_func))
plt.xlabel("Timesteps")
plt.ylabel("")
plt.title("Visual Pusher")
plt.legend([])
plt.tight_layout()
plt.savefig(output_dir + "pusher_reward_type_ablation.pdf")
offline_pusher = dp.get_trials(
    pusher_dir,
    criteria={
        'rdim':
        250,
        'algo_kwargs.should_train_vae.$function':
        'railrl.torch.vae.vae_schedules.never_train',
    })
plt.figure(figsize=(6, 5))
plot.plot_trials(
    OrderedDict([
        ("Online", online_pusher),
        ("Offline", offline_pusher),
    ]),
    y_keys="Final  sum_distance Mean",
    x_key="Number of env steps total",
    process_time_series=plot.padded_ma_filter(100),
)
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(format_func))
plt.xlabel("Timesteps")
plt.ylabel("Final Distance to Goal")
plt.title("Visual Pusher, Online Ablation")
lgnd = plt.legend(["Online", "Offline"],
                  bbox_to_anchor=(0.49, -0.2),
                  loc="upper center",
                  ncol=4,
                  handlelength=1)
plt.tight_layout()
plt.savefig(output_dir + "pusher_online_ablation.pdf")
print("File saved to", output_dir + "pusher_online_ablation.pdf")
Ejemplo n.º 3
0
# vae_trials = get_trials(
#     '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-reach-vae-rl-reproduce-2/',
#     criteria={
#         'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5,
#         'replay_kwargs.fraction_goals_are_rollout_goals': 0.2,
#     }
# )

y_keys = [
    'Final  distance Mean',
]
plot_trials(
    {
        'State - HER TD3': state_her_td3,
        'State - TDM DDPG': state_tdm_ddpg,
        'VAE - HER TD3': vae_trials,
        # 'VAE - TD3': vae_td3_trials,
    },
    y_keys=y_keys,
    process_time_series=padded_ma_filter(3),
    # x_key=x_key,
)

plt.xlabel('Number of Environment Steps Total')
plt.ylabel('Final distance to Goal')
plt.savefig('/home/vitchyr/git/railrl/experiments/vitchyr/nips2018/plots'
            '/reach.jpg')
plt.show()

# plt.savefig("/home/ashvin/data/s3doodad/media/plots/pusher2d.pdf")
Ejemplo n.º 4
0
    })
offline_reacher = dp.get_trials(
    reacher_dir,
    criteria={
        'algo_kwargs.should_train_vae.$function':
        'railrl.torch.vae.vae_schedules.never_train',
    })

plt.figure(figsize=(6, 5))
plot.plot_trials(
    OrderedDict([
        ("Online", online_reacher),
        ("Offline", offline_reacher),
    ]),
    y_keys="Final  distance Mean",
    x_key="Number of env steps total",
    process_time_series=plot.padded_ma_filter(10, avg_only_from_left=True),
)
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(format_func))
plt.xlabel("Timesteps")
plt.ylabel("Final Distance to Goal")
plt.title("Visual Reacher Online Ablation")
plt.legend(["Online", "Offline"],
           bbox_to_anchor=(0.49, -0.2),
           loc="upper center",
           ncol=4,
           handlelength=1)
plt.tight_layout()
plt.savefig(output_dir + "reacher_online_ablation.pdf")
print("File saved to", output_dir + "reacher_online_ablation.pdf")