Beispiel #1
0
    def test_save_path(self):
        n_store_episodes = 10
        obs = np.ones(shape=self.env.observation_space.shape, dtype=np.float32)
        for epi in range(n_store_episodes):
            for i in range(self.replay_buffer.get_buffer_size()):
                self.replay_buffer.add(obs=obs * i,
                                       act=i,
                                       rew=0.,
                                       next_obs=obs * (i + 1),
                                       done=False)
            save_path(
                self.replay_buffer.sample(
                    self.replay_buffer.get_buffer_size()),
                os.path.join(self.output_dir,
                             "step_0_epi_{}_return_0.0.pkl").format(epi))
        data = restore_latest_n_traj(self.output_dir)
        self.assertEqual(
            data["obses"].shape[0],
            self.replay_buffer.get_buffer_size() * n_store_episodes)

        max_steps = 10
        data = restore_latest_n_traj(self.output_dir, 1, max_steps)
        self.assertEqual(data["obses"].shape[0], max_steps)
        self.assertEqual(data["acts"].shape[0], max_steps)
        self.assertEqual(data["next_obses"].shape[0], max_steps)
Beispiel #2
0
    if args.expert_path_dir is None:
        print("Plaese generate demonstrations first")
        print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000")
        exit()

    units = [400, 300]

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = DDPG(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
        gpu=args.gpu,
        actor_units=units,
        critic_units=units,
        n_warmup=10000,
        batch_size=100)
    irl = VAIL(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        units=units,
        enable_sn=args.enable_sn,
        batch_size=32,
        gpu=args.gpu)
    expert_trajs = restore_latest_n_traj(
        args.expert_path_dir, n_path=20, max_steps=1000)
    trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"],
                         expert_trajs["next_obses"], expert_trajs["acts"], test_env)
    trainer()