def test_save_path(self): n_store_episodes = 10 obs = np.ones(shape=self.env.observation_space.shape, dtype=np.float32) for epi in range(n_store_episodes): for i in range(self.replay_buffer.get_buffer_size()): self.replay_buffer.add(obs=obs * i, act=i, rew=0., next_obs=obs * (i + 1), done=False) save_path( self.replay_buffer.sample( self.replay_buffer.get_buffer_size()), os.path.join(self.output_dir, "step_0_epi_{}_return_0.0.pkl").format(epi)) data = restore_latest_n_traj(self.output_dir) self.assertEqual( data["obses"].shape[0], self.replay_buffer.get_buffer_size() * n_store_episodes) max_steps = 10 data = restore_latest_n_traj(self.output_dir, 1, max_steps) self.assertEqual(data["obses"].shape[0], max_steps) self.assertEqual(data["acts"].shape[0], max_steps) self.assertEqual(data["next_obses"].shape[0], max_steps)
if args.expert_path_dir is None: print("Plaese generate demonstrations first") print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000") exit() units = [400, 300] env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy = DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], gpu=args.gpu, actor_units=units, critic_units=units, n_warmup=10000, batch_size=100) irl = VAIL( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, units=units, enable_sn=args.enable_sn, batch_size=32, gpu=args.gpu) expert_trajs = restore_latest_n_traj( args.expert_path_dir, n_path=20, max_steps=1000) trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"], expert_trajs["next_obses"], expert_trajs["acts"], test_env) trainer()