def test_config_init(self, train_config): c = train_config config = PPO.generate_config({}) config["frame_config"]["models"] = ["Actor", "Critic"] config["frame_config"]["model_kwargs"] = [ { "state_dim": c.observe_dim, "action_num": c.action_num }, { "state_dim": c.observe_dim }, ] ppo = PPO.init_from_config(config) old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32) action = t.zeros([1, 1], dtype=t.float32) ppo.store_episode([{ "state": { "state": old_state }, "action": { "action": action }, "next_state": { "state": state }, "reward": 0, "terminal": False, } for _ in range(3)]) ppo.update()
def gail(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw( Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device) critic = smw( Critic(c.observe_dim).type(dtype).to(device), device, device) discriminator = smw( Discriminator(c.observe_dim, c.action_num).type(dtype).to(device), device, device, ) ppo = PPO( actor, critic, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) gail = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, ) return gail
def ppo_train(self, train_config): c = train_config actor = smw(Actor(c.observe_dim, c.action_num), "cpu", "cpu") critic = smw(Critic(c.observe_dim), "cpu", "cpu") ppo = PPO(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size) return ppo
def ppo(self, train_config, device, dtype): c = train_config actor = smw( Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device) critic = smw( Critic(c.observe_dim).type(dtype).to(device), device, device) ppo = PPO(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size) return ppo
def ppo_vis(self, train_config, tmpdir): # not used for training, only used for testing apis c = train_config tmp_dir = tmpdir.make_numbered_dir() actor = smw(Actor(c.observe_dim, c.action_num) .to(c.device), c.device, c.device) critic = smw(Critic(c.observe_dim) .to(c.device), c.device, c.device) ppo = PPO(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size, visualize=True, visualize_dir=str(tmp_dir)) return ppo
def gail_lr(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw( Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device) critic = smw( Critic(c.observe_dim).type(dtype).to(device), device, device) discriminator = smw( Discriminator(c.observe_dim, c.action_num).type(dtype).to(device), device, device, ) ppo = PPO( actor, critic, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, lr_scheduler=LambdaLR, ) gail = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), ), ) return gail
def gail_train(self, train_config): c = train_config actor = smw(Actor(c.observe_dim, c.action_num), "cpu", "cpu") critic = smw(Critic(c.observe_dim), "cpu", "cpu") discriminator = smw(Discriminator(c.observe_dim, c.action_num), "cpu", "cpu") ppo = PPO( actor, critic, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) gail = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, ) return gail