def test_config_init(self, train_config): c = train_config config = DDPG.generate_config({}) config["frame_config"]["models"] = ["Actor", "Actor", "Critic", "Critic"] config["frame_config"]["model_kwargs"] = [ { "state_dim": c.observe_dim, "action_dim": c.action_dim, "action_range": c.action_range, } ] * 2 + [{"state_dim": c.observe_dim, "action_dim": c.action_dim}] * 2 ddpg = DDPG.init_from_config(config) old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32) action = t.zeros([1, c.action_dim], dtype=t.float32) ddpg.store_transition( { "state": {"state": old_state}, "action": {"action": action}, "next_state": {"state": state}, "reward": 0, "terminal": False, } ) ddpg.update()
def ddpg(self, train_config, device, dtype): c = train_config actor = smw( Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device), device, device, ) actor_t = smw( Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device), device, device, ) critic = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) critic_t = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) ddpg = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) return ddpg
def ddpg_disc(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw( ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device), device, device, ) actor_t = smw( ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device), device, device, ) critic = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) critic_t = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) ddpg = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) return ddpg
def ddpg_vis(self, train_config, device, dtype, tmpdir): # not used for training, only used for testing apis c = train_config tmp_dir = tmpdir.make_numbered_dir() actor = smw( Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device), device, device, ) actor_t = smw( Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device), device, device, ) critic = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) critic_t = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) ddpg = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, visualize=True, visualize_dir=str(tmp_dir), ) return ddpg
def ddpg_lr(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw( ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device), device, device, ) actor_t = smw( ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device), device, device, ) critic = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) critic_t = smw( Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device ) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR, ) ddpg = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func,), (lr_func,)), ) return ddpg
def ddpg_train(self, train_config): c = train_config # cpu is faster for testing full training. actor = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu") actor_t = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu") critic = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu") critic_t = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu") ddpg = DDPG( actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) return ddpg
def ddpg(self, train_config): c = train_config actor = smw( Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device), c.device, c.device) actor_t = smw( Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device), c.device, c.device) critic = smw( Critic(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) critic_t = smw( Critic(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) ddpg = DDPG(actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size) return ddpg
def ddpg_disc(self, train_config): # not used for training, only used for testing apis c = train_config actor = smw( ActorDiscreet(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) actor_t = smw( ActorDiscreet(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) critic = smw( Critic(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) critic_t = smw( Critic(c.observe_dim, c.action_dim).to(c.device), c.device, c.device) ddpg = DDPG(actor, actor_t, critic, critic_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size) return ddpg