def maddpg_lr(self, train_config, device, dtype): c = train_config actor = smw( Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range).type(dtype).to(device), device, device) critic = smw( Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim * c.test_agent_num).type(dtype).to(device), device, device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [list(range(c.test_agent_num))] * c.test_agent_num, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR) maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [list(range(c.test_agent_num))] * c.test_agent_num, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), (lr_func, ))) return maddpg
def test_config_init(self, train_config): c = train_config config = MADDPG.generate_config({}) config["frame_config"]["models"] = [ ["Actor"] * c.test_agent_num, ["Actor"] * c.test_agent_num, ["Critic"] * c.test_agent_num, ["Critic"] * c.test_agent_num, ] config["frame_config"]["model_args"] = [[()] * c.test_agent_num] * 4 config["frame_config"]["model_kwargs"] = ( [ [ { "state_dim": c.test_observe_dim, "action_dim": c.test_action_dim, "action_range": c.test_action_range, } ] * c.test_agent_num ] * 2 + [ [ { "state_dim": c.test_observe_dim * c.test_agent_num, "action_dim": c.test_action_dim * c.test_agent_num, } ] * c.test_agent_num ] * 2 ) maddpg = MADDPG.init_from_config(config) old_state = state = t.zeros([1, c.test_observe_dim], dtype=t.float32) action = t.zeros([1, c.test_action_dim], dtype=t.float32) maddpg.store_episodes( [ [ { "state": {"state": old_state}, "action": {"action": action}, "next_state": {"state": state}, "reward": 0, "terminal": False, } ] ] * c.test_agent_num ) maddpg.update()
def maddpg_vis(self, train_config, device, dtype, tmpdir): c = train_config tmp_dir = tmpdir.make_numbered_dir() actor = smw( Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range) .type(dtype) .to(device), device, device, ) critic = smw( Critic( c.test_observe_dim * c.test_agent_num, c.test_action_dim * c.test_agent_num, ) .type(dtype) .to(device), device, device, ) maddpg = MADDPG( [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, visualize=True, visualize_dir=str(tmp_dir), ) return maddpg
def maddpg_cont(self, train_config, device, dtype): c = train_config actor = smw( Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range) .type(dtype) .to(device), device, device, ) critic = smw( Critic( c.test_observe_dim * c.test_agent_num, c.test_action_dim * c.test_agent_num, ) .type(dtype) .to(device), device, device, ) maddpg = MADDPG( [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) return maddpg
def maddpg(self, train_config, device, dtype): c = train_config # for simplicity, prey will be trained with predators, # Predator can get the observation of prey, same for prey. actor = smw( ActorDiscrete(c.observe_dim, c.action_num).type(dtype).to(device), device, device, ) critic = smw( Critic(c.observe_dim * c.agent_num, c.action_num * c.agent_num) .type(dtype) .to(device), device, device, ) # set visible indexes to [[0], [1], [2]] is equivalent to using DDPG maddpg = MADDPG( [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, pool_type="thread", ) return maddpg
def maddpg_disc(self, train_config, device, dtype): c = train_config actor = smw( ActorDiscrete(c.test_observe_dim, c.test_action_dim).type(dtype).to(device), device, device) critic = smw( Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim * c.test_agent_num).type(dtype).to(device), device, device) maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [list(range(c.test_agent_num))] * c.test_agent_num, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size) return maddpg