Ejemplo n.º 1
0
 def maddpg_lr(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.test_observe_dim, c.test_action_dim,
               c.test_action_range).type(dtype).to(device), device, device)
     critic = smw(
         Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim *
                c.test_agent_num).type(dtype).to(device), device, device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)],
                    [deepcopy(actor) for _ in range(c.test_agent_num)],
                    [deepcopy(critic) for _ in range(c.test_agent_num)],
                    [deepcopy(critic) for _ in range(c.test_agent_num)],
                    [list(range(c.test_agent_num))] * c.test_agent_num,
                    t.optim.Adam,
                    nn.MSELoss(reduction='sum'),
                    replay_device="cpu",
                    replay_size=c.replay_size,
                    lr_scheduler=LambdaLR)
     maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)],
                     [deepcopy(actor) for _ in range(c.test_agent_num)],
                     [deepcopy(critic) for _ in range(c.test_agent_num)],
                     [deepcopy(critic) for _ in range(c.test_agent_num)],
                     [list(range(c.test_agent_num))] * c.test_agent_num,
                     t.optim.Adam,
                     nn.MSELoss(reduction='sum'),
                     replay_device="cpu",
                     replay_size=c.replay_size,
                     lr_scheduler=LambdaLR,
                     lr_scheduler_args=((lr_func, ), (lr_func, )))
     return maddpg
Ejemplo n.º 2
0
    def test_config_init(self, train_config):
        c = train_config
        config = MADDPG.generate_config({})
        config["frame_config"]["models"] = [
            ["Actor"] * c.test_agent_num,
            ["Actor"] * c.test_agent_num,
            ["Critic"] * c.test_agent_num,
            ["Critic"] * c.test_agent_num,
        ]
        config["frame_config"]["model_args"] = [[()] * c.test_agent_num] * 4
        config["frame_config"]["model_kwargs"] = (
            [
                [
                    {
                        "state_dim": c.test_observe_dim,
                        "action_dim": c.test_action_dim,
                        "action_range": c.test_action_range,
                    }
                ]
                * c.test_agent_num
            ]
            * 2
            + [
                [
                    {
                        "state_dim": c.test_observe_dim * c.test_agent_num,
                        "action_dim": c.test_action_dim * c.test_agent_num,
                    }
                ]
                * c.test_agent_num
            ]
            * 2
        )

        maddpg = MADDPG.init_from_config(config)

        old_state = state = t.zeros([1, c.test_observe_dim], dtype=t.float32)
        action = t.zeros([1, c.test_action_dim], dtype=t.float32)
        maddpg.store_episodes(
            [
                [
                    {
                        "state": {"state": old_state},
                        "action": {"action": action},
                        "next_state": {"state": state},
                        "reward": 0,
                        "terminal": False,
                    }
                ]
            ]
            * c.test_agent_num
        )
        maddpg.update()
Ejemplo n.º 3
0
    def maddpg_vis(self, train_config, device, dtype, tmpdir):
        c = train_config
        tmp_dir = tmpdir.make_numbered_dir()
        actor = smw(
            Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range)
            .type(dtype)
            .to(device),
            device,
            device,
        )
        critic = smw(
            Critic(
                c.test_observe_dim * c.test_agent_num,
                c.test_action_dim * c.test_agent_num,
            )
            .type(dtype)
            .to(device),
            device,
            device,
        )

        maddpg = MADDPG(
            [deepcopy(actor) for _ in range(c.test_agent_num)],
            [deepcopy(actor) for _ in range(c.test_agent_num)],
            [deepcopy(critic) for _ in range(c.test_agent_num)],
            [deepcopy(critic) for _ in range(c.test_agent_num)],
            t.optim.Adam,
            nn.MSELoss(reduction="sum"),
            replay_device="cpu",
            replay_size=c.replay_size,
            visualize=True,
            visualize_dir=str(tmp_dir),
        )
        return maddpg
Ejemplo n.º 4
0
    def maddpg_cont(self, train_config, device, dtype):
        c = train_config
        actor = smw(
            Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range)
            .type(dtype)
            .to(device),
            device,
            device,
        )
        critic = smw(
            Critic(
                c.test_observe_dim * c.test_agent_num,
                c.test_action_dim * c.test_agent_num,
            )
            .type(dtype)
            .to(device),
            device,
            device,
        )

        maddpg = MADDPG(
            [deepcopy(actor) for _ in range(c.test_agent_num)],
            [deepcopy(actor) for _ in range(c.test_agent_num)],
            [deepcopy(critic) for _ in range(c.test_agent_num)],
            [deepcopy(critic) for _ in range(c.test_agent_num)],
            t.optim.Adam,
            nn.MSELoss(reduction="sum"),
            replay_device="cpu",
            replay_size=c.replay_size,
        )
        return maddpg
Ejemplo n.º 5
0
 def maddpg(self, train_config, device, dtype):
     c = train_config
     # for simplicity, prey will be trained with predators,
     # Predator can get the observation of prey, same for prey.
     actor = smw(
         ActorDiscrete(c.observe_dim, c.action_num).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim * c.agent_num, c.action_num * c.agent_num)
         .type(dtype)
         .to(device),
         device,
         device,
     )
     # set visible indexes to [[0], [1], [2]] is equivalent to using DDPG
     maddpg = MADDPG(
         [deepcopy(actor) for _ in range(c.test_agent_num)],
         [deepcopy(actor) for _ in range(c.test_agent_num)],
         [deepcopy(critic) for _ in range(c.test_agent_num)],
         [deepcopy(critic) for _ in range(c.test_agent_num)],
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
         pool_type="thread",
     )
     return maddpg
Ejemplo n.º 6
0
    def maddpg_disc(self, train_config, device, dtype):
        c = train_config
        actor = smw(
            ActorDiscrete(c.test_observe_dim,
                          c.test_action_dim).type(dtype).to(device), device,
            device)
        critic = smw(
            Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim *
                   c.test_agent_num).type(dtype).to(device), device, device)

        maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)],
                        [deepcopy(actor) for _ in range(c.test_agent_num)],
                        [deepcopy(critic) for _ in range(c.test_agent_num)],
                        [deepcopy(critic) for _ in range(c.test_agent_num)],
                        [list(range(c.test_agent_num))] * c.test_agent_num,
                        t.optim.Adam,
                        nn.MSELoss(reduction='sum'),
                        replay_device="cpu",
                        replay_size=c.replay_size)
        return maddpg