Esempio n. 1
0
    def impala(device, dtype, use_lr_sch=False):
        c = TestIMPALA.c
        actor = smw(
            Actor(c.observe_dim, c.action_num).type(dtype).to(device), device,
            device)
        critic = smw(
            Critic(c.observe_dim).type(dtype).to(device), device, device)
        servers = model_server_helper(model_num=1)
        world = get_world()
        # process 0 and 1 will be workers, and 2 will be trainer
        impala_group = world.create_rpc_group("impala", ["0", "1", "2"])

        if use_lr_sch:
            lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                             logger=default_logger)
            impala = IMPALA(actor,
                            critic,
                            t.optim.Adam,
                            nn.MSELoss(reduction='sum'),
                            impala_group,
                            servers,
                            lr_scheduler=LambdaLR,
                            lr_scheduler_args=((lr_func, ), (lr_func, )))
        else:
            impala = IMPALA(actor, critic, t.optim.Adam,
                            nn.MSELoss(reduction='sum'), impala_group, servers)
        return impala
Esempio n. 2
0
 def a2c_lr(self, train_config):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_num).to(c.device), c.device,
         c.device)
     critic = smw(Critic(c.observe_dim).to(c.device), c.device, c.device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = A2C(actor,
                 critic,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device=c.device,
                 replay_size=c.replay_size,
                 lr_scheduler=LambdaLR)
     a2c = A2C(actor,
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size,
               lr_scheduler=LambdaLR,
               lr_scheduler_args=((lr_func, ), (lr_func, )))
     return a2c
Esempio n. 3
0
 def dqn_lr(self, train_config):
     # not used for training, only used for testing apis
     c = train_config
     q_net = smw(
         QNet(c.observe_dim, c.action_num).to(c.device), c.device, c.device)
     q_net_t = smw(
         QNet(c.observe_dim, c.action_num).to(c.device), c.device, c.device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = DQN(q_net,
                 q_net_t,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device=c.device,
                 replay_size=c.replay_size,
                 lr_scheduler=LambdaLR)
     dqn = DQN(q_net,
               q_net_t,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size,
               lr_scheduler=LambdaLR,
               lr_scheduler_args=((lr_func, ), ))
     return dqn
Esempio n. 4
0
 def maddpg_lr(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.test_observe_dim, c.test_action_dim,
               c.test_action_range).type(dtype).to(device), device, device)
     critic = smw(
         Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim *
                c.test_agent_num).type(dtype).to(device), device, device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)],
                    [deepcopy(actor) for _ in range(c.test_agent_num)],
                    [deepcopy(critic) for _ in range(c.test_agent_num)],
                    [deepcopy(critic) for _ in range(c.test_agent_num)],
                    [list(range(c.test_agent_num))] * c.test_agent_num,
                    t.optim.Adam,
                    nn.MSELoss(reduction='sum'),
                    replay_device="cpu",
                    replay_size=c.replay_size,
                    lr_scheduler=LambdaLR)
     maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)],
                     [deepcopy(actor) for _ in range(c.test_agent_num)],
                     [deepcopy(critic) for _ in range(c.test_agent_num)],
                     [deepcopy(critic) for _ in range(c.test_agent_num)],
                     [list(range(c.test_agent_num))] * c.test_agent_num,
                     t.optim.Adam,
                     nn.MSELoss(reduction='sum'),
                     replay_device="cpu",
                     replay_size=c.replay_size,
                     lr_scheduler=LambdaLR,
                     lr_scheduler_args=((lr_func, ), (lr_func, )))
     return maddpg
Esempio n. 5
0
 def td3_lr(self, train_config, device, dtype):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(ActorDiscrete(c.observe_dim, c.action_dim)
                 .type(dtype).to(device), device, device)
     actor_t = smw(ActorDiscrete(c.observe_dim, c.action_dim)
                   .type(dtype).to(device), device, device)
     critic = smw(Critic(c.observe_dim, c.action_dim)
                  .type(dtype).to(device), device, device)
     critic_t = smw(Critic(c.observe_dim, c.action_dim)
                    .type(dtype).to(device), device, device)
     critic2 = smw(Critic(c.observe_dim, c.action_dim)
                   .type(dtype).to(device), device, device)
     critic2_t = smw(Critic(c.observe_dim, c.action_dim)
                     .type(dtype).to(device), device, device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = TD3(actor, actor_t, critic, critic_t, critic2, critic2_t,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device="cpu",
                 replay_size=c.replay_size,
                 lr_scheduler=LambdaLR)
     td3 = TD3(actor, actor_t, critic, critic_t, critic2, critic2_t,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device="cpu",
               replay_size=c.replay_size,
               lr_scheduler=LambdaLR,
               lr_scheduler_args=((lr_func,), (lr_func,), (lr_func,)))
     return td3
Esempio n. 6
0
 def ars_lr(device, dtype):
     c = TestARS.c
     actor = smw(
         ActorDiscrete(c.observe_dim, c.action_num).type(dtype).to(device),
         device, device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=default_logger)
     servers = model_server_helper(model_num=1)
     world = get_world()
     ars_group = world.create_rpc_group("ars", ["0", "1", "2"])
     ars = ARS(actor,
               t.optim.SGD,
               ars_group,
               servers,
               noise_size=1000000,
               lr_scheduler=LambdaLR,
               lr_scheduler_args=((lr_func, ), ))
     return ars
Esempio n. 7
0
    def gail_lr(self, train_config, device, dtype):
        # not used for training, only used for testing apis
        c = train_config
        actor = smw(
            Actor(c.observe_dim, c.action_num).type(dtype).to(device), device,
            device)
        critic = smw(
            Critic(c.observe_dim).type(dtype).to(device), device, device)
        discriminator = smw(
            Discriminator(c.observe_dim, c.action_num).type(dtype).to(device),
            device,
            device,
        )
        ppo = PPO(
            actor,
            critic,
            t.optim.Adam,
            nn.MSELoss(reduction="sum"),
            replay_device="cpu",
            replay_size=c.replay_size,
        )
        lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                         logger=logger)
        with pytest.raises(TypeError, match="missing .+ positional argument"):
            _ = GAIL(
                discriminator,
                ppo,
                t.optim.Adam,
                expert_replay_device="cpu",
                expert_replay_size=c.replay_size,
                lr_scheduler=LambdaLR,
            )

        gail = GAIL(
            discriminator,
            ppo,
            t.optim.Adam,
            expert_replay_device="cpu",
            expert_replay_size=c.replay_size,
            lr_scheduler=LambdaLR,
            lr_scheduler_args=((lr_func, ), ),
        )
        return gail
Esempio n. 8
0
def test_gen_learning_rate_func():
    func = gen_learning_rate_func([(0, 1e-3), (20000, 1e-3)], default_logger)
    func(10000)
    func(20001)