def impala(device, dtype, use_lr_sch=False): c = TestIMPALA.c actor = smw( Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device) critic = smw( Critic(c.observe_dim).type(dtype).to(device), device, device) servers = model_server_helper(model_num=1) world = get_world() # process 0 and 1 will be workers, and 2 will be trainer impala_group = world.create_rpc_group("impala", ["0", "1", "2"]) if use_lr_sch: lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=default_logger) impala = IMPALA(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), impala_group, servers, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), (lr_func, ))) else: impala = IMPALA(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), impala_group, servers) return impala
def a2c_lr(self, train_config): # not used for training, only used for testing apis c = train_config actor = smw( Actor(c.observe_dim, c.action_num).to(c.device), c.device, c.device) critic = smw(Critic(c.observe_dim).to(c.device), c.device, c.device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = A2C(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size, lr_scheduler=LambdaLR) a2c = A2C(actor, critic, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), (lr_func, ))) return a2c
def dqn_lr(self, train_config): # not used for training, only used for testing apis c = train_config q_net = smw( QNet(c.observe_dim, c.action_num).to(c.device), c.device, c.device) q_net_t = smw( QNet(c.observe_dim, c.action_num).to(c.device), c.device, c.device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = DQN(q_net, q_net_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size, lr_scheduler=LambdaLR) dqn = DQN(q_net, q_net_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device=c.device, replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), )) return dqn
def maddpg_lr(self, train_config, device, dtype): c = train_config actor = smw( Actor(c.test_observe_dim, c.test_action_dim, c.test_action_range).type(dtype).to(device), device, device) critic = smw( Critic(c.test_observe_dim * c.test_agent_num, c.test_action_dim * c.test_agent_num).type(dtype).to(device), device, device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [list(range(c.test_agent_num))] * c.test_agent_num, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR) maddpg = MADDPG([deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(actor) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [deepcopy(critic) for _ in range(c.test_agent_num)], [list(range(c.test_agent_num))] * c.test_agent_num, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), (lr_func, ))) return maddpg
def td3_lr(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw(ActorDiscrete(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) actor_t = smw(ActorDiscrete(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) critic = smw(Critic(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) critic_t = smw(Critic(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) critic2 = smw(Critic(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) critic2_t = smw(Critic(c.observe_dim, c.action_dim) .type(dtype).to(device), device, device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = TD3(actor, actor_t, critic, critic_t, critic2, critic2_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR) td3 = TD3(actor, actor_t, critic, critic_t, critic2, critic2_t, t.optim.Adam, nn.MSELoss(reduction='sum'), replay_device="cpu", replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func,), (lr_func,), (lr_func,))) return td3
def ars_lr(device, dtype): c = TestARS.c actor = smw( ActorDiscrete(c.observe_dim, c.action_num).type(dtype).to(device), device, device) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=default_logger) servers = model_server_helper(model_num=1) world = get_world() ars_group = world.create_rpc_group("ars", ["0", "1", "2"]) ars = ARS(actor, t.optim.SGD, ars_group, servers, noise_size=1000000, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), )) return ars
def gail_lr(self, train_config, device, dtype): # not used for training, only used for testing apis c = train_config actor = smw( Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device) critic = smw( Critic(c.observe_dim).type(dtype).to(device), device, device) discriminator = smw( Discriminator(c.observe_dim, c.action_num).type(dtype).to(device), device, device, ) ppo = PPO( actor, critic, t.optim.Adam, nn.MSELoss(reduction="sum"), replay_device="cpu", replay_size=c.replay_size, ) lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger) with pytest.raises(TypeError, match="missing .+ positional argument"): _ = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, lr_scheduler=LambdaLR, ) gail = GAIL( discriminator, ppo, t.optim.Adam, expert_replay_device="cpu", expert_replay_size=c.replay_size, lr_scheduler=LambdaLR, lr_scheduler_args=((lr_func, ), ), ) return gail
def test_gen_learning_rate_func(): func = gen_learning_rate_func([(0, 1e-3), (20000, 1e-3)], default_logger) func(10000) func(20001)