Python A2C Examples

Programming Language: Python

Namespace/Package Name: machin.frame.algorithms.a2c

Class/Type: A2C

Examples at hotexamples.com: 6

Python A2C - 6 examples found. These are the top rated real world Python examples of machin.frame.algorithms.a2c.A2C extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

A2C(5)

generate_config(1)

init_from_config(1)

Example #1

Show file

File: test_a2c.py Project: yueweizhizhu/machin

 def a2c_lr(self, train_config):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_num).to(c.device), c.device,
         c.device)
     critic = smw(Critic(c.observe_dim).to(c.device), c.device, c.device)
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                      logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = A2C(actor,
                 critic,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device=c.device,
                 replay_size=c.replay_size,
                 lr_scheduler=LambdaLR)
     a2c = A2C(actor,
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size,
               lr_scheduler=LambdaLR,
               lr_scheduler_args=((lr_func, ), (lr_func, )))
     return a2c

Example #2

Show file

 def test_config_init(self, train_config):
     c = train_config
     config = A2C.generate_config({})
     config["frame_config"]["models"] = ["Actor", "Critic"]
     config["frame_config"]["model_kwargs"] = [
         {
             "state_dim": c.observe_dim,
             "action_num": c.action_num
         },
         {
             "state_dim": c.observe_dim
         },
     ]
     a2c = A2C.init_from_config(config)
     old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32)
     action = t.zeros([1, 1], dtype=t.int)
     a2c.store_episode([{
         "state": {
             "state": old_state
         },
         "action": {
             "action": action
         },
         "next_state": {
             "state": state
         },
         "reward": 0,
         "terminal": False,
     } for _ in range(3)])
     a2c.update()

Example #3

Show file

File: test_a2c.py Project: lethaiq/machin

 def a2c_train(self, train_config):
     c = train_config
     # cpu is faster for testing full training.
     actor = smw(Actor(c.observe_dim, c.action_num), "cpu", "cpu")
     critic = smw(Critic(c.observe_dim), "cpu", "cpu")
     a2c = A2C(actor,
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device="cpu",
               replay_size=c.replay_size)
     return a2c

Example #4

Show file

 def a2c(self, train_config):
     c = train_config
     actor = smw(Actor(c.observe_dim, c.action_num)
                 .to(c.device), c.device, c.device)
     critic = smw(Critic(c.observe_dim)
                  .to(c.device), c.device, c.device)
     a2c = A2C(actor, critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size)
     return a2c

Example #5

Show file

File: test_a2c.py Project: ikamensh/machin

 def a2c(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_num).type(dtype).to(device), device, device
     )
     critic = smw(Critic(c.observe_dim).type(dtype).to(device), device, device)
     a2c = A2C(
         actor,
         critic,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return a2c

Example #6

Show file

 def a2c_vis(self, train_config, tmpdir):
     # not used for training, only used for testing apis
     c = train_config
     tmp_dir = tmpdir.make_numbered_dir()
     actor = smw(Actor(c.observe_dim, c.action_num)
                 .to(c.device), c.device, c.device)
     critic = smw(Critic(c.observe_dim)
                  .to(c.device), c.device, c.device)
     a2c = A2C(actor, critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size,
               visualize=True,
               visualize_dir=str(tmp_dir))
     return a2c