Esempio n. 1
0
 def create_agent(self, env):
     model = agents.a3c.A3CSeparateModel(
         pi=create_stochastic_policy_for_env(env),
         v=create_v_function_for_env(env))
     opt = optimizers.Adam()
     opt.setup(model)
     return agents.A3C(model, opt, t_max=1, gamma=0.99)
Esempio n. 2
0
 def _test_load_a3c(self, gpu):
     model = A3CFF(4)
     opt = rmsprop_async.RMSpropAsync(lr=7e-4, eps=1e-1, alpha=0.99)
     opt.setup(model)
     agent = agents.A3C(model,
                        opt,
                        t_max=5,
                        gamma=0.99,
                        beta=1e-2,
                        phi=lambda x: x)
     model, exists = download_model("A3C",
                                    "BreakoutNoFrameskip-v4",
                                    model_type=self.pretrained_type)
     agent.load(model)
     if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'):
         assert exists