def test_writes_loss(self):
     experiment = MockExperiment(a2c(),
                                 self.env,
                                 quiet=True,
                                 write_loss=True)
     self.assertTrue(experiment._writer.write_loss)
     experiment = MockExperiment(a2c(),
                                 self.env,
                                 quiet=True,
                                 write_loss=False)
     self.assertFalse(experiment._writer.write_loss)
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = GymEnvironment('CartPole-v0')
     self.experiment = MockExperiment(a2c(), self.env, quiet=True)
     for i, env in enumerate(self.experiment._envs):
         env.seed(i)
 def test_runs_multi_env(self):
     experiment = MockExperiment(a2c(n_envs=3),
                                 self.env,
                                 quiet=True,
                                 episodes=3)
     self.assertEqual(
         len(experiment._writer.data["evaluation/returns/episode"]
             ["values"]), 3)
def main():
    device = 'cpu'
    timesteps = 40000
    run_experiment(
        [dqn(), a2c()],
        [
            GymEnvironment('CartPole-v0', device),
            GymEnvironment('Acrobot-v1', device)
        ],
        timesteps,
    )
    plot_returns_100('runs', timesteps=timesteps)
Ejemplo n.º 5
0
 def test_a2c_(self):
     self.validate(a2c())