def test_writes_loss(self): experiment = MockExperiment(a2c(), self.env, quiet=True, write_loss=True) self.assertTrue(experiment._writer.write_loss) experiment = MockExperiment(a2c(), self.env, quiet=True, write_loss=False) self.assertFalse(experiment._writer.write_loss)
def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.experiment = MockExperiment(a2c(), self.env, quiet=True) for i, env in enumerate(self.experiment._envs): env.seed(i)
def test_runs_multi_env(self): experiment = MockExperiment(a2c(n_envs=3), self.env, quiet=True, episodes=3) self.assertEqual( len(experiment._writer.data["evaluation/returns/episode"] ["values"]), 3)
def main(): device = 'cpu' timesteps = 40000 run_experiment( [dqn(), a2c()], [ GymEnvironment('CartPole-v0', device), GymEnvironment('Acrobot-v1', device) ], timesteps, ) plot_returns_100('runs', timesteps=timesteps)
def test_a2c_(self): self.validate(a2c())