Esempio n. 1
0
class TestSingleEnvExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = None

    def test_adds_label(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0")

    def test_writes_training_returns_eps(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        experiment.train(episodes=3)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([22.0, 20.0, 24.0]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )

    def test_writes_test_returns(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        experiment.train(episodes=5)
        returns = experiment.test(episodes=4)
        expected_mean = 9.5
        expected_std = 0.5
        np.testing.assert_equal(np.mean(returns), expected_mean)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/mean"]["values"],
            np.array([expected_mean]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/std"]["values"],
            np.array([expected_std]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/mean"]["steps"],
            np.array([95.]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=True)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=False)
        self.assertFalse(experiment._writer.write_loss)
Esempio n. 2
0
class TestParallelEnvExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True)
        for i, env in enumerate(self.experiment._envs):
            env.seed(i)

    def test_adds_default_label(self):
        self.assertEqual(self.experiment._writer.label, "a2c_CartPole-v0")

    def test_adds_custom_label(self):
        env = GymEnvironment('CartPole-v0')
        experiment = MockExperiment(self.make_agent(), env, name='a2c', quiet=True)
        self.assertEqual(experiment._writer.label, "a2c_CartPole-v0")

    def test_writes_training_returns_eps(self):
        self.experiment.train(episodes=3)
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([10., 11., 17.]),
        )

    def test_writes_test_returns(self):
        self.experiment.train(episodes=5)
        returns = self.experiment.test(episodes=4)
        self.assertEqual(len(returns), 4)
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns-test/mean"]["values"],
            np.array([np.mean(returns)]),
        )
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns-test/std"]["values"],
            np.array([np.std(returns)]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=True)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=False)
        self.assertFalse(experiment._writer.write_loss)

    def make_agent(self):
        return a2c.device('cpu').env(self.env).build()
class TestExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = None

    def test_adds_label(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3)
        self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0")

    def test_writes_returns_eps(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([14.0, 19.0, 26.0]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=True,
                                    episodes=3)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=False,
                                    episodes=3)
        self.assertFalse(experiment._writer.write_loss)

    def test_runs_multi_env(self):
        experiment = MockExperiment(a2c(n_envs=3),
                                    self.env,
                                    quiet=True,
                                    episodes=3)
        self.assertEqual(
            len(experiment._writer.data["evaluation/returns/episode"]
                ["values"]), 3)