class TestReinforce(TestCase):
    def setUp(self) -> None:
        self.env = ToTensor(gym.make("CartPole-v0"))
        self.obs_shape = self.env.observation_space.shape
        self.n_actions = self.env.action_space.n
        self.net = MLP(self.obs_shape, self.n_actions)
        self.agent = Agent(self.net)
        self.exp_source = DiscountedExperienceSource(self.env, self.agent)

        parent_parser = argparse.ArgumentParser(add_help=False)
        parent_parser = Reinforce.add_model_specific_args(parent_parser)
        args_list = [
            "--env",
            "CartPole-v0",
            "--batch_size",
            "32",
            "--gamma",
            "0.99",
        ]
        self.hparams = parent_parser.parse_args(args_list)
        self.model = Reinforce(**vars(self.hparams))

        self.rl_dataloader = self.model.train_dataloader()

    def test_loss(self):
        """Test the reinforce loss function"""

        batch_states = torch.rand(16, 4)
        batch_actions = torch.rand(16).long()
        batch_qvals = torch.rand(16)

        loss = self.model.loss(batch_states, batch_actions, batch_qvals)

        self.assertIsInstance(loss, torch.Tensor)

    def test_get_qvals(self):
        """Test that given an batch of episodes that it will return a list of qvals for each episode"""

        batch_qvals = []
        rewards = np.ones(32)
        out = self.model.calc_qvals(rewards)
        batch_qvals.append(out)

        self.assertIsInstance(batch_qvals[0][0], float)
        self.assertEqual(batch_qvals[0][0],
                         (batch_qvals[0][1] * self.hparams.gamma) + 1.0)

    def test_calc_q_vals(self):
        rewards = np.ones(4)
        gt_qvals = [3.9403989999999998, 2.9701, 1.99, 1.0]

        qvals = self.model.calc_qvals(rewards)

        self.assertEqual(gt_qvals, qvals)
    def test_reinforce(self):
        """Smoke test that the reinforce model runs"""

        model = Reinforce(self.hparams.env)
        result = self.trainer.fit(model)

        self.assertEqual(result, 1)
    def setUp(self) -> None:
        self.env = ToTensor(gym.make("CartPole-v0"))
        self.obs_shape = self.env.observation_space.shape
        self.n_actions = self.env.action_space.n
        self.net = MLP(self.obs_shape, self.n_actions)
        self.agent = Agent(self.net)
        self.exp_source = DiscountedExperienceSource(self.env, self.agent)

        parent_parser = argparse.ArgumentParser(add_help=False)
        parent_parser = Reinforce.add_model_specific_args(parent_parser)
        args_list = [
            "--env", "CartPole-v0", "--batch_size", "32", "--gamma", "0.99"
        ]
        self.hparams = parent_parser.parse_args(args_list)
        self.model = Reinforce(**vars(self.hparams))

        self.rl_dataloader = self.model.train_dataloader()
Exemple #4
0
    def setUp(self) -> None:
        self.env = ToTensor(gym.make("CartPole-v0"))
        self.obs_shape = self.env.observation_space.shape
        self.n_actions = self.env.action_space.n
        self.net = MLP(self.obs_shape, self.n_actions)
        self.agent = Agent(self.net)
        self.xp_stream = EpisodicExperienceStream(self.env, self.agent, Mock(), episodes=4)
        self.rl_dataloader = DataLoader(self.xp_stream)

        parent_parser = argparse.ArgumentParser(add_help=False)
        parent_parser = cli.add_base_args(parent=parent_parser)
        parent_parser = DQN.add_model_specific_args(parent_parser)
        args_list = [
            "--algo", "dqn",
            "--warm_start_steps", "500",
            "--episode_length", "100",
            "--env", "CartPole-v0",
        ]
        self.hparams = parent_parser.parse_args(args_list)
        self.model = Reinforce(**vars(self.hparams))
Exemple #5
0
    def test_reinforce(self):
        """Smoke test that the reinforce model runs"""

        model = Reinforce(self.hparams.env)
        self.trainer.fit(model)
Exemple #6
0
class TestReinforce(TestCase):

    def setUp(self) -> None:
        self.env = ToTensor(gym.make("CartPole-v0"))
        self.obs_shape = self.env.observation_space.shape
        self.n_actions = self.env.action_space.n
        self.net = MLP(self.obs_shape, self.n_actions)
        self.agent = Agent(self.net)
        self.xp_stream = EpisodicExperienceStream(self.env, self.agent, Mock(), episodes=4)
        self.rl_dataloader = DataLoader(self.xp_stream)

        parent_parser = argparse.ArgumentParser(add_help=False)
        parent_parser = cli.add_base_args(parent=parent_parser)
        parent_parser = DQN.add_model_specific_args(parent_parser)
        args_list = [
            "--algo", "dqn",
            "--warm_start_steps", "500",
            "--episode_length", "100",
            "--env", "CartPole-v0",
        ]
        self.hparams = parent_parser.parse_args(args_list)
        self.model = Reinforce(**vars(self.hparams))

    def test_loss(self):
        """Test the reinforce loss function"""
        self.model.net = self.net
        self.model.agent = self.agent

        for i_batch, batch in enumerate(self.rl_dataloader):
            exp_batch = batch

            batch_qvals, batch_states, batch_actions, _ = self.model.process_batch(exp_batch)

            loss = self.model.loss(batch_qvals, batch_states, batch_actions)

            self.assertIsInstance(loss, torch.Tensor)
            break

    def test_get_qvals(self):
        """Test that given an batch of episodes that it will return a list of qvals for each episode"""
        batch_qvals = []
        for i_batch, batch in enumerate(self.rl_dataloader):

            for episode in batch:
                rewards = [step[2] for step in episode]
                batch_qvals.append(self.model.calc_qvals(rewards))

            self.assertEqual(len(batch_qvals), len(batch))
            self.assertIsInstance(batch_qvals[0][0], torch.Tensor)
            self.assertEqual(batch_qvals[0][0], (batch_qvals[0][1] * self.hparams.gamma) + 1.0)
            break

    def test_process_batch(self):
        """Test that given a batch of episodes that it will return the q_vals, the states and the actions"""
        batch_len = 0

        for i_batch, batch in enumerate(self.rl_dataloader):
            for mini_batch in batch:
                batch_len += len(mini_batch)

            q_vals, states, actions, rewards = self.model.process_batch(batch)

            self.assertEqual(len(q_vals), batch_len)
            self.assertEqual(len(states), batch_len)
            self.assertEqual(len(actions), batch_len)
            self.assertEqual(len(rewards), batch_len)

            self.assertEqual(len(q_vals.shape), 1)
            self.assertEqual(len(states.shape), 2)
            self.assertEqual(len(actions.shape), 1)
            self.assertEqual(len(rewards.shape), 1)

    def test_calc_q_vals(self):
        rewards = [1, 1, 1, 1]
        gt_qvals = [3.9403989999999998, 2.9701, 1.99, 1.0]

        qvals = self.model.calc_qvals(rewards)

        self.assertEqual(gt_qvals, qvals)