class TestReinforce(TestCase): def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.exp_source = DiscountedExperienceSource(self.env, self.agent) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = Reinforce.add_model_specific_args(parent_parser) args_list = [ "--env", "CartPole-v0", "--batch_size", "32", "--gamma", "0.99", ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams)) self.rl_dataloader = self.model.train_dataloader() def test_loss(self): """Test the reinforce loss function""" batch_states = torch.rand(16, 4) batch_actions = torch.rand(16).long() batch_qvals = torch.rand(16) loss = self.model.loss(batch_states, batch_actions, batch_qvals) self.assertIsInstance(loss, torch.Tensor) def test_get_qvals(self): """Test that given an batch of episodes that it will return a list of qvals for each episode""" batch_qvals = [] rewards = np.ones(32) out = self.model.calc_qvals(rewards) batch_qvals.append(out) self.assertIsInstance(batch_qvals[0][0], float) self.assertEqual(batch_qvals[0][0], (batch_qvals[0][1] * self.hparams.gamma) + 1.0) def test_calc_q_vals(self): rewards = np.ones(4) gt_qvals = [3.9403989999999998, 2.9701, 1.99, 1.0] qvals = self.model.calc_qvals(rewards) self.assertEqual(gt_qvals, qvals)
def test_reinforce(self): """Smoke test that the reinforce model runs""" model = Reinforce(self.hparams.env) result = self.trainer.fit(model) self.assertEqual(result, 1)
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.exp_source = DiscountedExperienceSource(self.env, self.agent) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = Reinforce.add_model_specific_args(parent_parser) args_list = [ "--env", "CartPole-v0", "--batch_size", "32", "--gamma", "0.99" ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams)) self.rl_dataloader = self.model.train_dataloader()
def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.xp_stream = EpisodicExperienceStream(self.env, self.agent, Mock(), episodes=4) self.rl_dataloader = DataLoader(self.xp_stream) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = cli.add_base_args(parent=parent_parser) parent_parser = DQN.add_model_specific_args(parent_parser) args_list = [ "--algo", "dqn", "--warm_start_steps", "500", "--episode_length", "100", "--env", "CartPole-v0", ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams))
def test_reinforce(self): """Smoke test that the reinforce model runs""" model = Reinforce(self.hparams.env) self.trainer.fit(model)
class TestReinforce(TestCase): def setUp(self) -> None: self.env = ToTensor(gym.make("CartPole-v0")) self.obs_shape = self.env.observation_space.shape self.n_actions = self.env.action_space.n self.net = MLP(self.obs_shape, self.n_actions) self.agent = Agent(self.net) self.xp_stream = EpisodicExperienceStream(self.env, self.agent, Mock(), episodes=4) self.rl_dataloader = DataLoader(self.xp_stream) parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = cli.add_base_args(parent=parent_parser) parent_parser = DQN.add_model_specific_args(parent_parser) args_list = [ "--algo", "dqn", "--warm_start_steps", "500", "--episode_length", "100", "--env", "CartPole-v0", ] self.hparams = parent_parser.parse_args(args_list) self.model = Reinforce(**vars(self.hparams)) def test_loss(self): """Test the reinforce loss function""" self.model.net = self.net self.model.agent = self.agent for i_batch, batch in enumerate(self.rl_dataloader): exp_batch = batch batch_qvals, batch_states, batch_actions, _ = self.model.process_batch(exp_batch) loss = self.model.loss(batch_qvals, batch_states, batch_actions) self.assertIsInstance(loss, torch.Tensor) break def test_get_qvals(self): """Test that given an batch of episodes that it will return a list of qvals for each episode""" batch_qvals = [] for i_batch, batch in enumerate(self.rl_dataloader): for episode in batch: rewards = [step[2] for step in episode] batch_qvals.append(self.model.calc_qvals(rewards)) self.assertEqual(len(batch_qvals), len(batch)) self.assertIsInstance(batch_qvals[0][0], torch.Tensor) self.assertEqual(batch_qvals[0][0], (batch_qvals[0][1] * self.hparams.gamma) + 1.0) break def test_process_batch(self): """Test that given a batch of episodes that it will return the q_vals, the states and the actions""" batch_len = 0 for i_batch, batch in enumerate(self.rl_dataloader): for mini_batch in batch: batch_len += len(mini_batch) q_vals, states, actions, rewards = self.model.process_batch(batch) self.assertEqual(len(q_vals), batch_len) self.assertEqual(len(states), batch_len) self.assertEqual(len(actions), batch_len) self.assertEqual(len(rewards), batch_len) self.assertEqual(len(q_vals.shape), 1) self.assertEqual(len(states.shape), 2) self.assertEqual(len(actions.shape), 1) self.assertEqual(len(rewards.shape), 1) def test_calc_q_vals(self): rewards = [1, 1, 1, 1] gt_qvals = [3.9403989999999998, 2.9701, 1.99, 1.0] qvals = self.model.calc_qvals(rewards) self.assertEqual(gt_qvals, qvals)