class TestMultiagentAtariPresets(unittest.TestCase):
    def setUp(self):
        self.env = MultiagentAtariEnv('pong_v2', device='cpu')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_independent(self):
        env = MultiagentAtariEnv('pong_v2', device='cpu')
        presets = {
            agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build()
            for agent_id in env.agents
        }
        self.validate_preset(
            IndependentMultiagentPreset('independent', 'cpu', presets), env)

    def validate_preset(self, preset, env):
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.last())
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.last())
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.last())
Exemple #2
0
 def test_last(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     state = env.last()
     self.assertEqual(state.observation.shape, (1, 84, 84))
     self.assertEqual(state.reward, 0)
     self.assertEqual(state.done, False)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state['agent'], 'first_0')
Exemple #3
0
 def test_step_tensor(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     state = env.step(torch.tensor([0]))
     self.assertEqual(state.observation.shape, (1, 84, 84))
     self.assertEqual(state.reward, 0)
     self.assertEqual(state.done, False)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state['agent'], 'second_0')
Exemple #4
0
 def test_is_done(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     self.assertFalse(env.is_done('first_0'))
     self.assertFalse(env.is_done('second_0'))
Exemple #5
0
 def test_agent_iter(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     it = iter(env.agent_iter())
     self.assertEqual(next(it), 'first_0')