class TestMultiagentAtariPresets(unittest.TestCase): def setUp(self): self.env = MultiagentAtariEnv('pong_v2', device='cpu') self.env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_independent(self): env = MultiagentAtariEnv('pong_v2', device='cpu') presets = { agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build() for agent_id in env.agents } self.validate_preset( IndependentMultiagentPreset('independent', 'cpu', presets), env) def validate_preset(self, preset, env): # normal agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.last()) # test agent test_agent = preset.test_agent() test_agent.act(self.env.last()) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.last())
def test_reset(self): env = MultiagentAtariEnv('pong_v1', device='cpu') state = env.reset() self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) self.assertEqual(state.mask, 1.) self.assertEqual(state['agent'], 'first_0')
def test_step_tensor(self): env = MultiagentAtariEnv('pong_v1', device='cpu') env.reset() state = env.step(torch.tensor([0])) self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) self.assertEqual(state.mask, 1.) self.assertEqual(state['agent'], 'second_0')
def test_independent_cuda(self): env = MultiagentAtariEnv('pong_v2', max_cycles=1000, device=CUDA) presets = { agent_id: dqn.device(CUDA).env(env.subenvs[agent_id]).build() for agent_id in env.agents } validate_multiagent(IndependentMultiagentPreset('independent', CUDA, presets), env)
def test_independent(self): env = MultiagentAtariEnv('pong_v1', device='cpu') presets = { agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build() for agent_id in env.agents } self.validate_preset(IndependentMultiagentPreset('independent', 'cpu', presets), env)
def main(): parser = argparse.ArgumentParser(description="Run an multiagent Atari benchmark.") parser.add_argument("env", help="Name of the Atari game (e.g. Pong).") parser.add_argument( "agent", help="Name of the agent (e.g. dqn). See presets for available agents." ) parser.add_argument( "--device", default="cuda", help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", ) parser.add_argument( "--frames", type=int, default=40e6, help="The number of training frames." ) parser.add_argument( "--render", type=bool, default=False, help="Render the environment." ) parser.add_argument( "--writer", default='tensorboard', help="The backend used for tracking experiment metrics." ) args = parser.parse_args() env = MultiagentAtariEnv(args.env, device=args.device) agent_name = args.agent agent = getattr(multiagent_atari, agent_name) experiment = MultiagentEnvExperiment(agent(device=args.device), env, write_loss=False, writer=args.writer) experiment.train(frames=args.frames)
def main(): parser = argparse.ArgumentParser( description="Watch pretrained multiagent atari") parser.add_argument("env", help="Name of the Atari game (e.g. pong-v1)") parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", default="cuda", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--fps", default=30, type=int, help="Playback speed", ) parser.add_argument("--reload", action="store_true", default=False, help="Reload the model from disk after every episode") args = parser.parse_args() env = MultiagentAtariEnv(args.env, device=args.device) watch(env, args.filename, args.fps, args.reload)
def test_is_done(self): env = MultiagentAtariEnv('pong_v1', device='cpu') env.reset() self.assertFalse(env.is_done('first_0')) self.assertFalse(env.is_done('second_0'))
def test_list_agents(self): env = MultiagentAtariEnv('pong_v1', device='cpu') self.assertEqual(env.agents, ['first_0', 'second_0'])
def test_init(self): MultiagentAtariEnv('pong_v1', device='cpu') MultiagentAtariEnv('mario_bros_v2', device='cpu') MultiagentAtariEnv('entombed_cooperative_v2', device='cpu')
def test_action_spaces(self): action_spaces = MultiagentAtariEnv('pong_v1', device='cpu').action_spaces self.assertEqual(action_spaces['first_0'].n, 18) self.assertEqual(action_spaces['second_0'].n, 18)
def test_state_spaces(self): state_spaces = MultiagentAtariEnv('pong_v1', device='cpu').state_spaces self.assertEqual(state_spaces['first_0'].shape, (1, 84, 84)) self.assertEqual(state_spaces['second_0'].shape, (1, 84, 84))
def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = MultiagentAtariEnv('space_invaders_v1', device='cpu') self.env.seed(0) self.experiment = None
def test_name(self): env = MultiagentAtariEnv('pong_v1', device='cpu') self.assertEqual(env.name, 'pong_v1')
def setUp(self): self.env = MultiagentAtariEnv('pong_v2', device='cpu') self.env.reset()
def test_agent_iter(self): env = MultiagentAtariEnv('pong_v1', device='cpu') env.reset() it = iter(env.agent_iter()) self.assertEqual(next(it), 'first_0')
class TestMultiagentEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = MultiagentAtariEnv('space_invaders_v1', device='cpu') self.env.seed(0) self.experiment = None def test_adds_default_name(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) self.assertEqual(experiment._writer.label, "independent_space_invaders_v1") def test_adds_custom_name(self): experiment = MockExperiment(self.make_preset(), self.env, name='custom', quiet=True, save_freq=float('inf')) self.assertEqual(experiment._writer.label, "custom_space_invaders_v1") def test_writes_training_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) experiment.train(episodes=3) self.assertEqual( experiment._writer.data, { 'evaluation/first_0/returns/frame': { 'values': [465.0, 235.0, 735.0, 415.0], 'steps': [766, 1524, 2440, 3038] }, 'evaluation/second_0/returns/frame': { 'values': [235.0, 465.0, 170.0, 295.0], 'steps': [766, 1524, 2440, 3038] } }) def test_writes_test_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) experiment.train(episodes=3) experiment._writer.data = {} experiment.test(episodes=3) self.assertEqual(list(experiment._writer.data.keys()), [ 'evaluation/first_0/returns-test/mean', 'evaluation/first_0/returns-test/std', 'evaluation/second_0/returns-test/mean', 'evaluation/second_0/returns-test/std' ]) steps = experiment._writer.data[ 'evaluation/first_0/returns-test/mean']['steps'][0] for datum in experiment._writer.data.values(): self.assertEqual(len(datum['values']), 1) self.assertGreaterEqual(datum['values'][0], 0.0) self.assertEqual(len(datum['steps']), 1) self.assertEqual(datum['steps'][0], steps) def test_writes_loss(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, write_loss=True, save_freq=float('inf')) self.assertTrue(experiment._writer.write_loss) experiment = MockExperiment(self.make_preset(), self.env, quiet=True, write_loss=False, save_freq=float('inf')) self.assertFalse(experiment._writer.write_loss) def make_preset(self): return IndependentMultiagentPreset( 'independent', 'cpu', { agent: dqn.device('cpu').env(env).build() for agent, env in self.env.subenvs.items() })