def create_game(): """Create the assymetric game.""" art = ['############', '#a b ABc #', '############'] item_a = game.Item(color=(0, 254, 254)) item_b = game.Item(color=(254, 254, 0)) item_c = game.Item(color=(0, 254, 254)) items = {'a': item_a, 'b': item_b, 'c': item_c} player_a = game.Player(color=(0, 100, 254)) player_b = game.Player(color=(254, 100, 0)) players = {'A': player_a, 'B': player_b} env = game.Game(art, items, players, tabular=True) env.display() env.add_reward('A_moves', {'A': -10}) env.add_reward('B_moves', {'B': -10}) env.add_reward('A_collects_a', {'A': 100}) env.add_reward('A_collects_c', {'A': 100}) env.add_reward('B_collects_b', {'B': 100}) env.add_terminaison('A_collects_a') env.add_terminaison('A_collects_c') env.add_terminaison('B_collects_b') # For frame-by-frame visualization: env = visualizer.Visualizer(env, fps=2, by_episode=False) # For fast visualization: # env = visualizer.Visualizer(env, fps=1000, by_episode=True) return env
def prisoners_dilemma(): """Returns prisoners dilemma game from examples. Used to test complex behaviours like transitions, events info, etc. """ art = ['####d####', 'a A B b', '#########' ] item_a = game.Item(color=(0, 254, 254)) item_b = game.Item(color=(254, 254, 0)) item_d = game.Item(color=(0, 254, 254)) items = {'a': item_a, 'b': item_b, 'd': item_d} player_a = game.Player(color=(0, 100, 254)) player_b = game.Player(color=(254, 100, 0)) players = {'A': player_a, 'B': player_b} env = game.Game(art, items, players, tabular=True) env.add_reward('A_moves', {'A': -1}) env.add_reward('B_moves', {'B': -1}) env.add_reward('A_collects_a', {'A': 100}) env.add_reward('B_collects_b', {'B': 100}) env.add_reward('A_collects_d', {'A': 100}) env.add_reward('B_collects_d', {'B': 100}) env.add_terminaison('A_collects_d') env.add_terminaison('B_collects_d') env.add_terminaison('A_collects_a') env.add_terminaison('B_collects_b') return env
def create_game(): """Creates the temptation game.""" art = [ '########', 'a AB a', 'b b', 'c c', 'd d', 'e e', 'f f', 'g g', 'h h', 'i i', 'j j', '########' ] item_a = game.Item(color=(60, 254, 254)) item_b = game.Item(color=(80, 254, 254)) item_c = game.Item(color=(100, 254, 254)) item_d = game.Item(color=(120, 254, 254)) item_e = game.Item(color=(140, 254, 254)) item_f = game.Item(color=(160, 254, 254)) item_g = game.Item(color=(180, 254, 254)) item_h = game.Item(color=(200, 254, 254)) item_i = game.Item(color=(220, 254, 254)) item_j = game.Item(color=(254, 254, 254)) items = { 'a': item_a, 'b': item_b, 'c': item_c, 'd': item_d, 'e': item_e, 'f': item_f, 'g': item_g, 'h': item_h, 'i': item_i, 'j': item_j, } player_a = game.Player(color=(0, 100, 254)) player_b = game.Player(color=(254, 100, 0)) players = {'A': player_a, 'B': player_b} env = game.Game(art, items, players, tabular=True) env.display() env.add_reward('A_moves', {'A': -1}) env.add_reward('B_moves', {'B': -1}) for i, item in enumerate('abcdefghij'): env.add_reward('A_collects_' + item, {'A': (i + 1) * 10}) env.add_reward('B_collects_' + item, {'B': (i + 1) * 10}) env.add_terminaison('A_collects_' + item) env.add_terminaison('B_collects_' + item) # for frame-by-frame visualization: env = visualizer.Visualizer(env, fps=2, by_episode=False) # for fast visualization: # env = visualizer.Visualizer(env, fps=1000, by_episode=True) return env
def test_transition(self): # (this is the prisonners dilemma grid game from examples) art = ['####d####', 'a A B b', '#########'] item_a = game.Item(color=(0, 254, 254)) item_b = game.Item(color=(254, 254, 0)) item_d = game.Item(color=(254, 254, 254)) items = {'a': item_a, 'b': item_b, 'd': item_d} player_a = game.Player(color=(0, 100, 254)) player_b = game.Player(color=(254, 100, 0)) players = {'A': player_a, 'B': player_b} env = game.Game(art, items, players, tabular=True, max_steps=50) env.add_reward('A_moves', {'A': -1}) env.add_reward('B_moves', {'B': -1}) env.add_reward('A_collects_a', {'A': 100}) env.add_reward('B_collects_b', {'B': 100}) env.add_reward('A_collects_d', {'A': 100}) env.add_reward('B_collects_d', {'B': 100}) env.add_terminaison('A_collects_d') env.add_terminaison('B_collects_d') env.add_terminaison('A_collects_a') env.add_terminaison('B_collects_b') _ = env.reset() # A goes in wall actions = [1, 0] _, rewards, _, infos = env.step(actions) self.assertIn('A_moves', infos['event_list']) self.assertIn('A_goes_in_walls', infos['event_list']) self.assertEqual(rewards[0], -1) _ = env.reset() # A goes in B actions = [4, 0] _ = env.step(actions) actions = [4, 0] _, rewards, _, infos = env.step(actions) self.assertIn('A_moves', infos['event_list']) self.assertIn('A_blocked_by_B', infos['event_list']) self.assertEqual(rewards[0], -1) _ = env.reset() # A and B reach same cell actions = [4, 3] _, rewards, _, infos = env.step(actions) self.assertIn('A_moves', infos['event_list']) self.assertIn('B_moves', infos['event_list']) self.assertTrue(('A_lost_the_drawn' in infos['event_list']) or ('B_lost_the_drawn' in infos['event_list'])) self.assertEqual(rewards[0], -1) self.assertEqual(rewards[1], -1) _ = env.reset() # A and B block each other actions = [4, 0] _ = env.step(actions) actions = [4, 3] _, rewards, _, infos = env.step(actions) self.assertIn('A_moves', infos['event_list']) self.assertIn('B_moves', infos['event_list']) self.assertIn('A_blocked_by_B', infos['event_list']) self.assertIn('B_blocked_by_A', infos['event_list']) self.assertEqual(rewards[0], -1) self.assertEqual(rewards[1], -1) _ = env.reset() # A reaches reward actions = [3, 0] _ = env.step(actions) actions = [3, 0] _ = env.step(actions) actions = [3, 0] _, rewards, _, infos = env.step(actions) self.assertIn('A_moves', infos['event_list']) self.assertIn('A_collects_a', infos['event_list']) self.assertEqual(rewards[0], 100 - 1) _ = env.reset()