Beispiel #1
0
    def step(self, action):
        if self.dones[self.agent_selection]:
            del self._actions[self.agent_selection]
            return self._was_done_step(action)
        self._actions[self.agent_selection] = action
        if self._agent_selector.is_last():
            obss, rews, dones, infos = self.env.step(self._actions)

            self._observations = copy.copy(obss)
            self.dones = copy.copy(dones)
            self.infos = copy.copy(infos)
            self.rewards = copy.copy(rews)
            self.agents = self.env.agents[:]

            self._live_agents = [
                agent for agent in self.agents if not dones[agent]
            ]
            # assert self._live_agents == self.agents
            if len(self._live_agents):
                self._agent_selector = agent_selector(self._live_agents)
                self.agent_selection = self._agent_selector.reset()

            self._cumulative_rewards = copy.copy(rews)
            self._dones_step_first()
        else:
            if self._agent_selector.is_first():
                self._clear_rewards()

            self.agent_selection = self._agent_selector.next()
Beispiel #2
0
    def step(self, action, observe=True):
        self._actions[self._agent_mapper[self.agent_selection]] = action
        if self._agent_selector.is_last():
            obss, rews, dones, infos = self.env.step(self._actions)
            self._observations = obss

            self.dones = {
                agent: done
                for agent, done in zip(self.agents, dones)
            }
            self.infos = {
                agent: info
                for agent, info in zip(self.agents, infos)
            }
            self.rewards = {
                agent: reward
                for agent, reward in zip(self.agents, rews)
            }

            self._live_agents = [
                agent for done, agent in zip(dones, self.agents) if not done
            ]
            if len(self._live_agents):
                self._agent_selector = agent_selector(self._live_agents)
                self.agent_selection = self._agent_selector.reset()
        else:
            self.agent_selection = self._agent_selector.next()
        return self.observe(self.agent_selection) if observe else None
Beispiel #3
0
    def __init__(self):
        super().__init__()
        self.board = Board()

        self.agents = ["player_1", "player_2"]
        self.possible_agents = self.agents[:]

        self.action_spaces = {i: spaces.Discrete(9) for i in self.agents}
        self.observation_spaces = {
            i: spaces.Dict({
                'observation':
                spaces.Box(low=0, high=1, shape=(3, 3, 2), dtype=np.int8),
                'action_mask':
                spaces.Box(low=0, high=1, shape=(9, ), dtype=np.int8)
            })
            for i in self.agents
        }

        self.rewards = {i: 0 for i in self.agents}
        self.dones = {i: False for i in self.agents}
        self.infos = {
            i: {
                'legal_moves': list(range(0, 9))
            }
            for i in self.agents
        }

        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()
    def __init__(self):
        super().__init__()
        self.game = Game()
        self.seed()

        self.agents = ["player_{}".format(i) for i in range(2)]
        self.possible_agents = self.agents[:]
        self._agent_order = list(self.agents)
        self._agent_selector = agent_selector(self._agent_order)
        self.infos = {i: {'legal_moves': []} for i in self.agents}

        self.action_spaces = {
            name: spaces.Discrete(26 * 26 * 2 + 1)
            for name in self.agents
        }

        low = np.zeros((198, ))
        high = np.ones((198, ))
        for i in range(3, 97, 4):
            high[i] = 6.0
        high[96] = 7.5
        for i in range(101, 195, 4):
            high[i] = 6.0
        high[194] = 7.5
        self.observation_spaces = {
            i: spaces.Box(low=np.float32(low),
                          high=np.float32(high),
                          dtype=np.float32)
            for i in self.agents
        }

        self.double_roll = 0
Beispiel #5
0
    def step(self, action):
        if self.dones[self.agent_selection]:
            del self._actions[self.agent_selection]
            return self._was_done_step(action)
        self._actions[self.agent_selection] = action
        if self._agent_selector.is_last():
            obss, rews, dones, infos = self.env.step(self._actions)

            self._observations = copy.copy(obss)
            self.dones = copy.copy(dones)
            self.infos = copy.copy(infos)
            self.rewards = copy.copy(rews)
            self._cumulative_rewards = copy.copy(rews)

            env_agent_set = set(self.env.agents)

            self.agents = self.env.agents + [
                agent for agent in sorted(self._observations.keys())
                if agent not in env_agent_set
            ]

            if len(self.env.agents):
                self._agent_selector = agent_selector(self.env.agents)
                self.agent_selection = self._agent_selector.reset()

            self._dones_step_first()
        else:
            if self._agent_selector.is_first():
                self._clear_rewards()

            self.agent_selection = self._agent_selector.next()
Beispiel #6
0
 def reinit(self):
     self._agent_selector = agent_selector(self.agents)
     self.agent_selection = self._agent_selector.next()
     self.rewards = {agent: 0 for agent in self.agents}
     self.dones = {agent: False for agent in self.agents}
     self.infos = {agent: {} for agent in self.agents}
     self.state = {agent: none for agent in self.agents}
     self.observations = {agent: none for agent in self.agents}
     self.num_moves = 0
Beispiel #7
0
    def __init__(self, seed=None, continuous=False, vector_observation=False, max_frames=900, num_floors=4, synchronized_start=False, identical_aliens=False, random_aliens=False):
        EzPickle.__init__(self, seed, continuous, vector_observation, max_frames, num_floors, synchronized_start, identical_aliens, random_aliens)
        self.num_agents = 2 * num_floors
        self.agents = ["prisoner_" + str(s) for s in range(0, self.num_agents)]
        self._agent_selector = agent_selector(self.agents)
        self.sprite_list = ["sprites/alien", "sprites/drone", "sprites/glowy", "sprites/reptile", "sprites/ufo", "sprites/bunny", "sprites/robot", "sprites/tank"]
        self.sprite_img_heights = [40, 40, 46, 48, 32, 54, 48, 53]
        self.metadata = {'render.modes': ['human']}
        self.infos = {}
        self.rendering = False
        self.max_frames = max_frames
        pygame.init()
        self.clock = pygame.time.Clock()
        self.num_frames = 0
        self.done_val = False
        self.num_floors = num_floors
        self.background = get_image('background.png')
        self.background_append = get_image('background_append.png')
        self.dynamic_background = get_image('blit_background.png')
        self.dynamic_background_append = get_image('blit_background_append.png')
        self.velocity = 24
        self.continuous = continuous
        self.vector_obs = vector_observation
        self.synchronized_start = synchronized_start
        self.identical_aliens = identical_aliens
        if (self.identical_aliens):
            self.random_aliens = False
        else:
            self.random_aliens = random_aliens
        self.np_random, seed = seeding.np_random(seed)
        self.closed = False

        self.action_spaces = {}
        if continuous:
            for a in self.agents:
                self.action_spaces[a] = spaces.Box(low=-self.velocity, high=self.velocity, shape=(1,), dtype=np.float32)
        else:
            for a in self.agents:
                self.action_spaces[a] = spaces.Discrete(3)

        self.observation_spaces = {}
        self.last_observation = {}
        for a in self.agents:
            self.last_observation[a] = None
            if vector_observation:
                self.observation_spaces[a] = spaces.Box(low=-300, high=300, shape=(1,), dtype=np.float32)
            else:
                self.observation_spaces[a] = spaces.Box(low=0, high=255, shape=(100, 300, 3), dtype=np.uint8)

        self.walls = []
        self.create_walls(num_floors)

        self.spawn_prisoners()
        self.has_reset = False

        self.reinit()
Beispiel #8
0
    def __init__(self, board_size):
        super().__init__()
        self.board_size = board_size
        self.agents = ['player_1']
        # Create the body of the snake. Initially,
        # the body is only one cell long.
        # The body includes the head, and the tail is at
        # the end of the list.
        self.head = (1, 1)
        self.body = [self.head]
        self._place_fruit()
        self.direction = 'R'
        # Create the fruit...
        # We can either keep going forward, or turn left or right.
        # So total of 3 actions. 0 - Forward, 1 - Left, 2 - Right
        self.action_spaces = {i: spaces.Discrete(3) for i in self.agents}
        # The observation space consists of the current state of the
        # board. In this case, it would be where you are, and the
        # location of the fruit. 0 - blank, 1 - snake body, 2 - fruit.
        # We have to include the border as well, and this adds 2 to the board size.
        # 3 - border.
        self.observation_spaces = {i: spaces.Box(low=0, high=3, shape=(self.board_size + 2, self.board_size + 2), dtype=np.uint8)
                                   for i in self.agents}
        # In the beginning, rewards are 0, and the game isn't over (we just started lol)
        self.rewards = {i: 0 for i in self.agents}
        self.dones = {i: False for i in self.agents}
        # Info would contain head, body, direction of travel, and fruit location
        # The indices are with respect to the full observation space. Since (0, 0)
        # would be the corner (and point on the border), we initialize to (1, 1)
        self.infos = {'player_1': {
            'head': (1, 1),
            'body': [(1, 1)],
            'direction': self.direction,
            'fruit': self.fruit_loc
        }}

        # Mapping from direction to direction from the result of a turn
        self.TURN_RES = {
            'U': ['U', 'L', 'R'],
            'D': ['D', 'R', 'L'],
            'L': ['L', 'D', 'U'],
            'R': ['R', 'U', 'D']
        }
        # This dictionary holds the cell changes e.g. when going UP,
        # the row subtracts by 1, and the column doesn't change.
        self.DIR_CELL_CHANGE = {
            'U': (-1, 0),
            'D': (1, 0),
            'L': (0, -1),
            'R': (0, 1)
        }

        self._cumulative_rewards = {i: 0 for i in self.agents}
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()
Beispiel #9
0
 def reset(self):
     self._observations = self.env.reset()
     self.agents = self.env.agents[:]
     self._live_agents = self.agents[:]
     self._actions = {agent: None for agent in self.agents}
     self._agent_selector = agent_selector(self._live_agents)
     self.agent_selection = self._agent_selector.reset()
     self.dones = {agent: False for agent in self.agents}
     self.infos = {agent: {} for agent in self.agents}
     self.rewards = {agent: 0 for agent in self.agents}
     self._cumulative_rewards = {agent: 0 for agent in self.agents}
Beispiel #10
0
 def reinit(self):
     self.agents = self.possible_agents[:]
     self._agent_selector = agent_selector(self.agents)
     self.agent_selection = self._agent_selector.next()
     self.rewards = {agent: 0 for agent in self.agents}
     self._cumulative_rewards = {agent: 0 for agent in self.agents}
     self.dones = {agent: False for agent in self.agents}
     self.infos = {agent: {} for agent in self.agents}
     self.state = {agent: NONE for agent in self.agents}
     self.observations = {agent: NONE for agent in self.agents}
     self.num_moves = 0
    def reset(self):
        self.agents = self.possible_agents[:]
        self.rewards = {agent: 0 for agent in self.agents}
        self._cumulative_rewards = {agent: 0 for agent in self.agents}
        self.dones = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}

        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.next()

        self.board_state.reset()
Beispiel #12
0
    def _reset_agents(self, player_number: int):
        """ Rearrange self.agents as pyhanabi starts a different player after each reset(). """

        # Shifts self.agents list as long order starting player is not according to player_number
        while not self.agents[0] == 'player_' + str(player_number):
            self.agents = self.agents[1:] + [self.agents[0]]

        # Agent order list, on which the agent selector operates on.
        self._agent_selector = agent_selector(self.agents)

        # Reset agent_selection
        self.agent_selection = self._agent_selector.reset()
Beispiel #13
0
    def reset(self, observe=True):
        self._actions = [None] * self.num_agents

        self._live_agents = self.agents[:]
        self._agent_selector = agent_selector(self._live_agents)
        self.agent_selection = self._agent_selector.reset()
        self.dones = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}
        self.rewards = {agent: 0 for agent in self.agents}

        self._observations = self.env.reset()

        return self.observe(self.agent_selection) if observe else None
Beispiel #14
0
    def __init__(self, *args, **kwargs):
        EzPickle.__init__(self, *args, **kwargs)
        self.env = _env(*args, **kwargs)

        self.agents = ["walker_" + str(r) for r in range(self.env.num_agents)]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents))))
        self._agent_selector = agent_selector(self.agents)
        # spaces
        self.action_spaces = dict(zip(self.agents, self.env.action_space))
        self.observation_spaces = dict(
            zip(self.agents, self.env.observation_space))
        self.steps = 0
        self.display_wait = 0.04
Beispiel #15
0
    def __init__(self, *args, **kwargs):
        super().__init__()
        self.env = _env(*args, **kwargs)

        self.agents = ["pursuer_" + str(r) for r in range(self.env.num_agents)]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(
            zip(self.agents, list(range(self.num_agents))))
        self._agent_selector = agent_selector(self.agents)
        # spaces
        self.action_spaces = dict(zip(self.agents, self.env.action_space))
        self.observation_spaces = dict(
            zip(self.agents, self.env.observation_space))
        self.has_reset = False
Beispiel #16
0
def test_agent_order(env):
    env.reset()
    if not hasattr(env, "_agent_selector"):
        warnings.warn(
            "Env has no object named _agent_selector. We recommend handling agent cycling with the agent_selector utility from utils/agent_selector.py."
        )

    elif not isinstance(env._agent_selector, agent_selector):
        warnings.warn(
            "You created your own agent_selector utility. You might want to use ours, in utils/agent_selector.py"
        )

    assert hasattr(env, "agent_order"), "Env does not have agent_order"

    env.reset(observe=False)
    agent_order = copy(env.agent_order)
    _agent_selector = agent_selector(agent_order)
    agent_selection = _agent_selector.next()

    if hasattr(env, "_agent_selector"):
        assert env._agent_selector == _agent_selector, "env._agent_selector is initialized incorrectly"

    assert env.agent_selection == agent_selection, "env.agent_selection is not the same as the first agent in agent_order"

    for _ in range(200):
        agent = agent_selection
        if 'legal_moves' in env.infos[agent]:
            action = random.choice(env.infos[agent]['legal_moves'])
        else:
            action = env.action_spaces[agent].sample()
        env.step(action, observe=False)

        if all(env.dones.values()):
            break

        if agent_order == env.agent_order:
            agent_selection = _agent_selector.next()
            assert env.agent_selection == agent_selection, "env.agent_selection ({}) is not the same as the next agent in agent_order {}".format(
                env.agent_selection, env.agent_order)
        else:
            previous_agent_selection_index = agent_order.index(agent_selection)
            agent_order = copy(env.agent_order)
            _agent_selector.reinit(agent_order)
            skips = (previous_agent_selection_index + 1) % len(env.agents)
            for _ in range(skips + 1):
                agent_selection = _agent_selector.next()
            assert env.agent_selection == agent_selection, "env.agent_selection ({}) is not the same as the next agent in agent_order {}".format(
                env.agent_selection, env.agent_order)
Beispiel #17
0
    def __init__(self, seed=None, *args, **kwargs):
        super().__init__()
        self.env = _env(seed, *args, **kwargs)

        self.num_agents = self.env.num_agents
        self.agents = ["pursuer_" + str(r) for r in range(self.num_agents)]
        self.agent_name_mapping = dict(
            zip(self.agents, list(range(self.num_agents))))
        self._agent_selector = agent_selector(self.agents)
        # spaces
        self.action_spaces = dict(zip(self.agents, self.env.action_space))
        self.observation_spaces = dict(
            zip(self.agents, self.env.observation_space))
        self.steps = 0
        self.display_wait = 0.03
        self.has_reset = False
Beispiel #18
0
 def __init__(self, *args, **kwargs):
     EzPickle.__init__(self, *args, **kwargs)
     self.env = _env(*args, **kwargs)
     pygame.init()
     self.agents = ["pursuer_" + str(a) for a in range(self.env.num_agents)]
     self.possible_agents = self.agents[:]
     self.agent_name_mapping = dict(
         zip(self.agents, list(range(self.num_agents))))
     self._agent_selector = agent_selector(self.agents)
     # spaces
     self.n_act_agents = self.env.act_dims[0]
     self.action_spaces = dict(zip(self.agents, self.env.action_space))
     self.observation_spaces = dict(
         zip(self.agents, self.env.observation_space))
     self.steps = 0
     self.closed = False
Beispiel #19
0
 def __init__(self, seed=None, *args, **kwargs):
     EzPickle.__init__(self, seed, *args, **kwargs)
     self.env = _env(*args, seed, **kwargs)
     pygame.init()
     self.num_agents = self.env.num_agents
     self.agents = ["pursuer_" + str(a) for a in range(self.num_agents)]
     self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents))))
     self._agent_selector = agent_selector(self.agents)
     self.has_reset = False
     # spaces
     self.n_act_agents = self.env.act_dims[0]
     self.action_spaces = dict(zip(self.agents, self.env.action_space))
     self.observation_spaces = dict(
         zip(self.agents, self.env.observation_space))
     self.steps = 0
     self.display_wait = 0.0
     self.closed = False
    def step(self, action, observe=True):
        self._actions[self.agent_selection] = action
        if self._agent_selector.is_last():
            obss, rews, dones, infos = self.env.step(self._actions)

            self._observations = obss
            self.dones = dones
            self.infos = infos
            self.rewards = rews

            self._live_agents = [
                agent for agent in self.agents if not dones[agent]
            ]
            if len(self._live_agents):
                self._agent_selector = agent_selector(self._live_agents)
                self.agent_selection = self._agent_selector.reset()
        else:
            self.agent_selection = self._agent_selector.next()
        return self.observe(self.agent_selection) if observe else None
Beispiel #21
0
    def __init__(self, scenario: MultiAgentScenario, reset_mode='grid'):
        self._scenario = scenario
        self._reset_mode = reset_mode
        self.possible_agents = sorted([a for a in scenario.agents])
        self.agents = self.possible_agents[:]
        self._agent_selector = agent_selector(self.possible_agents)
        self.agent_selection = self.possible_agents[0]

        self.rewards = {agent: 0 for agent in self.agents}
        self._cumulative_rewards = {agent: 0 for agent in self.agents}
        self.dones = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}
        self.state = {agent: {} for agent in self.agents}
        self.observations = {agent: {} for agent in self.agents}
        self.actions = {agent: {} for agent in self.agents}
        self.num_moves = 0

        self._initialized = False
        self._scenario.world.init()
        super().__init__()
Beispiel #22
0
    def reset(self):
        self.agents = self.possible_agents.copy()
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.next()

        for agent_id in self.agents:
            agent = self._scenario.agents[agent_id]
            start_position = self._scenario.world.get_starting_position(
                agent=agent, mode=self._reset_mode)
            obs = agent.reset(pose=start_position)
            self.observations[agent_id] = obs
            self.rewards[agent_id] = 0
            self._cumulative_rewards[agent_id] = 0
            self.dones[agent_id] = False
            self.infos[agent_id] = {}
            self.observations[agent_id] = self._scenario.agents[
                agent_id].vehicle.observe()
            self.actions[agent_id] = {}
            self.num_moves = 0

        self._scenario.world.reset()
        self._scenario.world.update()
        self.state = self._scenario.world.state()
Beispiel #23
0
    def __init__(self):
        super().__init__()
        self.game = Game()
        self.seed()

        self.agents = [f"player_{i}" for i in range(2)]
        self.possible_agents = self.agents[:]
        self._agent_order = list(self.agents)
        self._agent_selector = agent_selector(self._agent_order)
        self.infos = {i: {} for i in self.agents}

        self.action_spaces = {
            name: spaces.Discrete(26 * 26 * 2 + 1)
            for name in self.agents
        }

        low = np.zeros((198, ))
        high = np.ones((198, ))
        for i in range(3, 97, 4):
            high[i] = 6.0
        high[96] = 7.5
        for i in range(101, 195, 4):
            high[i] = 6.0
        high[194] = 7.5
        self.observation_spaces = {
            i: spaces.Dict({
                'observation':
                spaces.Box(low=np.float32(low),
                           high=np.float32(high),
                           dtype=np.float32),
                'action_mask':
                spaces.Box(low=0, high=1, shape=(1353, ), dtype=np.int8)
            })
            for i in self.agents
        }
        self.double_roll = 0
Beispiel #24
0
    def __init__(self,
                 game,
                 num_players,
                 mode_num=None,
                 seed=None,
                 obs_type='rgb_image',
                 frameskip=4,
                 repeat_action_probability=0.25,
                 full_action_space=True):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        assert obs_type in (
            'ram', 'rgb_image', "grayscale_image"
        ), "obs_type must  either be 'ram' or 'rgb_image' or 'grayscale_image'"
        self.obs_type = obs_type
        self.full_action_space = full_action_space
        self.num_players = num_players
        self.np_random = seeding.np_random(seed)

        multi_agent_ale_py.ALEInterface.setLoggerMode("error")
        self.ale = multi_agent_ale_py.ALEInterface()

        if seed is None:
            seed = seeding.create_seed(seed, max_bytes=4)

        self.ale.setInt(b"random_seed", seed)
        self.ale.setInt(b"frame_skip", frameskip)
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)

        pathstart = os.path.dirname(multi_agent_ale_py.__file__)
        final_path = os.path.join(pathstart, "ROM", game, game + ".bin")
        if not os.path.exists(final_path):
            raise IOError(
                "rom {} is not installed. Please install roms using AutoROM tool (https://github.com/PettingZoo-Team/AutoROM)"
                .format(game))

        self.ale.loadROM(final_path)

        all_modes = self.ale.getAvailableModes(num_players)

        if mode_num is None:
            mode = all_modes[0]
        else:
            mode = mode_num
            assert mode in all_modes, "mode_num parameter is wrong. Mode {} selected, only {} modes are supported".format(
                mode_num, str(list(all_modes)))

        self.ale.setMode(mode)
        assert num_players == self.ale.numPlayersActive()

        if full_action_space:
            action_size = 18
        else:
            action_size = len(self.ale.getMinimalActionSet())

        if obs_type == 'ram':
            observation_space = gym.spaces.Box(low=0,
                                               high=255,
                                               dtype=np.uint8,
                                               shape=(128, ))
        else:
            (screen_width, screen_height) = self.ale.getScreenDims()
            if obs_type == 'rgb_image':
                num_channels = 3
            elif obs_type == 'grayscale_image':
                num_channels = 1
            observation_space = spaces.Box(low=0,
                                           high=255,
                                           shape=(screen_height, screen_width,
                                                  num_channels),
                                           dtype=np.uint8)

        self.num_agents = num_players
        player_names = ["first", "second", "third", "fourth"]
        self.agents = [f"{player_names[n]}_0" for n in range(self.num_agents)]

        self.action_spaces = {
            agent: gym.spaces.Discrete(action_size)
            for agent in self.agents
        }
        self.observation_spaces = {
            agent: observation_space
            for agent in self.agents
        }
        self.infos = {agent: {} for agent in self.agents}

        self._agent_selector = agent_selector(self.agents)

        self._screen = None
Beispiel #25
0
    def __init__(self,
                 local_ratio=0.02,
                 continuous=False,
                 random_drop=True,
                 starting_angular_momentum=True,
                 ball_mass=0.75,
                 ball_friction=0.3,
                 ball_elasticity=1.5,
                 max_frames=900):
        EzPickle.__init__(self, local_ratio, continuous, random_drop,
                          starting_angular_momentum, ball_mass, ball_friction,
                          ball_elasticity, max_frames)
        self.agents = ["piston_" + str(r) for r in range(20)]
        self.agent_name_mapping = dict(zip(self.agents, list(range(20))))
        self._agent_selector = agent_selector(self.agents)
        self.continuous = continuous
        if self.continuous:
            self.action_spaces = dict(
                zip(self.agents,
                    [gym.spaces.Box(low=-1, high=1, shape=(1, ))] * 20))
        else:
            self.action_spaces = dict(
                zip(self.agents, [gym.spaces.Discrete(3)] * 20))
        self.observation_spaces = dict(
            zip(self.agents, [
                gym.spaces.Box(
                    low=0, high=255, shape=(200, 120, 3), dtype=np.uint8)
            ] * 20))
        pygame.init()
        pymunk.pygame_util.positive_y_is_up = False
        self.clock = pygame.time.Clock()

        self.renderOn = False
        self.screen = pygame.Surface((960, 560))
        self.max_frames = max_frames

        self.pistonSprite = get_image('piston.png')
        self.background = get_image('background.png')
        self.random_drop = random_drop
        self.starting_angular_momentum = starting_angular_momentum

        self.space = pymunk.Space(threaded=True)
        self.space.threads = 2
        self.space.gravity = (0.0, 750.0)
        self.space.collision_bias = .0001
        self.space.iterations = 10  # 10 is default in PyMunk

        self.pistonList = []
        self.pistonRewards = []  # Keeps track of individual rewards
        # Defines what "recent" means in terms of number of frames.
        self.recentFrameLimit = 20
        self.recentPistons = set(
        )  # Set of pistons that have touched the ball recently
        self.global_reward_weight = 1 - local_ratio
        self.local_reward_weight = 1 - self.global_reward_weight

        self.add_walls()

        self.done = False

        self.velocity = 4
        self.resolution = 16

        self.seed(0)
        for i in range(20):
            temp_range = np.arange(0, .5 * self.velocity * self.resolution,
                                   self.velocity)
            piston = self.add_piston(
                self.space, 85 + 40 * i,
                451 - temp_range[self.np_random.randint(0, len(temp_range))])
            self.pistonList.append(piston)

        self.offset = 0
        if self.random_drop:
            self.offset = self.np_random.randint(-30, 30 + 1)
        self.ball = self.add_ball(800 + self.offset,
                                  350 + self.np_random.randint(-15, 15 + 1),
                                  ball_mass, ball_friction, ball_elasticity)
        self.lastX = int(self.ball.position[0] - 40)
        self.distance = self.lastX - 80

        self.screen.blit(self.background, (0, 0))

        self.rect = pygame.Rect(80, 80, 800, 377)

        # blit background image if ball goes out of bounds. Ball radius is 40
        self.valid_ball_position_rect = pygame.Rect(self.rect.left + 40,
                                                    self.rect.top + 40,
                                                    self.rect.width - 80,
                                                    self.rect.height - 80)

        self.frames = 0
        self.display_wait = 0.0

        self.num_agents = len(self.agents)
        self.has_reset = False
        self.closed = False
Beispiel #26
0
    def __init__(
        self,
        ind_reward=0.8,
        group_reward=0.1,
        other_group_reward=0.1,
        prospec_find_gold_reward=1,
        prospec_handoff_gold_reward=1,
        banker_receive_gold_reward=1,
        banker_deposit_gold_reward=1,
        max_frames=900,
    ):
        EzPickle.__init__(
            self,
            ind_reward,
            group_reward,
            other_group_reward,
            prospec_find_gold_reward,
            prospec_handoff_gold_reward,
            banker_receive_gold_reward,
            banker_deposit_gold_reward,
            max_frames,
        )

        total_reward_factor = ind_reward + group_reward + other_group_reward
        if not math.isclose(total_reward_factor, 1.0, rel_tol=1e-09):
            raise ValueError(
                "The sum of the individual reward, group reward, and other "
                "group reward should add up to approximately 1.0"
            )

        self.num_agents = const.NUM_AGENTS
        self.agents = []

        self.sprite_list = [
            "bankers/0.png",
            "bankers/1.png",
            "bankers/2.png",
            "prospector.png",
        ]
        self.max_frames = max_frames

        pg.init()
        self.seed()
        self.clock = pg.time.Clock()
        self.closed = False

        self.background = Background(self.rng)

        self.space = pm.Space()
        self.space.gravity = Vec2d(0.0, 0.0)
        self.space.iterations = 20  # for decreasing bounciness
        self.space.damping = 0.0

        self.all_sprites = pg.sprite.RenderUpdates()
        self.gold = []

        self.water = Water(
            const.WATER_INFO[0], const.WATER_INFO[1], self.space, self.rng
        )

        # Generate random positions for each prospector agent
        prospector_info = [
            (i, utils.rand_pos("prospector", self.rng))
            for i in range(const.NUM_PROSPECTORS)
        ]
        self.prospectors = {}
        for num, pos in prospector_info:
            prospector = Prospector(pos, self.space, num, self.all_sprites)
            identifier = f"prospector_{num}"
            self.prospectors[identifier] = prospector
            self.agents.append(identifier)

        banker_info = [
            (i, utils.rand_pos("banker", self.rng)) for i in range(const.NUM_BANKERS)
        ]
        self.bankers = {}
        for num, pos in banker_info:
            banker = Banker(pos, self.space, num, self.all_sprites)
            identifier = f"banker_{num}"
            self.bankers[identifier] = banker
            self.agents.append(identifier)

        self.banks = []
        for pos, verts in const.BANK_INFO:
            self.banks.append(Bank(pos, verts, self.space, self.all_sprites))

        self.fences = []
        for w_type, s_pos, b_pos, verts in const.FENCE_INFO:
            f = Fence(w_type, s_pos, b_pos, verts, self.space)
            self.fences.append(f)

        self.metadata = {"render.modes": ["human", "rgb_array"]}

        self.action_spaces = {}
        for p in self.prospectors:
            self.action_spaces[p] = spaces.Box(
                low=np.float32(-1.0), high=np.float32(1.0), shape=(3,)
            )

        for b in self.bankers:
            self.action_spaces[b] = spaces.Box(
                low=np.float32(-1.0), high=np.float32(1.0), shape=(2,)
            )

        self.observation_spaces = {}
        self.last_observation = {}

        for p in self.prospectors:
            self.last_observation[p] = None
            self.observation_spaces[p] = spaces.Box(
                low=0, high=255, shape=const.PROSPEC_OBSERV_SHAPE, dtype=np.uint8
            )

        for b in self.bankers:
            self.last_observation[b] = None
            self.observation_spaces[b] = spaces.Box(
                low=0, high=255, shape=const.BANKER_OBSERV_SHAPE, dtype=np.uint8
            )

        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.next()
        self.reset()

        # Collision Handler Functions --------------------------------------------
        # Water to Prospector
        def add_gold(arbiter, space, data):
            prospec_shape = arbiter.shapes[0]
            prospec_body = prospec_shape.body

            for k, v in self.prospectors.items():
                if v.body is prospec_body:
                    self.rewards[k] += ind_reward * prospec_find_gold_reward
                else:
                    self.rewards[k] += group_reward * prospec_find_gold_reward

            for k in self.bankers:
                self.rewards[k] += other_group_reward * prospec_find_gold_reward

            if prospec_body.nugget is None:
                position = arbiter.contact_point_set.points[0].point_a

                gold = Gold(position, prospec_body, self.space, self.all_sprites)
                self.gold.append(gold)
                prospec_body.nugget = gold

            return True

        # Prospector to banker
        def handoff_gold_handler(arbiter, space, data):
            banker_shape, gold_shape = arbiter.shapes

            gold_sprite = None
            for g in self.gold:
                if g.id == gold_shape.id:
                    gold_sprite = g

            # gold_sprite is None if gold was handed off to the bank right before
            #   calling this collision handler
            # This collision handler is only for prospector -> banker gold handoffs
            if (
                gold_sprite is None
                or gold_sprite.parent_body.sprite_type != "prospector"
            ):
                return True

            banker_body = banker_shape.body
            prospec_body = gold_sprite.parent_body

            normal = arbiter.contact_point_set.normal
            # Correct the angle because banker's head is rotated pi/2
            corrected = utils.normalize_angle(banker_body.angle + (math.pi / 2))
            normalized_normal = utils.normalize_angle(normal.angle)
            if (
                corrected - const.BANKER_HANDOFF_TOLERANCE
                <= normalized_normal
                <= corrected + const.BANKER_HANDOFF_TOLERANCE
            ):

                # transfer gold
                gold_sprite.parent_body.nugget = None
                gold_sprite.parent_body = banker_body
                banker_body.nugget = gold_sprite
                banker_body.nugget_offset = normal.angle

                for k, v in self.prospectors.items():
                    self.rewards[k] += other_group_reward * banker_receive_gold_reward
                    if v.body is prospec_body:
                        self.rewards[k] += ind_reward * prospec_handoff_gold_reward
                    else:
                        self.rewards[k] += group_reward * prospec_handoff_gold_reward

                for k, v in self.bankers.items():
                    self.rewards[k] += other_group_reward * prospec_handoff_gold_reward
                    if v.body is banker_body:
                        self.rewards[k] += ind_reward * banker_receive_gold_reward
                    else:
                        self.rewards[k] += group_reward * banker_receive_gold_reward

            return True

        # Banker to bank
        def gold_score_handler(arbiter, space, data):
            gold_shape, _ = arbiter.shapes

            for g in self.gold:
                if g.id == gold_shape.id:
                    gold_class = g

            if gold_class.parent_body.sprite_type == "banker":
                self.space.remove(gold_shape, gold_shape.body)
                gold_class.parent_body.nugget = None
                banker_body = gold_class.parent_body

                for k, v in self.bankers.items():
                    if v.body is banker_body:
                        self.rewards[k] += ind_reward * banker_deposit_gold_reward
                    else:
                        self.rewards[k] += group_reward * banker_deposit_gold_reward

                for k in self.prospectors:
                    self.rewards[k] += other_group_reward * banker_deposit_gold_reward

                self.gold.remove(gold_class)
                self.all_sprites.remove(gold_class)

            return False

        # Create the collision event generators
        gold_dispenser = self.space.add_collision_handler(
            CollisionTypes.PROSPECTOR, CollisionTypes.WATER
        )

        gold_dispenser.begin = add_gold

        handoff_gold = self.space.add_collision_handler(
            CollisionTypes.BANKER, CollisionTypes.GOLD
        )

        handoff_gold.begin = handoff_gold_handler

        gold_score = self.space.add_collision_handler(
            CollisionTypes.GOLD, CollisionTypes.BANK
        )

        gold_score.begin = gold_score_handler
Beispiel #27
0
    def __init__(self, spawn_rate=20, num_archers=2, num_knights=2, killable_knights=True, killable_archers=True, pad_observation=True, line_death=False, max_cycles=900):
        EzPickle.__init__(self, spawn_rate, num_archers, num_knights, killable_knights, killable_archers, pad_observation, line_death, max_cycles)
        # Game Constants
        self.ZOMBIE_SPAWN = spawn_rate
        self.WIDTH = 1280
        self.HEIGHT = 720
        self.max_cycles = max_cycles
        self.frames = 0
        self.pad_observation = pad_observation
        self.killable_knights = killable_knights
        self.killable_archers = killable_archers
        self.line_death = line_death
        self.has_reset = False
        self.seed()

        # Dictionaries for holding new players and their weapons
        self.archer_dict = {}
        self.knight_dict = {}
        self.arrow_dict = {}
        self.sword_dict = {}

        # Game Variables
        self.score = 0
        self.run = True
        self.arrow_spawn_rate = self.sword_spawn_rate = self.zombie_spawn_rate = 0
        self.knight_player_num = self.archer_player_num = 0
        self.archer_killed = False
        self.knight_killed = False
        self.sword_killed = False
        self.closed = False

        # Creating Sprite Groups
        self.all_sprites = pygame.sprite.Group()
        self.zombie_list = pygame.sprite.Group()
        self.arrow_list = pygame.sprite.Group()
        self.sword_list = pygame.sprite.Group()
        self.archer_list = pygame.sprite.Group()
        self.knight_list = pygame.sprite.Group()

        self.num_archers = num_archers
        self.num_knights = num_knights

        # Represents agents to remove at end of cycle
        self.kill_list = []

        # Initializing Pygame
        self.render_on = False
        pygame.init()
        # self.WINDOW = pygame.display.set_mode([self.WIDTH, self.HEIGHT])
        self.WINDOW = pygame.Surface((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Knights, Archers, Zombies")
        self.left_wall = get_image(os.path.join('img', 'left_wall.png'))
        self.right_wall = get_image(os.path.join('img', 'right_wall.png'))
        self.right_wall_rect = self.right_wall.get_rect()
        self.right_wall_rect.left = self.WIDTH - self.right_wall_rect.width
        self.floor_patch1 = get_image(os.path.join('img', 'patch1.png'))
        self.floor_patch2 = get_image(os.path.join('img', 'patch2.png'))
        self.floor_patch3 = get_image(os.path.join('img', 'patch3.png'))
        self.floor_patch4 = get_image(os.path.join('img', 'patch4.png'))

        self.agent_list = []
        self.agents = []

        for i in range(num_archers):
            name = "archer_" + str(i)
            self.archer_dict["archer{0}".format(self.archer_player_num)] = Archer(agent_name=name)
            self.archer_dict["archer{0}".format(self.archer_player_num)].offset(i * 50, 0)
            self.archer_list.add(self.archer_dict["archer{0}".format(self.archer_player_num)])
            self.all_sprites.add(self.archer_dict["archer{0}".format(self.archer_player_num)])
            self.agent_list.append(self.archer_dict["archer{0}".format(self.archer_player_num)])
            if i != num_archers - 1:
                self.archer_player_num += 1

        for i in range(num_knights):
            name = "knight_" + str(i)
            self.knight_dict["knight{0}".format(self.knight_player_num)] = Knight(agent_name=name)
            self.knight_dict["knight{0}".format(self.knight_player_num)].offset(i * 50, 0)
            self.knight_list.add(self.knight_dict["knight{0}".format(self.knight_player_num)])
            self.all_sprites.add(self.knight_dict["knight{0}".format(self.knight_player_num)])
            self.agent_list.append(self.knight_dict["knight{0}".format(self.knight_player_num)])
            if i != num_knights - 1:
                self.knight_player_num += 1

        self.agent_name_mapping = {}
        a_count = 0
        for i in range(num_archers):
            a_name = "archer_" + str(i)
            self.agents.append(a_name)
            self.agent_name_mapping[a_name] = a_count
            a_count += 1
        for i in range(num_knights):
            k_name = "knight_" + str(i)
            self.agents.append(k_name)
            self.agent_name_mapping[k_name] = a_count
            a_count += 1

        self.observation_spaces = dict(zip(self.agents, [Box(low=0, high=255, shape=(512, 512, 3), dtype=np.uint8) for _ in enumerate(self.agents)]))
        self.action_spaces = dict(zip(self.agents, [Discrete(6) for _ in enumerate(self.agents)]))
        self.display_wait = 0.0
        self.possible_agents = self.agents[:]

        self._agent_selector = agent_selector(self.agents)
        self.reinit()
Beispiel #28
0
    def __init__(
        self,
        spawn_rate=20,
        num_archers=2,
        num_knights=2,
        max_zombies=10,
        max_arrows=10,
        killable_knights=True,
        killable_archers=True,
        pad_observation=True,
        line_death=False,
        max_cycles=900,
        vector_state=True,
        use_typemasks=False,
        transformer=False,
    ):
        EzPickle.__init__(
            self,
            spawn_rate,
            num_archers,
            num_knights,
            max_zombies,
            max_arrows,
            killable_knights,
            killable_archers,
            pad_observation,
            line_death,
            max_cycles,
            vector_state,
            use_typemasks,
            transformer,
        )
        # variable state space
        self.transformer = transformer

        # whether we want RGB state or vector state
        self.vector_state = vector_state
        # agents + zombies + weapons
        self.num_tracked = (num_archers + num_knights + max_zombies +
                            num_knights + max_arrows)
        self.use_typemasks = True if transformer else use_typemasks
        self.typemask_width = 6
        self.vector_width = 4 + self.typemask_width if use_typemasks else 4

        # Game Status
        self.frames = 0
        self.closed = False
        self.has_reset = False
        self.render_on = False

        # Game Constants
        self.seed()
        self.spawn_rate = spawn_rate
        self.max_cycles = max_cycles
        self.pad_observation = pad_observation
        self.killable_knights = killable_knights
        self.killable_archers = killable_archers
        self.line_death = line_death
        self.num_archers = num_archers
        self.num_knights = num_knights
        self.max_zombies = max_zombies
        self.max_arrows = max_arrows

        # Represents agents to remove at end of cycle
        self.kill_list = []
        self.agent_list = []
        self.agents = []
        self.dead_agents = []

        self.agent_name_mapping = {}
        a_count = 0
        for i in range(self.num_archers):
            a_name = "archer_" + str(i)
            self.agents.append(a_name)
            self.agent_name_mapping[a_name] = a_count
            a_count += 1
        for i in range(self.num_knights):
            k_name = "knight_" + str(i)
            self.agents.append(k_name)
            self.agent_name_mapping[k_name] = a_count
            a_count += 1

        shape = ([512, 512, 3] if not self.vector_state else
                 [self.num_tracked + 1, self.vector_width + 1])
        low = 0 if not self.vector_state else -1.0
        high = 255 if not self.vector_state else 1.0
        dtype = np.uint8 if not self.vector_state else np.float64
        self.observation_spaces = dict(
            zip(
                self.agents,
                [
                    Box(low=low, high=high, shape=shape, dtype=dtype)
                    for _ in enumerate(self.agents)
                ],
            ))

        self.action_spaces = dict(
            zip(self.agents, [Discrete(6) for _ in enumerate(self.agents)]))

        shape = ([
            const.SCREEN_HEIGHT, const.SCREEN_WIDTH, 3
        ] if not self.vector_state else [self.num_tracked, self.vector_width])
        low = 0 if not self.vector_state else -1.0
        high = 255 if not self.vector_state else 1.0
        dtype = np.uint8 if not self.vector_state else np.float64
        self.state_space = Box(
            low=low,
            high=high,
            shape=shape,
            dtype=dtype,
        )
        self.possible_agents = self.agents

        # Initializing Pygame
        pygame.init()
        # self.WINDOW = pygame.display.set_mode([self.WIDTH, self.HEIGHT])
        self.WINDOW = pygame.Surface((const.SCREEN_WIDTH, const.SCREEN_HEIGHT))
        pygame.display.set_caption("Knights, Archers, Zombies")
        self.left_wall = get_image(os.path.join("img", "left_wall.png"))
        self.right_wall = get_image(os.path.join("img", "right_wall.png"))
        self.right_wall_rect = self.right_wall.get_rect()
        self.right_wall_rect.left = const.SCREEN_WIDTH - self.right_wall_rect.width
        self.floor_patch1 = get_image(os.path.join("img", "patch1.png"))
        self.floor_patch2 = get_image(os.path.join("img", "patch2.png"))
        self.floor_patch3 = get_image(os.path.join("img", "patch3.png"))
        self.floor_patch4 = get_image(os.path.join("img", "patch4.png"))

        self._agent_selector = agent_selector(self.agents)
        self.reinit()
Beispiel #29
0
    def __init__(self,
                 n_pistons=20,
                 local_ratio=0,
                 time_penalty=-0.1,
                 continuous=True,
                 random_drop=True,
                 random_rotate=True,
                 ball_mass=0.75,
                 ball_friction=0.3,
                 ball_elasticity=1.5,
                 max_cycles=125):
        EzPickle.__init__(self, n_pistons, local_ratio, time_penalty,
                          continuous, random_drop, random_rotate, ball_mass,
                          ball_friction, ball_elasticity, max_cycles)
        self.n_pistons = n_pistons
        self.piston_head_height = 11
        self.piston_width = 40
        self.piston_height = 40
        self.piston_body_height = 23
        self.piston_radius = 5
        self.wall_width = 40
        self.ball_radius = 40
        self.screen_width = (2 * self.wall_width) + (self.piston_width *
                                                     self.n_pistons)
        self.screen_height = 560
        y_high = self.screen_height - self.wall_width - self.piston_body_height
        y_low = self.wall_width
        obs_height = y_high - y_low

        assert self.piston_width == self.wall_width, "Wall width and piston width must be equal for observation calculation"

        self.agents = ["piston_" + str(r) for r in range(self.n_pistons)]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(
            zip(self.agents, list(range(self.n_pistons))))
        self._agent_selector = agent_selector(self.agents)

        self.observation_spaces = dict(
            zip(self.agents, [
                gym.spaces.Box(low=0,
                               high=255,
                               shape=(obs_height, self.piston_width * 3, 3),
                               dtype=np.uint8)
            ] * self.n_pistons))
        self.continuous = continuous
        if self.continuous:
            self.action_spaces = dict(
                zip(self.agents,
                    [gym.spaces.Box(low=-1, high=1, shape=(1, ))] *
                    self.n_pistons))
        else:
            self.action_spaces = dict(
                zip(self.agents, [gym.spaces.Discrete(3)] * self.n_pistons))

        pygame.init()
        pymunk.pygame_util.positive_y_is_up = False

        self.renderOn = False
        self.screen = pygame.Surface((self.screen_width, self.screen_height))
        self.max_cycles = max_cycles

        self.piston_sprite = get_image('piston.png')
        self.piston_body_sprite = get_image('piston_body.png')
        self.background = get_image('background.png')
        self.random_drop = random_drop
        self.random_rotate = random_rotate

        self.pistonList = []
        self.pistonRewards = []  # Keeps track of individual rewards
        self.recentFrameLimit = 20  # Defines what "recent" means in terms of number of frames.
        self.recentPistons = set(
        )  # Set of pistons that have touched the ball recently
        self.time_penalty = time_penalty
        self.local_ratio = local_ratio
        self.ball_mass = ball_mass
        self.ball_friction = ball_friction
        self.ball_elasticity = ball_elasticity

        self.done = False

        self.pixels_per_position = 4
        self.n_piston_positions = 16

        self.screen.fill((0, 0, 0))
        self.draw_background()
        # self.screen.blit(self.background, (0, 0))

        self.render_rect = pygame.Rect(
            self.wall_width,  # Left
            self.wall_width,  # Top
            self.screen_width - (2 * self.wall_width),  # Width
            self.screen_height - (2 * self.wall_width) -
            self.piston_body_height  # Height
        )

        # Blit background image if ball goes out of bounds. Ball radius is 40
        self.valid_ball_position_rect = pygame.Rect(
            self.render_rect.left + self.ball_radius,  # Left
            self.render_rect.top + self.ball_radius,  # Top
            self.render_rect.width - (2 * self.ball_radius),  # Width
            self.render_rect.height - (2 * self.ball_radius)  # Height
        )

        self.frames = 0
        self.display_wait = 0.0

        self.has_reset = False
        self.closed = False
        self.seed()
Beispiel #30
0
    def __init__(
        self,
        seed=None,
        ind_reward=0.8,
        group_reward=0.1,
        other_group_reward=0.1,
        prospec_find_gold_reward=1,
        prospec_handoff_gold_reward=1,
        banker_receive_gold_reward=1,
        banker_deposit_gold_reward=1,
        max_frames=900,
    ):
        if ind_reward + group_reward + other_group_reward != 1.0:
            raise ValueError(
                "Individual reward, group reward, and other group reward should "
                "add up to 1.0")

        self.num_agents = const.NUM_AGENTS
        self.agents = []

        self.sprite_list = [
            "bankers/1-big.png",
            "bankers/2-big.png",
            "bankers/3-big.png",
            "prospector-pickaxe-big.png",
        ]
        self.rendering = False
        self.max_frames = max_frames
        self.frame = 0

        pg.init()
        self.rng, seed = seeding.np_random(seed)
        self.screen = pg.Surface(const.SCREEN_SIZE)
        self.clock = pg.time.Clock()
        self.done = False
        self.closed = False

        self.background = utils.load_image(["background-debris.png"])
        self.background_rect = pg.Rect(0, 0, *const.SCREEN_SIZE)
        self.screen.blit(self.background, self.background_rect)

        self.space = pm.Space()
        self.space.gravity = Vec2d(0.0, 0.0)
        self.space.damping = 0.0

        self.all_sprites = pg.sprite.RenderUpdates()
        self.gold = []

        # Generate random positions for each prospector agent
        prospector_info = [(i, utils.rand_pos("prospector", self.rng))
                           for i in range(const.NUM_PROSPECTORS)]
        self.prospectors = {}
        for num, pos in prospector_info:
            prospector = Prospector(pos, self.space, num, self.all_sprites)
            identifier = f"prospector_{num}"
            self.prospectors[identifier] = prospector
            self.agents.append(identifier)

        banker_info = [(i, utils.rand_pos("banker", self.rng))
                       for i in range(const.NUM_BANKERS)]
        self.bankers = {}
        for num, pos in banker_info:
            banker = Banker(pos, self.space, num, self.all_sprites)
            identifier = f"banker_{num}"
            self.bankers[identifier] = banker
            self.agents.append(identifier)

        self.banks = []
        for pos, verts in const.BANK_INFO:
            self.banks.append(Bank(pos, verts, self.space, self.all_sprites))

        for w_type, s_pos, b_pos, verts in const.FENCE_INFO:
            Fence(w_type, s_pos, b_pos, verts, self.space, self.all_sprites)

        Water(const.WATER_INFO[0], const.WATER_INFO[1], self.space,
              self.all_sprites)

        self.metadata = {"render.modes": ["human"]}

        self.action_spaces = {}
        for p in self.prospectors:
            self.action_spaces[p] = spaces.Box(low=np.float32(-1.),
                                               high=np.float32(1.),
                                               shape=(3, ))

        for b in self.bankers:
            self.action_spaces[b] = spaces.Box(low=np.float32(-1.),
                                               high=np.float32(1.),
                                               shape=(3, ))

        self.observation_spaces = {}
        self.last_observation = {}
        for p in self.prospectors:
            self.last_observation[p] = None
            self.observation_spaces[p] = spaces.Box(
                low=0,
                high=255,
                shape=const.PROSPEC_OBSERV_SHAPE,
                dtype=np.uint8)

        for b in self.bankers:
            self.last_observation[b] = None
            self.observation_spaces[b] = spaces.Box(
                low=0,
                high=255,
                shape=const.BANKER_OBSERV_SHAPE,
                dtype=np.uint8)

        self.agent_order = self.agents[:]
        self._agent_selector = agent_selector(self.agent_order)
        self.agent_selection = self._agent_selector.next()
        self.reset()

        # Collision Handler Functions --------------------------------------------
        # Water to Prospector
        def add_gold(arbiter, space, data):
            prospec_shape = arbiter.shapes[0]
            prospec_body = prospec_shape.body

            position = arbiter.contact_point_set.points[0].point_a
            normal = arbiter.contact_point_set.normal

            prospec_body.position = position - (24 * normal)
            prospec_body.velocity = (0, 0)

            for k, v in self.prospectors.items():
                if v.body is prospec_body:
                    self.rewards[k] += ind_reward * prospec_find_gold_reward
                else:
                    self.rewards[k] += group_reward * prospec_find_gold_reward

            for k in self.bankers:
                self.rewards[
                    k] += other_group_reward * prospec_find_gold_reward

            if prospec_body.nugget is None:
                position = arbiter.contact_point_set.points[0].point_a

                gold = Gold(position, prospec_body, self.space,
                            self.all_sprites)
                self.gold.append(gold)
                prospec_body.nugget = gold

            return True

        # Prospector to banker
        def handoff_gold_handler(arbiter, space, data):
            banker_shape, gold_shape = arbiter.shapes[0], arbiter.shapes[1]

            gold_sprite = None
            for g in self.gold:
                if g.id == gold_shape.id:
                    gold_sprite = g

            # This collision handler is only for prospector -> banker gold handoffs
            if gold_sprite.parent_body.sprite_type != "prospector":
                return True

            banker_body = banker_shape.body
            prospec_body = gold_sprite.parent_body

            for k, v in self.prospectors.items():
                self.rewards[
                    k] += other_group_reward * banker_receive_gold_reward
                if v.body is prospec_body:
                    self.rewards[k] += prospec_handoff_gold_reward
                else:
                    self.rewards[
                        k] += group_reward * prospec_handoff_gold_reward

            for k, v in self.bankers.items():
                self.rewards[
                    k] += other_group_reward * prospec_handoff_gold_reward
                if v.body is banker_body:
                    self.rewards[k] += banker_receive_gold_reward
                else:
                    self.rewards[
                        k] += group_reward * banker_receive_gold_reward

            normal = arbiter.contact_point_set.normal
            # Correct the angle because banker's head is rotated pi/2
            corrected = utils.normalize_angle(banker_body.angle +
                                              (math.pi / 2))
            if (corrected - const.BANKER_HANDOFF_TOLERANCE <= normal.angle <=
                    corrected + const.BANKER_HANDOFF_TOLERANCE):
                gold_sprite.parent_body.nugget = None

                gold_sprite.parent_body = banker_body
                banker_body.nugget = gold_sprite
                banker_body.nugget_offset = normal.angle

            return True

        # Banker to bank
        def gold_score_handler(arbiter, space, data):
            gold_shape, _ = arbiter.shapes[0], arbiter.shapes[1]

            for g in self.gold:
                if g.id == gold_shape.id:
                    gold_class = g

            if gold_class.parent_body.sprite_type == "banker":
                self.space.remove(gold_shape, gold_shape.body)
                gold_class.parent_body.nugget = None
                banker_body = gold_class.parent_body

                for k, v in self.bankers.items():
                    if v.body is banker_body:
                        self.rewards[k] += banker_deposit_gold_reward
                        # banker_sprite = v
                    else:
                        self.rewards[
                            k] += group_reward * banker_deposit_gold_reward

                for k in self.prospectors:
                    self.rewards[
                        k] += other_group_reward * banker_deposit_gold_reward

                self.gold.remove(gold_class)
                self.all_sprites.remove(gold_class)

            return False

        # Create the collision event generators
        gold_dispenser = self.space.add_collision_handler(
            CollisionTypes.PROSPECTOR, CollisionTypes.WATER)

        gold_dispenser.begin = add_gold

        handoff_gold = self.space.add_collision_handler(
            CollisionTypes.BANKER, CollisionTypes.GOLD)

        handoff_gold.begin = handoff_gold_handler

        gold_score = self.space.add_collision_handler(CollisionTypes.GOLD,
                                                      CollisionTypes.BANK)

        gold_score.begin = gold_score_handler