def step(self, action): if self.dones[self.agent_selection]: del self._actions[self.agent_selection] return self._was_done_step(action) self._actions[self.agent_selection] = action if self._agent_selector.is_last(): obss, rews, dones, infos = self.env.step(self._actions) self._observations = copy.copy(obss) self.dones = copy.copy(dones) self.infos = copy.copy(infos) self.rewards = copy.copy(rews) self.agents = self.env.agents[:] self._live_agents = [ agent for agent in self.agents if not dones[agent] ] # assert self._live_agents == self.agents if len(self._live_agents): self._agent_selector = agent_selector(self._live_agents) self.agent_selection = self._agent_selector.reset() self._cumulative_rewards = copy.copy(rews) self._dones_step_first() else: if self._agent_selector.is_first(): self._clear_rewards() self.agent_selection = self._agent_selector.next()
def step(self, action, observe=True): self._actions[self._agent_mapper[self.agent_selection]] = action if self._agent_selector.is_last(): obss, rews, dones, infos = self.env.step(self._actions) self._observations = obss self.dones = { agent: done for agent, done in zip(self.agents, dones) } self.infos = { agent: info for agent, info in zip(self.agents, infos) } self.rewards = { agent: reward for agent, reward in zip(self.agents, rews) } self._live_agents = [ agent for done, agent in zip(dones, self.agents) if not done ] if len(self._live_agents): self._agent_selector = agent_selector(self._live_agents) self.agent_selection = self._agent_selector.reset() else: self.agent_selection = self._agent_selector.next() return self.observe(self.agent_selection) if observe else None
def __init__(self): super().__init__() self.board = Board() self.agents = ["player_1", "player_2"] self.possible_agents = self.agents[:] self.action_spaces = {i: spaces.Discrete(9) for i in self.agents} self.observation_spaces = { i: spaces.Dict({ 'observation': spaces.Box(low=0, high=1, shape=(3, 3, 2), dtype=np.int8), 'action_mask': spaces.Box(low=0, high=1, shape=(9, ), dtype=np.int8) }) for i in self.agents } self.rewards = {i: 0 for i in self.agents} self.dones = {i: False for i in self.agents} self.infos = { i: { 'legal_moves': list(range(0, 9)) } for i in self.agents } self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset()
def __init__(self): super().__init__() self.game = Game() self.seed() self.agents = ["player_{}".format(i) for i in range(2)] self.possible_agents = self.agents[:] self._agent_order = list(self.agents) self._agent_selector = agent_selector(self._agent_order) self.infos = {i: {'legal_moves': []} for i in self.agents} self.action_spaces = { name: spaces.Discrete(26 * 26 * 2 + 1) for name in self.agents } low = np.zeros((198, )) high = np.ones((198, )) for i in range(3, 97, 4): high[i] = 6.0 high[96] = 7.5 for i in range(101, 195, 4): high[i] = 6.0 high[194] = 7.5 self.observation_spaces = { i: spaces.Box(low=np.float32(low), high=np.float32(high), dtype=np.float32) for i in self.agents } self.double_roll = 0
def step(self, action): if self.dones[self.agent_selection]: del self._actions[self.agent_selection] return self._was_done_step(action) self._actions[self.agent_selection] = action if self._agent_selector.is_last(): obss, rews, dones, infos = self.env.step(self._actions) self._observations = copy.copy(obss) self.dones = copy.copy(dones) self.infos = copy.copy(infos) self.rewards = copy.copy(rews) self._cumulative_rewards = copy.copy(rews) env_agent_set = set(self.env.agents) self.agents = self.env.agents + [ agent for agent in sorted(self._observations.keys()) if agent not in env_agent_set ] if len(self.env.agents): self._agent_selector = agent_selector(self.env.agents) self.agent_selection = self._agent_selector.reset() self._dones_step_first() else: if self._agent_selector.is_first(): self._clear_rewards() self.agent_selection = self._agent_selector.next()
def reinit(self): self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.next() self.rewards = {agent: 0 for agent in self.agents} self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self.state = {agent: none for agent in self.agents} self.observations = {agent: none for agent in self.agents} self.num_moves = 0
def __init__(self, seed=None, continuous=False, vector_observation=False, max_frames=900, num_floors=4, synchronized_start=False, identical_aliens=False, random_aliens=False): EzPickle.__init__(self, seed, continuous, vector_observation, max_frames, num_floors, synchronized_start, identical_aliens, random_aliens) self.num_agents = 2 * num_floors self.agents = ["prisoner_" + str(s) for s in range(0, self.num_agents)] self._agent_selector = agent_selector(self.agents) self.sprite_list = ["sprites/alien", "sprites/drone", "sprites/glowy", "sprites/reptile", "sprites/ufo", "sprites/bunny", "sprites/robot", "sprites/tank"] self.sprite_img_heights = [40, 40, 46, 48, 32, 54, 48, 53] self.metadata = {'render.modes': ['human']} self.infos = {} self.rendering = False self.max_frames = max_frames pygame.init() self.clock = pygame.time.Clock() self.num_frames = 0 self.done_val = False self.num_floors = num_floors self.background = get_image('background.png') self.background_append = get_image('background_append.png') self.dynamic_background = get_image('blit_background.png') self.dynamic_background_append = get_image('blit_background_append.png') self.velocity = 24 self.continuous = continuous self.vector_obs = vector_observation self.synchronized_start = synchronized_start self.identical_aliens = identical_aliens if (self.identical_aliens): self.random_aliens = False else: self.random_aliens = random_aliens self.np_random, seed = seeding.np_random(seed) self.closed = False self.action_spaces = {} if continuous: for a in self.agents: self.action_spaces[a] = spaces.Box(low=-self.velocity, high=self.velocity, shape=(1,), dtype=np.float32) else: for a in self.agents: self.action_spaces[a] = spaces.Discrete(3) self.observation_spaces = {} self.last_observation = {} for a in self.agents: self.last_observation[a] = None if vector_observation: self.observation_spaces[a] = spaces.Box(low=-300, high=300, shape=(1,), dtype=np.float32) else: self.observation_spaces[a] = spaces.Box(low=0, high=255, shape=(100, 300, 3), dtype=np.uint8) self.walls = [] self.create_walls(num_floors) self.spawn_prisoners() self.has_reset = False self.reinit()
def __init__(self, board_size): super().__init__() self.board_size = board_size self.agents = ['player_1'] # Create the body of the snake. Initially, # the body is only one cell long. # The body includes the head, and the tail is at # the end of the list. self.head = (1, 1) self.body = [self.head] self._place_fruit() self.direction = 'R' # Create the fruit... # We can either keep going forward, or turn left or right. # So total of 3 actions. 0 - Forward, 1 - Left, 2 - Right self.action_spaces = {i: spaces.Discrete(3) for i in self.agents} # The observation space consists of the current state of the # board. In this case, it would be where you are, and the # location of the fruit. 0 - blank, 1 - snake body, 2 - fruit. # We have to include the border as well, and this adds 2 to the board size. # 3 - border. self.observation_spaces = {i: spaces.Box(low=0, high=3, shape=(self.board_size + 2, self.board_size + 2), dtype=np.uint8) for i in self.agents} # In the beginning, rewards are 0, and the game isn't over (we just started lol) self.rewards = {i: 0 for i in self.agents} self.dones = {i: False for i in self.agents} # Info would contain head, body, direction of travel, and fruit location # The indices are with respect to the full observation space. Since (0, 0) # would be the corner (and point on the border), we initialize to (1, 1) self.infos = {'player_1': { 'head': (1, 1), 'body': [(1, 1)], 'direction': self.direction, 'fruit': self.fruit_loc }} # Mapping from direction to direction from the result of a turn self.TURN_RES = { 'U': ['U', 'L', 'R'], 'D': ['D', 'R', 'L'], 'L': ['L', 'D', 'U'], 'R': ['R', 'U', 'D'] } # This dictionary holds the cell changes e.g. when going UP, # the row subtracts by 1, and the column doesn't change. self.DIR_CELL_CHANGE = { 'U': (-1, 0), 'D': (1, 0), 'L': (0, -1), 'R': (0, 1) } self._cumulative_rewards = {i: 0 for i in self.agents} self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset()
def reset(self): self._observations = self.env.reset() self.agents = self.env.agents[:] self._live_agents = self.agents[:] self._actions = {agent: None for agent in self.agents} self._agent_selector = agent_selector(self._live_agents) self.agent_selection = self._agent_selector.reset() self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self.rewards = {agent: 0 for agent in self.agents} self._cumulative_rewards = {agent: 0 for agent in self.agents}
def reinit(self): self.agents = self.possible_agents[:] self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.next() self.rewards = {agent: 0 for agent in self.agents} self._cumulative_rewards = {agent: 0 for agent in self.agents} self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self.state = {agent: NONE for agent in self.agents} self.observations = {agent: NONE for agent in self.agents} self.num_moves = 0
def reset(self): self.agents = self.possible_agents[:] self.rewards = {agent: 0 for agent in self.agents} self._cumulative_rewards = {agent: 0 for agent in self.agents} self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.next() self.board_state.reset()
def _reset_agents(self, player_number: int): """ Rearrange self.agents as pyhanabi starts a different player after each reset(). """ # Shifts self.agents list as long order starting player is not according to player_number while not self.agents[0] == 'player_' + str(player_number): self.agents = self.agents[1:] + [self.agents[0]] # Agent order list, on which the agent selector operates on. self._agent_selector = agent_selector(self.agents) # Reset agent_selection self.agent_selection = self._agent_selector.reset()
def reset(self, observe=True): self._actions = [None] * self.num_agents self._live_agents = self.agents[:] self._agent_selector = agent_selector(self._live_agents) self.agent_selection = self._agent_selector.reset() self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self.rewards = {agent: 0 for agent in self.agents} self._observations = self.env.reset() return self.observe(self.agent_selection) if observe else None
def __init__(self, *args, **kwargs): EzPickle.__init__(self, *args, **kwargs) self.env = _env(*args, **kwargs) self.agents = ["walker_" + str(r) for r in range(self.env.num_agents)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents)))) self._agent_selector = agent_selector(self.agents) # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) self.steps = 0 self.display_wait = 0.04
def __init__(self, *args, **kwargs): super().__init__() self.env = _env(*args, **kwargs) self.agents = ["pursuer_" + str(r) for r in range(self.env.num_agents)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict( zip(self.agents, list(range(self.num_agents)))) self._agent_selector = agent_selector(self.agents) # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) self.has_reset = False
def test_agent_order(env): env.reset() if not hasattr(env, "_agent_selector"): warnings.warn( "Env has no object named _agent_selector. We recommend handling agent cycling with the agent_selector utility from utils/agent_selector.py." ) elif not isinstance(env._agent_selector, agent_selector): warnings.warn( "You created your own agent_selector utility. You might want to use ours, in utils/agent_selector.py" ) assert hasattr(env, "agent_order"), "Env does not have agent_order" env.reset(observe=False) agent_order = copy(env.agent_order) _agent_selector = agent_selector(agent_order) agent_selection = _agent_selector.next() if hasattr(env, "_agent_selector"): assert env._agent_selector == _agent_selector, "env._agent_selector is initialized incorrectly" assert env.agent_selection == agent_selection, "env.agent_selection is not the same as the first agent in agent_order" for _ in range(200): agent = agent_selection if 'legal_moves' in env.infos[agent]: action = random.choice(env.infos[agent]['legal_moves']) else: action = env.action_spaces[agent].sample() env.step(action, observe=False) if all(env.dones.values()): break if agent_order == env.agent_order: agent_selection = _agent_selector.next() assert env.agent_selection == agent_selection, "env.agent_selection ({}) is not the same as the next agent in agent_order {}".format( env.agent_selection, env.agent_order) else: previous_agent_selection_index = agent_order.index(agent_selection) agent_order = copy(env.agent_order) _agent_selector.reinit(agent_order) skips = (previous_agent_selection_index + 1) % len(env.agents) for _ in range(skips + 1): agent_selection = _agent_selector.next() assert env.agent_selection == agent_selection, "env.agent_selection ({}) is not the same as the next agent in agent_order {}".format( env.agent_selection, env.agent_order)
def __init__(self, seed=None, *args, **kwargs): super().__init__() self.env = _env(seed, *args, **kwargs) self.num_agents = self.env.num_agents self.agents = ["pursuer_" + str(r) for r in range(self.num_agents)] self.agent_name_mapping = dict( zip(self.agents, list(range(self.num_agents)))) self._agent_selector = agent_selector(self.agents) # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) self.steps = 0 self.display_wait = 0.03 self.has_reset = False
def __init__(self, *args, **kwargs): EzPickle.__init__(self, *args, **kwargs) self.env = _env(*args, **kwargs) pygame.init() self.agents = ["pursuer_" + str(a) for a in range(self.env.num_agents)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict( zip(self.agents, list(range(self.num_agents)))) self._agent_selector = agent_selector(self.agents) # spaces self.n_act_agents = self.env.act_dims[0] self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) self.steps = 0 self.closed = False
def __init__(self, seed=None, *args, **kwargs): EzPickle.__init__(self, seed, *args, **kwargs) self.env = _env(*args, seed, **kwargs) pygame.init() self.num_agents = self.env.num_agents self.agents = ["pursuer_" + str(a) for a in range(self.num_agents)] self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents)))) self._agent_selector = agent_selector(self.agents) self.has_reset = False # spaces self.n_act_agents = self.env.act_dims[0] self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) self.steps = 0 self.display_wait = 0.0 self.closed = False
def step(self, action, observe=True): self._actions[self.agent_selection] = action if self._agent_selector.is_last(): obss, rews, dones, infos = self.env.step(self._actions) self._observations = obss self.dones = dones self.infos = infos self.rewards = rews self._live_agents = [ agent for agent in self.agents if not dones[agent] ] if len(self._live_agents): self._agent_selector = agent_selector(self._live_agents) self.agent_selection = self._agent_selector.reset() else: self.agent_selection = self._agent_selector.next() return self.observe(self.agent_selection) if observe else None
def __init__(self, scenario: MultiAgentScenario, reset_mode='grid'): self._scenario = scenario self._reset_mode = reset_mode self.possible_agents = sorted([a for a in scenario.agents]) self.agents = self.possible_agents[:] self._agent_selector = agent_selector(self.possible_agents) self.agent_selection = self.possible_agents[0] self.rewards = {agent: 0 for agent in self.agents} self._cumulative_rewards = {agent: 0 for agent in self.agents} self.dones = {agent: False for agent in self.agents} self.infos = {agent: {} for agent in self.agents} self.state = {agent: {} for agent in self.agents} self.observations = {agent: {} for agent in self.agents} self.actions = {agent: {} for agent in self.agents} self.num_moves = 0 self._initialized = False self._scenario.world.init() super().__init__()
def reset(self): self.agents = self.possible_agents.copy() self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.next() for agent_id in self.agents: agent = self._scenario.agents[agent_id] start_position = self._scenario.world.get_starting_position( agent=agent, mode=self._reset_mode) obs = agent.reset(pose=start_position) self.observations[agent_id] = obs self.rewards[agent_id] = 0 self._cumulative_rewards[agent_id] = 0 self.dones[agent_id] = False self.infos[agent_id] = {} self.observations[agent_id] = self._scenario.agents[ agent_id].vehicle.observe() self.actions[agent_id] = {} self.num_moves = 0 self._scenario.world.reset() self._scenario.world.update() self.state = self._scenario.world.state()
def __init__(self): super().__init__() self.game = Game() self.seed() self.agents = [f"player_{i}" for i in range(2)] self.possible_agents = self.agents[:] self._agent_order = list(self.agents) self._agent_selector = agent_selector(self._agent_order) self.infos = {i: {} for i in self.agents} self.action_spaces = { name: spaces.Discrete(26 * 26 * 2 + 1) for name in self.agents } low = np.zeros((198, )) high = np.ones((198, )) for i in range(3, 97, 4): high[i] = 6.0 high[96] = 7.5 for i in range(101, 195, 4): high[i] = 6.0 high[194] = 7.5 self.observation_spaces = { i: spaces.Dict({ 'observation': spaces.Box(low=np.float32(low), high=np.float32(high), dtype=np.float32), 'action_mask': spaces.Box(low=0, high=1, shape=(1353, ), dtype=np.int8) }) for i in self.agents } self.double_roll = 0
def __init__(self, game, num_players, mode_num=None, seed=None, obs_type='rgb_image', frameskip=4, repeat_action_probability=0.25, full_action_space=True): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" assert obs_type in ( 'ram', 'rgb_image', "grayscale_image" ), "obs_type must either be 'ram' or 'rgb_image' or 'grayscale_image'" self.obs_type = obs_type self.full_action_space = full_action_space self.num_players = num_players self.np_random = seeding.np_random(seed) multi_agent_ale_py.ALEInterface.setLoggerMode("error") self.ale = multi_agent_ale_py.ALEInterface() if seed is None: seed = seeding.create_seed(seed, max_bytes=4) self.ale.setInt(b"random_seed", seed) self.ale.setInt(b"frame_skip", frameskip) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) pathstart = os.path.dirname(multi_agent_ale_py.__file__) final_path = os.path.join(pathstart, "ROM", game, game + ".bin") if not os.path.exists(final_path): raise IOError( "rom {} is not installed. Please install roms using AutoROM tool (https://github.com/PettingZoo-Team/AutoROM)" .format(game)) self.ale.loadROM(final_path) all_modes = self.ale.getAvailableModes(num_players) if mode_num is None: mode = all_modes[0] else: mode = mode_num assert mode in all_modes, "mode_num parameter is wrong. Mode {} selected, only {} modes are supported".format( mode_num, str(list(all_modes))) self.ale.setMode(mode) assert num_players == self.ale.numPlayersActive() if full_action_space: action_size = 18 else: action_size = len(self.ale.getMinimalActionSet()) if obs_type == 'ram': observation_space = gym.spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128, )) else: (screen_width, screen_height) = self.ale.getScreenDims() if obs_type == 'rgb_image': num_channels = 3 elif obs_type == 'grayscale_image': num_channels = 1 observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, num_channels), dtype=np.uint8) self.num_agents = num_players player_names = ["first", "second", "third", "fourth"] self.agents = [f"{player_names[n]}_0" for n in range(self.num_agents)] self.action_spaces = { agent: gym.spaces.Discrete(action_size) for agent in self.agents } self.observation_spaces = { agent: observation_space for agent in self.agents } self.infos = {agent: {} for agent in self.agents} self._agent_selector = agent_selector(self.agents) self._screen = None
def __init__(self, local_ratio=0.02, continuous=False, random_drop=True, starting_angular_momentum=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_frames=900): EzPickle.__init__(self, local_ratio, continuous, random_drop, starting_angular_momentum, ball_mass, ball_friction, ball_elasticity, max_frames) self.agents = ["piston_" + str(r) for r in range(20)] self.agent_name_mapping = dict(zip(self.agents, list(range(20)))) self._agent_selector = agent_selector(self.agents) self.continuous = continuous if self.continuous: self.action_spaces = dict( zip(self.agents, [gym.spaces.Box(low=-1, high=1, shape=(1, ))] * 20)) else: self.action_spaces = dict( zip(self.agents, [gym.spaces.Discrete(3)] * 20)) self.observation_spaces = dict( zip(self.agents, [ gym.spaces.Box( low=0, high=255, shape=(200, 120, 3), dtype=np.uint8) ] * 20)) pygame.init() pymunk.pygame_util.positive_y_is_up = False self.clock = pygame.time.Clock() self.renderOn = False self.screen = pygame.Surface((960, 560)) self.max_frames = max_frames self.pistonSprite = get_image('piston.png') self.background = get_image('background.png') self.random_drop = random_drop self.starting_angular_momentum = starting_angular_momentum self.space = pymunk.Space(threaded=True) self.space.threads = 2 self.space.gravity = (0.0, 750.0) self.space.collision_bias = .0001 self.space.iterations = 10 # 10 is default in PyMunk self.pistonList = [] self.pistonRewards = [] # Keeps track of individual rewards # Defines what "recent" means in terms of number of frames. self.recentFrameLimit = 20 self.recentPistons = set( ) # Set of pistons that have touched the ball recently self.global_reward_weight = 1 - local_ratio self.local_reward_weight = 1 - self.global_reward_weight self.add_walls() self.done = False self.velocity = 4 self.resolution = 16 self.seed(0) for i in range(20): temp_range = np.arange(0, .5 * self.velocity * self.resolution, self.velocity) piston = self.add_piston( self.space, 85 + 40 * i, 451 - temp_range[self.np_random.randint(0, len(temp_range))]) self.pistonList.append(piston) self.offset = 0 if self.random_drop: self.offset = self.np_random.randint(-30, 30 + 1) self.ball = self.add_ball(800 + self.offset, 350 + self.np_random.randint(-15, 15 + 1), ball_mass, ball_friction, ball_elasticity) self.lastX = int(self.ball.position[0] - 40) self.distance = self.lastX - 80 self.screen.blit(self.background, (0, 0)) self.rect = pygame.Rect(80, 80, 800, 377) # blit background image if ball goes out of bounds. Ball radius is 40 self.valid_ball_position_rect = pygame.Rect(self.rect.left + 40, self.rect.top + 40, self.rect.width - 80, self.rect.height - 80) self.frames = 0 self.display_wait = 0.0 self.num_agents = len(self.agents) self.has_reset = False self.closed = False
def __init__( self, ind_reward=0.8, group_reward=0.1, other_group_reward=0.1, prospec_find_gold_reward=1, prospec_handoff_gold_reward=1, banker_receive_gold_reward=1, banker_deposit_gold_reward=1, max_frames=900, ): EzPickle.__init__( self, ind_reward, group_reward, other_group_reward, prospec_find_gold_reward, prospec_handoff_gold_reward, banker_receive_gold_reward, banker_deposit_gold_reward, max_frames, ) total_reward_factor = ind_reward + group_reward + other_group_reward if not math.isclose(total_reward_factor, 1.0, rel_tol=1e-09): raise ValueError( "The sum of the individual reward, group reward, and other " "group reward should add up to approximately 1.0" ) self.num_agents = const.NUM_AGENTS self.agents = [] self.sprite_list = [ "bankers/0.png", "bankers/1.png", "bankers/2.png", "prospector.png", ] self.max_frames = max_frames pg.init() self.seed() self.clock = pg.time.Clock() self.closed = False self.background = Background(self.rng) self.space = pm.Space() self.space.gravity = Vec2d(0.0, 0.0) self.space.iterations = 20 # for decreasing bounciness self.space.damping = 0.0 self.all_sprites = pg.sprite.RenderUpdates() self.gold = [] self.water = Water( const.WATER_INFO[0], const.WATER_INFO[1], self.space, self.rng ) # Generate random positions for each prospector agent prospector_info = [ (i, utils.rand_pos("prospector", self.rng)) for i in range(const.NUM_PROSPECTORS) ] self.prospectors = {} for num, pos in prospector_info: prospector = Prospector(pos, self.space, num, self.all_sprites) identifier = f"prospector_{num}" self.prospectors[identifier] = prospector self.agents.append(identifier) banker_info = [ (i, utils.rand_pos("banker", self.rng)) for i in range(const.NUM_BANKERS) ] self.bankers = {} for num, pos in banker_info: banker = Banker(pos, self.space, num, self.all_sprites) identifier = f"banker_{num}" self.bankers[identifier] = banker self.agents.append(identifier) self.banks = [] for pos, verts in const.BANK_INFO: self.banks.append(Bank(pos, verts, self.space, self.all_sprites)) self.fences = [] for w_type, s_pos, b_pos, verts in const.FENCE_INFO: f = Fence(w_type, s_pos, b_pos, verts, self.space) self.fences.append(f) self.metadata = {"render.modes": ["human", "rgb_array"]} self.action_spaces = {} for p in self.prospectors: self.action_spaces[p] = spaces.Box( low=np.float32(-1.0), high=np.float32(1.0), shape=(3,) ) for b in self.bankers: self.action_spaces[b] = spaces.Box( low=np.float32(-1.0), high=np.float32(1.0), shape=(2,) ) self.observation_spaces = {} self.last_observation = {} for p in self.prospectors: self.last_observation[p] = None self.observation_spaces[p] = spaces.Box( low=0, high=255, shape=const.PROSPEC_OBSERV_SHAPE, dtype=np.uint8 ) for b in self.bankers: self.last_observation[b] = None self.observation_spaces[b] = spaces.Box( low=0, high=255, shape=const.BANKER_OBSERV_SHAPE, dtype=np.uint8 ) self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.next() self.reset() # Collision Handler Functions -------------------------------------------- # Water to Prospector def add_gold(arbiter, space, data): prospec_shape = arbiter.shapes[0] prospec_body = prospec_shape.body for k, v in self.prospectors.items(): if v.body is prospec_body: self.rewards[k] += ind_reward * prospec_find_gold_reward else: self.rewards[k] += group_reward * prospec_find_gold_reward for k in self.bankers: self.rewards[k] += other_group_reward * prospec_find_gold_reward if prospec_body.nugget is None: position = arbiter.contact_point_set.points[0].point_a gold = Gold(position, prospec_body, self.space, self.all_sprites) self.gold.append(gold) prospec_body.nugget = gold return True # Prospector to banker def handoff_gold_handler(arbiter, space, data): banker_shape, gold_shape = arbiter.shapes gold_sprite = None for g in self.gold: if g.id == gold_shape.id: gold_sprite = g # gold_sprite is None if gold was handed off to the bank right before # calling this collision handler # This collision handler is only for prospector -> banker gold handoffs if ( gold_sprite is None or gold_sprite.parent_body.sprite_type != "prospector" ): return True banker_body = banker_shape.body prospec_body = gold_sprite.parent_body normal = arbiter.contact_point_set.normal # Correct the angle because banker's head is rotated pi/2 corrected = utils.normalize_angle(banker_body.angle + (math.pi / 2)) normalized_normal = utils.normalize_angle(normal.angle) if ( corrected - const.BANKER_HANDOFF_TOLERANCE <= normalized_normal <= corrected + const.BANKER_HANDOFF_TOLERANCE ): # transfer gold gold_sprite.parent_body.nugget = None gold_sprite.parent_body = banker_body banker_body.nugget = gold_sprite banker_body.nugget_offset = normal.angle for k, v in self.prospectors.items(): self.rewards[k] += other_group_reward * banker_receive_gold_reward if v.body is prospec_body: self.rewards[k] += ind_reward * prospec_handoff_gold_reward else: self.rewards[k] += group_reward * prospec_handoff_gold_reward for k, v in self.bankers.items(): self.rewards[k] += other_group_reward * prospec_handoff_gold_reward if v.body is banker_body: self.rewards[k] += ind_reward * banker_receive_gold_reward else: self.rewards[k] += group_reward * banker_receive_gold_reward return True # Banker to bank def gold_score_handler(arbiter, space, data): gold_shape, _ = arbiter.shapes for g in self.gold: if g.id == gold_shape.id: gold_class = g if gold_class.parent_body.sprite_type == "banker": self.space.remove(gold_shape, gold_shape.body) gold_class.parent_body.nugget = None banker_body = gold_class.parent_body for k, v in self.bankers.items(): if v.body is banker_body: self.rewards[k] += ind_reward * banker_deposit_gold_reward else: self.rewards[k] += group_reward * banker_deposit_gold_reward for k in self.prospectors: self.rewards[k] += other_group_reward * banker_deposit_gold_reward self.gold.remove(gold_class) self.all_sprites.remove(gold_class) return False # Create the collision event generators gold_dispenser = self.space.add_collision_handler( CollisionTypes.PROSPECTOR, CollisionTypes.WATER ) gold_dispenser.begin = add_gold handoff_gold = self.space.add_collision_handler( CollisionTypes.BANKER, CollisionTypes.GOLD ) handoff_gold.begin = handoff_gold_handler gold_score = self.space.add_collision_handler( CollisionTypes.GOLD, CollisionTypes.BANK ) gold_score.begin = gold_score_handler
def __init__(self, spawn_rate=20, num_archers=2, num_knights=2, killable_knights=True, killable_archers=True, pad_observation=True, line_death=False, max_cycles=900): EzPickle.__init__(self, spawn_rate, num_archers, num_knights, killable_knights, killable_archers, pad_observation, line_death, max_cycles) # Game Constants self.ZOMBIE_SPAWN = spawn_rate self.WIDTH = 1280 self.HEIGHT = 720 self.max_cycles = max_cycles self.frames = 0 self.pad_observation = pad_observation self.killable_knights = killable_knights self.killable_archers = killable_archers self.line_death = line_death self.has_reset = False self.seed() # Dictionaries for holding new players and their weapons self.archer_dict = {} self.knight_dict = {} self.arrow_dict = {} self.sword_dict = {} # Game Variables self.score = 0 self.run = True self.arrow_spawn_rate = self.sword_spawn_rate = self.zombie_spawn_rate = 0 self.knight_player_num = self.archer_player_num = 0 self.archer_killed = False self.knight_killed = False self.sword_killed = False self.closed = False # Creating Sprite Groups self.all_sprites = pygame.sprite.Group() self.zombie_list = pygame.sprite.Group() self.arrow_list = pygame.sprite.Group() self.sword_list = pygame.sprite.Group() self.archer_list = pygame.sprite.Group() self.knight_list = pygame.sprite.Group() self.num_archers = num_archers self.num_knights = num_knights # Represents agents to remove at end of cycle self.kill_list = [] # Initializing Pygame self.render_on = False pygame.init() # self.WINDOW = pygame.display.set_mode([self.WIDTH, self.HEIGHT]) self.WINDOW = pygame.Surface((self.WIDTH, self.HEIGHT)) pygame.display.set_caption("Knights, Archers, Zombies") self.left_wall = get_image(os.path.join('img', 'left_wall.png')) self.right_wall = get_image(os.path.join('img', 'right_wall.png')) self.right_wall_rect = self.right_wall.get_rect() self.right_wall_rect.left = self.WIDTH - self.right_wall_rect.width self.floor_patch1 = get_image(os.path.join('img', 'patch1.png')) self.floor_patch2 = get_image(os.path.join('img', 'patch2.png')) self.floor_patch3 = get_image(os.path.join('img', 'patch3.png')) self.floor_patch4 = get_image(os.path.join('img', 'patch4.png')) self.agent_list = [] self.agents = [] for i in range(num_archers): name = "archer_" + str(i) self.archer_dict["archer{0}".format(self.archer_player_num)] = Archer(agent_name=name) self.archer_dict["archer{0}".format(self.archer_player_num)].offset(i * 50, 0) self.archer_list.add(self.archer_dict["archer{0}".format(self.archer_player_num)]) self.all_sprites.add(self.archer_dict["archer{0}".format(self.archer_player_num)]) self.agent_list.append(self.archer_dict["archer{0}".format(self.archer_player_num)]) if i != num_archers - 1: self.archer_player_num += 1 for i in range(num_knights): name = "knight_" + str(i) self.knight_dict["knight{0}".format(self.knight_player_num)] = Knight(agent_name=name) self.knight_dict["knight{0}".format(self.knight_player_num)].offset(i * 50, 0) self.knight_list.add(self.knight_dict["knight{0}".format(self.knight_player_num)]) self.all_sprites.add(self.knight_dict["knight{0}".format(self.knight_player_num)]) self.agent_list.append(self.knight_dict["knight{0}".format(self.knight_player_num)]) if i != num_knights - 1: self.knight_player_num += 1 self.agent_name_mapping = {} a_count = 0 for i in range(num_archers): a_name = "archer_" + str(i) self.agents.append(a_name) self.agent_name_mapping[a_name] = a_count a_count += 1 for i in range(num_knights): k_name = "knight_" + str(i) self.agents.append(k_name) self.agent_name_mapping[k_name] = a_count a_count += 1 self.observation_spaces = dict(zip(self.agents, [Box(low=0, high=255, shape=(512, 512, 3), dtype=np.uint8) for _ in enumerate(self.agents)])) self.action_spaces = dict(zip(self.agents, [Discrete(6) for _ in enumerate(self.agents)])) self.display_wait = 0.0 self.possible_agents = self.agents[:] self._agent_selector = agent_selector(self.agents) self.reinit()
def __init__( self, spawn_rate=20, num_archers=2, num_knights=2, max_zombies=10, max_arrows=10, killable_knights=True, killable_archers=True, pad_observation=True, line_death=False, max_cycles=900, vector_state=True, use_typemasks=False, transformer=False, ): EzPickle.__init__( self, spawn_rate, num_archers, num_knights, max_zombies, max_arrows, killable_knights, killable_archers, pad_observation, line_death, max_cycles, vector_state, use_typemasks, transformer, ) # variable state space self.transformer = transformer # whether we want RGB state or vector state self.vector_state = vector_state # agents + zombies + weapons self.num_tracked = (num_archers + num_knights + max_zombies + num_knights + max_arrows) self.use_typemasks = True if transformer else use_typemasks self.typemask_width = 6 self.vector_width = 4 + self.typemask_width if use_typemasks else 4 # Game Status self.frames = 0 self.closed = False self.has_reset = False self.render_on = False # Game Constants self.seed() self.spawn_rate = spawn_rate self.max_cycles = max_cycles self.pad_observation = pad_observation self.killable_knights = killable_knights self.killable_archers = killable_archers self.line_death = line_death self.num_archers = num_archers self.num_knights = num_knights self.max_zombies = max_zombies self.max_arrows = max_arrows # Represents agents to remove at end of cycle self.kill_list = [] self.agent_list = [] self.agents = [] self.dead_agents = [] self.agent_name_mapping = {} a_count = 0 for i in range(self.num_archers): a_name = "archer_" + str(i) self.agents.append(a_name) self.agent_name_mapping[a_name] = a_count a_count += 1 for i in range(self.num_knights): k_name = "knight_" + str(i) self.agents.append(k_name) self.agent_name_mapping[k_name] = a_count a_count += 1 shape = ([512, 512, 3] if not self.vector_state else [self.num_tracked + 1, self.vector_width + 1]) low = 0 if not self.vector_state else -1.0 high = 255 if not self.vector_state else 1.0 dtype = np.uint8 if not self.vector_state else np.float64 self.observation_spaces = dict( zip( self.agents, [ Box(low=low, high=high, shape=shape, dtype=dtype) for _ in enumerate(self.agents) ], )) self.action_spaces = dict( zip(self.agents, [Discrete(6) for _ in enumerate(self.agents)])) shape = ([ const.SCREEN_HEIGHT, const.SCREEN_WIDTH, 3 ] if not self.vector_state else [self.num_tracked, self.vector_width]) low = 0 if not self.vector_state else -1.0 high = 255 if not self.vector_state else 1.0 dtype = np.uint8 if not self.vector_state else np.float64 self.state_space = Box( low=low, high=high, shape=shape, dtype=dtype, ) self.possible_agents = self.agents # Initializing Pygame pygame.init() # self.WINDOW = pygame.display.set_mode([self.WIDTH, self.HEIGHT]) self.WINDOW = pygame.Surface((const.SCREEN_WIDTH, const.SCREEN_HEIGHT)) pygame.display.set_caption("Knights, Archers, Zombies") self.left_wall = get_image(os.path.join("img", "left_wall.png")) self.right_wall = get_image(os.path.join("img", "right_wall.png")) self.right_wall_rect = self.right_wall.get_rect() self.right_wall_rect.left = const.SCREEN_WIDTH - self.right_wall_rect.width self.floor_patch1 = get_image(os.path.join("img", "patch1.png")) self.floor_patch2 = get_image(os.path.join("img", "patch2.png")) self.floor_patch3 = get_image(os.path.join("img", "patch3.png")) self.floor_patch4 = get_image(os.path.join("img", "patch4.png")) self._agent_selector = agent_selector(self.agents) self.reinit()
def __init__(self, n_pistons=20, local_ratio=0, time_penalty=-0.1, continuous=True, random_drop=True, random_rotate=True, ball_mass=0.75, ball_friction=0.3, ball_elasticity=1.5, max_cycles=125): EzPickle.__init__(self, n_pistons, local_ratio, time_penalty, continuous, random_drop, random_rotate, ball_mass, ball_friction, ball_elasticity, max_cycles) self.n_pistons = n_pistons self.piston_head_height = 11 self.piston_width = 40 self.piston_height = 40 self.piston_body_height = 23 self.piston_radius = 5 self.wall_width = 40 self.ball_radius = 40 self.screen_width = (2 * self.wall_width) + (self.piston_width * self.n_pistons) self.screen_height = 560 y_high = self.screen_height - self.wall_width - self.piston_body_height y_low = self.wall_width obs_height = y_high - y_low assert self.piston_width == self.wall_width, "Wall width and piston width must be equal for observation calculation" self.agents = ["piston_" + str(r) for r in range(self.n_pistons)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict( zip(self.agents, list(range(self.n_pistons)))) self._agent_selector = agent_selector(self.agents) self.observation_spaces = dict( zip(self.agents, [ gym.spaces.Box(low=0, high=255, shape=(obs_height, self.piston_width * 3, 3), dtype=np.uint8) ] * self.n_pistons)) self.continuous = continuous if self.continuous: self.action_spaces = dict( zip(self.agents, [gym.spaces.Box(low=-1, high=1, shape=(1, ))] * self.n_pistons)) else: self.action_spaces = dict( zip(self.agents, [gym.spaces.Discrete(3)] * self.n_pistons)) pygame.init() pymunk.pygame_util.positive_y_is_up = False self.renderOn = False self.screen = pygame.Surface((self.screen_width, self.screen_height)) self.max_cycles = max_cycles self.piston_sprite = get_image('piston.png') self.piston_body_sprite = get_image('piston_body.png') self.background = get_image('background.png') self.random_drop = random_drop self.random_rotate = random_rotate self.pistonList = [] self.pistonRewards = [] # Keeps track of individual rewards self.recentFrameLimit = 20 # Defines what "recent" means in terms of number of frames. self.recentPistons = set( ) # Set of pistons that have touched the ball recently self.time_penalty = time_penalty self.local_ratio = local_ratio self.ball_mass = ball_mass self.ball_friction = ball_friction self.ball_elasticity = ball_elasticity self.done = False self.pixels_per_position = 4 self.n_piston_positions = 16 self.screen.fill((0, 0, 0)) self.draw_background() # self.screen.blit(self.background, (0, 0)) self.render_rect = pygame.Rect( self.wall_width, # Left self.wall_width, # Top self.screen_width - (2 * self.wall_width), # Width self.screen_height - (2 * self.wall_width) - self.piston_body_height # Height ) # Blit background image if ball goes out of bounds. Ball radius is 40 self.valid_ball_position_rect = pygame.Rect( self.render_rect.left + self.ball_radius, # Left self.render_rect.top + self.ball_radius, # Top self.render_rect.width - (2 * self.ball_radius), # Width self.render_rect.height - (2 * self.ball_radius) # Height ) self.frames = 0 self.display_wait = 0.0 self.has_reset = False self.closed = False self.seed()
def __init__( self, seed=None, ind_reward=0.8, group_reward=0.1, other_group_reward=0.1, prospec_find_gold_reward=1, prospec_handoff_gold_reward=1, banker_receive_gold_reward=1, banker_deposit_gold_reward=1, max_frames=900, ): if ind_reward + group_reward + other_group_reward != 1.0: raise ValueError( "Individual reward, group reward, and other group reward should " "add up to 1.0") self.num_agents = const.NUM_AGENTS self.agents = [] self.sprite_list = [ "bankers/1-big.png", "bankers/2-big.png", "bankers/3-big.png", "prospector-pickaxe-big.png", ] self.rendering = False self.max_frames = max_frames self.frame = 0 pg.init() self.rng, seed = seeding.np_random(seed) self.screen = pg.Surface(const.SCREEN_SIZE) self.clock = pg.time.Clock() self.done = False self.closed = False self.background = utils.load_image(["background-debris.png"]) self.background_rect = pg.Rect(0, 0, *const.SCREEN_SIZE) self.screen.blit(self.background, self.background_rect) self.space = pm.Space() self.space.gravity = Vec2d(0.0, 0.0) self.space.damping = 0.0 self.all_sprites = pg.sprite.RenderUpdates() self.gold = [] # Generate random positions for each prospector agent prospector_info = [(i, utils.rand_pos("prospector", self.rng)) for i in range(const.NUM_PROSPECTORS)] self.prospectors = {} for num, pos in prospector_info: prospector = Prospector(pos, self.space, num, self.all_sprites) identifier = f"prospector_{num}" self.prospectors[identifier] = prospector self.agents.append(identifier) banker_info = [(i, utils.rand_pos("banker", self.rng)) for i in range(const.NUM_BANKERS)] self.bankers = {} for num, pos in banker_info: banker = Banker(pos, self.space, num, self.all_sprites) identifier = f"banker_{num}" self.bankers[identifier] = banker self.agents.append(identifier) self.banks = [] for pos, verts in const.BANK_INFO: self.banks.append(Bank(pos, verts, self.space, self.all_sprites)) for w_type, s_pos, b_pos, verts in const.FENCE_INFO: Fence(w_type, s_pos, b_pos, verts, self.space, self.all_sprites) Water(const.WATER_INFO[0], const.WATER_INFO[1], self.space, self.all_sprites) self.metadata = {"render.modes": ["human"]} self.action_spaces = {} for p in self.prospectors: self.action_spaces[p] = spaces.Box(low=np.float32(-1.), high=np.float32(1.), shape=(3, )) for b in self.bankers: self.action_spaces[b] = spaces.Box(low=np.float32(-1.), high=np.float32(1.), shape=(3, )) self.observation_spaces = {} self.last_observation = {} for p in self.prospectors: self.last_observation[p] = None self.observation_spaces[p] = spaces.Box( low=0, high=255, shape=const.PROSPEC_OBSERV_SHAPE, dtype=np.uint8) for b in self.bankers: self.last_observation[b] = None self.observation_spaces[b] = spaces.Box( low=0, high=255, shape=const.BANKER_OBSERV_SHAPE, dtype=np.uint8) self.agent_order = self.agents[:] self._agent_selector = agent_selector(self.agent_order) self.agent_selection = self._agent_selector.next() self.reset() # Collision Handler Functions -------------------------------------------- # Water to Prospector def add_gold(arbiter, space, data): prospec_shape = arbiter.shapes[0] prospec_body = prospec_shape.body position = arbiter.contact_point_set.points[0].point_a normal = arbiter.contact_point_set.normal prospec_body.position = position - (24 * normal) prospec_body.velocity = (0, 0) for k, v in self.prospectors.items(): if v.body is prospec_body: self.rewards[k] += ind_reward * prospec_find_gold_reward else: self.rewards[k] += group_reward * prospec_find_gold_reward for k in self.bankers: self.rewards[ k] += other_group_reward * prospec_find_gold_reward if prospec_body.nugget is None: position = arbiter.contact_point_set.points[0].point_a gold = Gold(position, prospec_body, self.space, self.all_sprites) self.gold.append(gold) prospec_body.nugget = gold return True # Prospector to banker def handoff_gold_handler(arbiter, space, data): banker_shape, gold_shape = arbiter.shapes[0], arbiter.shapes[1] gold_sprite = None for g in self.gold: if g.id == gold_shape.id: gold_sprite = g # This collision handler is only for prospector -> banker gold handoffs if gold_sprite.parent_body.sprite_type != "prospector": return True banker_body = banker_shape.body prospec_body = gold_sprite.parent_body for k, v in self.prospectors.items(): self.rewards[ k] += other_group_reward * banker_receive_gold_reward if v.body is prospec_body: self.rewards[k] += prospec_handoff_gold_reward else: self.rewards[ k] += group_reward * prospec_handoff_gold_reward for k, v in self.bankers.items(): self.rewards[ k] += other_group_reward * prospec_handoff_gold_reward if v.body is banker_body: self.rewards[k] += banker_receive_gold_reward else: self.rewards[ k] += group_reward * banker_receive_gold_reward normal = arbiter.contact_point_set.normal # Correct the angle because banker's head is rotated pi/2 corrected = utils.normalize_angle(banker_body.angle + (math.pi / 2)) if (corrected - const.BANKER_HANDOFF_TOLERANCE <= normal.angle <= corrected + const.BANKER_HANDOFF_TOLERANCE): gold_sprite.parent_body.nugget = None gold_sprite.parent_body = banker_body banker_body.nugget = gold_sprite banker_body.nugget_offset = normal.angle return True # Banker to bank def gold_score_handler(arbiter, space, data): gold_shape, _ = arbiter.shapes[0], arbiter.shapes[1] for g in self.gold: if g.id == gold_shape.id: gold_class = g if gold_class.parent_body.sprite_type == "banker": self.space.remove(gold_shape, gold_shape.body) gold_class.parent_body.nugget = None banker_body = gold_class.parent_body for k, v in self.bankers.items(): if v.body is banker_body: self.rewards[k] += banker_deposit_gold_reward # banker_sprite = v else: self.rewards[ k] += group_reward * banker_deposit_gold_reward for k in self.prospectors: self.rewards[ k] += other_group_reward * banker_deposit_gold_reward self.gold.remove(gold_class) self.all_sprites.remove(gold_class) return False # Create the collision event generators gold_dispenser = self.space.add_collision_handler( CollisionTypes.PROSPECTOR, CollisionTypes.WATER) gold_dispenser.begin = add_gold handoff_gold = self.space.add_collision_handler( CollisionTypes.BANKER, CollisionTypes.GOLD) handoff_gold.begin = handoff_gold_handler gold_score = self.space.add_collision_handler(CollisionTypes.GOLD, CollisionTypes.BANK) gold_score.begin = gold_score_handler