def __init__(self, scenario, world, max_frames, seed, local_ratio=None): super(SimpleEnv, self).__init__() self.np_random, seed = seeding.np_random(seed) self.max_frames = max_frames self.scenario = scenario self.world = world self.local_ratio = local_ratio self.scenario.reset_world(self.world, self.np_random) self.num_agents = len(self.world.agents) self.agents = [agent.name for agent in self.world.agents] self._index_map = { agent.name: idx for idx, agent in enumerate(self.world.agents) } self.agent_order = list(self.agents) self._agent_selector = agent_selector(self.agent_order) # set spaces self.action_spaces = dict() self.observation_spaces = dict() for agent in self.world.agents: space_dim = 1 if agent.movable: space_dim *= self.world.dim_p * 2 + 1 if not agent.silent: space_dim *= self.world.dim_c obs_dim = len(self.scenario.observation(agent, self.world)) self.action_spaces[agent.name] = spaces.Discrete(space_dim) self.observation_spaces[agent.name] = spaces.Box( low=-np.float32(np.inf), high=+np.float32(np.inf), shape=(obs_dim, ), dtype=np.float32) self.steps = 0 self.current_actions = [None] * self.num_agents self.viewer = None
def __init__(self, seed=None): super().__init__() self.env = rlcard.make('mahjong', config={"seed": seed}) self.agents = ['player_0', 'player_1', 'player_2', 'player_3'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=0.0, high=1.0, shape=(6, 34, 4), dtype=np.bool) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self.env.game.get_action_num()) for _ in range(self.num_agents) ]) self.agent_order = list(self.agents) self._agent_selector = agent_selector(self.agent_order)
def __init__(self, seed=None): super().__init__() self.env = rlcard.make('doudizhu', config={"seed": seed}) self.agents = ['landlord_0', 'peasant_0', 'peasant_1'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=0.0, high=1.0, shape=(6, 5, 15), dtype=np.bool) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self.env.game.get_action_num()) for _ in range(self.num_agents) ]) self.agent_order = self.agents self._agent_selector = agent_selector(self.agent_order)
def __init__(self): super().__init__() self.board = chess.Board() self.agents = ["player_{}".format(i) for i in range(2)] self.possible_agents = self.agents[:] self._agent_selector = agent_selector(self.agents) self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents} self.observation_spaces = {name: spaces.Box(low=0, high=1, shape=(8, 8, 20), dtype=np.bool) for name in self.agents} self.rewards = None self.dones = None self.infos = {name: {} for name in self.agents} self.agent_selection = None
def __init__(self, **kwargs): EzPickle.__init__(self, **kwargs) self._kwargs = kwargs self.seed() self.env = SumoEnvironment(**self._kwargs) self.agents = self.env.ts_ids self.possible_agents = self.env.ts_ids self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset() # spaces self.action_spaces = {a: self.env.action_spaces(a) for a in self.agents} self.observation_spaces = {a: self.env.observation_spaces(a) for a in self.agents} # dicts self.rewards = {a: 0 for a in self.agents} self.dones = {a: False for a in self.agents} self.infos = {a: {} for a in self.agents}
def reset(self, observe=True): self.has_reset = True obs, player_id = self.env.reset() self.agent_order = list(self.agents) self._agent_selector = agent_selector(self.agent_order) self.agent_selection = self._agent_selector.reset() self.rewards = self._convert_to_dict(self.env.get_payoffs()) self.dones = self._convert_to_dict( [False for _ in range(self.num_agents)]) self.infos = self._convert_to_dict([{ 'legal_moves': [] } for _ in range(self.num_agents)]) self.infos[self._int_to_name( player_id)]['legal_moves'] = obs['legal_actions'] self._last_obs = obs['obs'] if observe: return obs['obs'] else: return
def __init__(self, seed=None): super().__init__() if seed is not None: np.random.seed(seed) random.seed(seed) self.env = rlcard.make('leduc-holdem', config={"seed": seed}) self.agents = ['player_0', 'player_1'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=0.0, high=1.0, shape=(36, ), dtype=np.float64) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self.env.game.get_action_num()) for _ in range(self.num_agents) ]) self.agent_order = self.agents self._agent_selector = agent_selector(self.agent_order)
def __init__(self, seed=None, **kwargs): super().__init__() self.randomizer, seed = seeding.np_random(seed) self.env = CooperativePong(self.randomizer, **kwargs) self.agents = self.env.agents self.num_agents = len(self.agents) self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset() # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict( zip(self.agents, self.env.observation_space)) # dicts self.observations = {} self.rewards = self.env.rewards self.dones = self.env.dones self.infos = self.env.infos self.score = self.env.score self.display_wait = 0.0
def __init__(self, **kwargs): EzPickle.__init__(self, **kwargs) self._kwargs = kwargs self.seed() self.agents = self.env.agents[:] self.possible_agents = self.agents[:] self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset() # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict(zip(self.agents, self.env.observation_space)) # dicts self.observations = {} self.rewards = self.env.rewards self.dones = self.env.dones self.infos = self.env.infos self.score = self.env.score self.display_wait = 0.0
def __init__(self): super().__init__() self.board = chess.Board() self.agents = ["player_{}".format(i) for i in range(2)] self.possible_agents = self.agents[:] # pgn move stack export self.game = chess.pgn.Game() self.game.headers["White"] = self.agents[0] self.game.headers["Black"] = self.agents[1] self.game.headers["Date"] = str(date.today()) self.game.setup(self.board) self.node = self.game self._agent_selector = agent_selector(self.agents) self.action_spaces = { name: spaces.Discrete(8 * 8 * 73) for name in self.agents } self.observation_spaces = { name: spaces.Dict({ 'observation': spaces.Box(low=0, high=1, shape=(8, 8, 20), dtype=np.bool), 'action_mask': spaces.Box(low=0, high=1, shape=(4672, ), dtype=np.int8) }) for name in self.agents } self.rewards = None self.dones = None self.infos = {name: {} for name in self.agents} self.agent_selection = None
def __init__(self, seed=None): super().__init__() if seed is not None: np.random.seed(seed) random.seed(seed) self.env = rlcard.make('no-limit-holdem', config={"seed": seed}) self.agents = ['player_0', 'player_1'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=np.zeros(54, ), high=np.append(np.ones(52, ), [100, 100]), dtype=np.float32) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self.env.game.get_action_num()) for _ in range(self.num_agents) ]) self.agent_order = self.agents self._agent_selector = agent_selector(self.agent_order)
def __init__(self): super().__init__() self.board = chess.Board() self.agents = [f"player_{i}" for i in range(2)] self.possible_agents = self.agents[:] self._agent_selector = agent_selector(self.agents) self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents} self.observation_spaces = {name: spaces.Dict({ 'observation': spaces.Box(low=0, high=1, shape=(8, 8, 111), dtype=bool), 'action_mask': spaces.Box(low=0, high=1, shape=(4672,), dtype=np.int8) }) for name in self.agents} self.rewards = None self.dones = None self.infos = {name: {} for name in self.agents} self.agent_selection = None self.board_history = np.zeros((8, 8, 104), dtype=bool)
def __init__(self, board_size: int = 19, komi: float = 7.5): # board_size: a int, representing the board size (board has a board_size x board_size shape) # komi: a float, representing points given to the second player. super().__init__() self._overwrite_go_global_variables(board_size=board_size) self._komi = komi self.agents = ['black_0', 'white_0'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=0, high=1, shape=(self._N, self._N, 3), dtype=np.bool) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents) ]) self._agent_selector = agent_selector(self.agents)
def __init__(self, board_size: int = 19, komi: float = 7.5): # board_size: a int, representing the board size (board has a board_size x board_size shape) # komi: a float, representing points given to the second player. super().__init__() self._overwrite_go_global_variables(board_size=board_size) self._komi = komi self.agents = ['black_0', 'white_0'] self.possible_agents = self.agents[:] self.has_reset = False self.screen = None self.observation_spaces = self._convert_to_dict( [spaces.Dict({'observation': spaces.Box(low=0, high=1, shape=(self._N, self._N, 17), dtype=bool), 'action_mask': spaces.Box(low=0, high=1, shape=((self._N * self._N) + 1,), dtype=np.int8)}) for _ in range(self.num_agents)]) self.action_spaces = self._convert_to_dict([spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents)]) self._agent_selector = agent_selector(self.agents) self.board_history = np.zeros((self._N, self._N, 16), dtype=bool)
def __init__(self, seed=None, knock_reward: float = 0.5, gin_reward: float = 1.0): super().__init__() self._knock_reward = knock_reward self._gin_reward = gin_reward self.env = rlcard.make('gin-rummy', config={"seed": seed}) self.agents = ['player_0', 'player_1'] self.num_agents = len(self.agents) self.has_reset = False self.observation_spaces = self._convert_to_dict([ spaces.Box(low=0.0, high=1.0, shape=(5, 52), dtype=np.bool) for _ in range(self.num_agents) ]) self.action_spaces = self._convert_to_dict([ spaces.Discrete(self.env.game.get_action_num()) for _ in range(self.num_agents) ]) self.agent_order = self.agents self._agent_selector = agent_selector(self.agent_order) self.env.game.judge.scorer.get_payoff = self._get_payoff
def reset(self): self.has_reset = True self.agents = self.possible_agents[:] self.board = chess.Board() self.game = chess.pgn.Game() self.game.headers["White"] = self.agents[0] self.game.headers["Black"] = self.agents[1] self.game.headers["Date"] = str(date.today()) self.game.setup(self.board) self.node = self.game self._agent_selector = agent_selector(self.agents) self.agent_selection = self._agent_selector.reset() self.rewards = {name: 0 for name in self.agents} self._cumulative_rewards = {name: 0 for name in self.agents} self.dones = {name: False for name in self.agents} self.infos = {name: {} for name in self.agents}
def __init__(self, env_constructors, num_cpus=None, return_copy=True): # set signaling so that crashing is handled gracefully init_parallel_env() num_envs = len(env_constructors) if num_cpus is None: num_cpus = mp.cpu_count() num_cpus = min(num_cpus, num_envs) assert num_envs > 0 assert num_envs >= 1 assert callable(env_constructors[0]), "env_constructor must be a callable object (i.e function) that create an environment" # self.envs = [env_constructor() for _ in range(num_envs)] self.env = env = env_constructors[0]() self.max_num_agents = len(self.env.possible_agents) self.possible_agents = self.env.possible_agents self.observation_spaces = copy.copy(self.env.observation_spaces) self.action_spaces = copy.copy(self.env.action_spaces) self.order_is_nondeterministic = False self.num_envs = num_envs self.agent_indexes = {agent: i for i, agent in enumerate(self.env.possible_agents)} self._agent_selector = agent_selector(self.possible_agents) all_arrays = { agent: create_shared_data( num_envs, SpaceWrapper(self.observation_spaces[agent]), SpaceWrapper(self.action_spaces[agent]), ) for agent in self.possible_agents } self.shared_datas = { agent: AgentSharedData(num_envs, SpaceWrapper(env.observation_spaces[agent]), SpaceWrapper(env.action_spaces[agent]), all_arrays[agent]) for agent in env.possible_agents } env_arrays = create_env_data(num_envs) self.env_datas = EnvSharedData(num_envs, env_arrays) self.return_copy = return_copy self.procs = [] self.pipes = [mp.Pipe() for _ in range(num_cpus)] self.con_ins = [con_in for con_in, con_out in self.pipes] self.con_outs = [con_out for con_in, con_out in self.pipes] self.env_starts = [] env_counter = 0 for pidx in range(num_cpus): envs_left = num_envs - env_counter allocated_envs = min(envs_left, (num_envs + num_cpus - 1) // num_cpus) proc_constructors = env_constructors[env_counter : env_counter + allocated_envs] proc = mp.Process( target=env_worker, args=(proc_constructors, num_envs, env_counter, allocated_envs, all_arrays, env_arrays, self.con_outs[pidx]) ) self.procs.append(proc) self.env_starts.append(env_counter) proc.start() env_counter += allocated_envs
def __init__(self, scenario, world, max_cycles, continuous_actions=False, local_ratio=None): super(SimpleEnv, self).__init__() self.seed() self.metadata = {'render.modes': ['human', 'rgb_array']} self.max_cycles = max_cycles self.scenario = scenario self.world = world self.continuous_actions = continuous_actions self.local_ratio = local_ratio self.scenario.reset_world(self.world, self.np_random) self.agents = [agent.name for agent in self.world.agents] self.possible_agents = self.agents[:] self._index_map = { agent.name: idx for idx, agent in enumerate(self.world.agents) } self._agent_selector = agent_selector(self.agents) # set spaces self.action_spaces = dict() self.observation_spaces = dict() state_dim = 0 for agent in self.world.agents: if agent.movable: space_dim = self.world.dim_p * 2 + 1 elif self.continuous_actions: space_dim = 0 else: space_dim = 1 if not agent.silent: if self.continuous_actions: space_dim += self.world.dim_c else: space_dim *= self.world.dim_c obs_dim = len(self.scenario.observation(agent, self.world)) state_dim += obs_dim if self.continuous_actions: self.action_spaces[agent.name] = spaces.Box( low=0, high=1, shape=(space_dim, )) else: self.action_spaces[agent.name] = spaces.Discrete(space_dim) self.observation_spaces[agent.name] = spaces.Box( low=-np.float32(np.inf), high=+np.float32(np.inf), shape=(obs_dim, ), dtype=np.float32) self.state_space = spaces.Box(low=-np.float32(np.inf), high=+np.float32(np.inf), shape=(state_dim, ), dtype=np.float32) self.steps = 0 self.current_actions = [None] * self.num_agents self.viewer = None