コード例 #1
0
ファイル: simple_env.py プロジェクト: lssr/PettingZoo
    def __init__(self, scenario, world, max_frames, seed, local_ratio=None):
        super(SimpleEnv, self).__init__()

        self.np_random, seed = seeding.np_random(seed)

        self.max_frames = max_frames
        self.scenario = scenario
        self.world = world
        self.local_ratio = local_ratio

        self.scenario.reset_world(self.world, self.np_random)

        self.num_agents = len(self.world.agents)
        self.agents = [agent.name for agent in self.world.agents]
        self._index_map = {
            agent.name: idx
            for idx, agent in enumerate(self.world.agents)
        }

        self.agent_order = list(self.agents)

        self._agent_selector = agent_selector(self.agent_order)

        # set spaces
        self.action_spaces = dict()
        self.observation_spaces = dict()
        for agent in self.world.agents:
            space_dim = 1
            if agent.movable:
                space_dim *= self.world.dim_p * 2 + 1
            if not agent.silent:
                space_dim *= self.world.dim_c

            obs_dim = len(self.scenario.observation(agent, self.world))
            self.action_spaces[agent.name] = spaces.Discrete(space_dim)
            self.observation_spaces[agent.name] = spaces.Box(
                low=-np.float32(np.inf),
                high=+np.float32(np.inf),
                shape=(obs_dim, ),
                dtype=np.float32)

        self.steps = 0

        self.current_actions = [None] * self.num_agents

        self.viewer = None
コード例 #2
0
ファイル: mahjong.py プロジェクト: lssr/PettingZoo
    def __init__(self, seed=None):
        super().__init__()
        self.env = rlcard.make('mahjong', config={"seed": seed})
        self.agents = ['player_0', 'player_1', 'player_2', 'player_3']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=0.0, high=1.0, shape=(6, 34, 4), dtype=np.bool)
            for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self.env.game.get_action_num())
            for _ in range(self.num_agents)
        ])

        self.agent_order = list(self.agents)
        self._agent_selector = agent_selector(self.agent_order)
コード例 #3
0
ファイル: dou_dizhu.py プロジェクト: lssr/PettingZoo
    def __init__(self, seed=None):
        super().__init__()
        self.env = rlcard.make('doudizhu', config={"seed": seed})
        self.agents = ['landlord_0', 'peasant_0', 'peasant_1']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=0.0, high=1.0, shape=(6, 5, 15), dtype=np.bool)
            for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self.env.game.get_action_num())
            for _ in range(self.num_agents)
        ])

        self.agent_order = self.agents
        self._agent_selector = agent_selector(self.agent_order)
コード例 #4
0
ファイル: chess_env.py プロジェクト: zhoushiyang12/PettingZoo
    def __init__(self):
        super().__init__()

        self.board = chess.Board()

        self.agents = ["player_{}".format(i) for i in range(2)]
        self.possible_agents = self.agents[:]

        self._agent_selector = agent_selector(self.agents)

        self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents}
        self.observation_spaces = {name: spaces.Box(low=0, high=1, shape=(8, 8, 20), dtype=np.bool) for name in self.agents}

        self.rewards = None
        self.dones = None
        self.infos = {name: {} for name in self.agents}

        self.agent_selection = None
コード例 #5
0
    def __init__(self, **kwargs):
        EzPickle.__init__(self, **kwargs)
        self._kwargs = kwargs

        self.seed()
        self.env = SumoEnvironment(**self._kwargs)

        self.agents = self.env.ts_ids
        self.possible_agents = self.env.ts_ids
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()
        # spaces
        self.action_spaces = {a: self.env.action_spaces(a) for a in self.agents}
        self.observation_spaces = {a: self.env.observation_spaces(a) for a in self.agents}

        # dicts
        self.rewards = {a: 0 for a in self.agents}
        self.dones = {a: False for a in self.agents}
        self.infos = {a: {} for a in self.agents}
コード例 #6
0
ファイル: mahjong.py プロジェクト: lssr/PettingZoo
 def reset(self, observe=True):
     self.has_reset = True
     obs, player_id = self.env.reset()
     self.agent_order = list(self.agents)
     self._agent_selector = agent_selector(self.agent_order)
     self.agent_selection = self._agent_selector.reset()
     self.rewards = self._convert_to_dict(self.env.get_payoffs())
     self.dones = self._convert_to_dict(
         [False for _ in range(self.num_agents)])
     self.infos = self._convert_to_dict([{
         'legal_moves': []
     } for _ in range(self.num_agents)])
     self.infos[self._int_to_name(
         player_id)]['legal_moves'] = obs['legal_actions']
     self._last_obs = obs['obs']
     if observe:
         return obs['obs']
     else:
         return
コード例 #7
0
ファイル: leduc_holdem.py プロジェクト: Ujwal2910/PettingZoo
    def __init__(self, seed=None):
        super().__init__()
        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)
        self.env = rlcard.make('leduc-holdem', config={"seed": seed})
        self.agents = ['player_0', 'player_1']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=0.0, high=1.0, shape=(36, ), dtype=np.float64)
            for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self.env.game.get_action_num())
            for _ in range(self.num_agents)
        ])
        self.agent_order = self.agents
        self._agent_selector = agent_selector(self.agent_order)
コード例 #8
0
    def __init__(self, seed=None, **kwargs):
        super().__init__()
        self.randomizer, seed = seeding.np_random(seed)
        self.env = CooperativePong(self.randomizer, **kwargs)

        self.agents = self.env.agents
        self.num_agents = len(self.agents)
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()
        # spaces
        self.action_spaces = dict(zip(self.agents, self.env.action_space))
        self.observation_spaces = dict(
            zip(self.agents, self.env.observation_space))
        # dicts
        self.observations = {}
        self.rewards = self.env.rewards
        self.dones = self.env.dones
        self.infos = self.env.infos

        self.score = self.env.score
        self.display_wait = 0.0
コード例 #9
0
ファイル: cooperative_pong.py プロジェクト: fratim/PettingZoo
    def __init__(self, **kwargs):
        EzPickle.__init__(self, **kwargs)
        self._kwargs = kwargs

        self.seed()

        self.agents = self.env.agents[:]
        self.possible_agents = self.agents[:]
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()
        # spaces
        self.action_spaces = dict(zip(self.agents, self.env.action_space))
        self.observation_spaces = dict(zip(self.agents, self.env.observation_space))
        # dicts
        self.observations = {}
        self.rewards = self.env.rewards
        self.dones = self.env.dones
        self.infos = self.env.infos

        self.score = self.env.score
        self.display_wait = 0.0
コード例 #10
0
    def __init__(self):
        super().__init__()

        self.board = chess.Board()

        self.agents = ["player_{}".format(i) for i in range(2)]
        self.possible_agents = self.agents[:]

        # pgn move stack export
        self.game = chess.pgn.Game()

        self.game.headers["White"] = self.agents[0]
        self.game.headers["Black"] = self.agents[1]
        self.game.headers["Date"] = str(date.today())

        self.game.setup(self.board)

        self.node = self.game

        self._agent_selector = agent_selector(self.agents)

        self.action_spaces = {
            name: spaces.Discrete(8 * 8 * 73)
            for name in self.agents
        }
        self.observation_spaces = {
            name: spaces.Dict({
                'observation':
                spaces.Box(low=0, high=1, shape=(8, 8, 20), dtype=np.bool),
                'action_mask':
                spaces.Box(low=0, high=1, shape=(4672, ), dtype=np.int8)
            })
            for name in self.agents
        }

        self.rewards = None
        self.dones = None
        self.infos = {name: {} for name in self.agents}

        self.agent_selection = None
コード例 #11
0
    def __init__(self, seed=None):
        super().__init__()
        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)
        self.env = rlcard.make('no-limit-holdem', config={"seed": seed})
        self.agents = ['player_0', 'player_1']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=np.zeros(54, ),
                       high=np.append(np.ones(52, ), [100, 100]),
                       dtype=np.float32) for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self.env.game.get_action_num())
            for _ in range(self.num_agents)
        ])

        self.agent_order = self.agents
        self._agent_selector = agent_selector(self.agent_order)
コード例 #12
0
    def __init__(self):
        super().__init__()

        self.board = chess.Board()

        self.agents = [f"player_{i}" for i in range(2)]
        self.possible_agents = self.agents[:]

        self._agent_selector = agent_selector(self.agents)

        self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents}
        self.observation_spaces = {name: spaces.Dict({
            'observation': spaces.Box(low=0, high=1, shape=(8, 8, 111), dtype=bool),
            'action_mask': spaces.Box(low=0, high=1, shape=(4672,), dtype=np.int8)
        }) for name in self.agents}

        self.rewards = None
        self.dones = None
        self.infos = {name: {} for name in self.agents}

        self.agent_selection = None

        self.board_history = np.zeros((8, 8, 104), dtype=bool)
コード例 #13
0
ファイル: go_env.py プロジェクト: zeta1999/PettingZoo
    def __init__(self, board_size: int = 19, komi: float = 7.5):
        # board_size: a int, representing the board size (board has a board_size x board_size shape)
        # komi: a float, representing points given to the second player.
        super().__init__()

        self._overwrite_go_global_variables(board_size=board_size)
        self._komi = komi

        self.agents = ['black_0', 'white_0']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=0,
                       high=1,
                       shape=(self._N, self._N, 3),
                       dtype=np.bool) for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self._N * self._N + 1)
            for _ in range(self.num_agents)
        ])

        self._agent_selector = agent_selector(self.agents)
コード例 #14
0
    def __init__(self, board_size: int = 19, komi: float = 7.5):
        # board_size: a int, representing the board size (board has a board_size x board_size shape)
        # komi: a float, representing points given to the second player.
        super().__init__()

        self._overwrite_go_global_variables(board_size=board_size)
        self._komi = komi

        self.agents = ['black_0', 'white_0']
        self.possible_agents = self.agents[:]
        self.has_reset = False

        self.screen = None

        self.observation_spaces = self._convert_to_dict(
            [spaces.Dict({'observation': spaces.Box(low=0, high=1, shape=(self._N, self._N, 17), dtype=bool),
                          'action_mask': spaces.Box(low=0, high=1, shape=((self._N * self._N) + 1,), dtype=np.int8)})
             for _ in range(self.num_agents)])

        self.action_spaces = self._convert_to_dict([spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents)])

        self._agent_selector = agent_selector(self.agents)

        self.board_history = np.zeros((self._N, self._N, 16), dtype=bool)
コード例 #15
0
ファイル: gin_rummy.py プロジェクト: Ujwal2910/PettingZoo
    def __init__(self,
                 seed=None,
                 knock_reward: float = 0.5,
                 gin_reward: float = 1.0):
        super().__init__()
        self._knock_reward = knock_reward
        self._gin_reward = gin_reward
        self.env = rlcard.make('gin-rummy', config={"seed": seed})
        self.agents = ['player_0', 'player_1']
        self.num_agents = len(self.agents)
        self.has_reset = False

        self.observation_spaces = self._convert_to_dict([
            spaces.Box(low=0.0, high=1.0, shape=(5, 52), dtype=np.bool)
            for _ in range(self.num_agents)
        ])
        self.action_spaces = self._convert_to_dict([
            spaces.Discrete(self.env.game.get_action_num())
            for _ in range(self.num_agents)
        ])

        self.agent_order = self.agents
        self._agent_selector = agent_selector(self.agent_order)
        self.env.game.judge.scorer.get_payoff = self._get_payoff
コード例 #16
0
    def reset(self):
        self.has_reset = True

        self.agents = self.possible_agents[:]

        self.board = chess.Board()

        self.game = chess.pgn.Game()

        self.game.headers["White"] = self.agents[0]
        self.game.headers["Black"] = self.agents[1]
        self.game.headers["Date"] = str(date.today())

        self.game.setup(self.board)

        self.node = self.game

        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()

        self.rewards = {name: 0 for name in self.agents}
        self._cumulative_rewards = {name: 0 for name in self.agents}
        self.dones = {name: False for name in self.agents}
        self.infos = {name: {} for name in self.agents}
コード例 #17
0
ファイル: async_vector_env.py プロジェクト: vwxyzjn/SuperSuit
    def __init__(self, env_constructors, num_cpus=None, return_copy=True):
        # set signaling so that crashing is handled gracefully
        init_parallel_env()

        num_envs = len(env_constructors)

        if num_cpus is None:
            num_cpus = mp.cpu_count()

        num_cpus = min(num_cpus, num_envs)
        assert num_envs > 0

        assert num_envs >= 1
        assert callable(env_constructors[0]), "env_constructor must be a callable object (i.e function) that create an environment"
        # self.envs = [env_constructor() for _ in range(num_envs)]
        self.env = env = env_constructors[0]()
        self.max_num_agents = len(self.env.possible_agents)
        self.possible_agents = self.env.possible_agents
        self.observation_spaces = copy.copy(self.env.observation_spaces)
        self.action_spaces = copy.copy(self.env.action_spaces)
        self.order_is_nondeterministic = False
        self.num_envs = num_envs

        self.agent_indexes = {agent: i for i, agent in enumerate(self.env.possible_agents)}

        self._agent_selector = agent_selector(self.possible_agents)

        all_arrays = {
            agent: create_shared_data(
                num_envs,
                SpaceWrapper(self.observation_spaces[agent]),
                SpaceWrapper(self.action_spaces[agent]),
            )
            for agent in self.possible_agents
        }

        self.shared_datas = {
            agent: AgentSharedData(num_envs, SpaceWrapper(env.observation_spaces[agent]), SpaceWrapper(env.action_spaces[agent]), all_arrays[agent])
            for agent in env.possible_agents
        }

        env_arrays = create_env_data(num_envs)

        self.env_datas = EnvSharedData(num_envs, env_arrays)
        self.return_copy = return_copy

        self.procs = []
        self.pipes = [mp.Pipe() for _ in range(num_cpus)]
        self.con_ins = [con_in for con_in, con_out in self.pipes]
        self.con_outs = [con_out for con_in, con_out in self.pipes]
        self.env_starts = []
        env_counter = 0
        for pidx in range(num_cpus):
            envs_left = num_envs - env_counter
            allocated_envs = min(envs_left, (num_envs + num_cpus - 1) // num_cpus)
            proc_constructors = env_constructors[env_counter : env_counter + allocated_envs]
            proc = mp.Process(
                target=env_worker, args=(proc_constructors, num_envs, env_counter, allocated_envs, all_arrays, env_arrays, self.con_outs[pidx])
            )
            self.procs.append(proc)
            self.env_starts.append(env_counter)

            proc.start()
            env_counter += allocated_envs
コード例 #18
0
    def __init__(self,
                 scenario,
                 world,
                 max_cycles,
                 continuous_actions=False,
                 local_ratio=None):
        super(SimpleEnv, self).__init__()

        self.seed()

        self.metadata = {'render.modes': ['human', 'rgb_array']}

        self.max_cycles = max_cycles
        self.scenario = scenario
        self.world = world
        self.continuous_actions = continuous_actions
        self.local_ratio = local_ratio

        self.scenario.reset_world(self.world, self.np_random)

        self.agents = [agent.name for agent in self.world.agents]
        self.possible_agents = self.agents[:]
        self._index_map = {
            agent.name: idx
            for idx, agent in enumerate(self.world.agents)
        }

        self._agent_selector = agent_selector(self.agents)

        # set spaces
        self.action_spaces = dict()
        self.observation_spaces = dict()
        state_dim = 0
        for agent in self.world.agents:
            if agent.movable:
                space_dim = self.world.dim_p * 2 + 1
            elif self.continuous_actions:
                space_dim = 0
            else:
                space_dim = 1
            if not agent.silent:
                if self.continuous_actions:
                    space_dim += self.world.dim_c
                else:
                    space_dim *= self.world.dim_c

            obs_dim = len(self.scenario.observation(agent, self.world))
            state_dim += obs_dim
            if self.continuous_actions:
                self.action_spaces[agent.name] = spaces.Box(
                    low=0, high=1, shape=(space_dim, ))
            else:
                self.action_spaces[agent.name] = spaces.Discrete(space_dim)
            self.observation_spaces[agent.name] = spaces.Box(
                low=-np.float32(np.inf),
                high=+np.float32(np.inf),
                shape=(obs_dim, ),
                dtype=np.float32)

        self.state_space = spaces.Box(low=-np.float32(np.inf),
                                      high=+np.float32(np.inf),
                                      shape=(state_dim, ),
                                      dtype=np.float32)

        self.steps = 0

        self.current_actions = [None] * self.num_agents

        self.viewer = None