コード例 #1
0
    def __init__(self, config={}):
        super(YanivEnv).__init__()
        conf = DEFAULT_GAME_CONFIG.copy()
        conf.update(config)
        self.config = conf
        self.player_step_fn = conf.pop("player_step_fn", {})

        self.single_step = self.config.get("single_step", True)
        self.obs_scheme = self.config.get("observation_scheme", 0)
        self.num_players = self.config.get("n_players")
        self.state_n_players = self.config.get("state_n_players")
        self.step_reward = self.config.get("step_reward", 0)

        self.game = Game(single_step_actions=self.single_step,
                         num_players=self.num_players)
        self.game.configure(self.config)

        self.action_space = Discrete(self.game.get_action_num())
        self.observation_space = Dict({
            "action_mask":
            Box(0, 1, shape=(self.action_space.n, )),
            "state":
            Box(shape=(self._get_state_shape(), ), low=0, high=1, dtype=int),
        })
        self.reward_range = (-1.0, 1.0)

        self.timestep = 0
コード例 #2
0
 def test_init_game(self):
     game = Game()
     state, current_player = game.init_game()
     self.assertEqual(current_player, 0)
     for player in game.players:
         self.assertEqual(len(player.hand), utils.INITIAL_NUMBER_OF_CARDS)
     self.assertEqual(len(game.round.discard_pile), 1)
     self.assertEqual(len(game.round.discard_pile[0]), 1)
コード例 #3
0
    def __init__(self, config={}):
        self.name = "yaniv"
        self.single_step = config.get("single_step_actions", False)
        self.game = Game(single_step_actions=self.single_step)
        self.default_game_config = DEFAULT_GAME_CONFIG
        self.reward_func = calculate_reward
        # configure game
        super().__init__(config)
        self.state_shape = [266]

        _game_config = self.default_game_config.copy()
        for key in config:
            if key in _game_config:
                _game_config[key] = config[key]
        self.game.configure(_game_config)
コード例 #4
0
    def test_end_after_n_steps(self):
        game = Game()
        game.init_game()
        game._early_end_reward = -1
        game._end_after_n_steps = 100
        while not game.is_over():
            legal_actions = game.get_legal_actions()
            if utils.DRAW_CARD_ACTION in legal_actions:
                action = utils.DRAW_CARD_ACTION
            else:
                action = np.random.choice(
                    [a for a in legal_actions if a != utils.YANIV_ACTION])

            _, _ = game.step(action)
            self.assertLessEqual(len(game.actions), 100)

        # should take 84 actions
        self.assertEqual(len(game.actions), 100)
        for p in game.get_payoffs():
            self.assertEqual(p, -1)
コード例 #5
0
    def __init__(self, single_step=True, config={}):
        super(YanivEnv).__init__()
        self.single_step = single_step
        self.num_players = 2

        self.game = Game(single_step_actions=single_step,
                         num_players=self.num_players)

        conf = DEFAULT_GAME_CONFIG.copy()
        conf.update(config)
        self.game.configure(conf)

        single_aspace = spaces.Discrete(self.game.get_action_num())
        self.action_space = spaces.Tuple(
            [single_aspace for _ in range(self.num_players)])

        single_obs = spaces.Box(shape=(266, ), low=0, high=1, dtype=int)
        self.observation_space = spaces.Tuple(
            [single_obs for _ in range(self.num_players)])

        self.timestep = 0
        self.current_player = None
コード例 #6
0
    def test_end_after_n_deck_replacements(self):
        game = Game()
        game.init_game()
        game._early_end_reward = -1
        game._end_after_n_deck_replacements = 1
        while not game.is_over():
            legal_actions = game.get_legal_actions()
            if utils.DRAW_CARD_ACTION in legal_actions:
                action = utils.DRAW_CARD_ACTION
            else:
                action = np.random.choice(
                    [a for a in legal_actions if a != utils.YANIV_ACTION])

            _, _ = game.step(action)
            self.assertLessEqual(len(game.actions), 84)

        # should take 84 actions
        self.assertEqual(game.round.deck_replacements, 1)
        self.assertEqual(len(game.actions), 84)
        self.assertEqual(game.actions[-1], "draw_card")
        for p in game.get_payoffs():
            self.assertEqual(p, -1)
コード例 #7
0
    def test_get_player_num(self):
        game = Game()
        player_num = game.get_player_num()
        self.assertEqual(player_num, 2)

        game = Game(3)
        player_num = game.get_player_num()
        self.assertEqual(player_num, 3)
コード例 #8
0
    def test_pickup_discard(self):
        game = Game()
        _, current_player = game.init_game()

        card = game.players[current_player].hand[0]
        action = str(card)
        _, current_player = game.step(action)

        _, next_player = game.step(utils.DRAW_CARD_ACTION)
        _, next_player = game.step(
            np.random.choice(
                [a for a in game.get_legal_actions() if a != "yaniv"]))
        _, current_player = game.step(utils.PICKUP_TOP_DISCARD_ACTION)

        self.assertIn(card, game.players[next_player].hand)
コード例 #9
0
    def test_proceed_game(self):
        game = Game()
        game.init_game()
        while not game.is_over():
            legal_actions = game.get_legal_actions()
            action = np.random.choice(legal_actions)
            self.assertIn(action, utils.ACTION_LIST)
            _, _ = game.step(action)

        if game.round.winner != -1:
            self.assertEqual(game.actions[-1], "yaniv")
        else:
            self.assertEqual(game.actions[-1], "draw_card")
コード例 #10
0
    def test_step(self):
        game = Game()
        _, current_player = game.init_game()

        # discard first
        action = np.random.choice(
            [a for a in game.get_legal_actions() if a != "yaniv"])
        self.assertNotIn(action, utils.pickup_actions)
        self.assertIn(action, utils.ACTION_LIST)
        _, next_player = game.step(action)
        self.assertEqual(next_player, current_player)

        # then pickup
        action = np.random.choice(game.get_legal_actions())
        self.assertIn(action, utils.pickup_actions)
        _, next_player = game.step(action)
        self.assertEqual(next_player,
                         (current_player + 1) % game.get_player_num())
コード例 #11
0
    def test_get_payoffs(self):
        game = Game()
        game.init_game()
        while not game.is_over():
            actions = game.get_legal_actions()
            action = np.random.choice(actions)
            state, _ = game.step(action)

        payoffs = game.get_payoffs()

        for player in game.players:
            player_id = player.get_player_id()
            payoff = payoffs[player_id]

            if game.round.winner == -1:
                self.assertEqual(payoff, -1)
            elif game.round.winner == player_id:
                self.assertEqual(payoff, 1)
            else:
                self.assertEqual(payoff, -(game.round.scores[player_id] / 50))
コード例 #12
0
class YanivEnv(MultiAgentEnv):
    def __init__(self, config={}):
        super(YanivEnv).__init__()
        conf = DEFAULT_GAME_CONFIG.copy()
        conf.update(config)
        self.config = conf
        self.player_step_fn = conf.pop("player_step_fn", {})

        self.single_step = self.config.get("single_step", True)
        self.obs_scheme = self.config.get("observation_scheme", 0)
        self.num_players = self.config.get("n_players")
        self.state_n_players = self.config.get("state_n_players")
        self.step_reward = self.config.get("step_reward", 0)

        self.game = Game(single_step_actions=self.single_step,
                         num_players=self.num_players)
        self.game.configure(self.config)

        self.action_space = Discrete(self.game.get_action_num())
        self.observation_space = Dict({
            "action_mask":
            Box(0, 1, shape=(self.action_space.n, )),
            "state":
            Box(shape=(self._get_state_shape(), ), low=0, high=1, dtype=int),
        })
        self.reward_range = (-1.0, 1.0)

        self.timestep = 0

    @property
    def current_player(self):
        return self.game.round.current_player

    @property
    def current_player_string(self):
        return self._get_player_string(self.current_player)

    def reset(self):
        self.game.init_game()
        self.timestep = 0
        self.step_player_fn()

        return {
            self.current_player_string:
            self._get_players_observation(self.current_player)
        }

    def step_player_fn(self):
        while self.current_player_string in self.player_step_fn and not self.game.is_over(
        ):
            self.player_step_fn[self.current_player_string](self)

    def step(self, action_dict, raw_action=False):
        action = action_dict[self.current_player_string]
        if not raw_action:
            action = self._decode_action(action)

        self.game.step(action)
        self.step_player_fn()

        done = self.game.is_over()
        dones = {p: done for p in self._get_players()}
        dones["__all__"] = done

        if done:
            payoffs = self.game.get_payoffs()
            rewards = {
                self._get_player_string(i): payoffs[i]
                for i in range(self.num_players)
            }
            observations = {
                p: {
                    "state":
                    np.zeros(self.observation_space.spaces["state"].shape),
                    "action_mask": np.zeros(self.action_space.n),
                }
                for p in self._get_players()
            }
        else:
            rewards = {self.current_player_string: self.step_reward}
            observations = {
                self.current_player_string:
                self._get_players_observation(self.current_player)
            }

        infos = {p: {} for p in self._get_players()}

        self.timestep += 1
        return (
            observations,
            rewards,
            dones,
            {},
        )

    def _decode_action(self, action_id):
        if self.single_step:
            return utils.JOINED_ACTION_LIST[action_id]
        else:
            return utils.ACTION_LIST[action_id]

    def _get_observations(self):
        observations = {}
        for i in range(self.num_players):
            # apppaarently rllib doens't do action for if no obs ret
            if i != self.current_player:
                continue

            obs = self._get_players_observation(i)
            observations[self._get_player_string(i)] = obs

        return observations

    def _get_players_observation(self, id):
        if self.obs_scheme == 0:
            state = self._extract_state_0(id)
        elif self.obs_scheme == 1:
            state = self._extract_state_1(id)
        else:
            raise Exception("obs scheme not")

        return {
            "state": state,
            "action_mask": self._get_action_mask(id),
        }

    def _get_action_mask(self, player_id):
        if player_id != self.current_player:
            return np.zeros(self.action_space.n)

        legal_actions = self.game.get_legal_actions()
        if self.game._single_step_actions:
            legal_ids = [
                utils.JOINED_ACTION_SPACE[action] for action in legal_actions
            ]
        else:
            legal_ids = [
                utils.ACTION_SPACE[action] for action in legal_actions
            ]

        action_mask = np.zeros(self.action_space.n)
        np.put(action_mask, ind=legal_ids, v=1)

        return action_mask

    def _extract_state_0(self, player_id):
        if self.game.is_over():
            return np.zeros(self._get_state_shape())

        discard_pile = self.game.round.discard_pile
        if self.game.round.discarding:
            last_discard = discard_pile[-1]
        else:
            last_discard = discard_pile[-2]

        available_discard = set([last_discard[0], last_discard[-1]])
        deadcards = [
            c for d in discard_pile for c in d if c not in available_discard
        ]

        current_player = self.game.players[player_id]

        known_cards = []
        hand_sizes = []
        for i in range(self.state_n_players - 1):
            next_id = self.game.round._get_next_player(player_id + i)
            next_player = self.game.players[next_id]
            known_cards.append(self.game.round.known_cards[next_id])
            opponent_hand_size = np.zeros(6)
            opponent_hand_size[len(next_player.hand)] = 1
            hand_sizes.append(opponent_hand_size)

        card_obs = [
            current_player.hand,
            available_discard,
            deadcards,
            *known_cards,
        ]

        if self.config["use_unkown_cards_in_state"]:
            unknown_cards = self.game.round.dealer.deck + [
                c for c in next_player.hand if c not in known_cards
            ]
            card_obs.append(unknown_cards)

        card_obs = np.ravel(list(map(utils.encode_cards, card_obs)))

        hand_sizes = np.ravel(hand_sizes)

        obs = np.concatenate((card_obs, opponent_hand_size))

        return obs

    def _extract_state_1(self, player_id):
        if self.game.is_over():
            return np.zeros((262, ))

        discard_pile = self.game.round.discard_pile
        if self.game.round.discarding:
            last_discard = discard_pile[-1]
        else:
            last_discard = discard_pile[-2]

        top_card = last_discard[0]
        bottom_card = last_discard[-1]

        deadcards = [
            c for d in discard_pile for c in d
            if c not in (top_card, bottom_card)
        ]

        current_player = self.game.players[player_id]
        next_id = self.game.round._get_next_player(player_id)
        next_player = self.game.players[next_id]
        known_cards = self.game.round.known_cards[next_id]

        hand_enc = np.zeros(85)
        known_enc = np.zeros(85)

        if len(current_player.hand) > 0:
            hand_one_hot = utils.one_hot_encode_cards(current_player.hand)
            hand_enc[:hand_one_hot.shape[0]] = hand_one_hot

        if len(known_cards) > 0:
            known_one_hot = utils.one_hot_encode_cards(known_cards)
            known_enc[:known_one_hot.shape[0]] = known_one_hot

        opponent_hand_size = np.zeros(6)
        opponent_hand_size[len(next_player.hand)] = 1

        obs = [
            hand_enc,
            known_enc,
            opponent_hand_size,
            utils.one_hot_encode_card(top_card),
            utils.one_hot_encode_card(bottom_card),
            utils.encode_cards(deadcards)
            if self.config["use_dead_cards_in_state"] else [],
        ]

        obs = np.concatenate(obs)

        return obs

    def _get_player_string(self, id):
        return "player_{}".format(id)

    def _get_players(self):
        return [self._get_player_string(i) for i in range(self.num_players)]

    def _get_state_shape(self):
        if self.obs_scheme == 0:
            shape = 162
            if self.config["use_unkown_cards_in_state"]:
                shape += 52

            if self.state_n_players > 1:
                shape += 52 * (self.state_n_players - 1)

            return shape
        elif self.obs_scheme == 1:
            shape = 210
            if self.config["use_dead_cards_in_state"]:
                shape += 52

            return shape
        else:
            raise Exception("obs scheme not")
コード例 #13
0
class YanivEnv(Env):
    def __init__(self, config={}):
        self.name = "yaniv"
        self.single_step = config.get("single_step_actions", False)
        self.game = Game(single_step_actions=self.single_step)
        self.default_game_config = DEFAULT_GAME_CONFIG
        self.reward_func = calculate_reward
        # configure game
        super().__init__(config)
        self.state_shape = [266]

        _game_config = self.default_game_config.copy()
        for key in config:
            if key in _game_config:
                _game_config[key] = config[key]
        self.game.configure(_game_config)

    def _extract_state(self, state):
        if self.game.is_over():
            return {
                "obs": np.zeros(self.state_shape),
                "legal_actions": self._get_legal_actions(),
            }

        discard_pile = self.game.round.discard_pile
        if self.game.round.discarding:
            last_discard = discard_pile[-1]
        else:
            last_discard = discard_pile[-2]

        available_discard = set([last_discard[0], last_discard[-1]])
        deadcards = [
            c for d in discard_pile for c in d if c not in available_discard
        ]

        current_player = self.game.players[self.game.round.current_player]
        next_player = self.game.players[self.game.round.get_next_player()]
        known_cards = self.game.round.known_cards[0]
        unknown_cards = self.game.round.dealer.deck + [
            c for c in next_player.hand if c not in known_cards
        ]

        card_obs = [
            current_player.hand,
            available_discard,
            deadcards,
            known_cards,
            unknown_cards,
        ]
        card_obs = np.ravel(list(map(utils.encode_cards, card_obs)))

        opponent_hand_size = np.zeros(6)
        opponent_hand_size[len(next_player.hand)] = 1

        obs = np.concatenate((card_obs, opponent_hand_size))

        extracted_state = {
            "obs": obs,
            "legal_actions": self._get_legal_actions()
        }

        if self.allow_raw_data:
            extracted_state["raw_obs"] = state
            extracted_state["raw_legal_actions"] = [
                a for a in state["legal_actions"]
            ]

        if self.record_action:
            extracted_state["action_record"] = self.action_recorder

        return extracted_state

    def get_payoffs(self):
        return np.array(self.game.get_payoffs())

    def _decode_action(self, action_id):
        if self.single_step:
            return utils.JOINED_ACTION_LIST[action_id]
        else:
            return utils.ACTION_LIST[action_id]

        # legal_ids = self._get_legal_actions()
        # if action_id in legal_ids:
        #     return utils.ACTION_LIST[action_id]
        # else:
        #     print("Tried non legal action", action_id, utils.ACTION_LIST[action_id], legal_ids, [utils.ACTION_LIST[a] for a in legal_ids])
        #     return utils.ACTION_LIST[np.random.choice(legal_ids)]

    def _get_legal_actions(self):
        legal_actions = self.game.get_legal_actions()
        if self.game._single_step_actions:
            legal_ids = [
                utils.JOINED_ACTION_SPACE[action] for action in legal_actions
            ]
        else:
            legal_ids = [
                utils.ACTION_SPACE[action] for action in legal_actions
            ]

        return legal_ids

    def _load_model(self):
        """Load pretrained/rule model

        Returns:
            model (Model): A Model object
        """
        raise NotImplementedError

    def step(self, action, raw_action=False):
        if not raw_action:
            action = self._decode_action(action)
        if self.single_agent_mode:
            return self._single_agent_step(action)

        self.timestep += 1
        # Record the action for human interface
        if self.record_action:
            self.action_recorder.append([self.get_player_id(), action])
        next_state, player_id = self.game.step(action)

        return self._extract_state(next_state), player_id

    def run(self, is_training=False):
        """
        Run a complete game, either for evaluation or training RL agent.

        Args:
            is_training (boolean): True if for training purpose.

        Returns:
            (tuple) Tuple containing:

                (list): A list of trajectories generated from the environment.
                (list): A list payoffs. Each entry corresponds to one player.

        Note: The trajectories are 3-dimension list. The first dimension is for different players.
              The second dimension is for different transitions. The third dimension is for the contents of each transiton
        """
        if self.single_agent_mode:
            raise ValueError("Run in single agent not allowed.")

        trajectories = [[] for _ in range(self.player_num)]
        state, player_id = self.reset()

        # Loop to play the game
        trajectories[player_id].append(state)

        while not self.is_over():
            # Agent plays
            if is_training:
                action = self.agents[player_id].step(state)
            else:
                action, _ = self.agents[player_id].eval_step(state)

            # Environment steps
            next_state, next_player_id = self.step(
                action, self.agents[player_id].use_raw)
            # Save action
            trajectories[player_id].append(action)

            decoded_action = action
            if not self.agents[player_id].use_raw:
                decoded_action = self._decode_action(action)
            trajectories[player_id].append(
                self.reward_func(state, next_state, decoded_action))

            # Set the state and player
            state = next_state
            player_id = next_player_id

            # Save state.
            if not self.game.is_over():
                trajectories[player_id].append(state)

        # Add a final state to all the players
        for player_id in range(self.player_num):
            state = self.get_state(player_id)
            trajectories[player_id].append(state)

        # Payoffs
        payoffs = self.get_payoffs()

        # Reorganize the trajectories
        trajectories = reorganize(trajectories, payoffs)

        return trajectories, payoffs
コード例 #14
0
class YanivEnv(gym.Env):
    metadata = {"render.modes": ["human"], "name": "Yaniv-v0"}

    def __init__(self, single_step=True, config={}):
        super(YanivEnv).__init__()
        self.single_step = single_step
        self.num_players = 2

        self.game = Game(single_step_actions=single_step,
                         num_players=self.num_players)

        conf = DEFAULT_GAME_CONFIG.copy()
        conf.update(config)
        self.game.configure(conf)

        single_aspace = spaces.Discrete(self.game.get_action_num())
        self.action_space = spaces.Tuple(
            [single_aspace for _ in range(self.num_players)])

        single_obs = spaces.Box(shape=(266, ), low=0, high=1, dtype=int)
        self.observation_space = spaces.Tuple(
            [single_obs for _ in range(self.num_players)])

        self.timestep = 0
        self.current_player = None

    def reset(self):
        state, player_id = self.game.init_game()
        self.current_player = player_id
        self.timestep = 0

        return self._get_observations()
        # return self._get_observations(), {
        #     "current_player": player_id,
        #     "legal_actions": self._get_legal_actions(),
        # }

    def step(self, action, raw_action=False):
        if not raw_action:
            action = self._decode_action(action)

        self.timestep += 1
        _, player_id = self.game.step(action)

        done = self.game.is_over()
        if done:
            rewards = self.game.get_payoffs()
        else:
            rewards = [-0.1 for _ in range(self.num_players)]

        self.current_player = player_id
        return (
            self._get_observations(),
            rewards,
            done,
            {
                "current_player": player_id,
                "legal_actions": self._get_legal_actions(),
            },
        )

    def _get_observations(self):
        observations = []
        for i in range(self.num_players):
            obs = self._extract_state(i)
            observations.append(obs)

        return observations

    def _decode_action(self, action_id):
        if self.single_step:
            return utils.JOINED_ACTION_LIST[action_id]
        else:
            return utils.ACTION_LIST[action_id]

    def _get_legal_actions(self):
        legal_actions = self.game.get_legal_actions()
        if self.game._single_step_actions:
            legal_ids = [
                utils.JOINED_ACTION_SPACE[action] for action in legal_actions
            ]
        else:
            legal_ids = [
                utils.ACTION_SPACE[action] for action in legal_actions
            ]

        return legal_ids

    def get_moves(self):
        return self._get_legal_actions()

    def _extract_state(self, player_id):
        if self.game.is_over():
            return np.zeros(self.observation_space.spaces[0].shape)

        discard_pile = self.game.round.discard_pile
        if self.game.round.discarding:
            last_discard = discard_pile[-1]
        else:
            last_discard = discard_pile[-2]

        available_discard = set([last_discard[0], last_discard[-1]])
        deadcards = [
            c for d in discard_pile for c in d if c not in available_discard
        ]

        current_player = self.game.players[player_id]
        next_player = self.game.players[self.game.round._get_next_player(
            player_id)]
        known_cards = self.game.round.known_cards[player_id]
        unknown_cards = self.game.round.dealer.deck + [
            c for c in next_player.hand if c not in known_cards
        ]

        card_obs = [
            current_player.hand,
            available_discard,
            deadcards,
            known_cards,
            unknown_cards,
        ]
        card_obs = np.ravel(list(map(utils.encode_cards, card_obs)))

        opponent_hand_size = np.zeros(6)
        opponent_hand_size[len(next_player.hand)] = 1

        obs = np.concatenate((card_obs, opponent_hand_size))

        return obs

    def render(self, mode="human"):
        state = self.game.get_state(self.current_player)
        _print_state(state, [])
コード例 #15
0
 def test_get_action_num(self):
     game = Game()
     action_num = game.get_action_num()
     self.assertEqual(action_num, 488)