Beispiel #1
0
 def test_empty(self):
     deck = HanabiDeck()
     cards_drawn = 0
     while not deck.empty:
         deck.draw()
         cards_drawn += 1
     self.assertEqual(50, cards_drawn, "The number of drawn cards is not equal to the deck size.")
Beispiel #2
0
    def test_deck_pointer(self):
        deck = HanabiDeck()
        init, _ = deck.render()
        deck._pointer = 25

        remaining, played = deck.render()
        self.assertEqual(25, len(played), f"Number of played is not consistent with deck pointer.")
Beispiel #3
0
    def test_deck(self):
        deck = HanabiDeck(colors=Colors.difficult())
        deck.render()
        self.assertEqual(60, len(deck._cards), "Wrong number of cards for 6 colors")

        deck = HanabiDeck(colors=Colors.normal())
        deck.render()
        self.assertEqual(50, len(deck._cards), "Wrong number of cards for 5 colors")
Beispiel #4
0
    def __init__(self, agents: list, hand_size=None, number_of_colors=5):
        assert 2 <= len(agents) <= 5, f"Requires 2-5 players, you entered {len(agents)}."

        self.agents = agents
        self.hand_size = hand_size if hand_size is not None else 5 if len(agents) <= 3 else 4
        self.colors = [color for idx, color in enumerate(Colors.difficult()) if idx < number_of_colors]

        self.deck = HanabiDeck(colors=self.colors)
        self.players = [HanabiPlayer(idx, self.deck.provide_hand(self.hand_size)) for idx in range(len(agents))]
        self.obs_index, self.observation = self._create_observation_index()

        self.log = defaultdict(list)
        self.log_moves = dict()

        self.info = self._create_info()

        self.action_mapping = {
            0: lambda action: self._action_play(action),
            1: lambda action: self._action_inform_color(action),
            2: lambda action: self._action_inform_rank(action),
            3: lambda action: self._action_discard(action),
        }
class TestHanabiPlayer(unittest.TestCase):
    def setUp(self) -> None:
        self.deck = HanabiDeck(ranks=[Rank.ONE])
        self.cards = self.deck.provide_hand(hand_size=6)

    def test_init(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        player.render()

    def test_info_hidden(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        player.render(can_see=False)

    def test_info_color(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        player.inform_color(Colors.BLACK)
        player.render()

    def test_info_rank(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        player.inform_rank(Rank.ONE.value)
        player.render()

    def test_play(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        card = player.play(3)
        player.render()
        self.assertEqual(4, len(player._hand),
                         f"Player didn't properly play a card.")
        self.assertEqual(True, isinstance(card, HanabiCard),
                         f"return value from play isn't a card.")

    def test_discard(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        card = player.discard(3)
        player.render()
        self.assertEqual(4, len(player._hand),
                         f"Player didn't properly discard a card.")
        self.assertEqual(True, isinstance(card, HanabiCard),
                         f"return value from discard isn't a card.")

    def test_add_card(self):
        player = HanabiPlayer(player_id=0, cards=self.cards)
        player.add_card(HanabiCard(color=Colors.RED, rank=Rank.TWO))
        player.render()
        self.assertEqual(
            6, len(player._hand),
            f"Player didn't properly add a new card to his hand.")
Beispiel #6
0
class Hanabi:

    def __init__(self, agents: list, hand_size=None, number_of_colors=5):
        assert 2 <= len(agents) <= 5, f"Requires 2-5 players, you entered {len(agents)}."

        self.agents = agents
        self.hand_size = hand_size if hand_size is not None else 5 if len(agents) <= 3 else 4
        self.colors = [color for idx, color in enumerate(Colors.difficult()) if idx < number_of_colors]

        self.deck = HanabiDeck(colors=self.colors)
        self.players = [HanabiPlayer(idx, self.deck.provide_hand(self.hand_size)) for idx in range(len(agents))]
        self.obs_index, self.observation = self._create_observation_index()

        self.log = defaultdict(list)
        self.log_moves = dict()

        self.info = self._create_info()

        self.action_mapping = {
            0: lambda action: self._action_play(action),
            1: lambda action: self._action_inform_color(action),
            2: lambda action: self._action_inform_rank(action),
            3: lambda action: self._action_discard(action),
        }

    @property
    def hints(self):
        return self.info['hints']

    @property
    def current_player(self):
        return self.info['current_player']

    def render(self):
        print(f"\n\nGeneral information:"
              f"\n\t- Current player: {self.current_player}, on turn: {self.info['turns_played']}"
              f"\n\t- Hint tokens: {self.info['hints']}"
              f"\n\t- Fuse tokens: {self.info['fuses']}"
              f"\n\t- Cards remaining: {self.deck.remaining}")
        [player.render(can_see=player.id != self.current_player) for player in self.players]
        self.render_step()

    def render_step(self):
        print(f"\nPlayer: {self.current_player}:\n\t-", '\n\t- '.join(self.log[self.info['turns_played'] - 1]))

    def step(self, action: Actions):
        """ Action Tuple.  """
        self.info['current_player'] = (self.current_player + 1) % len(self.agents)
        self.log_moves[self.info['turns_played']] = (self.current_player, action)
        obs, reward, done, info = self.action_mapping.get(action.id)(action)
        self.info['turns_played'] += 1
        return obs, reward, done, info

    def reset(self):
        self.deck.reset()
        self.players = [HanabiPlayer(idx, self.deck.provide_hand(self.hand_size)) for idx in range(len(self.agents))]
        self.log = defaultdict(list)
        self.info = self._create_info()
        return self._get_observation()

    def _action_play(self, action):
        player = self.players[self.current_player]
        card = player.play(index=action.index)
        self._handle_play(card)
        self._handle_draw(player)
        return self._handle_returns()

    def _action_inform_color(self, action):
        if self.info['hints'] <= 0:
            raise ValueError(f"There were no hint tokens left, invalid move.")
        self._handle_inform_color(action)
        return self._handle_returns()

    def _action_inform_rank(self, action):
        if self.info['hints'] <= 0:
            raise ValueError(f"There were no hint tokens left, invalid move.")
        self._handle_inform_rank(action)
        return self._handle_returns()

    def _action_discard(self, action):
        player = self.players[self.current_player]
        card = player.discard(index=action.index)
        self._handle_discard(card)
        self._handle_draw(player)
        return self._handle_returns()

    def _handle_play(self, card):
        if self.info['cards_played'][card.color] == card.rank - 1:
            self.info['cards_played'][card.color] += 1
            self.log[self.info['turns_played']].append(f"Played card: {card}")
            return True
        self.info['cards_discarded'][card.color].append(card.rank)
        self.log[self.info['turns_played']].append(f"Tried to play card: {card}, but was not possible")
        return False

    def _handle_discard(self, card):
        self.info['cards_discarded'][card.color].append(card.rank)
        self.info['fuses'] += 1
        self.log[self.info['turns_played']].append(f"Discard card: {card}")
        return True

    def _handle_inform_color(self, action):
        color = Colors.color(action.index)
        self.players[action.player].inform_color(color)
        self.info['hints'] -= 1
        self.log[self.info['turns_played']].append(f"Informed player: {action.player} of color: {color.get}")
        return True

    def _handle_inform_rank(self, action):
        self.players[action.player].inform_rank(action.index)
        self.info['hints'] -= 1
        self.log[self.info['turns_played']].append(f"Informed player: {action.player} of rank: {action.index}")
        return True

    def _handle_draw(self, player):
        if self.deck.empty:
            self.info['turns_left'] -= 1
            return False
        new_card = self.deck.draw()
        player.add_card(new_card)
        self.log[self.info['turns_played']].append(f"Drew card: {new_card}")
        return True

    def _handle_returns(self):
        done = self.info['turns_left'] <= 0 or self.info['fuses'] >= 3 or not self._check_valid_moves_left()
        reward = 0 if not done else sum(self.info['cards_played'].values())
        return self._get_observation(), reward, done, self.info

    def _check_valid_moves_left(self):
        """ Checks if there is a free playable card, can be optimized to maintain knowledge in a bit map.  """
        for card, value in self.info['cards_played'].items():
            if value == 0 and len(self.info['cards_discarded'][card]) < 3:
                return True
            if value == 4 and 5 not in self.info['cards_discarded'][card]:
                return True
            if len([val for val in self.info['cards_discarded'][card] if val - 1 == value]) != 2:
                return True
        return False

    def _get_observation(self):
        data = dict()
        data['hands'] = np.array([player.obs() for player in self.players], dtype=np.uint8)
        data['played'] = np.array([values for key, values in self.info['cards_played'].items()])
        data['discard'] = np.array([values if values else [0] for key, values in self.info['cards_discarded'].items()])
        data['info'] = {key: value for key, value in self.info.items() if not key.startswith('cards_')}
        data['moves'] = list(self.log_moves.values())[:min(len(self.log_moves), len(self.players) - 1)]
        data['colors'] = self.colors
        data['turns log'] = self.log
        return data

    def _create_info(self):
        return dict(
                hints=8,
                fuses=0,
                turns_left=len(self.agents) - 1,
                turns_played=0,

                players=len(self.players),
                current_player=random.randint(0, len(self.agents) - 1),

                cards_played={color.value: 0 for color in self.colors},
                cards_discarded={color.value: [] for color in self.colors},
        )

    def _create_observation_index(self):
        index_player = 0
        index_played = index_player + len(self.players) * self.hand_size
        index_discard = index_played + len(self.colors)
        index_info = index_discard + len(self.colors)
        index_end = index_info + 1

        index = namedtuple('obs', ('players', 'played', 'discard', 'info', 'end'))
        obs_index = index(index_player, index_played, index_discard, index_info, index_end)

        observation = np.zeros((obs_index.end, 10), dtype=np.uint8)
        return obs_index, observation
Beispiel #7
0
 def test_empty_with_player_hands(self):
     deck = HanabiDeck()
     deck.provide_hand(hand_size=5)
     for remaining in reversed(range(45)):
         deck.draw()
         self.assertEqual(remaining, deck.remaining, "The number of drawn cards is not equal to the deck size.")
Beispiel #8
0
 def test_remaining(self):
     deck = HanabiDeck()
     for remaining in reversed(range(50)):
         deck.draw()
         self.assertEqual(remaining, deck.remaining, "The number of drawn cards is not equal to the deck size.")
Beispiel #9
0
 def test_hand_cards(self):
     deck = HanabiDeck()
     for hand_size in range(1, 6):
         cards = deck.provide_hand(hand_size=hand_size)
         self.assertEqual(hand_size, len(cards))
 def setUp(self) -> None:
     self.deck = HanabiDeck(ranks=[Rank.ONE])
     self.cards = self.deck.provide_hand(hand_size=6)