Esempio n. 1
0
    def get_cards_order(self, trick, played_tricks, suits_order,
                        contract_value):
        # Create observation
        played_cards = [
            card for trick in played_tricks for card in trick.cards
        ]
        played_cards_observation = convert_cards_to_vector(
            played_cards, suits_order)
        player_cards_observation = convert_cards_to_vector(
            self.cards, suits_order)
        trick_cards_observation = convert_cards_to_vector(
            trick.cards, suits_order)
        trick_observation = np.concatenate(
            (played_cards_observation, player_cards_observation,
             trick_cards_observation, [contract_value / 9, self.attacker]))
        trick_observation = np.expand_dims(trick_observation, axis=0)
        with self.sess.as_default():
            with self.graph.as_default():
                actions = self.sess.run(self.output_tensor,
                                        {self.input_tensor: trick_observation})
        action = np.squeeze(actions, axis=0)

        player_cards_observation = convert_cards_to_vector(
            self.cards, suits_order)
        player_action_masked = player_cards_observation * action

        # Play cards in probability order
        if np.max(player_action_masked) > 0:
            cards_index_order = np.argsort(-player_action_masked)
        else:
            cards_index_order = np.argsort(-player_cards_observation)
        return convert_index_to_cards(cards_index_order, suits_order)
Esempio n. 2
0
    def _set_contrat(self, contrat_model):
        default_suit_order = list(Suit)

        hand0 = convert_cards_to_vector(self.players[0].cards, default_suit_order)
        hand1 = convert_cards_to_vector(self.players[1].cards, default_suit_order)
        hand2 = convert_cards_to_vector(self.players[2].cards, default_suit_order)
        hand3 = convert_cards_to_vector(self.players[3].cards, default_suit_order)

        shift_team1, expected_reward_team1 = decision_process(hand0, hand2, contrat_model)
        shift_team2, expected_reward_team2 = decision_process(hand1, hand3, contrat_model)

        if expected_reward_team1 > expected_reward_team2:
            expected_reward_team = expected_reward_team1
            attacker_team = 0
            shift = shift_team1
        else:
            expected_reward_team = expected_reward_team2
            attacker_team = 1
            shift = shift_team2

        # Carefull: shifting is anti-clockwise
        self.atout_suit = default_suit_order[-shift]
        self.suits_order = Suit.create_order(self.atout_suit)

        self.value = np.max([0, (expected_reward_team//10) - 8])/9
        self.attacker_team = attacker_team
Esempio n. 3
0
 def _get_trick_observation(self):
     # self.observation_space = [spaces.Discrete(2)] * (32 + 32 + 32) + [spaces.Discrete(10), spaces.Discrete(2)]
     played_cards = [card for trick in self.played_tricks for card in trick.cards]
     played_cards_observation = convert_cards_to_vector(played_cards, self.suits_order)
     current_player = self.current_trick_rotation[0]
     player_cards_observation = convert_cards_to_vector(current_player.cards, self.suits_order)
     trick_cards_observation = convert_cards_to_vector(self.trick.cards, self.suits_order)
     observation = np.concatenate((played_cards_observation,
                                   player_cards_observation,
                                   trick_cards_observation,
                                   [self.value, current_player.attacker]))
     return observation
Esempio n. 4
0
    def reset(self):
        """
        reset is mandatory to use gym framework. Reset is called at the end of each round (8 tricks)
        :return: observation
        """
        # New round
        self.round_number += 1

        # We rebuild the deck based on previous trick won by each players
        self._rebuild_deck(self.played_tricks)
        self._deal_cards()
        self.played_tricks = []

        # Get value of the contract and attacker team and updates suits order
        if self.contrat_model is None:
            self.atout_suit = random.choice(
                list(Suit))  # select randomly the suit
            self.value = random.randint(
                0, 1)  # Can only announce 80 or 90 to begin with
            self.attacker_team = random.randint(
                0, 1
            )  # 0 if it is team 0 (player 0 and player 2) else 1 for team 1
            self.suits_order = Suit.create_order(self.atout_suit)
        else:
            self._set_contrat(self.contrat_model)

        # Set players attacker
        for p in self.players:
            p.attacker = int(self.attacker_team == p.index % 2)

        self.original_hands = {
            "player0-hand":
            convert_cards_to_vector(self.players[0].cards, self.suits_order),
            "player1-hand":
            convert_cards_to_vector(self.players[1].cards, self.suits_order),
            "player2-hand":
            convert_cards_to_vector(self.players[2].cards, self.suits_order),
            "player3-hand":
            convert_cards_to_vector(self.players[3].cards, self.suits_order),
            "attacker_team": [p.attacker for p in self.players]
        }
        self.total_score = 0
        self.trick = Trick(self.atout_suit, trick_number=1)
        self.current_trick_rotation = self._create_trick_rotation(
            self.round_number % 4)

        # Play until AI
        self._play_until_end_of_rotation_or_ai_play()
        observation = self._get_trick_observation()
        return observation
Esempio n. 5
0
 def _get_round_observation(self):
     # self.observation_space = [spaces.Discrete(2)] * (32 + 32 + 32) + [spaces.Discrete(10), spaces.Discrete(2)]
     played_cards_observation = np.ones(32)
     player_cards_observation = np.zeros(32)
     trick_cards_observation = convert_cards_to_vector(self.trick.cards, self.suits_order)
     observation = np.concatenate((played_cards_observation,
                                   player_cards_observation,
                                   trick_cards_observation,
                                   [self.value, 1]))
     return observation
Esempio n. 6
0
    def get_cards_order(self, _trick, _played_tricks, suits_order,
                        _contract_value):
        if self.next_action is None:
            raise RuntimeError("Action should be filled.")
        player_cards_observation = convert_cards_to_vector(
            self.cards, suits_order)
        player_action_masked = player_cards_observation * self.next_action

        # Play cards in probability order
        if np.max(player_action_masked) > 0:
            cards_index = np.argsort(-player_action_masked)
        else:
            cards_index = np.argsort(-player_cards_observation)
        cards_play_order = convert_index_to_cards(cards_index, suits_order)
        self.next_action = None
        return cards_play_order