def get_cards_order(self, trick, played_tricks, suits_order, contract_value): # Create observation played_cards = [ card for trick in played_tricks for card in trick.cards ] played_cards_observation = convert_cards_to_vector( played_cards, suits_order) player_cards_observation = convert_cards_to_vector( self.cards, suits_order) trick_cards_observation = convert_cards_to_vector( trick.cards, suits_order) trick_observation = np.concatenate( (played_cards_observation, player_cards_observation, trick_cards_observation, [contract_value / 9, self.attacker])) trick_observation = np.expand_dims(trick_observation, axis=0) with self.sess.as_default(): with self.graph.as_default(): actions = self.sess.run(self.output_tensor, {self.input_tensor: trick_observation}) action = np.squeeze(actions, axis=0) player_cards_observation = convert_cards_to_vector( self.cards, suits_order) player_action_masked = player_cards_observation * action # Play cards in probability order if np.max(player_action_masked) > 0: cards_index_order = np.argsort(-player_action_masked) else: cards_index_order = np.argsort(-player_cards_observation) return convert_index_to_cards(cards_index_order, suits_order)
def _set_contrat(self, contrat_model): default_suit_order = list(Suit) hand0 = convert_cards_to_vector(self.players[0].cards, default_suit_order) hand1 = convert_cards_to_vector(self.players[1].cards, default_suit_order) hand2 = convert_cards_to_vector(self.players[2].cards, default_suit_order) hand3 = convert_cards_to_vector(self.players[3].cards, default_suit_order) shift_team1, expected_reward_team1 = decision_process(hand0, hand2, contrat_model) shift_team2, expected_reward_team2 = decision_process(hand1, hand3, contrat_model) if expected_reward_team1 > expected_reward_team2: expected_reward_team = expected_reward_team1 attacker_team = 0 shift = shift_team1 else: expected_reward_team = expected_reward_team2 attacker_team = 1 shift = shift_team2 # Carefull: shifting is anti-clockwise self.atout_suit = default_suit_order[-shift] self.suits_order = Suit.create_order(self.atout_suit) self.value = np.max([0, (expected_reward_team//10) - 8])/9 self.attacker_team = attacker_team
def _get_trick_observation(self): # self.observation_space = [spaces.Discrete(2)] * (32 + 32 + 32) + [spaces.Discrete(10), spaces.Discrete(2)] played_cards = [card for trick in self.played_tricks for card in trick.cards] played_cards_observation = convert_cards_to_vector(played_cards, self.suits_order) current_player = self.current_trick_rotation[0] player_cards_observation = convert_cards_to_vector(current_player.cards, self.suits_order) trick_cards_observation = convert_cards_to_vector(self.trick.cards, self.suits_order) observation = np.concatenate((played_cards_observation, player_cards_observation, trick_cards_observation, [self.value, current_player.attacker])) return observation
def reset(self): """ reset is mandatory to use gym framework. Reset is called at the end of each round (8 tricks) :return: observation """ # New round self.round_number += 1 # We rebuild the deck based on previous trick won by each players self._rebuild_deck(self.played_tricks) self._deal_cards() self.played_tricks = [] # Get value of the contract and attacker team and updates suits order if self.contrat_model is None: self.atout_suit = random.choice( list(Suit)) # select randomly the suit self.value = random.randint( 0, 1) # Can only announce 80 or 90 to begin with self.attacker_team = random.randint( 0, 1 ) # 0 if it is team 0 (player 0 and player 2) else 1 for team 1 self.suits_order = Suit.create_order(self.atout_suit) else: self._set_contrat(self.contrat_model) # Set players attacker for p in self.players: p.attacker = int(self.attacker_team == p.index % 2) self.original_hands = { "player0-hand": convert_cards_to_vector(self.players[0].cards, self.suits_order), "player1-hand": convert_cards_to_vector(self.players[1].cards, self.suits_order), "player2-hand": convert_cards_to_vector(self.players[2].cards, self.suits_order), "player3-hand": convert_cards_to_vector(self.players[3].cards, self.suits_order), "attacker_team": [p.attacker for p in self.players] } self.total_score = 0 self.trick = Trick(self.atout_suit, trick_number=1) self.current_trick_rotation = self._create_trick_rotation( self.round_number % 4) # Play until AI self._play_until_end_of_rotation_or_ai_play() observation = self._get_trick_observation() return observation
def _get_round_observation(self): # self.observation_space = [spaces.Discrete(2)] * (32 + 32 + 32) + [spaces.Discrete(10), spaces.Discrete(2)] played_cards_observation = np.ones(32) player_cards_observation = np.zeros(32) trick_cards_observation = convert_cards_to_vector(self.trick.cards, self.suits_order) observation = np.concatenate((played_cards_observation, player_cards_observation, trick_cards_observation, [self.value, 1])) return observation
def get_cards_order(self, _trick, _played_tricks, suits_order, _contract_value): if self.next_action is None: raise RuntimeError("Action should be filled.") player_cards_observation = convert_cards_to_vector( self.cards, suits_order) player_action_masked = player_cards_observation * self.next_action # Play cards in probability order if np.max(player_action_masked) > 0: cards_index = np.argsort(-player_action_masked) else: cards_index = np.argsort(-player_cards_observation) cards_play_order = convert_index_to_cards(cards_index, suits_order) self.next_action = None return cards_play_order