예제 #1
0
    def select_card(self, game_state):
        action, prob = self.act(
            self.policy.preprocess(game_state, self),
            torch.tensor(
                np.concatenate(
                    (np.zeros(9), np.zeros(2),
                     one_hot_cards(self.rules.allowed_cards(game_state,
                                                            self))))).float())

        selected_card = self.rules.cards[action - 11]

        self.cards.remove(selected_card)
        #Davonlaufen needs to be tracked
        if game_state.game_type[1] == 0:  # Sauspiel
            first_player_of_trick = game_state.first_player if game_state.trick_number == 0 else game_state.trick_owner[
                game_state.trick_number - 1]
            rufsau = [game_state.game_type[0], 7]
            if game_state.game_type[0] == selected_card[
                    0] and selected_card != rufsau and first_player_of_trick == self.id and selected_card not in self.rules.get_sorted_trumps(
                        game_state.game_type) and rufsau in self.cards:
                self.davongelaufen = True
        return selected_card, prob
예제 #2
0
    def preprocess(self, game_state, player):
        """
        state_size:
        - info_vector: 328
          - game_type: 8
          - game_player: 4
          - first_player: 4
          - current_scores: 4 (divided by 120 for normalization purpose)
          - remaining cards: 32
          - teams: 4 [bits of players are set to 1]
          - played cards by player: 4*32
          - current_trick: 4 * 36

        """

        ego_player = player.id

        #game state
        game_enc = one_hot_games([game_state.game_type])

        game_player_enc = np.zeros(4)
        if game_state.game_player != None:
            game_player_enc[(game_state.game_player - ego_player) % 4] = 1

        first_player_enc = np.zeros(4)
        first_player_enc[(game_state.first_player - ego_player) % 4] = 1

        team_encoding = np.zeros(4)
        if game_state.get_player_team() != [None]:
            player_team = [(t - ego_player) % 4
                           for t in game_state.get_player_team()]

            if game_state.game_type[1] != 0 and len(player_team) == 1:
                team_encoding[player_team] = 1
            elif game_state.game_type[1] == 0 and len(player_team) == 2:
                team_encoding[player_team] = 1

        played_cards = np.zeros(32 * 4)
        for p in range(4):
            cards = [
                game_state.course_of_game[trick][p] for trick in range(8)
                if game_state.course_of_game[trick][p] != [None, None]
            ]
            enc_cards = one_hot_cards(cards)
            p_id = (p - ego_player) % 4
            played_cards[p_id * 32:(p_id + 1) * 32] = enc_cards

        current_trick_enc = np.zeros(36 * 4)

        trick = game_state.trick_number
        for card in range(4):
            if game_state.course_of_game[trick][card] == [None, None]:
                continue
            else:
                card_player = game_state.first_player
                if trick != 0:
                    card_player = game_state.trick_owner[trick - 1]
                card_player = (card_player + card) % 4
                card_player_enc = np.zeros(4)
                card_player_enc[(card_player - ego_player) % 4] = 1

                card_enc = one_hot_cards(
                    [game_state.course_of_game[trick][card]])

                current_trick_enc[card * 36:(card + 1) * 36] = np.concatenate(
                    (card_enc, card_player_enc))

        state_vector = np.concatenate(
            (game_enc, game_player_enc, first_player_enc,
             np.true_divide(game_state.scores,
                            120), one_hot_cards(player.cards), played_cards,
             current_trick_enc, team_encoding))

        return [torch.tensor(state_vector).float().to(device=self.device)]
예제 #3
0
    def preprocess(self, game_state, player):
        """
        state_size:
        - info_vector: 55
          - game_type: 7 [two bit encoding]
          - game_player: 4
          - first_player: 4
          - current_scores: 4 (divided by 120 for normalization purpose)
          - remaining cards: 32
          - teams: 4 [bits of players are set to 1]
        - game_history: x * 16
            - course_of_game: x * (12 + 4) each played card in order plus the player that played it
        - current_trick: x * 16
            - current_trick: x * (12 + 4) each played card in order plus the player that played it

        """

        ego_player = player.id

        #game state
        game_enc = two_hot_encode_game(game_state.game_type)

        game_player_enc = np.zeros(4)
        if game_state.game_player != None:
            game_player_enc[(game_state.game_player - ego_player) % 4] = 1

        first_player_enc = np.zeros(4)
        first_player_enc[(game_state.first_player - ego_player) % 4] = 1

        team_encoding = np.zeros(4)
        if game_state.get_player_team() != [None]:
            player_team = [(t - ego_player) % 4
                           for t in game_state.get_player_team()]

            if game_state.game_type[1] != 0 and len(player_team) == 1:
                team_encoding[player_team] = 1
            elif game_state.game_type[1] == 0 and len(player_team) == 2:
                team_encoding[player_team] = 1

        #course of game
        #course_of_game_enc = [torch.zeros(16).float().to(device='cuda')]
        course_of_game_enc = np.zeros((1, 16))
        current_trick_enc = np.zeros((1, 16))
        for trick in range(len(game_state.course_of_game)):
            for card in range(len(game_state.course_of_game[trick])):
                if game_state.course_of_game[trick][card] == [None, None]:
                    continue
                else:
                    card_player = game_state.first_player
                    if trick != 0:
                        card_player = game_state.trick_owner[trick - 1]
                    card_player = (card_player + card) % 4
                    card_player_enc = np.zeros(4)
                    card_player_enc[(card_player - ego_player) % 4] = 1
                    if trick != game_state.trick_number:
                        course_of_game_enc = np.vstack(
                            (course_of_game_enc,
                             np.append(
                                 np.array(
                                     two_hot_encode_card(
                                         game_state.course_of_game[trick]
                                         [card])), card_player_enc)))
                    else:
                        current_trick_enc = np.vstack(
                            (current_trick_enc,
                             np.append(
                                 np.array(
                                     two_hot_encode_card(
                                         game_state.course_of_game[trick]
                                         [card])), card_player_enc)))

        info_vector = np.concatenate(
            (game_enc, game_player_enc, first_player_enc,
             np.true_divide(game_state.scores,
                            120), one_hot_cards(player.cards), team_encoding))

        #return torch.tensor(info_vector).float().to(device='cuda')
        #return [torch.tensor(info_vector).float().to(device='cuda'), course_of_game_enc]
        if course_of_game_enc.shape[0] > 1:
            course_of_game_enc = np.delete(course_of_game_enc, 0, 0)
        course_of_game_enc = torch.tensor(course_of_game_enc).float().to(
            device=self.device)
        course_of_game_enc = course_of_game_enc.view(len(course_of_game_enc),
                                                     1, 16)

        if current_trick_enc.shape[0] > 1:
            current_trick_enc = np.delete(current_trick_enc, 0, 0)
        current_trick_enc = torch.tensor(current_trick_enc).float().to(
            device=self.device)
        current_trick_enc = current_trick_enc.view(len(current_trick_enc), 1,
                                                   16)

        return [
            torch.tensor(info_vector).float().to(device=self.device),
            course_of_game_enc, current_trick_enc
        ]