def select_card(self, game_state): action, prob = self.act( self.policy.preprocess(game_state, self), torch.tensor( np.concatenate( (np.zeros(9), np.zeros(2), one_hot_cards(self.rules.allowed_cards(game_state, self))))).float()) selected_card = self.rules.cards[action - 11] self.cards.remove(selected_card) #Davonlaufen needs to be tracked if game_state.game_type[1] == 0: # Sauspiel first_player_of_trick = game_state.first_player if game_state.trick_number == 0 else game_state.trick_owner[ game_state.trick_number - 1] rufsau = [game_state.game_type[0], 7] if game_state.game_type[0] == selected_card[ 0] and selected_card != rufsau and first_player_of_trick == self.id and selected_card not in self.rules.get_sorted_trumps( game_state.game_type) and rufsau in self.cards: self.davongelaufen = True return selected_card, prob
def preprocess(self, game_state, player): """ state_size: - info_vector: 328 - game_type: 8 - game_player: 4 - first_player: 4 - current_scores: 4 (divided by 120 for normalization purpose) - remaining cards: 32 - teams: 4 [bits of players are set to 1] - played cards by player: 4*32 - current_trick: 4 * 36 """ ego_player = player.id #game state game_enc = one_hot_games([game_state.game_type]) game_player_enc = np.zeros(4) if game_state.game_player != None: game_player_enc[(game_state.game_player - ego_player) % 4] = 1 first_player_enc = np.zeros(4) first_player_enc[(game_state.first_player - ego_player) % 4] = 1 team_encoding = np.zeros(4) if game_state.get_player_team() != [None]: player_team = [(t - ego_player) % 4 for t in game_state.get_player_team()] if game_state.game_type[1] != 0 and len(player_team) == 1: team_encoding[player_team] = 1 elif game_state.game_type[1] == 0 and len(player_team) == 2: team_encoding[player_team] = 1 played_cards = np.zeros(32 * 4) for p in range(4): cards = [ game_state.course_of_game[trick][p] for trick in range(8) if game_state.course_of_game[trick][p] != [None, None] ] enc_cards = one_hot_cards(cards) p_id = (p - ego_player) % 4 played_cards[p_id * 32:(p_id + 1) * 32] = enc_cards current_trick_enc = np.zeros(36 * 4) trick = game_state.trick_number for card in range(4): if game_state.course_of_game[trick][card] == [None, None]: continue else: card_player = game_state.first_player if trick != 0: card_player = game_state.trick_owner[trick - 1] card_player = (card_player + card) % 4 card_player_enc = np.zeros(4) card_player_enc[(card_player - ego_player) % 4] = 1 card_enc = one_hot_cards( [game_state.course_of_game[trick][card]]) current_trick_enc[card * 36:(card + 1) * 36] = np.concatenate( (card_enc, card_player_enc)) state_vector = np.concatenate( (game_enc, game_player_enc, first_player_enc, np.true_divide(game_state.scores, 120), one_hot_cards(player.cards), played_cards, current_trick_enc, team_encoding)) return [torch.tensor(state_vector).float().to(device=self.device)]
def preprocess(self, game_state, player): """ state_size: - info_vector: 55 - game_type: 7 [two bit encoding] - game_player: 4 - first_player: 4 - current_scores: 4 (divided by 120 for normalization purpose) - remaining cards: 32 - teams: 4 [bits of players are set to 1] - game_history: x * 16 - course_of_game: x * (12 + 4) each played card in order plus the player that played it - current_trick: x * 16 - current_trick: x * (12 + 4) each played card in order plus the player that played it """ ego_player = player.id #game state game_enc = two_hot_encode_game(game_state.game_type) game_player_enc = np.zeros(4) if game_state.game_player != None: game_player_enc[(game_state.game_player - ego_player) % 4] = 1 first_player_enc = np.zeros(4) first_player_enc[(game_state.first_player - ego_player) % 4] = 1 team_encoding = np.zeros(4) if game_state.get_player_team() != [None]: player_team = [(t - ego_player) % 4 for t in game_state.get_player_team()] if game_state.game_type[1] != 0 and len(player_team) == 1: team_encoding[player_team] = 1 elif game_state.game_type[1] == 0 and len(player_team) == 2: team_encoding[player_team] = 1 #course of game #course_of_game_enc = [torch.zeros(16).float().to(device='cuda')] course_of_game_enc = np.zeros((1, 16)) current_trick_enc = np.zeros((1, 16)) for trick in range(len(game_state.course_of_game)): for card in range(len(game_state.course_of_game[trick])): if game_state.course_of_game[trick][card] == [None, None]: continue else: card_player = game_state.first_player if trick != 0: card_player = game_state.trick_owner[trick - 1] card_player = (card_player + card) % 4 card_player_enc = np.zeros(4) card_player_enc[(card_player - ego_player) % 4] = 1 if trick != game_state.trick_number: course_of_game_enc = np.vstack( (course_of_game_enc, np.append( np.array( two_hot_encode_card( game_state.course_of_game[trick] [card])), card_player_enc))) else: current_trick_enc = np.vstack( (current_trick_enc, np.append( np.array( two_hot_encode_card( game_state.course_of_game[trick] [card])), card_player_enc))) info_vector = np.concatenate( (game_enc, game_player_enc, first_player_enc, np.true_divide(game_state.scores, 120), one_hot_cards(player.cards), team_encoding)) #return torch.tensor(info_vector).float().to(device='cuda') #return [torch.tensor(info_vector).float().to(device='cuda'), course_of_game_enc] if course_of_game_enc.shape[0] > 1: course_of_game_enc = np.delete(course_of_game_enc, 0, 0) course_of_game_enc = torch.tensor(course_of_game_enc).float().to( device=self.device) course_of_game_enc = course_of_game_enc.view(len(course_of_game_enc), 1, 16) if current_trick_enc.shape[0] > 1: current_trick_enc = np.delete(current_trick_enc, 0, 0) current_trick_enc = torch.tensor(current_trick_enc).float().to( device=self.device) current_trick_enc = current_trick_enc.view(len(current_trick_enc), 1, 16) return [ torch.tensor(info_vector).float().to(device=self.device), course_of_game_enc, current_trick_enc ]