def refresh(self): # create a sub_watten game self.sub_watten_game = WattenSubGame() # player can be either 1 or -1 # player 1 is A # player -1 is B self.current_player = 1 # player who distributes cards when the game starts; each game the starting player is switched # the opponent picks rank and playes the first move self.distributing_cards_player = -1 # overall score of the game self.player_A_score = 0 self.player_B_score = 0 # points to achieve for winning a game self.win_threshold = 15 self._refresh_state_single_hand() # player who won the game self.winning_player = None self.moves = moves # list of actions taken in a game, used for debugging purposes self.moves_series = [] self.starting_state = f"\n{self.current_player}, {self.distributing_cards_player}, {self.player_A_score}, {self.player_B_score}, {self.player_A_hand}, {self.player_B_hand}, {self.played_cards}, {self.current_game_player_A_score}, {self.current_game_player_B_score}, {self.current_game_prize}, {self.is_last_move_raise}, {self.is_last_move_accepted_raise}, {self.is_last_hand_raise_valid}, {self.first_card_deck}, {self.last_card_deck}, {self.rank}, {self.suit}"
def test_get_cur_player(self): watten_game = WattenSubGame() watten_game.trueboard.current_player = 1 self.assertEqual(watten_game.get_cur_player(), 0) watten_game.trueboard.current_player = -1 self.assertEqual(watten_game.get_cur_player(), 1)
def test_game_complete(self): # [36, 45, 14, 5, 25, 20, 46, 48, 1, 46, 48, 13, 35, 45, 15, 18, 7, 2, # 30, 46, 47, 36, 43, 27, 5, 46, 48, 6, 2, 22, 46, 47, 35, 44, 29, 8, 46, # 48, 18, 21, 27, 26, 14, 11, 9, 46, 48, 17, 34, 43, 10, 1, 46, 47, 41, # 42, 24, 17, 46, 48, 0, 14, 3, 29, 46, 48, 41, 45, 19, 8, 30, 24, 20, # 16, 34, 46, 47, 39, 44, 16, 30, 18, 46, 48, 21, 46, 47, 39, 43, 24, # 26, 14, 22, 29, 6, 5, 7] game = WattenSubGame() world = WorldSubWatten() world.init_world_to_state(1, -1, [25, 9, 1, 32, 14], [5, 13, 7, 10, 20], [], 0, 0, 16, 28, None, None) game.trueboard = world cur_player = game.get_cur_player() self.assertEqual(cur_player, 0) moves = game.get_valid_moves_no_zeros() self.assertEqual(moves, [33, 34, 35, 36, 37, 38, 39, 40, 41]) self.assertEqual(game.make_move(36), (0.0, 1)) cur_player = game.get_cur_player() self.assertEqual(cur_player, 1) moves = game.get_valid_moves_no_zeros() self.assertEqual(moves, [42, 43, 44, 45])
def test_cloned_prediction(self): env = es.EnvironmentSelector() # get agent agent = env.sub_watten_non_human_agent_for_total_watten() sub_watten_game = WattenSubGame() clone_sub_watten_game = sub_watten_game.clone() pi_values, v = agent.predict(sub_watten_game, sub_watten_game.get_cur_player()) clone_pi_values, clone_v = agent.predict( clone_sub_watten_game, clone_sub_watten_game.get_cur_player()) self.assertEqual(pi_values.all(), clone_pi_values.all()) self.assertEqual(v, clone_v)
def sub_watten_non_human_agent_for_total_watten(self): game = WattenSubGame() x, y = game.get_observation_size() nnet = SubWattenNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) print('Building sub_watten non human agent for total_watten') # load here best sub_watten model try: agent_nnet.load("games/sub_watten/training/default_nn/best.h5") except OSError: print("File not found with games/sub_watten/training/best.h5") print( "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path" ) agent_nnet.load("../../sub_watten/training/default_nn/best.h5") return agent_nnet
def test_get_score(self): watten_game = WattenSubGame() watten_game.trueboard.current_player = 1 self.assertEqual(watten_game.get_score(1), 0.0) self.assertEqual(watten_game.get_score(0), 0.0) watten_game.trueboard.winning_player = 1 watten_game.trueboard.current_game_player_A_score = 3 self.assertEqual(watten_game.get_score(1), -1.0) self.assertEqual(watten_game.get_score(0), 1.0) watten_game = WattenSubGame() watten_game.trueboard.current_player = -1 self.assertEqual(watten_game.get_score(1), 0.0) self.assertEqual(watten_game.get_score(0), 0.0) watten_game.trueboard.winning_player = -1 watten_game.trueboard.current_game_player_B_score = 3 self.assertEqual(watten_game.get_score(1), 1.0) self.assertEqual(watten_game.get_score(0), -1.0)
def test_make_move(self): watten_game = WattenSubGame() watten_game.trueboard.current_player = 1 self.assertEqual(watten_game.make_move(40), (0.0, 1)) watten_game = WattenSubGame() watten_game.trueboard.distributing_cards_player = -1 watten_game.trueboard.suit = 1 watten_game.trueboard.rank = 3 watten_game.trueboard.current_player = 1 watten_game.trueboard.current_game_player_A_score = 2 watten_game.trueboard.played_cards = [0] watten_game.trueboard.player_A_hand = [1] self.assertEqual(watten_game.make_move(1), (1.0, 1)) watten_game = WattenSubGame() watten_game.trueboard.distributing_cards_player = 1 watten_game.trueboard.suit = 1 watten_game.trueboard.rank = 3 watten_game.trueboard.current_player = 1 watten_game.trueboard.current_game_player_A_score = 2 watten_game.trueboard.played_cards = [0] watten_game.trueboard.player_A_hand = [1] self.assertEqual(watten_game.make_move(1), (1.0, 0))
def test_nn_agent_prediction(self): sub_watten_game = WattenSubGame() clone_sub_watten_game = sub_watten_game.clone() x, y = sub_watten_game.get_observation_size() nnet = SubWattenNNet(x, y, 1, sub_watten_game.get_action_size()) agent_nnet = AgentNNet(nnet) agent_nnet.load("../../sub_watten/training/best.h5") pi_values, v = agent_nnet.predict(sub_watten_game, sub_watten_game.get_cur_player()) clone_pi_values, clone_v = agent_nnet.predict( clone_sub_watten_game, clone_sub_watten_game.get_cur_player()) self.assertEqual(pi_values.all(), clone_pi_values.all()) self.assertEqual(v, clone_v)
def sub_watten_human_agent_for_total_watten(self): game = WattenSubGame() return SubWattenHumanAgent(game)
def __init__(self): super().__init__() self.game_mapping = { # EnvironmentSelector.GAME_CHECKERS_DEFAULT: CheckersGame(8, history_n=7), EnvironmentSelector.GAME_TICTACTOE_DEFAULT: TicTacToeGame(), EnvironmentSelector.GAME_DURAK_DEFAULT: DurakGame(), EnvironmentSelector.GAME_WATTEN_DEFAULT: WattenGame(), EnvironmentSelector.GAME_SUB_WATTEN_DEFAULT: WattenSubGame(), EnvironmentSelector.GAME_ASYMMETRIC_SUB_WATTEN_DEFAULT: AsymmetricSubWattenGame(), EnvironmentSelector.GAME_ASYMMETRIC_SUB_WATTEN_EVALUATE: WattenSubGame(), # EnvironmentSelector.GAME_TOTAL_WATTEN_DEFAULT: TotalWattenGame( # self.sub_watten_non_human_agent_for_total_watten(), # self.sub_watten_non_human_agent_for_total_watten() # ), # EnvironmentSelector.GAME_TOTAL_WATTEN_H_VS_H: TotalWattenGame( # self.sub_watten_human_agent_for_total_watten(), # self.sub_watten_human_agent_for_total_watten() # ), # EnvironmentSelector.GAME_TOTAL_WATTEN_H_VS_NH: TotalWattenGame( # self.sub_watten_human_agent_for_total_watten(), # self.sub_watten_non_human_agent_for_total_watten() # ), # EnvironmentSelector.GAME_TOTAL_WATTEN_NH_VS_H: TotalWattenGame( # self.sub_watten_non_human_agent_for_total_watten(), # self.sub_watten_human_agent_for_total_watten() # ), EnvironmentSelector.GAME_HAND_WATTEN: HandWattenGame(), EnvironmentSelector.GAME_HAND_WATTEN_CNN: HandWattenGame(cnn=True) } self.agent_builder_mapping = { EnvironmentSelector.TICTACTOE_AGENT_TRAIN: self.build_tictactoe_train_agent, EnvironmentSelector.TICTACTOE_AGENT_RANDOM: self.build_tictactoe_agent, EnvironmentSelector.TICTACTOE_AGENT_HUMAN: self.build_tictactoe_agent, EnvironmentSelector.DURAK_AGENT_TRAIN: self.build_durak_train_agent, EnvironmentSelector.DURAK_AGENT_RANDOM: self.build_durak_agent, EnvironmentSelector.DURAK_AGENT_HUMAN: self.build_durak_agent, EnvironmentSelector.WATTEN_AGENT_TRAIN: self.build_watten_train_agent, EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN: self.build_watten_train_big_agent, EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN: self.build_watten_train_4_512_agent, EnvironmentSelector.WATTEN_AGENT_EVALUATE: self.build_watten_train_agent, EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE: self.build_watten_train_big_agent, EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE: self.build_watten_train_4_512_agent, EnvironmentSelector.WATTEN_AGENT_RANDOM: self.build_watten_agent, EnvironmentSelector.WATTEN_AGENT_HUMAN: self.build_watten_agent, EnvironmentSelector.WATTEN_AGENT_NNET: self.build_watten_train_4_512_agent, EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN: self.build_sub_watten_train_agent, EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE: self.build_sub_watten_train_agent, EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE: self.build_sub_watten_evaluate_agent, EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE: self.build_sub_watten_evaluate_agent, EnvironmentSelector.SUB_WATTEN_AGENT_BAGGING: self.build_sub_watten_agent, EnvironmentSelector.SUB_WATTEN_AGENT_RANDOM: self.build_sub_watten_agent, EnvironmentSelector.SUB_WATTEN_AGENT_HUMAN: self.build_sub_watten_agent, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_TRAIN: self.build_asymmetric_sub_watten_train_agent, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_EVALUATE: self.build_asymmetric_sub_watten_evaluate_agent, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_RANDOM: self.build_asymmetric_sub_watten_agent, EnvironmentSelector.TOTAL_WATTEN_AGENT_TRAIN: self.build_total_watten_train_agent, EnvironmentSelector.TOTAL_WATTEN_AGENT_EVALUATE: self.build_total_watten_evaluate_agent, EnvironmentSelector.TOTAL_WATTEN_AGENT_RANDOM: self.build_total_watten_agent, EnvironmentSelector.TOTAL_WATTEN_AGENT_HUMAN: self.build_total_watten_agent, EnvironmentSelector.HAND_WATTEN_TRAIN: self.build_hand_watten_train_agent, EnvironmentSelector.HAND_WATTEN_TRAIN_S_S: self.build_hand_watten_train_agent, EnvironmentSelector.HAND_WATTEN_TRAIN_M_M: self.build_hand_watten_train_agent, EnvironmentSelector.HAND_WATTEN_TRAIN_CNN: self.build_hand_watten_train_agent, EnvironmentSelector.HAND_WATTEN_EVALUATE: self.build_hand_watten_evaluate_agent, EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S: self.build_hand_watten_evaluate_agent, EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M: self.build_hand_watten_evaluate_agent, EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN: self.build_hand_watten_evaluate_agent, EnvironmentSelector.HAND_WATTEN_RANDOM: self.build_hand_watten_agent, EnvironmentSelector.HAND_WATTEN_HUMAN: self.build_hand_watten_agent, } self.agent_profiles = [ EnvironmentSelector.TICTACTOE_AGENT_TRAIN, EnvironmentSelector.TICTACTOE_AGENT_RANDOM, EnvironmentSelector.TICTACTOE_AGENT_HUMAN, EnvironmentSelector.DURAK_AGENT_TRAIN, EnvironmentSelector.DURAK_AGENT_RANDOM, EnvironmentSelector.DURAK_AGENT_HUMAN, EnvironmentSelector.WATTEN_AGENT_TRAIN, EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN, EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN, EnvironmentSelector.WATTEN_AGENT_EVALUATE, EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE, EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE, EnvironmentSelector.WATTEN_AGENT_RANDOM, EnvironmentSelector.WATTEN_AGENT_HUMAN, EnvironmentSelector.WATTEN_AGENT_NNET, EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN, EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE, EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE, EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE, EnvironmentSelector.SUB_WATTEN_AGENT_BAGGING, EnvironmentSelector.SUB_WATTEN_AGENT_RANDOM, EnvironmentSelector.SUB_WATTEN_AGENT_HUMAN, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_TRAIN, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_EVALUATE, EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_RANDOM, EnvironmentSelector.TOTAL_WATTEN_AGENT_TRAIN, EnvironmentSelector.TOTAL_WATTEN_AGENT_EVALUATE, EnvironmentSelector.TOTAL_WATTEN_AGENT_RANDOM, EnvironmentSelector.TOTAL_WATTEN_AGENT_HUMAN, EnvironmentSelector.HAND_WATTEN_TRAIN, EnvironmentSelector.HAND_WATTEN_TRAIN_S_S, EnvironmentSelector.HAND_WATTEN_TRAIN_M_M, EnvironmentSelector.HAND_WATTEN_TRAIN_CNN, EnvironmentSelector.HAND_WATTEN_EVALUATE, EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M, EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S, EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN, EnvironmentSelector.HAND_WATTEN_RANDOM, EnvironmentSelector.HAND_WATTEN_HUMAN, ]
class WorldTotalWatten(object): def __init__(self, logger=stdout_logger): self.LOG = logger self.refresh() def refresh(self): # create a sub_watten game self.sub_watten_game = WattenSubGame() # player can be either 1 or -1 # player 1 is A # player -1 is B self.current_player = 1 # player who distributes cards when the game starts; each game the starting player is switched # the opponent picks rank and playes the first move self.distributing_cards_player = -1 # overall score of the game self.player_A_score = 0 self.player_B_score = 0 # points to achieve for winning a game self.win_threshold = 15 self._refresh_state_single_hand() # player who won the game self.winning_player = None self.moves = moves # list of actions taken in a game, used for debugging purposes self.moves_series = [] self.starting_state = f"\n{self.current_player}, {self.distributing_cards_player}, {self.player_A_score}, {self.player_B_score}, {self.player_A_hand}, {self.player_B_hand}, {self.played_cards}, {self.current_game_player_A_score}, {self.current_game_player_B_score}, {self.current_game_prize}, {self.is_last_move_raise}, {self.is_last_move_accepted_raise}, {self.is_last_hand_raise_valid}, {self.first_card_deck}, {self.last_card_deck}, {self.rank}, {self.suit}" def _refresh_state_single_hand(self): # init deck self.deck = list(range(33)) np.random.shuffle(self.deck) # init starting hands self.player_A_hand = [] self.player_B_hand = [] # give cards to players self.player_A_hand += self.deck[-5:] self.deck = self.deck[:-5] self.player_B_hand += self.deck[-5:] self.deck = self.deck[:-5] # init board self.played_cards = [] # init player scores, needs 3 for winning the hand # do not confuse those two fields with the total score achieved self.current_game_player_A_score = 0 self.current_game_player_B_score = 0 # is True only if the last move was a raise self.is_last_move_raise = False self.is_last_move_accepted_raise = False # raise in last hand implies some specific rules. see act method self.is_last_hand_raise_valid = None # first and last card in deck (doesn't really matter where those cards are taken :D ) self.first_card_deck = self.deck[-1:][0] self.deck = self.deck[:-1] self.last_card_deck = self.deck[-1:][0] self.deck = self.deck[:-1] self.rank = None # schlag self.suit = None # farb self._set_initial_game_prize() self.sub_watten_moves_series = [] for card in self.player_A_hand: if card in self.deck: raise InconsistentStateError("Card %d cannot be in deck." % card) for card in self.player_B_hand: if card in self.deck: raise InconsistentStateError("Card %d cannot be in deck." % card) def _set_initial_game_prize(self): if (self.win_threshold - self.player_A_score) <= 2: if self.player_B_score < 10: self.current_game_prize = 4 else: self.current_game_prize = 3 return if (self.win_threshold - self.player_B_score) <= 2: if self.player_A_score < 10: self.current_game_prize = 4 else: self.current_game_prize = 3 return self.current_game_prize = 2 def get_valid_moves_zeros(self): valid_moves = self.get_valid_moves() if len(valid_moves) == 0: self.display() raise ValidMovesError("Valid moves cannot be 0!") valid_moves_zeros = [0] * 5 # number of possible moves for valid_move in valid_moves: valid_moves_zeros[valid_move] = 1 return valid_moves_zeros def get_valid_moves(self): """ :rtype: list """ # a player can raise at any time # if the last move was a raise then the player can fold or accept it if self.is_last_move_raise and not self.is_last_move_accepted_raise: valid_moves = [moves["fold_hand"], moves["accept_raise"]] if self.is_last_hand_raise_valid is not None: valid_moves.append(moves["fold_hand_and_show_valid_raise"]) self.LOG.debug(f"Valid moves for player [{self.current_player}] are {valid_moves}") return valid_moves # if last move was not a raise, then the player can make the best sub_watten move valid_moves = [moves["make_best_move"]] if (not self.is_last_move_raise) and (not self.is_last_move_accepted_raise) and\ (self.is_last_hand_raise_valid is None) and self.check_allowed_raise_situation(): valid_moves.append(self.moves["raise_points"]) self.LOG.debug(f"Valid moves for player [{self.current_player}] are {valid_moves}") return valid_moves def check_allowed_raise_situation(self): # it makes sense to raise only if a player can't win the game with the current game prize if self.current_player == 1 and (self.player_A_score + self.current_game_prize) < self.win_threshold: return True if self.current_player == -1 and (self.player_B_score + self.current_game_prize) < self.win_threshold: return True return False # make a single move and apply changes to inner state of the world # modify the current state of the game and returns an outcome # the function should return 2 values: the outcome of the move and the next player # the outcome should be wither # - end, a single game is ended because one of the 2 players won 3 hands or a player folds # - continue, a player made a move that didn't bring the current game to an end # - current_player_won # the next player can be either 1 or -1 def act(self, action, agent): num_played_cards = len(self.played_cards) if action not in self.get_valid_moves(): raise InvalidActionError("Action %d cannot be played" % action) if action > 4: raise InvalidActionError("Action %d is not valid" % action) if self.current_game_player_A_score > 3 or self.current_game_player_B_score > 3: raise InconsistentStateError("Current game score cannot exceed 3. Player 1 [%d] and player -1 [%d]" % (self.current_game_player_A_score, self.current_game_player_B_score)) self.moves_series.append(action) if action == moves["raise_points"]: if self.is_last_move_raise or self.is_last_move_accepted_raise or self.is_last_hand_raise_valid is not None: raise InvalidActionError("Cannot raise if the previous move was a raise") self.LOG.debug(f"{self.current_player} raised points") self.is_last_move_raise = True if num_played_cards >= 8: self.is_last_hand_raise_valid = self._last_hand_raise_valid() self.current_game_prize += 1 return self._act_continue_move() if action == moves["accept_raise"]: if self.is_last_move_raise is False or self.is_last_move_accepted_raise: raise InvalidActionError("Cannot accept raise if the previous move was not a raise") self.LOG.debug(f"{self.current_player} accepted raise") self.is_last_move_accepted_raise = True self.is_last_move_raise = False return self._act_continue_move() # if a player folds, then the prize is given to the opponent if action == moves["fold_hand"] or action == moves["fold_hand_and_show_valid_raise"]: if self.is_last_move_raise is False or self.is_last_move_accepted_raise: raise InvalidActionError("Cannot fold hand if the previous move was not a raise") self.LOG.debug(f"{self.current_player} folds hand") self._assign_points_fold() self._assign_winning_player() self.current_player = self.distributing_cards_player self.distributing_cards_player = self.distributing_cards_player * -1 self._refresh_state_single_hand() return "end", self.current_player # if an action is not a raise, an accept raise or a fold, then the next move is definitely going to # reset the chance for raising self.is_last_move_accepted_raise = False self.is_last_move_raise = False # if an action is make best move, then the sub_watten agent will predict the move if action == moves["make_best_move"]: if self.is_last_move_raise: raise InvalidActionError("Cannot play a card if the previous move was a raise") self.LOG.debug(f"{self.current_player} made best sub_watten move") # set sub_watten game to represent the current state self.sub_watten_game.trueboard.init_world_to_state(self.current_player, self.distributing_cards_player, self.player_A_hand, self.player_B_hand, self.played_cards, self.current_game_player_A_score, self.current_game_player_B_score, self.first_card_deck, self.last_card_deck, self.rank, self.suit) # get predictions from nnet or human best_move_array, v = agent.predict(self.sub_watten_game, self.sub_watten_game.get_cur_player()) if type(best_move_array) == list: best_move_array = np.array(best_move_array, dtype=float) # mask invalid moves valid_moves = self.sub_watten_game.get_valid_moves(self.sub_watten_game.get_cur_player()) best_move_array = best_move_array*valid_moves # index of the move in sub_watten move = np.argmax(best_move_array) self.sub_watten_moves_series.append(move) # rank is between 33 and 42 if 33 <= move < 42: self.rank = move % 33 self.LOG.debug(f"{self.current_player} picked rank [{self.rank}]") return self._act_continue_move() # suit is between 42 and 46 if 42 <= move < 46: self.suit = move % 42 self.LOG.debug(f"{self.current_player} picked suit [{self.suit}]") return self._act_continue_move() if 0 <= move < 33: hand = self._get_current_player_hand() if move not in hand: self.display() raise InconsistentStateError( 'Played card [%d] not in %s of player %d' % (move, hand, self.current_player)) self.LOG.debug(f"{self.current_player} played card {move}") self._remove_card_from_hand(move, self.current_player) if num_played_cards % 2 == 0: if self.is_last_hand_raise_valid is not None and not self.is_last_hand_raise_valid: # played cards are 8 and current player also raised without respecting the conditions if self.current_player == 1: self.player_B_score += self.current_game_prize else: self.player_A_score += self.current_game_prize return self._hand_is_done_after_card_is_played_common() self.played_cards.append(move) return self._act_continue_move() else: if self.is_last_hand_raise_valid is not None and not self.is_last_hand_raise_valid: # played cards are 9 and current player also raised without respecting the conditions if self.current_player == 1: self.player_B_score += self.current_game_prize else: self.player_A_score += self.current_game_prize return self._hand_is_done_after_card_is_played_common() last_played_card = self._get_last_played_card() self.played_cards.append(move) current_played_card = move current_player_wins = not self.compare_cards(last_played_card, current_played_card) next_player_move = self._assign_points_move(current_player_wins) if self.current_game_player_A_score == 3 or self.current_game_player_B_score == 3: if self.current_game_player_A_score == 3: self.player_A_score += self.current_game_prize else: self.player_B_score += self.current_game_prize return self._hand_is_done_after_card_is_played_common() self.current_player = next_player_move return "continue", next_player_move raise InconsistentStateError("Best_move %d is not allowed." % move) self.display() raise InconsistentStateError("Action %d is not allowed." % action) def _hand_is_done_after_card_is_played_common(self): self._assign_winning_player() self.current_player = self.distributing_cards_player self.distributing_cards_player = self.distributing_cards_player * -1 self._refresh_state_single_hand() return "end", self.current_player def _act_continue_move(self): self.current_player = self.current_player * -1 return "continue", self.current_player def _assign_winning_player(self): if self.player_A_score >= self.win_threshold: self.winning_player = 1 elif self.player_B_score >= self.win_threshold: self.winning_player = -1 def _remove_card_from_hand(self, action, player): if player == 1: self.player_A_hand.remove(action) return if player == -1: self.player_B_hand.remove(action) return raise InvalidActionError("Player should be either 1 or -1. Got %d" % player) # if a player folds, then the prize is given to the opponent except when the raise was done in a not valid situation def _assign_points_fold(self): fold_points = self.current_game_prize - 1 if self.is_last_hand_raise_valid is None or self.is_last_hand_raise_valid: if self.current_player == 1: self.player_B_score += fold_points if self.current_player == -1: self.player_A_score += fold_points else: if self.current_player == 1: self.player_A_score += fold_points if self.current_player == -1: self.player_B_score += fold_points # returns true if the player who raised the current turn satisfies the following rules: # - he has a trumpf # - his card has the same suit of the one played by the previous player # - his card wins against the one played by the opponent player def _last_hand_raise_valid(self): num_played_cards = len(self.played_cards) if num_played_cards not in (8, 9): raise InconsistentStateError("Num played cards when fold occurs in last hand can be either 8 or 9. Got %d." % num_played_cards) hidden_card = self._get_current_player_hand()[0] hidd_r, hidd_s = get_rs(hidden_card) if self.is_trumpf(hidd_r, hidd_s): return True if num_played_cards == 9: last_played_card = self._get_last_played_card() last_played_card_rank, last_played_card_suit = get_rs(last_played_card) if hidd_s == last_played_card_suit or not self.compare_cards(last_played_card, hidden_card): return True return False # after two cards have been compared, assign points and returns the player that should play the next move def _assign_points_move(self, current_player_wins): if current_player_wins: if self.current_player == 1: self.current_game_player_A_score += 1 return 1 else: self.current_game_player_B_score += 1 return -1 else: if self.current_player == 1: self.current_game_player_B_score += 1 return -1 else: self.current_game_player_A_score += 1 return 1 # routine for deciding whether a card (card1) wins over another card (card2) # returns true if the first card wins, false otherwise # the first card is expected to be played before the second one # # ORDER OF IMPORTANCE: # - Rechte (card with the same suit and rank chosen when the game started) # - Blinden (cards with the same rank of the chosen rank) # - Trümpfe (cards with the same suit of the chosen suit) # - Other cards (importance given by the rank) def compare_cards(self, card1, card2): card1_rank, card1_suit = get_rs(card1) card2_rank, card2_suit = get_rs(card2) ####################################################### # RECHTE ####################################################### # rechte is the strongest card if self.is_rechte(card1_rank, card1_suit): return True if self.is_rechte(card2_rank, card2_suit): return False ####################################################### # BLINDEN ####################################################### # the second strongest cards after the rechte are the blinde if self.is_blinde(card1_rank): return True if self.is_blinde(card2_rank): return False ####################################################### # TRÜMPFEN ####################################################### # if a played card has the same chosen suit, then the opponent for winning the hand should play # a card of the same suit but with higher rank if self.is_trumpf(card1_rank, card1_suit): if self.is_trumpf(card2_rank, card2_suit): # when both cards are trümpfe then wins the card with the highest rank return self.is_rank_higher(card1_rank, card2_rank) # a card of the chosen suit wins against a card without the chosen suit else: return True # if the first card is not trümpfe and the second is trümpfe, then the second card wins if self.suit == card2_suit: return False ####################################################### # OTHER CARDS ####################################################### # at this point if the second card has a different suit from the first card, then the first wins if card1_suit != card2_suit: return True # if the first and the second card are not trümpfe and have the same suit, # then the card with the highest rank wins return self.is_rank_higher(card1_rank, card2_rank) def is_rechte(self, card_rank, card_suit): if (self.rank == 8 and card_rank == 8) or (card_rank == self.rank and card_suit == self.suit): return True return False def is_blinde(self, card_rank): if card_rank == self.rank: return True return False def is_trumpf(self, card_rank, card_suit): if self.is_rechte(card_rank, card_suit): return False if self.suit == card_suit: return True def is_rank_higher(self, card1_rank, card2_rank): # the weli has the lowest rank if card1_rank == 8: return False # the weli has the lowest rank if card2_rank == 8: return True return card1_rank > card2_rank def is_game_end(self): if self.player_A_score >= self.win_threshold or self.player_B_score >= self.win_threshold: return True else: return False # this is called after act, player is the next player def is_won(self, player): if player not in [1, -1]: raise InvalidInputError("Player should be either 1 or -1. Input is %d." % player) if self.player_A_score >= self.win_threshold and self.player_B_score >= self.win_threshold: raise InconsistentStateError("Both player cannot exceed score threshold. Only one winner is allowed.") if player == -1 and self.player_A_score >= self.win_threshold: return True if player == 1 and self.player_B_score >= self.win_threshold: return True return False def get_player(self): return self.current_player # should return a unique id with the state of the game # the needed info are: # - observation value of current sub_watten state (32) # - points current hand current player (max 2) # - points current hand opponent player (max 2) # - points game current player (max 14) # - points game opponent player (max 14) # - last move raise (1) # - last move accepted raise (1) # - last hand raise valid (1) # - current prize (13) # - isRankNone (1) # - isSuitNone (1) def observe(self, player, agent): if player not in [1, -1]: raise InvalidInputError("Player should be either 1 or -1. Input is %d." % player) observation = np.zeros((82,)) # check if agent is human or not if isinstance(agent, SubWattenHumanAgent): v = 0 else: # When last move is a raise, in the sub_game the player moving should # be the one that played if self.is_last_move_raise: sub_game_current_player = self.current_player*-1 else: sub_game_current_player = self.current_player # set sub_watten game to current state self.sub_watten_game.trueboard.init_world_to_state(sub_game_current_player, self.distributing_cards_player, self.player_A_hand, self.player_B_hand, self.played_cards, self.current_game_player_A_score, self.current_game_player_B_score, self.first_card_deck, self.last_card_deck, self.rank, self.suit) observing_player = self.sub_watten_game.players[self.current_player] # observation value of sub_watten state best_move_array, v = agent.predict(self.sub_watten_game, observing_player) v_bin_string = int_to_binary(float32_bit_pattern(v), 32) for i in range(32): observation[i] = int(v_bin_string[i]) # points current hand current player index = 32 # 32 points_current_hand_current = self.current_game_player_A_score if player == 1 else self.current_game_player_B_score if points_current_hand_current != 0: observation[index + points_current_hand_current - 1] = 1 # points current hand opponent player index += 2 # 34 points_current_hand_opponent = self.current_game_player_B_score if player == 1 else self.current_game_player_A_score if points_current_hand_opponent != 0: observation[index + points_current_hand_opponent - 1] = 1 # points game current player index += 2 # 36 points_game_current = self.player_A_score if player == 1 else self.player_B_score if points_game_current != 0: observation[index + points_game_current - 1] = 1 # points game opponent player index += 14 # 50 points_game_opponent = self.player_B_score if player == 1 else self.player_A_score if points_game_opponent != 0: observation[index + points_game_opponent - 1] = 1 index += 14 # 64 if self.is_last_move_raise: observation[index] = 1 index += 1 # 65 if self.is_last_move_accepted_raise: observation[index] = 1 index += 1 # 66 if self.is_last_hand_raise_valid is None: observation[index] = 0 else: observation[index] = 1 index += 1 # 67 if self.current_game_prize - 3 >= 0: observation[index + self.current_game_prize - 3] = 1 index += 13 # 80 if self.rank is not None: observation[index] = 1 index += 1 # 81 if self.suit is not None: observation[index] = 1 # total size = 81 + 1 = 82 observation = observation.reshape((82, 1)) return observation # def observation_str_raw(self, observe): # new_observe = np.concatenate((observe, np.array([[1 if self.current_player == 1 else 0]]))) # print(new_observe) def _get_last_played_card(self): num_played_cards = len(self.played_cards) if num_played_cards == 0: return None return self.played_cards[num_played_cards - 1] def _get_opponent_hand(self): return self.player_B_hand if self.current_player == 1 else self.player_A_hand def _get_current_player_hand(self): return self.player_A_hand if self.current_player == 1 else self.player_B_hand def display(self): str_raise = "" if self.is_last_move_raise: str_raise = "- RAISE" if self.is_last_move_accepted_raise: str_raise = "- ACCEPTED RAISE" self.LOG.info(f"--- State of the game ---\nCurrent player: |{self.current_player}| " f"and current game prize |{self.current_game_prize}| {str_raise}" f"\nPlayer 1 points: |{self.player_A_score}| - Player -1 points: |{self.player_B_score}|" f"\nPlayer 1 current: |{self.current_game_player_A_score}| - " f"Player -1 current: |{self.current_game_player_B_score}|" f"\nPlayer 1 hand: {self._str_cards(self.player_A_hand)} - {self.player_A_hand}" f"\nPlayer -1 hand: {self._str_cards(self.player_B_hand)} - {self.player_B_hand}" f"\nRank: |{self.rank} - {rank_names[self.rank]}|, Suit: |{self.suit} - {suit_names[self.suit]}|" f"\nPlayed cards: {self._str_cards(self.played_cards)}" f"\nDist: {self.distributing_cards_player}, lhrv: {self.is_last_hand_raise_valid}, first card: {self.first_card_deck}, last card: {self.last_card_deck}") self.LOG.info(f"Starting state: {self.starting_state}") self.LOG.info(f"Moves series: {self.moves_series}") self.LOG.info(f"SubWatten moves series: {self.sub_watten_moves_series}") def _str_cards(self, cards): str_cards = "" for idx, card in enumerate(cards): str_cards += human_readable_card(card) str_cards += ' ({})'.format(card) if idx != len(cards) - 1: str_cards += ", " return str_cards def deepcopy(self): new_world = WorldTotalWatten() new_world.LOG = self.LOG new_world.current_player = self.current_player new_world.distributing_cards_player = self.distributing_cards_player new_world.deck = self.deck.copy() new_world.player_A_hand = self.player_A_hand.copy() new_world.player_B_hand = self.player_B_hand.copy() new_world.played_cards = self.played_cards.copy() new_world.player_A_score = self.player_A_score new_world.player_B_score = self.player_B_score new_world.current_game_player_A_score = self.current_game_player_A_score new_world.current_game_player_B_score = self.current_game_player_B_score new_world.current_game_prize = self.current_game_prize new_world.is_last_move_raise = self.is_last_move_raise new_world.is_last_move_accepted_raise = self.is_last_move_accepted_raise new_world.win_threshold = self.win_threshold new_world.first_card_deck = self.first_card_deck new_world.last_card_deck = self.last_card_deck new_world.rank = self.rank new_world.suit = self.suit new_world.is_last_hand_raise_valid = self.is_last_hand_raise_valid new_world.winning_player = self.winning_player new_world.sub_watten_game = self.sub_watten_game.clone() new_world.moves = self.moves.copy() new_world.starting_state = self.starting_state new_world.moves_series = self.moves_series.copy() new_world.sub_watten_moves_series = self.sub_watten_moves_series.copy() return new_world def init_world_to_state(self, current_player, distributing_cards_player, player_A_score, player_B_score, player_A_hand, player_B_hand, played_cards, current_game_player_A_score, current_game_player_B_score, current_game_prize, is_last_move_raise, is_last_move_accepted_raise, is_last_hand_raise_valid, first_card_deck, last_card_deck, rank, suit): self.current_player = current_player self.distributing_cards_player = distributing_cards_player self.player_A_score = player_A_score self.player_B_score = player_B_score self.player_A_hand = player_A_hand self.player_B_hand = player_B_hand self.played_cards = played_cards self.current_game_player_A_score = current_game_player_A_score self.current_game_player_B_score = current_game_player_B_score self.current_game_prize = current_game_prize self.is_last_move_raise = is_last_move_raise self.is_last_move_accepted_raise = is_last_move_accepted_raise self.is_last_hand_raise_valid = is_last_hand_raise_valid self.first_card_deck = first_card_deck self.last_card_deck = last_card_deck self.rank = rank self.suit = suit
def test_get_observation_size(self): watten_game = WattenSubGame() self.assertEqual(watten_game.get_observation_size(), (221, 1))
def test_get_action_size(self): watten_game = WattenSubGame() self.assertEqual(watten_game.get_action_size(), 46)
def test_get_player_num(self): watten_game = WattenSubGame() self.assertEqual(watten_game.get_players_num(), 2)