예제 #1
0
    def refresh(self):
        # create a sub_watten game
        self.sub_watten_game = WattenSubGame()

        # player can be either 1 or -1
        # player 1 is A
        # player -1 is B
        self.current_player = 1

        # player who distributes cards when the game starts; each game the starting player is switched
        # the opponent picks rank and playes the first move
        self.distributing_cards_player = -1

        # overall score of the game
        self.player_A_score = 0
        self.player_B_score = 0

        # points to achieve for winning a game
        self.win_threshold = 15

        self._refresh_state_single_hand()

        # player who won the game
        self.winning_player = None

        self.moves = moves

        # list of actions taken in a game, used for debugging purposes
        self.moves_series = []
        self.starting_state = f"\n{self.current_player}, {self.distributing_cards_player}, {self.player_A_score}, {self.player_B_score}, {self.player_A_hand}, {self.player_B_hand}, {self.played_cards}, {self.current_game_player_A_score}, {self.current_game_player_B_score}, {self.current_game_prize}, {self.is_last_move_raise}, {self.is_last_move_accepted_raise}, {self.is_last_hand_raise_valid}, {self.first_card_deck}, {self.last_card_deck}, {self.rank}, {self.suit}"
예제 #2
0
    def test_get_cur_player(self):
        watten_game = WattenSubGame()
        watten_game.trueboard.current_player = 1
        self.assertEqual(watten_game.get_cur_player(), 0)

        watten_game.trueboard.current_player = -1
        self.assertEqual(watten_game.get_cur_player(), 1)
    def test_game_complete(self):
        # [36, 45, 14, 5, 25, 20, 46, 48, 1, 46, 48, 13, 35, 45, 15, 18, 7, 2,
        #  30, 46, 47, 36, 43, 27, 5, 46, 48, 6, 2, 22, 46, 47, 35, 44, 29, 8, 46,
        #  48, 18, 21, 27, 26, 14, 11, 9, 46, 48, 17, 34, 43, 10, 1, 46, 47, 41,
        #  42, 24, 17, 46, 48, 0, 14, 3, 29, 46, 48, 41, 45, 19, 8, 30, 24, 20,
        #  16, 34, 46, 47, 39, 44, 16, 30, 18, 46, 48, 21, 46, 47, 39, 43, 24,
        #  26, 14, 22, 29, 6, 5, 7]

        game = WattenSubGame()
        world = WorldSubWatten()
        world.init_world_to_state(1, -1, [25, 9, 1, 32, 14], [5, 13, 7, 10, 20], [], 0, 0, 16, 28, None, None)
        game.trueboard = world

        cur_player = game.get_cur_player()
        self.assertEqual(cur_player, 0)

        moves = game.get_valid_moves_no_zeros()
        self.assertEqual(moves, [33, 34, 35, 36, 37, 38, 39, 40, 41])

        self.assertEqual(game.make_move(36), (0.0, 1))

        cur_player = game.get_cur_player()
        self.assertEqual(cur_player, 1)

        moves = game.get_valid_moves_no_zeros()
        self.assertEqual(moves, [42, 43, 44, 45])
    def test_cloned_prediction(self):
        env = es.EnvironmentSelector()
        # get agent
        agent = env.sub_watten_non_human_agent_for_total_watten()

        sub_watten_game = WattenSubGame()

        clone_sub_watten_game = sub_watten_game.clone()

        pi_values, v = agent.predict(sub_watten_game,
                                     sub_watten_game.get_cur_player())

        clone_pi_values, clone_v = agent.predict(
            clone_sub_watten_game, clone_sub_watten_game.get_cur_player())

        self.assertEqual(pi_values.all(), clone_pi_values.all())
        self.assertEqual(v, clone_v)
    def sub_watten_non_human_agent_for_total_watten(self):

        game = WattenSubGame()

        x, y = game.get_observation_size()
        nnet = SubWattenNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        print('Building sub_watten non human agent for total_watten')

        # load here best sub_watten model
        try:
            agent_nnet.load("games/sub_watten/training/default_nn/best.h5")
        except OSError:
            print("File not found with games/sub_watten/training/best.h5")
            print(
                "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path"
            )
            agent_nnet.load("../../sub_watten/training/default_nn/best.h5")

        return agent_nnet
예제 #6
0
    def test_get_score(self):
        watten_game = WattenSubGame()
        watten_game.trueboard.current_player = 1
        self.assertEqual(watten_game.get_score(1), 0.0)
        self.assertEqual(watten_game.get_score(0), 0.0)

        watten_game.trueboard.winning_player = 1
        watten_game.trueboard.current_game_player_A_score = 3
        self.assertEqual(watten_game.get_score(1), -1.0)
        self.assertEqual(watten_game.get_score(0), 1.0)

        watten_game = WattenSubGame()
        watten_game.trueboard.current_player = -1
        self.assertEqual(watten_game.get_score(1), 0.0)
        self.assertEqual(watten_game.get_score(0), 0.0)

        watten_game.trueboard.winning_player = -1
        watten_game.trueboard.current_game_player_B_score = 3
        self.assertEqual(watten_game.get_score(1), 1.0)
        self.assertEqual(watten_game.get_score(0), -1.0)
예제 #7
0
    def test_make_move(self):
        watten_game = WattenSubGame()
        watten_game.trueboard.current_player = 1
        self.assertEqual(watten_game.make_move(40), (0.0, 1))

        watten_game = WattenSubGame()
        watten_game.trueboard.distributing_cards_player = -1
        watten_game.trueboard.suit = 1
        watten_game.trueboard.rank = 3
        watten_game.trueboard.current_player = 1
        watten_game.trueboard.current_game_player_A_score = 2
        watten_game.trueboard.played_cards = [0]
        watten_game.trueboard.player_A_hand = [1]
        self.assertEqual(watten_game.make_move(1), (1.0, 1))

        watten_game = WattenSubGame()
        watten_game.trueboard.distributing_cards_player = 1
        watten_game.trueboard.suit = 1
        watten_game.trueboard.rank = 3
        watten_game.trueboard.current_player = 1
        watten_game.trueboard.current_game_player_A_score = 2
        watten_game.trueboard.played_cards = [0]
        watten_game.trueboard.player_A_hand = [1]
        self.assertEqual(watten_game.make_move(1), (1.0, 0))
    def test_nn_agent_prediction(self):
        sub_watten_game = WattenSubGame()

        clone_sub_watten_game = sub_watten_game.clone()

        x, y = sub_watten_game.get_observation_size()
        nnet = SubWattenNNet(x, y, 1, sub_watten_game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        agent_nnet.load("../../sub_watten/training/best.h5")

        pi_values, v = agent_nnet.predict(sub_watten_game,
                                          sub_watten_game.get_cur_player())

        clone_pi_values, clone_v = agent_nnet.predict(
            clone_sub_watten_game, clone_sub_watten_game.get_cur_player())

        self.assertEqual(pi_values.all(), clone_pi_values.all())
        self.assertEqual(v, clone_v)
    def sub_watten_human_agent_for_total_watten(self):

        game = WattenSubGame()

        return SubWattenHumanAgent(game)
    def __init__(self):
        super().__init__()

        self.game_mapping = {
            # EnvironmentSelector.GAME_CHECKERS_DEFAULT: CheckersGame(8, history_n=7),
            EnvironmentSelector.GAME_TICTACTOE_DEFAULT:
            TicTacToeGame(),
            EnvironmentSelector.GAME_DURAK_DEFAULT:
            DurakGame(),
            EnvironmentSelector.GAME_WATTEN_DEFAULT:
            WattenGame(),
            EnvironmentSelector.GAME_SUB_WATTEN_DEFAULT:
            WattenSubGame(),
            EnvironmentSelector.GAME_ASYMMETRIC_SUB_WATTEN_DEFAULT:
            AsymmetricSubWattenGame(),
            EnvironmentSelector.GAME_ASYMMETRIC_SUB_WATTEN_EVALUATE:
            WattenSubGame(),
            #  EnvironmentSelector.GAME_TOTAL_WATTEN_DEFAULT: TotalWattenGame(
            #      self.sub_watten_non_human_agent_for_total_watten(),
            #      self.sub_watten_non_human_agent_for_total_watten()
            #  ),
            #  EnvironmentSelector.GAME_TOTAL_WATTEN_H_VS_H: TotalWattenGame(
            #      self.sub_watten_human_agent_for_total_watten(),
            #      self.sub_watten_human_agent_for_total_watten()
            #  ),
            #  EnvironmentSelector.GAME_TOTAL_WATTEN_H_VS_NH: TotalWattenGame(
            #      self.sub_watten_human_agent_for_total_watten(),
            #      self.sub_watten_non_human_agent_for_total_watten()
            #  ),
            #  EnvironmentSelector.GAME_TOTAL_WATTEN_NH_VS_H: TotalWattenGame(
            #      self.sub_watten_non_human_agent_for_total_watten(),
            #      self.sub_watten_human_agent_for_total_watten()
            #  ),
            EnvironmentSelector.GAME_HAND_WATTEN:
            HandWattenGame(),
            EnvironmentSelector.GAME_HAND_WATTEN_CNN:
            HandWattenGame(cnn=True)
        }

        self.agent_builder_mapping = {
            EnvironmentSelector.TICTACTOE_AGENT_TRAIN:
            self.build_tictactoe_train_agent,
            EnvironmentSelector.TICTACTOE_AGENT_RANDOM:
            self.build_tictactoe_agent,
            EnvironmentSelector.TICTACTOE_AGENT_HUMAN:
            self.build_tictactoe_agent,
            EnvironmentSelector.DURAK_AGENT_TRAIN:
            self.build_durak_train_agent,
            EnvironmentSelector.DURAK_AGENT_RANDOM: self.build_durak_agent,
            EnvironmentSelector.DURAK_AGENT_HUMAN: self.build_durak_agent,
            EnvironmentSelector.WATTEN_AGENT_TRAIN:
            self.build_watten_train_agent,
            EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN:
            self.build_watten_train_big_agent,
            EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN:
            self.build_watten_train_4_512_agent,
            EnvironmentSelector.WATTEN_AGENT_EVALUATE:
            self.build_watten_train_agent,
            EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE:
            self.build_watten_train_big_agent,
            EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE:
            self.build_watten_train_4_512_agent,
            EnvironmentSelector.WATTEN_AGENT_RANDOM: self.build_watten_agent,
            EnvironmentSelector.WATTEN_AGENT_HUMAN: self.build_watten_agent,
            EnvironmentSelector.WATTEN_AGENT_NNET:
            self.build_watten_train_4_512_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN:
            self.build_sub_watten_train_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE:
            self.build_sub_watten_train_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE:
            self.build_sub_watten_evaluate_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE:
            self.build_sub_watten_evaluate_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_BAGGING:
            self.build_sub_watten_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_RANDOM:
            self.build_sub_watten_agent,
            EnvironmentSelector.SUB_WATTEN_AGENT_HUMAN:
            self.build_sub_watten_agent,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_TRAIN:
            self.build_asymmetric_sub_watten_train_agent,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_EVALUATE:
            self.build_asymmetric_sub_watten_evaluate_agent,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_RANDOM:
            self.build_asymmetric_sub_watten_agent,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_TRAIN:
            self.build_total_watten_train_agent,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_EVALUATE:
            self.build_total_watten_evaluate_agent,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_RANDOM:
            self.build_total_watten_agent,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_HUMAN:
            self.build_total_watten_agent,
            EnvironmentSelector.HAND_WATTEN_TRAIN:
            self.build_hand_watten_train_agent,
            EnvironmentSelector.HAND_WATTEN_TRAIN_S_S:
            self.build_hand_watten_train_agent,
            EnvironmentSelector.HAND_WATTEN_TRAIN_M_M:
            self.build_hand_watten_train_agent,
            EnvironmentSelector.HAND_WATTEN_TRAIN_CNN:
            self.build_hand_watten_train_agent,
            EnvironmentSelector.HAND_WATTEN_EVALUATE:
            self.build_hand_watten_evaluate_agent,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S:
            self.build_hand_watten_evaluate_agent,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M:
            self.build_hand_watten_evaluate_agent,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN:
            self.build_hand_watten_evaluate_agent,
            EnvironmentSelector.HAND_WATTEN_RANDOM:
            self.build_hand_watten_agent,
            EnvironmentSelector.HAND_WATTEN_HUMAN:
            self.build_hand_watten_agent,
        }

        self.agent_profiles = [
            EnvironmentSelector.TICTACTOE_AGENT_TRAIN,
            EnvironmentSelector.TICTACTOE_AGENT_RANDOM,
            EnvironmentSelector.TICTACTOE_AGENT_HUMAN,
            EnvironmentSelector.DURAK_AGENT_TRAIN,
            EnvironmentSelector.DURAK_AGENT_RANDOM,
            EnvironmentSelector.DURAK_AGENT_HUMAN,
            EnvironmentSelector.WATTEN_AGENT_TRAIN,
            EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN,
            EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN,
            EnvironmentSelector.WATTEN_AGENT_EVALUATE,
            EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE,
            EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE,
            EnvironmentSelector.WATTEN_AGENT_RANDOM,
            EnvironmentSelector.WATTEN_AGENT_HUMAN,
            EnvironmentSelector.WATTEN_AGENT_NNET,
            EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN,
            EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE,
            EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE,
            EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE,
            EnvironmentSelector.SUB_WATTEN_AGENT_BAGGING,
            EnvironmentSelector.SUB_WATTEN_AGENT_RANDOM,
            EnvironmentSelector.SUB_WATTEN_AGENT_HUMAN,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_TRAIN,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_EVALUATE,
            EnvironmentSelector.ASYMMETRIC_SUB_WATTEN_AGENT_RANDOM,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_TRAIN,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_EVALUATE,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_RANDOM,
            EnvironmentSelector.TOTAL_WATTEN_AGENT_HUMAN,
            EnvironmentSelector.HAND_WATTEN_TRAIN,
            EnvironmentSelector.HAND_WATTEN_TRAIN_S_S,
            EnvironmentSelector.HAND_WATTEN_TRAIN_M_M,
            EnvironmentSelector.HAND_WATTEN_TRAIN_CNN,
            EnvironmentSelector.HAND_WATTEN_EVALUATE,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S,
            EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN,
            EnvironmentSelector.HAND_WATTEN_RANDOM,
            EnvironmentSelector.HAND_WATTEN_HUMAN,
        ]
예제 #11
0
class WorldTotalWatten(object):

    def __init__(self, logger=stdout_logger):
        self.LOG = logger
        self.refresh()

    def refresh(self):
        # create a sub_watten game
        self.sub_watten_game = WattenSubGame()

        # player can be either 1 or -1
        # player 1 is A
        # player -1 is B
        self.current_player = 1

        # player who distributes cards when the game starts; each game the starting player is switched
        # the opponent picks rank and playes the first move
        self.distributing_cards_player = -1

        # overall score of the game
        self.player_A_score = 0
        self.player_B_score = 0

        # points to achieve for winning a game
        self.win_threshold = 15

        self._refresh_state_single_hand()

        # player who won the game
        self.winning_player = None

        self.moves = moves

        # list of actions taken in a game, used for debugging purposes
        self.moves_series = []
        self.starting_state = f"\n{self.current_player}, {self.distributing_cards_player}, {self.player_A_score}, {self.player_B_score}, {self.player_A_hand}, {self.player_B_hand}, {self.played_cards}, {self.current_game_player_A_score}, {self.current_game_player_B_score}, {self.current_game_prize}, {self.is_last_move_raise}, {self.is_last_move_accepted_raise}, {self.is_last_hand_raise_valid}, {self.first_card_deck}, {self.last_card_deck}, {self.rank}, {self.suit}"

    def _refresh_state_single_hand(self):
        # init deck
        self.deck = list(range(33))
        np.random.shuffle(self.deck)

        # init starting hands
        self.player_A_hand = []
        self.player_B_hand = []

        # give cards to players
        self.player_A_hand += self.deck[-5:]
        self.deck = self.deck[:-5]
        self.player_B_hand += self.deck[-5:]
        self.deck = self.deck[:-5]

        # init board
        self.played_cards = []

        # init player scores, needs 3 for winning the hand
        # do not confuse those two fields with the total score achieved
        self.current_game_player_A_score = 0
        self.current_game_player_B_score = 0

        # is True only if the last move was a raise
        self.is_last_move_raise = False
        self.is_last_move_accepted_raise = False

        # raise in last hand implies some specific rules. see act method
        self.is_last_hand_raise_valid = None

        # first and last card in deck (doesn't really matter where those cards are taken :D )
        self.first_card_deck = self.deck[-1:][0]
        self.deck = self.deck[:-1]
        self.last_card_deck = self.deck[-1:][0]
        self.deck = self.deck[:-1]

        self.rank = None  # schlag
        self.suit = None  # farb

        self._set_initial_game_prize()

        self.sub_watten_moves_series = []

        for card in self.player_A_hand:
            if card in self.deck:
                raise InconsistentStateError("Card %d cannot be in deck." % card)
        for card in self.player_B_hand:
            if card in self.deck:
                raise InconsistentStateError("Card %d cannot be in deck." % card)

    def _set_initial_game_prize(self):
        if (self.win_threshold - self.player_A_score) <= 2:
            if self.player_B_score < 10:
                self.current_game_prize = 4
            else:
                self.current_game_prize = 3
            return
        if (self.win_threshold - self.player_B_score) <= 2:
            if self.player_A_score < 10:
                self.current_game_prize = 4
            else:
                self.current_game_prize = 3
            return

        self.current_game_prize = 2

    def get_valid_moves_zeros(self):
        valid_moves = self.get_valid_moves()
        if len(valid_moves) == 0:
            self.display()
            raise ValidMovesError("Valid moves cannot be 0!")
        valid_moves_zeros = [0] * 5  # number of possible moves
        for valid_move in valid_moves:
            valid_moves_zeros[valid_move] = 1
        return valid_moves_zeros

    def get_valid_moves(self):
        """

        :rtype: list
        """

        # a player can raise at any time
        # if the last move was a raise then the player can fold or accept it
        if self.is_last_move_raise and not self.is_last_move_accepted_raise:
            valid_moves = [moves["fold_hand"], moves["accept_raise"]]
            if self.is_last_hand_raise_valid is not None:
                valid_moves.append(moves["fold_hand_and_show_valid_raise"])
            self.LOG.debug(f"Valid moves for player [{self.current_player}] are {valid_moves}")
            return valid_moves

        # if last move was not a raise, then the player can make the best sub_watten move
        valid_moves = [moves["make_best_move"]]

        if (not self.is_last_move_raise) and (not self.is_last_move_accepted_raise) and\
                (self.is_last_hand_raise_valid is None) and self.check_allowed_raise_situation():
            valid_moves.append(self.moves["raise_points"])

        self.LOG.debug(f"Valid moves for player [{self.current_player}] are {valid_moves}")
        return valid_moves

    def check_allowed_raise_situation(self):
        # it makes sense to raise only if a player can't win the game with the current game prize
        if self.current_player == 1 and (self.player_A_score + self.current_game_prize) < self.win_threshold:
            return True
        if self.current_player == -1 and (self.player_B_score + self.current_game_prize) < self.win_threshold:
            return True
        return False

    # make a single move and apply changes to inner state of the world
    # modify the current state of the game and returns an outcome
    # the function should return 2 values: the outcome of the move and the next player
    # the outcome should be wither
    # - end, a single game is ended because one of the 2 players won 3 hands or a player folds
    # - continue, a player made a move that didn't bring the current game to an end
    # - current_player_won
    # the next player can be either 1 or -1
    def act(self, action, agent):
        num_played_cards = len(self.played_cards)
        if action not in self.get_valid_moves():
            raise InvalidActionError("Action %d cannot be played" % action)

        if action > 4:
            raise InvalidActionError("Action %d is not valid" % action)
        if self.current_game_player_A_score > 3 or self.current_game_player_B_score > 3:
            raise InconsistentStateError("Current game score cannot exceed 3. Player 1 [%d] and player -1 [%d]"
                                         % (self.current_game_player_A_score, self.current_game_player_B_score))

        self.moves_series.append(action)

        if action == moves["raise_points"]:
            if self.is_last_move_raise or self.is_last_move_accepted_raise or self.is_last_hand_raise_valid is not None:
                raise InvalidActionError("Cannot raise if the previous move was a raise")
            self.LOG.debug(f"{self.current_player} raised points")
            self.is_last_move_raise = True
            if num_played_cards >= 8:
                self.is_last_hand_raise_valid = self._last_hand_raise_valid()
            self.current_game_prize += 1
            return self._act_continue_move()

        if action == moves["accept_raise"]:
            if self.is_last_move_raise is False or self.is_last_move_accepted_raise:
                raise InvalidActionError("Cannot accept raise if the previous move was not a raise")
            self.LOG.debug(f"{self.current_player} accepted raise")
            self.is_last_move_accepted_raise = True
            self.is_last_move_raise = False
            return self._act_continue_move()

        # if a player folds, then the prize is given to the opponent
        if action == moves["fold_hand"] or action == moves["fold_hand_and_show_valid_raise"]:
            if self.is_last_move_raise is False or self.is_last_move_accepted_raise:
                raise InvalidActionError("Cannot fold hand if the previous move was not a raise")
            self.LOG.debug(f"{self.current_player} folds hand")
            self._assign_points_fold()
            self._assign_winning_player()
            self.current_player = self.distributing_cards_player
            self.distributing_cards_player = self.distributing_cards_player * -1
            self._refresh_state_single_hand()
            return "end", self.current_player

        # if an action is not a raise, an accept raise or a fold, then the next move is definitely going to
        # reset the chance for raising
        self.is_last_move_accepted_raise = False
        self.is_last_move_raise = False

        # if an action is make best move, then the sub_watten agent will predict the move
        if action == moves["make_best_move"]:
            if self.is_last_move_raise:
                raise InvalidActionError("Cannot play a card if the previous move was a raise")

            self.LOG.debug(f"{self.current_player} made best sub_watten move")

            # set sub_watten game to represent the current state
            self.sub_watten_game.trueboard.init_world_to_state(self.current_player, self.distributing_cards_player,
                                                               self.player_A_hand, self.player_B_hand, self.played_cards,
                                                               self.current_game_player_A_score,
                                                               self.current_game_player_B_score, self.first_card_deck,
                                                               self.last_card_deck, self.rank, self.suit)

            # get predictions from nnet or human
            best_move_array, v = agent.predict(self.sub_watten_game, self.sub_watten_game.get_cur_player())

            if type(best_move_array) == list:
                best_move_array = np.array(best_move_array, dtype=float)

            # mask invalid moves
            valid_moves = self.sub_watten_game.get_valid_moves(self.sub_watten_game.get_cur_player())
            best_move_array = best_move_array*valid_moves

            # index of the move in sub_watten
            move = np.argmax(best_move_array)

            self.sub_watten_moves_series.append(move)

            # rank is between 33 and 42
            if 33 <= move < 42:
                self.rank = move % 33
                self.LOG.debug(f"{self.current_player} picked rank [{self.rank}]")
                return self._act_continue_move()

            # suit is between 42 and 46
            if 42 <= move < 46:
                self.suit = move % 42
                self.LOG.debug(f"{self.current_player} picked suit [{self.suit}]")
                return self._act_continue_move()

            if 0 <= move < 33:
                hand = self._get_current_player_hand()
                if move not in hand:
                    self.display()
                    raise InconsistentStateError(
                        'Played card [%d] not in %s of player %d' % (move, hand, self.current_player))

                self.LOG.debug(f"{self.current_player} played card {move}")

                self._remove_card_from_hand(move, self.current_player)

                if num_played_cards % 2 == 0:
                    if self.is_last_hand_raise_valid is not None and not self.is_last_hand_raise_valid:
                        # played cards are 8 and current player also raised without respecting the conditions
                        if self.current_player == 1:
                            self.player_B_score += self.current_game_prize
                        else:
                            self.player_A_score += self.current_game_prize
                        return self._hand_is_done_after_card_is_played_common()
                    self.played_cards.append(move)
                    return self._act_continue_move()
                else:
                    if self.is_last_hand_raise_valid is not None and not self.is_last_hand_raise_valid:
                        # played cards are 9 and current player also raised without respecting the conditions
                        if self.current_player == 1:
                            self.player_B_score += self.current_game_prize
                        else:
                            self.player_A_score += self.current_game_prize
                        return self._hand_is_done_after_card_is_played_common()

                    last_played_card = self._get_last_played_card()
                    self.played_cards.append(move)
                    current_played_card = move
                    current_player_wins = not self.compare_cards(last_played_card, current_played_card)
                    next_player_move = self._assign_points_move(current_player_wins)

                    if self.current_game_player_A_score == 3 or self.current_game_player_B_score == 3:
                        if self.current_game_player_A_score == 3:
                            self.player_A_score += self.current_game_prize
                        else:
                            self.player_B_score += self.current_game_prize
                        return self._hand_is_done_after_card_is_played_common()

                    self.current_player = next_player_move
                    return "continue", next_player_move

            raise InconsistentStateError("Best_move %d is not allowed." % move)

        self.display()
        raise InconsistentStateError("Action %d is not allowed." % action)

    def _hand_is_done_after_card_is_played_common(self):
        self._assign_winning_player()
        self.current_player = self.distributing_cards_player
        self.distributing_cards_player = self.distributing_cards_player * -1
        self._refresh_state_single_hand()
        return "end", self.current_player

    def _act_continue_move(self):
        self.current_player = self.current_player * -1
        return "continue", self.current_player

    def _assign_winning_player(self):
        if self.player_A_score >= self.win_threshold:
            self.winning_player = 1
        elif self.player_B_score >= self.win_threshold:
            self.winning_player = -1

    def _remove_card_from_hand(self, action, player):
        if player == 1:
            self.player_A_hand.remove(action)
            return
        if player == -1:
            self.player_B_hand.remove(action)
            return
        raise InvalidActionError("Player should be either 1 or -1. Got %d" % player)

    # if a player folds, then the prize is given to the opponent except when the raise was done in a not valid situation
    def _assign_points_fold(self):
        fold_points = self.current_game_prize - 1
        if self.is_last_hand_raise_valid is None or self.is_last_hand_raise_valid:
            if self.current_player == 1:
                self.player_B_score += fold_points
            if self.current_player == -1:
                self.player_A_score += fold_points
        else:
            if self.current_player == 1:
                self.player_A_score += fold_points
            if self.current_player == -1:
                self.player_B_score += fold_points

    # returns true if the player who raised the current turn satisfies the following rules:
    # - he has a trumpf
    # - his card has the same suit of the one played by the previous player
    # - his card wins against the one played by the opponent player
    def _last_hand_raise_valid(self):
        num_played_cards = len(self.played_cards)

        if num_played_cards not in (8, 9):
            raise InconsistentStateError("Num played cards when fold occurs in last hand can be either 8 or 9. Got %d." % num_played_cards)

        hidden_card = self._get_current_player_hand()[0]
        hidd_r, hidd_s = get_rs(hidden_card)

        if self.is_trumpf(hidd_r, hidd_s):
            return True
        if num_played_cards == 9:
            last_played_card = self._get_last_played_card()
            last_played_card_rank, last_played_card_suit = get_rs(last_played_card)

            if hidd_s == last_played_card_suit or not self.compare_cards(last_played_card, hidden_card):
                return True

        return False

    # after two cards have been compared, assign points and returns the player that should play the next move
    def _assign_points_move(self, current_player_wins):
        if current_player_wins:
            if self.current_player == 1:
                self.current_game_player_A_score += 1
                return 1
            else:
                self.current_game_player_B_score += 1
                return -1
        else:
            if self.current_player == 1:
                self.current_game_player_B_score += 1
                return -1
            else:
                self.current_game_player_A_score += 1
                return 1

    # routine for deciding whether a card (card1) wins over another card (card2)
    # returns true if the first card wins, false otherwise
    # the first card is expected to be played before the second one
    #
    # ORDER OF IMPORTANCE:
    # - Rechte (card with the same suit and rank chosen when the game started)
    # - Blinden (cards with the same rank of the chosen rank)
    # - Trümpfe (cards with the same suit of the chosen suit)
    # - Other cards (importance given by the rank)
    def compare_cards(self, card1, card2):

        card1_rank, card1_suit = get_rs(card1)
        card2_rank, card2_suit = get_rs(card2)

        #######################################################
        # RECHTE
        #######################################################

        # rechte is the strongest card
        if self.is_rechte(card1_rank, card1_suit):
            return True
        if self.is_rechte(card2_rank, card2_suit):
            return False

        #######################################################
        # BLINDEN
        #######################################################

        # the second strongest cards after the rechte are the blinde
        if self.is_blinde(card1_rank):
            return True
        if self.is_blinde(card2_rank):
            return False

        #######################################################
        # TRÜMPFEN
        #######################################################

        # if a played card has the same chosen suit, then the opponent for winning the hand should play
        # a card of the same suit but with higher rank
        if self.is_trumpf(card1_rank, card1_suit):
            if self.is_trumpf(card2_rank, card2_suit):
                # when both cards are trümpfe then wins the card with the highest rank
                return self.is_rank_higher(card1_rank, card2_rank)
            # a card of the chosen suit wins against a card without the chosen suit
            else:
                return True

        # if the first card is not trümpfe and the second is trümpfe, then the second card wins
        if self.suit == card2_suit:
            return False

        #######################################################
        # OTHER CARDS
        #######################################################

        # at this point if the second card has a different suit from the first card, then the first wins
        if card1_suit != card2_suit:
            return True

        # if the first and the second card are not trümpfe and have the same suit,
        # then the card with the highest rank wins
        return self.is_rank_higher(card1_rank, card2_rank)

    def is_rechte(self, card_rank, card_suit):
        if (self.rank == 8 and card_rank == 8) or (card_rank == self.rank and card_suit == self.suit):
            return True
        return False

    def is_blinde(self, card_rank):
        if card_rank == self.rank:
            return True
        return False

    def is_trumpf(self, card_rank, card_suit):
        if self.is_rechte(card_rank, card_suit):
            return False
        if self.suit == card_suit:
            return True

    def is_rank_higher(self, card1_rank, card2_rank):
        # the weli has the lowest rank
        if card1_rank == 8:
            return False
        # the weli has the lowest rank
        if card2_rank == 8:
            return True
        return card1_rank > card2_rank

    def is_game_end(self):
        if self.player_A_score >= self.win_threshold or self.player_B_score >= self.win_threshold:
            return True
        else:
            return False

    # this is called after act, player is the next player
    def is_won(self, player):
        if player not in [1, -1]:
            raise InvalidInputError("Player should be either 1 or -1. Input is %d." % player)

        if self.player_A_score >= self.win_threshold and self.player_B_score >= self.win_threshold:
            raise InconsistentStateError("Both player cannot exceed score threshold. Only one winner is allowed.")
        if player == -1 and self.player_A_score >= self.win_threshold:
            return True
        if player == 1 and self.player_B_score >= self.win_threshold:
            return True
        return False

    def get_player(self):
        return self.current_player

    # should return a unique id with the state of the game
    # the needed info are:
    # - observation value of current sub_watten state (32)
    # - points current hand current player (max 2)
    # - points current hand opponent player (max 2)
    # - points game current player (max 14)
    # - points game opponent player (max 14)
    # - last move raise (1)
    # - last move accepted raise (1)
    # - last hand raise valid (1)
    # - current prize (13)
    # - isRankNone (1)
    # - isSuitNone (1)
    def observe(self, player, agent):
        if player not in [1, -1]:
            raise InvalidInputError("Player should be either 1 or -1. Input is %d." % player)

        observation = np.zeros((82,))

        # check if agent is human or not
        if isinstance(agent, SubWattenHumanAgent):
            v = 0
        else:
            # When last move is a raise, in the sub_game the player moving should
            # be the one that played
            if self.is_last_move_raise:
                sub_game_current_player = self.current_player*-1
            else:
                sub_game_current_player = self.current_player
            # set sub_watten game to current state
            self.sub_watten_game.trueboard.init_world_to_state(sub_game_current_player, self.distributing_cards_player,
                                                               self.player_A_hand, self.player_B_hand, self.played_cards,
                                                               self.current_game_player_A_score,
                                                               self.current_game_player_B_score, self.first_card_deck,
                                                               self.last_card_deck, self.rank, self.suit)

            observing_player = self.sub_watten_game.players[self.current_player]

            # observation value of sub_watten state
            best_move_array, v = agent.predict(self.sub_watten_game, observing_player)

        v_bin_string = int_to_binary(float32_bit_pattern(v), 32)

        for i in range(32):
            observation[i] = int(v_bin_string[i])

        # points current hand current player
        index = 32  # 32
        points_current_hand_current = self.current_game_player_A_score if player == 1 else self.current_game_player_B_score
        if points_current_hand_current != 0:
            observation[index + points_current_hand_current - 1] = 1

        # points current hand opponent player
        index += 2  # 34
        points_current_hand_opponent = self.current_game_player_B_score if player == 1 else self.current_game_player_A_score
        if points_current_hand_opponent != 0:
            observation[index + points_current_hand_opponent - 1] = 1

        # points game current player
        index += 2  # 36
        points_game_current = self.player_A_score if player == 1 else self.player_B_score
        if points_game_current != 0:
            observation[index + points_game_current - 1] = 1

        # points game opponent player
        index += 14  # 50
        points_game_opponent = self.player_B_score if player == 1 else self.player_A_score
        if points_game_opponent != 0:
            observation[index + points_game_opponent - 1] = 1

        index += 14  # 64
        if self.is_last_move_raise:
            observation[index] = 1

        index += 1  # 65
        if self.is_last_move_accepted_raise:
            observation[index] = 1

        index += 1  # 66
        if self.is_last_hand_raise_valid is None:
            observation[index] = 0
        else:
            observation[index] = 1

        index += 1  # 67
        if self.current_game_prize - 3 >= 0:
            observation[index + self.current_game_prize - 3] = 1

        index += 13  # 80
        if self.rank is not None:
            observation[index] = 1

        index += 1  # 81
        if self.suit is not None:
            observation[index] = 1

        # total size = 81 + 1 = 82

        observation = observation.reshape((82, 1))
        return observation

    # def observation_str_raw(self, observe):
    #     new_observe = np.concatenate((observe, np.array([[1 if self.current_player == 1 else 0]])))
    #     print(new_observe)

    def _get_last_played_card(self):
        num_played_cards = len(self.played_cards)
        if num_played_cards == 0:
            return None
        return self.played_cards[num_played_cards - 1]

    def _get_opponent_hand(self):
        return self.player_B_hand if self.current_player == 1 else self.player_A_hand

    def _get_current_player_hand(self):
        return self.player_A_hand if self.current_player == 1 else self.player_B_hand

    def display(self):
        str_raise = ""
        if self.is_last_move_raise:
            str_raise = "- RAISE"
        if self.is_last_move_accepted_raise:
            str_raise = "- ACCEPTED RAISE"

        self.LOG.info(f"--- State of the game ---\nCurrent player: |{self.current_player}| "
                      f"and current game prize |{self.current_game_prize}| {str_raise}"
                      f"\nPlayer 1 points: |{self.player_A_score}| - Player -1 points: |{self.player_B_score}|"
                      f"\nPlayer 1 current: |{self.current_game_player_A_score}| - "
                      f"Player  -1 current: |{self.current_game_player_B_score}|"
                      f"\nPlayer  1 hand: {self._str_cards(self.player_A_hand)} - {self.player_A_hand}"
                      f"\nPlayer -1 hand: {self._str_cards(self.player_B_hand)} - {self.player_B_hand}"
                      f"\nRank: |{self.rank} - {rank_names[self.rank]}|, Suit: |{self.suit} - {suit_names[self.suit]}|"
                      f"\nPlayed cards: {self._str_cards(self.played_cards)}"
                      f"\nDist: {self.distributing_cards_player}, lhrv: {self.is_last_hand_raise_valid}, first card: {self.first_card_deck}, last card: {self.last_card_deck}")

        self.LOG.info(f"Starting state: {self.starting_state}")
        self.LOG.info(f"Moves series: {self.moves_series}")
        self.LOG.info(f"SubWatten moves series: {self.sub_watten_moves_series}")

    def _str_cards(self, cards):
        str_cards = ""
        for idx, card in enumerate(cards):
            str_cards += human_readable_card(card)
            str_cards += ' ({})'.format(card)
            if idx != len(cards) - 1:
                str_cards += ", "
        return str_cards

    def deepcopy(self):
        new_world = WorldTotalWatten()
        new_world.LOG = self.LOG
        new_world.current_player = self.current_player
        new_world.distributing_cards_player = self.distributing_cards_player
        new_world.deck = self.deck.copy()
        new_world.player_A_hand = self.player_A_hand.copy()
        new_world.player_B_hand = self.player_B_hand.copy()
        new_world.played_cards = self.played_cards.copy()
        new_world.player_A_score = self.player_A_score
        new_world.player_B_score = self.player_B_score
        new_world.current_game_player_A_score = self.current_game_player_A_score
        new_world.current_game_player_B_score = self.current_game_player_B_score
        new_world.current_game_prize = self.current_game_prize
        new_world.is_last_move_raise = self.is_last_move_raise
        new_world.is_last_move_accepted_raise = self.is_last_move_accepted_raise
        new_world.win_threshold = self.win_threshold
        new_world.first_card_deck = self.first_card_deck
        new_world.last_card_deck = self.last_card_deck
        new_world.rank = self.rank
        new_world.suit = self.suit
        new_world.is_last_hand_raise_valid = self.is_last_hand_raise_valid
        new_world.winning_player = self.winning_player
        new_world.sub_watten_game = self.sub_watten_game.clone()
        new_world.moves = self.moves.copy()


        new_world.starting_state = self.starting_state
        new_world.moves_series = self.moves_series.copy()
        new_world.sub_watten_moves_series = self.sub_watten_moves_series.copy()
        return new_world

    def init_world_to_state(self, current_player, distributing_cards_player, player_A_score, player_B_score,
                            player_A_hand, player_B_hand, played_cards, current_game_player_A_score,
                            current_game_player_B_score, current_game_prize, is_last_move_raise,
                            is_last_move_accepted_raise, is_last_hand_raise_valid, first_card_deck, last_card_deck, rank, suit):

        self.current_player = current_player
        self.distributing_cards_player = distributing_cards_player
        self.player_A_score = player_A_score
        self.player_B_score = player_B_score
        self.player_A_hand = player_A_hand
        self.player_B_hand = player_B_hand
        self.played_cards = played_cards
        self.current_game_player_A_score = current_game_player_A_score
        self.current_game_player_B_score = current_game_player_B_score
        self.current_game_prize = current_game_prize
        self.is_last_move_raise = is_last_move_raise
        self.is_last_move_accepted_raise = is_last_move_accepted_raise
        self.is_last_hand_raise_valid = is_last_hand_raise_valid
        self.first_card_deck = first_card_deck
        self.last_card_deck = last_card_deck
        self.rank = rank
        self.suit = suit
예제 #12
0
 def test_get_observation_size(self):
     watten_game = WattenSubGame()
     self.assertEqual(watten_game.get_observation_size(), (221, 1))
예제 #13
0
 def test_get_action_size(self):
     watten_game = WattenSubGame()
     self.assertEqual(watten_game.get_action_size(), 46)
예제 #14
0
 def test_get_player_num(self):
     watten_game = WattenSubGame()
     self.assertEqual(watten_game.get_players_num(), 2)