コード例 #1
0
    def __str__(self):
        rep = ["-- Player : " + self.name + " --"]
        rep.append("Prestige : " + str(self.prestige))
        rep.append("Tokens : " + game.tokens_to_str(self.tokens))
        rep.append("Bonus  : " + game.tokens_to_str(self.bonuses))

        return "\n".join(rep)
コード例 #2
0
    def convert_state(self, state, to='id'):
        if isinstance(state, tuple):
            state = list(state)
        if isinstance(state, str):
            state = int(state)

        if isinstance(state, int):
            cur = 'id'
        elif isinstance(state, dict):
            cur = 'dic'
        elif isinstance(state, list):
            cur = 'list'
        else:
            game.warning("/!\ Invalid state format", state)
            return
        if cur == to:
            return state

        if cur == 'id':
            if to == 'list':
                return self.state_id_to_list(state)
            elif to == 'dic':
                return self.state_id_to_dic(state)
        elif cur == 'dic':
            if to == 'list':
                return self.state_dic_to_list(state)
            elif to == 'id':
                return self.state_dic_to_id(state)
        elif cur == 'list':
            if to == 'id':
                return self.state_list_to_id(state)
            elif to == 'dic':
                return self.state_list_to_dic(state)
コード例 #3
0
    def observe(self, state, reward, done, actions):
        # Update the agent

        action = self.find_next_action(state, actions)
        self.next_action = action[0]
        game.out("Next action will be", self.next_action,
                 "; monte carlo gives a score of", action[1])
コード例 #4
0
    def act(self):
        actions_by_type = {
            action_id: []
            for action_id in range(len(self.action_space))
        }
        for action in self.actions:
            a = self.classify_action(action, to='id')
            actions_by_type[a].append(action)

        s = self.get_current_state_id()
        actions = self.Q[s, :].copy()

        next_action_type = self.softmax_policy(actions)
        possible_actions = actions_by_type[next_action_type]
        while len(possible_actions) == 0:
            actions[next_action_type] = 0
            next_action_type = self.softmax_policy(actions)
            possible_actions = actions_by_type[next_action_type]
        action = self.choose(possible_actions, next_action_type)

        self.last_action_id = next_action_type

        self.next_action = action
        game.out("Deciding to do", action)
        return action
コード例 #5
0
    def act_old(self):
        # Return an action
        if random.random() < self.epsilon:
            return random.choice(self.actions)
        actions_by_type = {
            action_id: []
            for action_id in range(len(self.action_space))
        }
        for action in self.actions:
            a = self.classify_action(action, to='id')
            actions_by_type[a].append(action)

        s = self.get_current_state_id()
        rank_action_types = np.flip(np.argsort(self.Q[s, :]), axis=0)
        rank = 0
        next_action_type = rank_action_types[rank]
        possible_actions = actions_by_type[next_action_type]
        while len(possible_actions) == 0:
            rank += 1
            if rank == len(rank_action_types):
                possible_actions = [{'type': 'do_nothing', 'params': None}]
            else:
                next_action_type = rank_action_types[rank]
                possible_actions = actions_by_type[next_action_type]

        action = self.choose(possible_actions, next_action_type)
        self.next_action = action
        game.out("Deciding to do", action)
        return action
コード例 #6
0
    def __init__(self, adversarial=True, state=None):
        self.state = state if state != None else State(adversarial=adversarial)
        self.step = 0
        self.position = 0
        self.GAME_ENDED = False

        game.out("New game", verbose=-1)
コード例 #7
0
 def reserve_card(self, state, card):
     '''
     Take a card <card> and put it in the player's hand to reserve it. Only him/her can buy it from now on
     '''
     assert len(self.hand) < 3, ("Too many cards in hand for " + self.name)
     self.hand.append(card)
     game.out(self.name, "reserved the following card :", card)
     self.take_tokens(state, [(game.JOKER_COLOR, 1)])
コード例 #8
0
    def reset(self):
        self.state.reset()
        self.step = 0

        # State
        self.GAME_ENDED = False
        game.out("Environment reset")

        return (self.get_player())
コード例 #9
0
 def get_noble_from_id(self, state, noble_id):
     '''
     Take a noble tile from the table and gain prestige
     '''
     noble = state.tiles.pop(noble_id)
     self.nobles.append(noble)
     self.prestige += 3
     game.out(self.name, "has gained a noble", noble, "and now have",
              self.prestige, "prestige points")
コード例 #10
0
 def choose_noble(self, state, nobles_id):
     '''
     If several nobles can visit a player at the end of its turn, this functions chooses one randomly. If there is only one, then there's no choice.
     /!\  Here, <nobles> is a list of nobles' id, i.e. their index in <state.tiles>
     '''
     if len(nobles_id) == 1:
         noble_id = nobles_id[0]
     else:
         game.out(self.name, "can be visited by", len(nobles_id), "nobles")
         noble_id = random.choice(nobles_id)
     self.get_noble_from_id(state, noble_id)
コード例 #11
0
 def pay(self, state, amount, color):
     '''
     Pay <amount> of <color> tokens
     '''
     if self.tokens[color] < amount:
         game.out("WARNING : the player", self.name, "tried to pay", amount,
                  " ", color, "tokens but had only", self.tokens[color])
         game.out(self)
         amount = self.tokens[color]
     self.tokens[color] -= amount
     state.tokens[color] += amount
コード例 #12
0
 def possible_transition(self, state, action):
     # Convert action object to string
     if 'type' in action:
         action = action['type']
         
     if action == 'do_nothing':
         return True
     elif action in ['buy_card', 'buy_prestige', 'reserve', 'take_3', 'take_2']:
         return state['can_' + action]
     else:
         game.warning("/!\ Unknown action type :", action)
         return False
コード例 #13
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
    def reset(self, seed=None):
        '''
        Reset the state
        '''
        random.seed(seed)

        self._init_deck()

        # State
        self.turn = 0
        self.current_player = 0
        self.TARGET_REACHED = False
        self.GAME_ENDED = False
        self.winner = "(none)"
        game.out("State reset")
コード例 #14
0
    def take_tokens(self, state, tokens):
        '''
        Take tokens from the table.
        <tokens> should be a list of tuples (<token_color>, <token_quantity>)
        '''
        message = self.name + " took " + ", ".join([
            str(amount) + " " + color + " token(s)" for color, amount in tokens
        ])

        for color, amount in tokens:
            state.tokens[color] -= amount
            self.tokens[color] += amount
            # Update total number of tokens
            self.n_tokens += amount

        game.out(message)
コード例 #15
0
 def heuristic(self, action, action_type):
     if action_type == 'take_3':
         return self._heuristic_take_3(action)
     elif action_type == 'take_2':
         return self._heuristic_take_2(action)
     elif action_type == 'reserve':
         return self._heuristic_reserve(action)
     elif action_type == 'buy_card':
         return self._heuristic_buy_card(action)
     elif action_type == 'buy_prestige':
         return self._heuristic_buy_prestige(action)
     elif action_type == 'do_nothing':
         return self._heuristic_do_nothing(action)
     else:
         game.warning("/!\ Unknown action type in heuristic filtering", action, action_type)
         return 0
コード例 #16
0
ファイル: Utils.py プロジェクト: felix-martel/splendor-ai
def subtract_tokens(tokens_a, tokens_b):
    '''
    Compute tokens_a - token_b ie for all color in colors, tokens_a[color] - tokens_b[color]
    '''
    result = game.get_empty_token_bag()
    for color in result:
        result[color] = tokens_a[color] - tokens_b[color]
    return result
コード例 #17
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
 def get_cards(self, nb_players=4):
     cards = [
         Card(level, price, prestige, bonus)
         for level, price, prestige, bonus in game.get_cards()
     ]
     l1 = [c for c in cards if c.level == 0]
     l2 = [c for c in cards if c.level == 1]
     l3 = [c for c in cards if c.level == 2]
     return l1, l2, l3
コード例 #18
0
    def convert_action(self, action, to='id'):
        if isinstance(action, str):
            cur = 'name'
        elif isinstance(action, int):
            cur = 'id'
        else:
            game.warning('/!\ Unknown action type')
            return

        if cur == to:
            return action

        if cur == 'id' and to == 'name':
            return self.action_space[action]
        elif cur == 'name' and to == 'id':
            return self.action_space_inverted_index[action]
        else:
            game.warning('/!\ Unknown action type')
            return
コード例 #19
0
    def buy_card(self, state, card):
        '''
        Buy the card <card>
        '''
        assert self.can_buy(card)

        # First pay for the mine...
        joker_color = game.JOKER_COLOR
        price = self.compute_discounted_price(card)
        for color, amount in price.items():
            if self.tokens[color] >= amount:
                # Enough tokens to pay directly
                self.pay(state, amount, color)
            else:
                # Else, use jokers
                normal_price = self.tokens[color]
                joker_price = amount - self.tokens[color]
                self.pay(state, normal_price, color)
                self.pay(state, joker_price, joker_color)

        # ...then receive bonuses and extra prestige
        self.prestige += card.prestige
        self.bonuses[card.bonus] += 1
        game.out(self.name, "bought the following card :", card)
コード例 #20
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
 def player_has_reached_target(self, player):
     '''
     This function is called when one of the player has reached the prestige target
     '''
     game.out("\n\nCONGRATS !!!!")
     game.out(
         player.name, "has reached", player.prestige,
         "points. The game will end after the current turn is complete")
     game.out("!!!!!!!!!!!!!\n\n")
     self.TARGET_REACHED = True
コード例 #21
0
    def compute_discounted_price(self, card):
        '''
        If you hold bonuses, you have a discount on dvpt cards. This function computes and returns such discounted price
        '''
        discounted_price = game.get_empty_token_bag()
        for color, price in card.price.items():
            discounted_price[color] = positive_part(price -
                                                    self.bonuses[color])
        return discounted_price

        discounted_price = subtract_tokens(card.price, self.bonuses)
        for color, price in discounted_price.items():
            # Forbid negative prices
            if price < 0:
                discounted_price[color] = 0
        return discounted_price
コード例 #22
0
 def observe(self, state, reward, done, actions):
     # Update the agent
     self.next_action = random.choice(actions)
     game.out("Next action will be", self.next_action)
コード例 #23
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
 def get_tokens(self):
     return game.get_tokens()
コード例 #24
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
 def get_tiles(self):
     tiles = [Tile(bonus, prestige) for bonus, prestige in game.get_tiles()]
     return tiles
コード例 #25
0
 def act(self):
     # Return an action
     action = self.next_action
     game.out("Deciding to do", action)
     return action
コード例 #26
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
    def _init_deck(self, nb_players=4):
        # Retrieve development cards
        l1, l2, l3 = self.get_cards()
        game.out("-- Initializing the game --")
        game.out("Nb of level-1 cards :", len(l1))
        game.out("Nb of level-2 cards :", len(l2))
        game.out("Nb of level-3 cards :", len(l3))

        random.shuffle(l1)
        random.shuffle(l2)
        random.shuffle(l3)
        # Set up the deck
        nb_reveal = game.BOARD_Y
        deck_1, column_1 = l1[:-nb_reveal], l1[-nb_reveal:]
        deck_2, column_2 = l2[:-nb_reveal], l2[-nb_reveal:]
        deck_3, column_3 = l3[:-nb_reveal], l3[-nb_reveal:]

        visible_cards = [column_1, column_2, column_3]

        # Tokens
        tokens = self.get_tokens()

        # Tiles
        nb_tiles = nb_players + 1
        tiles = self.get_tiles()
        tiles = random.sample(tiles, nb_tiles)

        # Declare attributes
        self.cards = visible_cards
        self.tiles = tiles
        self.tokens = tokens
        self.deck = [deck_1, deck_2, deck_3]
        self.players = [PlayerData(i) for i in range(game.NB_PLAYERS)]
        players_names = ["You"] + random.sample(game.PLAYER_NAMES,
                                                game.NB_PLAYERS - 1)
        for i, name in enumerate(players_names):
            self.players[i].rename(name)

        self.print_deck()

        game.out("-- Starting turn", (self.turn + 1), "-- ")
        game.out("Now playing :", self.get_current_player().name)
コード例 #27
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
    def step(self, action, player):
        '''
        Main function. Given an action <action> and a player <player>, it updates the state
        accordingly
        
        Input :
        <action> is a dict with two keys :
            - type : a string among <game.POSSIBLE_ACTIONS>
            - params : the parameters of the action. Its format depends on the type.
                - take_3 : list/iterable of three color names : [<color_1>, <color_2>, <color_3>]
                - take_2 : string, color name
                - reserve : origin and coordinate of the card
                    - ['from_table', (i, j)]
                    - ['from_deck', i]
                - purchase : origin and coordinate of the card
                    - ['from_table', (i, j)]
                    - ['from_hand', i]
        '''
        if self.GAME_ENDED:
            return

        [TAKE_3, TAKE_2, RESERVE, PURCHASE, DO_NOTHING] = game.POSSIBLE_ACTIONS
        action_type = action['type']
        params = action['params']

        if action_type == TAKE_3:
            tokens = [(color, 1) for color in params]
            player.take_tokens(self, tokens)

        elif action_type == TAKE_2:
            tokens = [(params, 2)]
            player.take_tokens(self, tokens)

        elif action_type == RESERVE:
            [origin, params] = params
            if origin == 'from_table':
                i, j = params
                card = self.get_card_from_table(i, j)
                player.reserve_card(self, card)

            elif origin == 'from_deck':
                i = params
                card = self.get_card_from_deck(i)
                player.reserve_card(self, card)

        elif action_type == PURCHASE:
            [origin, params] = params
            if origin == 'from_table':
                i, j = params
                card = self.get_card_from_table(i, j)
                player.buy_card(self, card)
            elif origin == 'from_hand':
                i = params
                card = player.pop_card_from_hand(i)
                player.buy_card(self, card)
        elif action_type == DO_NOTHING:
            game.out(self.get_current_player().name,
                     "doesn't do anything this turn")

        # CHECK WHOSE NOBLES ARE VISITING
        visiting_nobles = []
        for noble_id in range(len(self.tiles)):
            noble = self.tiles[noble_id]
            if noble.can_visit(player):
                visiting_nobles.append(noble_id)
        if len(visiting_nobles) > 0:
            player.choose_noble(self, visiting_nobles)

        # CHECK IF PLAYER HAS THE RIGHT AMOUNT OF TOKENS
        player.remove_extra_tokens(self)

        # CHECK IF PLAYER HAS WON
        if player.has_won(self):
            if self.adversarial or self.current_player == 0:
                self.player_has_reached_target(player)
                self.GAME_ENDED = True
                self.TARGET_REACHED = True
                game.out("-- END OF THE GAME --")
                game.out(self.get_results())
                return

        self.current_player += 1
        if self.current_player == game.NB_PLAYERS:
            game.out("End of turn", self.turn, "\n")
            if game.INCREMENTAL:
                s = input("Continue ?")
                if s in ["quit", "cancel", "no", "No", "N", "n"]:
                    return
                else:
                    game.out("Continuing...\n\n")
            if self.TARGET_REACHED:
                self.GAME_ENDED = True
                game.out("-- END OF THE GAME --")
                game.out(self.get_results())
                return

            self.turn += 1
            self.current_player = 0
            game.out("-- Starting turn", (self.turn + 1), "-- ")
        game.out("Now playing :", self.get_current_player().name, verbose=3)
コード例 #28
0
def compare(a, b, n_games=100, max_step=100, display_results=True):
    t_start = time()
    board = Environment()
    players = ['Player A', 'Player B']
    # Results
    victories_a = 0
    victories_b = 0
    diff_a = []
    diff_b = []
    diff = []
    rew_a = 0
    rew_b = 0

    for i in range(n_games):
        board.reset()

        # Start new game
        player_a = board.get_player(0)
        player_b = board.get_player(1)

        initial_state = board.state.visible()
        actions_a = board.get_possible_actions(player_a)
        actions_b = board.get_possible_actions(player_b)

        a.new_game(player_a, initial_state, actions_a)
        b.new_game(player_b, initial_state, actions_b)

        # Start playing !
        t = 0
        reward_a = 0
        reward_b = 0
        game_ended = False

        while not game_ended and t < max_step:
            # -- Beginning of A's turn --
            # Observe current state
            state = board.get_visible_state(a.identity)
            actions = board.get_possible_actions(a.identity)

            a.observe(state, reward_a, game_ended, actions)
            action = a.act()

            state, reward_a, game_ended, debug = board.take_action(
                action, a.identity)
            rew_a += reward_a
            # -- End of turn --

            # -- Beginning of B's turn --
            # Observe current state
            state = board.get_visible_state(b.identity)
            actions = board.get_possible_actions(b.identity)

            b.observe(state, reward_b, game_ended, actions)
            action = b.act()

            state, reward_b, game_ended, debug = board.take_action(
                action, b.identity)
            rew_b += reward_b
            # -- End of turn --

            # Other players' turn
            board.autoplay()

            t += 1

        if game_ended:
            winner_id = board.winner('pos')
            diff.append(a.identity.prestige - b.identity.prestige)
            if winner_id == 0:
                game.out("Player A won in",
                         t,
                         "steps. A scored",
                         a.identity.prestige,
                         "points, B scored",
                         b.identity.prestige,
                         "points.",
                         verbose=1)
                victories_a += 1
                diff_a.append(a.identity.prestige - b.identity.prestige)
            elif winner_id == 1:
                game.out("Player B won in",
                         t,
                         "steps. A scored",
                         a.identity.prestige,
                         "points, B scored",
                         b.identity.prestige,
                         "points.",
                         verbose=1)
                victories_b += 1
                diff_b.append(b.identity.prestige - a.identity.prestige)
            if i % 100 == 0 and i > 0:
                game.out("game",
                         i,
                         "out of",
                         n_games,
                         "score is",
                         victories_a,
                         "-",
                         victories_b,
                         verbose=0)
            if i % 1000 == 0 and i > 0:
                game.out(i,
                         "games played,", (n_games - i),
                         "to go. Elapsed time :", (time() - t_start),
                         "seconds. ETA :",
                         (n_games - i) * (time() - t_start) / i,
                         verbose=0)

    t_end = time()
    duration = t_end - t_start
    # Results :

    wid = 0 if victories_a > victories_b else 1
    wname = players[wid]
    average_diff = sum(diff) / len(diff)
    if display_results:
        print(n_games, "iterations finished after", duration, "seconds.\n -")
        print("Winner :", wname)
        print(" -")
        print("A wins :", victories_a)
        print("B wins :", victories_b)
        print(" -")
        print("% A :", 100 * (victories_a / (n_games)))
        print("% B :", 100 * (victories_b / (n_games)))
        print("Average score dist between A and B:", average_diff)

    return victories_a, victories_b, diff_a, diff_b, average_diff
コード例 #29
0
ファイル: State.py プロジェクト: felix-martel/splendor-ai
 def print_deck(self):
     game.out("splendor - turn", self.turn, "- now playing : player",
              self.current_player)
     game.out(
         "---------------------------------------------------------------------------"
     )
     game.out("".join([str(t) for t in self.tiles]))
     for i in range(game.BOARD_X):
         game.out("\t".join([str(c) for c in self.cards[i]]))
     game.out(" - ".join(
         [str(n) + " " + color for color, n in self.tokens.items()]))
     game.out(
         "---------------------------------------------------------------------------"
     )
     game.out("Players :", " - ".join([p.name for p in self.players]))