def observe(self, state, reward, done, actions): # Update the agent action = self.find_next_action(state, actions) self.next_action = action[0] game.out("Next action will be", self.next_action, "; monte carlo gives a score of", action[1])
def act(self): actions_by_type = { action_id: [] for action_id in range(len(self.action_space)) } for action in self.actions: a = self.classify_action(action, to='id') actions_by_type[a].append(action) s = self.get_current_state_id() actions = self.Q[s, :].copy() next_action_type = self.softmax_policy(actions) possible_actions = actions_by_type[next_action_type] while len(possible_actions) == 0: actions[next_action_type] = 0 next_action_type = self.softmax_policy(actions) possible_actions = actions_by_type[next_action_type] action = self.choose(possible_actions, next_action_type) self.last_action_id = next_action_type self.next_action = action game.out("Deciding to do", action) return action
def __init__(self, adversarial=True, state=None): self.state = state if state != None else State(adversarial=adversarial) self.step = 0 self.position = 0 self.GAME_ENDED = False game.out("New game", verbose=-1)
def act_old(self): # Return an action if random.random() < self.epsilon: return random.choice(self.actions) actions_by_type = { action_id: [] for action_id in range(len(self.action_space)) } for action in self.actions: a = self.classify_action(action, to='id') actions_by_type[a].append(action) s = self.get_current_state_id() rank_action_types = np.flip(np.argsort(self.Q[s, :]), axis=0) rank = 0 next_action_type = rank_action_types[rank] possible_actions = actions_by_type[next_action_type] while len(possible_actions) == 0: rank += 1 if rank == len(rank_action_types): possible_actions = [{'type': 'do_nothing', 'params': None}] else: next_action_type = rank_action_types[rank] possible_actions = actions_by_type[next_action_type] action = self.choose(possible_actions, next_action_type) self.next_action = action game.out("Deciding to do", action) return action
def reserve_card(self, state, card): ''' Take a card <card> and put it in the player's hand to reserve it. Only him/her can buy it from now on ''' assert len(self.hand) < 3, ("Too many cards in hand for " + self.name) self.hand.append(card) game.out(self.name, "reserved the following card :", card) self.take_tokens(state, [(game.JOKER_COLOR, 1)])
def reset(self): self.state.reset() self.step = 0 # State self.GAME_ENDED = False game.out("Environment reset") return (self.get_player())
def get_noble_from_id(self, state, noble_id): ''' Take a noble tile from the table and gain prestige ''' noble = state.tiles.pop(noble_id) self.nobles.append(noble) self.prestige += 3 game.out(self.name, "has gained a noble", noble, "and now have", self.prestige, "prestige points")
def pay(self, state, amount, color): ''' Pay <amount> of <color> tokens ''' if self.tokens[color] < amount: game.out("WARNING : the player", self.name, "tried to pay", amount, " ", color, "tokens but had only", self.tokens[color]) game.out(self) amount = self.tokens[color] self.tokens[color] -= amount state.tokens[color] += amount
def choose_noble(self, state, nobles_id): ''' If several nobles can visit a player at the end of its turn, this functions chooses one randomly. If there is only one, then there's no choice. /!\ Here, <nobles> is a list of nobles' id, i.e. their index in <state.tiles> ''' if len(nobles_id) == 1: noble_id = nobles_id[0] else: game.out(self.name, "can be visited by", len(nobles_id), "nobles") noble_id = random.choice(nobles_id) self.get_noble_from_id(state, noble_id)
def reset(self, seed=None): ''' Reset the state ''' random.seed(seed) self._init_deck() # State self.turn = 0 self.current_player = 0 self.TARGET_REACHED = False self.GAME_ENDED = False self.winner = "(none)" game.out("State reset")
def take_tokens(self, state, tokens): ''' Take tokens from the table. <tokens> should be a list of tuples (<token_color>, <token_quantity>) ''' message = self.name + " took " + ", ".join([ str(amount) + " " + color + " token(s)" for color, amount in tokens ]) for color, amount in tokens: state.tokens[color] -= amount self.tokens[color] += amount # Update total number of tokens self.n_tokens += amount game.out(message)
def buy_card(self, state, card): ''' Buy the card <card> ''' assert self.can_buy(card) # First pay for the mine... joker_color = game.JOKER_COLOR price = self.compute_discounted_price(card) for color, amount in price.items(): if self.tokens[color] >= amount: # Enough tokens to pay directly self.pay(state, amount, color) else: # Else, use jokers normal_price = self.tokens[color] joker_price = amount - self.tokens[color] self.pay(state, normal_price, color) self.pay(state, joker_price, joker_color) # ...then receive bonuses and extra prestige self.prestige += card.prestige self.bonuses[card.bonus] += 1 game.out(self.name, "bought the following card :", card)
def player_has_reached_target(self, player): ''' This function is called when one of the player has reached the prestige target ''' game.out("\n\nCONGRATS !!!!") game.out( player.name, "has reached", player.prestige, "points. The game will end after the current turn is complete") game.out("!!!!!!!!!!!!!\n\n") self.TARGET_REACHED = True
def compare(a, b, n_games=100, max_step=100, display_results=True): t_start = time() board = Environment() players = ['Player A', 'Player B'] # Results victories_a = 0 victories_b = 0 diff_a = [] diff_b = [] diff = [] rew_a = 0 rew_b = 0 for i in range(n_games): board.reset() # Start new game player_a = board.get_player(0) player_b = board.get_player(1) initial_state = board.state.visible() actions_a = board.get_possible_actions(player_a) actions_b = board.get_possible_actions(player_b) a.new_game(player_a, initial_state, actions_a) b.new_game(player_b, initial_state, actions_b) # Start playing ! t = 0 reward_a = 0 reward_b = 0 game_ended = False while not game_ended and t < max_step: # -- Beginning of A's turn -- # Observe current state state = board.get_visible_state(a.identity) actions = board.get_possible_actions(a.identity) a.observe(state, reward_a, game_ended, actions) action = a.act() state, reward_a, game_ended, debug = board.take_action( action, a.identity) rew_a += reward_a # -- End of turn -- # -- Beginning of B's turn -- # Observe current state state = board.get_visible_state(b.identity) actions = board.get_possible_actions(b.identity) b.observe(state, reward_b, game_ended, actions) action = b.act() state, reward_b, game_ended, debug = board.take_action( action, b.identity) rew_b += reward_b # -- End of turn -- # Other players' turn board.autoplay() t += 1 if game_ended: winner_id = board.winner('pos') diff.append(a.identity.prestige - b.identity.prestige) if winner_id == 0: game.out("Player A won in", t, "steps. A scored", a.identity.prestige, "points, B scored", b.identity.prestige, "points.", verbose=1) victories_a += 1 diff_a.append(a.identity.prestige - b.identity.prestige) elif winner_id == 1: game.out("Player B won in", t, "steps. A scored", a.identity.prestige, "points, B scored", b.identity.prestige, "points.", verbose=1) victories_b += 1 diff_b.append(b.identity.prestige - a.identity.prestige) if i % 100 == 0 and i > 0: game.out("game", i, "out of", n_games, "score is", victories_a, "-", victories_b, verbose=0) if i % 1000 == 0 and i > 0: game.out(i, "games played,", (n_games - i), "to go. Elapsed time :", (time() - t_start), "seconds. ETA :", (n_games - i) * (time() - t_start) / i, verbose=0) t_end = time() duration = t_end - t_start # Results : wid = 0 if victories_a > victories_b else 1 wname = players[wid] average_diff = sum(diff) / len(diff) if display_results: print(n_games, "iterations finished after", duration, "seconds.\n -") print("Winner :", wname) print(" -") print("A wins :", victories_a) print("B wins :", victories_b) print(" -") print("% A :", 100 * (victories_a / (n_games))) print("% B :", 100 * (victories_b / (n_games))) print("Average score dist between A and B:", average_diff) return victories_a, victories_b, diff_a, diff_b, average_diff
def act(self): # Return an action action = self.next_action game.out("Deciding to do", action) return action
def observe(self, state, reward, done, actions): # Update the agent self.next_action = random.choice(actions) game.out("Next action will be", self.next_action)
def print_deck(self): game.out("splendor - turn", self.turn, "- now playing : player", self.current_player) game.out( "---------------------------------------------------------------------------" ) game.out("".join([str(t) for t in self.tiles])) for i in range(game.BOARD_X): game.out("\t".join([str(c) for c in self.cards[i]])) game.out(" - ".join( [str(n) + " " + color for color, n in self.tokens.items()])) game.out( "---------------------------------------------------------------------------" ) game.out("Players :", " - ".join([p.name for p in self.players]))
def _init_deck(self, nb_players=4): # Retrieve development cards l1, l2, l3 = self.get_cards() game.out("-- Initializing the game --") game.out("Nb of level-1 cards :", len(l1)) game.out("Nb of level-2 cards :", len(l2)) game.out("Nb of level-3 cards :", len(l3)) random.shuffle(l1) random.shuffle(l2) random.shuffle(l3) # Set up the deck nb_reveal = game.BOARD_Y deck_1, column_1 = l1[:-nb_reveal], l1[-nb_reveal:] deck_2, column_2 = l2[:-nb_reveal], l2[-nb_reveal:] deck_3, column_3 = l3[:-nb_reveal], l3[-nb_reveal:] visible_cards = [column_1, column_2, column_3] # Tokens tokens = self.get_tokens() # Tiles nb_tiles = nb_players + 1 tiles = self.get_tiles() tiles = random.sample(tiles, nb_tiles) # Declare attributes self.cards = visible_cards self.tiles = tiles self.tokens = tokens self.deck = [deck_1, deck_2, deck_3] self.players = [PlayerData(i) for i in range(game.NB_PLAYERS)] players_names = ["You"] + random.sample(game.PLAYER_NAMES, game.NB_PLAYERS - 1) for i, name in enumerate(players_names): self.players[i].rename(name) self.print_deck() game.out("-- Starting turn", (self.turn + 1), "-- ") game.out("Now playing :", self.get_current_player().name)
def step(self, action, player): ''' Main function. Given an action <action> and a player <player>, it updates the state accordingly Input : <action> is a dict with two keys : - type : a string among <game.POSSIBLE_ACTIONS> - params : the parameters of the action. Its format depends on the type. - take_3 : list/iterable of three color names : [<color_1>, <color_2>, <color_3>] - take_2 : string, color name - reserve : origin and coordinate of the card - ['from_table', (i, j)] - ['from_deck', i] - purchase : origin and coordinate of the card - ['from_table', (i, j)] - ['from_hand', i] ''' if self.GAME_ENDED: return [TAKE_3, TAKE_2, RESERVE, PURCHASE, DO_NOTHING] = game.POSSIBLE_ACTIONS action_type = action['type'] params = action['params'] if action_type == TAKE_3: tokens = [(color, 1) for color in params] player.take_tokens(self, tokens) elif action_type == TAKE_2: tokens = [(params, 2)] player.take_tokens(self, tokens) elif action_type == RESERVE: [origin, params] = params if origin == 'from_table': i, j = params card = self.get_card_from_table(i, j) player.reserve_card(self, card) elif origin == 'from_deck': i = params card = self.get_card_from_deck(i) player.reserve_card(self, card) elif action_type == PURCHASE: [origin, params] = params if origin == 'from_table': i, j = params card = self.get_card_from_table(i, j) player.buy_card(self, card) elif origin == 'from_hand': i = params card = player.pop_card_from_hand(i) player.buy_card(self, card) elif action_type == DO_NOTHING: game.out(self.get_current_player().name, "doesn't do anything this turn") # CHECK WHOSE NOBLES ARE VISITING visiting_nobles = [] for noble_id in range(len(self.tiles)): noble = self.tiles[noble_id] if noble.can_visit(player): visiting_nobles.append(noble_id) if len(visiting_nobles) > 0: player.choose_noble(self, visiting_nobles) # CHECK IF PLAYER HAS THE RIGHT AMOUNT OF TOKENS player.remove_extra_tokens(self) # CHECK IF PLAYER HAS WON if player.has_won(self): if self.adversarial or self.current_player == 0: self.player_has_reached_target(player) self.GAME_ENDED = True self.TARGET_REACHED = True game.out("-- END OF THE GAME --") game.out(self.get_results()) return self.current_player += 1 if self.current_player == game.NB_PLAYERS: game.out("End of turn", self.turn, "\n") if game.INCREMENTAL: s = input("Continue ?") if s in ["quit", "cancel", "no", "No", "N", "n"]: return else: game.out("Continuing...\n\n") if self.TARGET_REACHED: self.GAME_ENDED = True game.out("-- END OF THE GAME --") game.out(self.get_results()) return self.turn += 1 self.current_player = 0 game.out("-- Starting turn", (self.turn + 1), "-- ") game.out("Now playing :", self.get_current_player().name, verbose=3)