def crm(history: History, player, pis): pi1, pi2 = pis if history.is_terminal(): return history.utility(player) if history.is_chance(): history.chance_sample() return crm(history, player, [pi1, pi2]) information_set = history.get_information_set() info_set_index = hash_to_index[hash_dict(information_set)] value = 0 value_to_action = [0.0, 0.0] for a in range(2): new_hist = deepcopy(history) new_hist.update(a) if history.active_player == 0: value_to_action[a] = crm(new_hist, player, [strategy[info_set_index, a]*pi1, pi2]) else: value_to_action[a] = crm(new_hist, player, [pi1, strategy[info_set_index, a]*pi2]) value += strategy[info_set_index, a]*value_to_action[a] if history.active_player == player: for a in range(2): regrets_info_sets[info_set_index, a] += pis[player-1]*(value_to_action[a] - value) strategies_info_sets[info_set_index, a] += pis[player]*strategy[info_set_index, a] strategy[info_set_index, :] = strategy_update(regrets_info_sets[info_set_index, :]) return value
def initialize_nodes(self): nodes_li = [] cards = [i for i in range(1, self.nb_cards + 1)] # Step 1: creating all nodes and information sets for nb_rounds in range(self.nb_cards): for all_cards in permutations(cards): for previous_p0 in permutations(cards, nb_rounds): for previous_p1 in permutations(cards, nb_rounds): all_cards = list(all_cards) previous_p0 = list(previous_p0) previous_p1 = list(previous_p1) p1 = goof.Player('P1', nb_cards=self.nb_cards, previous=previous_p0) p2 = goof.Player('P2', nb_cards=self.nb_cards, previous=previous_p1) prizes = goof.Prize(nb_cards=self.nb_cards, all_cards=all_cards, current_round=nb_rounds) engine = goof.Engine(p1=p1, p2=p2, prize=prizes) info_chance = engine.get_infoset(is_chance=True) info_p0 = engine.get_infoset(active_player=0) info_p1 = engine.get_infoset(active_player=1) # OLIVIER: CETTE LIGNE NE FONCTIONNE PAS CAR # LES ACTIONS POSSIBLES ASSOCIEES AUX NOEUDS CHANCE NE SONT PAS BONS # chance_actions = [card for card in cards # if card not in prizes.showing] # OLIVIER: j'ai RAJOUTER UN SORT ICI chance_actions = np.sort(prizes.all_cards[nb_rounds:]) act_p0 = [ card for card in cards if card not in previous_p0 ] act_p1 = [ card for card in cards if card not in previous_p1 ] dec_chance = Node(actions=chance_actions, available_information=info_chance, is_chance=True, is_initial=(nb_rounds == 0), line=nb_rounds * 3) dec_p0 = Node(actions=act_p0, available_information=info_p0, is_decision=True, player=0, line=nb_rounds * 3 + 1) dec_p1 = Node(actions=act_p1, available_information=info_p1, is_decision=True, player=1, line=nb_rounds * 3 + 2) nodes_li.append(dec_chance) nodes_li.append(dec_p0) nodes_li.append(dec_p1) # Step 2: removing duplicates (appearing for chance nodes) nodes_li = list(set(nodes_li)) # Step 3: sort by line nodes_li = sorted(nodes_li, key=lambda node: node.line) # Step 4: create terminal nodes max_reward = int(self.nb_cards * (self.nb_cards + 1) / 2) reward_to_node = {} for reward in range(-max_reward, max_reward + 1): term_node = Node(is_terminal=True, utility=reward, player=0) nodes_li.append(term_node) reward_to_node[reward] = term_node # Step 5: initialize topological index for index_node, node in enumerate(nodes_li): node.topological_idx = index_node # Step 6: creating hash --> node dictionnary self.hash_to_node = {} for node in nodes_li: node_hash = hash_dict(node.available_information) self.hash_to_node[node_hash] = node self.info_sets = nodes_li self.reward_to_node = reward_to_node self.nodes = nodes_li
def get_child(self, starting_node: Node, action, history): """ :param starting_node: :param action: :param history: last action of player 0 (int) :return: """ # First case: starting_node has a terminal child # print("node:", starting_node.available_information) # print("action:", action) # print("history:", history) if starting_node.line == 3 * (self.nb_cards - 1) + 2: actions0 = deepcopy( starting_node.available_information["actions_P0"]) if history is not None: actions0.append(history) reward = self.get_reward( actions0=actions0, actions1=starting_node.available_information["actions_P1"] + [action], prizes=starting_node.available_information["prizes"]) return self.reward_to_node[reward] # Second case: child node is not terminal # We will construct the information set of the child node to find it infoset = {} if starting_node.player == 0: infoset['active_player'] = 1 infoset['current_round'] = starting_node.available_information[ "current_round"] infoset['prizes'] = starting_node.available_information["prizes"] infoset['actions_P0'] = starting_node.available_information[ "actions_P0"] # P1 does not know what action P0 did infoset['actions_P1'] = starting_node.available_information[ "actions_P1"] elif starting_node.player == 1: actions0 = deepcopy( starting_node.available_information["actions_P0"]) if history is not None: actions0.append(history) infoset['is_chance'] = True infoset['current_round'] = starting_node.available_information[ "current_round"] + 1 infoset['prizes'] = starting_node.available_information["prizes"] infoset['actions_P0'] = actions0 infoset['actions_P1'] = starting_node.available_information[ "actions_P1"] + [action] elif starting_node.is_chance: infoset['active_player'] = 0 infoset['current_round'] = starting_node.available_information[ "current_round"] infoset['prizes'] = starting_node.available_information[ "prizes"] + [action] infoset['actions_P0'] = starting_node.available_information[ "actions_P0"] infoset['actions_P1'] = starting_node.available_information[ "actions_P1"] # Converting infoset to hash try: child = self.hash_to_node[hash_dict(infoset)] except KeyError: raise KeyError('following infoset unknown : {}'.format(infoset)) return child
self.active_player = 1 - self.active_player elif len(self.traj) == 1: previous_val = self.traj[0] candidates = [i for i in range(3) if i != previous_val] self.traj += [np.random.choice(candidates)] self.active_player = 1 - self.active_player else: raise PermissionError # We consider below more information sets that appear in reality all_info_sets = [{'card_info': card, 'active_player': player, 'trajectory': traj} for card in range(3) for player in range(2) for traj in [[], [0], [1], [0, 1]]] all_hashs = [hash_dict(dico) for dico in all_info_sets] hash_to_index = {hashc: index for (index, hashc) in enumerate(all_hashs)} n_info_sets = len(all_info_sets) regrets_info_sets = np.zeros((n_info_sets, 2)) strategies_info_sets = np.zeros((n_info_sets, 2)) strategy = 0.5 * np.ones((n_info_sets, 2)) def strategy_update(regrets): new_strat = np.maximum(regrets, 0.0) if np.max(new_strat) > 1e-8: new_strat = new_strat / np.sum(new_strat) return new_strat else:
def __hash__(self): return info_set.hash_dict(self.available_information)
def __eq__(self, other): return info_set.hash_dict(self.available_information) == \ info_set.hash_dict(other.available_information)