def train_normal(iters: int, graph: bool=False) -> list: """ Calculates the Nash equilibrium for a normal form game. """ if graph: rtn = [] p1, p2 = Regret(), Regret() for i in range(iters): a1 = get_action(p1.get_strategy()) a2 = get_action(p2.get_strategy()) p1.regret(game.util(a2), game.util(a2)[a1]) p2.regret(game.util(a1), game.util(a1)[a2]) if graph: rtn.append(p1.get_average_strategy()) return rtn if graph else (p1.get_average_strategy(), p2.get_average_strategy())
def dudo_cfr(info: list, history: list=[], p0: float=1, p1: float=1) -> float: """ Counterfactual regret minimzation iteration. """ player = game.get_player(history) # Return payoff for terminal states util = game.util(info, history) if util is not None: return util # Get information set node or create it if nonexistant repr = game.hash_info_set(info[player], history) if repr not in nodes: nodes[repr] = DudoNode(f"{info[player]} {game.format_history(history)}", game.last(history) + 1) node = nodes[repr] # For each action, recursively call cfr with additional history and probability strategy = node.get_strategy(p0 if player == 0 else p1) util = [0]*ACTIONS node_util = 0 for a in range(game.last(history) + 1, ACTIONS if sum(history) > 0 else ACTIONS - 1): if not history[a]: next_history = list(history) next_history[a] = True # negative because next call's value is from the opponent's perspective util[a] = -(dudo_cfr(info, next_history, p0*strategy[a], p1) if player == 0 else \ dudo_cfr(info, next_history, p0, p1*strategy[a])) node_util += strategy[a]*util[a] # For each action, compute and accumulate counterfactual regret node.regret(util, node_util, p1 if player == 0 else p0) return node_util
def kuhn_cfr(info: list, history: list=[], p0: float=1, p1: float=1) -> float: """ Counterfactual regret minimzation iteration. """ player = len(history) % 2 # Return payoff for terminal states util = game.util(info, history) if util is not None: return util info_set = [str(info[player])] + history # Get information set node or create it if nonexistant repr = " ".join(info_set) if repr not in nodes: nodes[repr] = Node(repr) node = nodes[repr] # For each action, recursively call cfr with additional history and probability strategy = node.get_strategy(p0 if player == 0 else p1) util = [0]*ACTIONS node_util = 0 for a in range(ACTIONS): next_history = history + [game.actions[a]] # negative because next call's value is from the opponent's perspective util[a] = -(kuhn_cfr(info, next_history, p0*strategy[a], p1) if player == 0 else \ kuhn_cfr(info, next_history, p0, p1*strategy[a])) node_util += strategy[a]*util[a] # For each action, compute and accumulate counterfactual regret node.regret(util, node_util, p1 if player == 0 else p0) return node_util
def kuhn_play(first: int=random.randint(0, 1)) -> float: """ Has a human play againt the computer. """ cards = list(range(1, 4)) random.shuffle(cards) print(f"Your card is {cards[first]}") p = [lambda i: play.get_move(), lambda i: get_action(nodes[i].get_average_strategy())] if first == 1: p = p[::-1] history = "" turn = 0 while game.util(cards, history) is None: move = game.actions[p[turn](str(cards[turn]) + history)] if turn != first: print(f"Computer plays {move}") history += " " + move turn ^= 1 print(f"Computer had card {cards[first ^ 1]}") return (1 if len(history) % 2 == first else -1)*game.util(cards, history)
def train(self, iters: int, graph: bool=False) -> list: """ Trains the CFR minimization again a known opponenet strategy. """ if graph: rtn = [] for i in range(iters): # Compute action utilities action_util = game.util(get_action(game.OPP_STRATEGY)) # Get regret-matched mixed-strategy actions self.regret(action_util, action_util[get_action(self.get_strategy())]) if graph: rtn.append(self.get_average_strategy()) return rtn if graph else self.get_average_strategy()
def dudo_play(first: int=random.randint(0, 1)) -> float: """ Has a human play againt the computer. """ rolls = [random.randrange(1, 7), random.randrange(1, 7)] print(f"Your roll is {rolls[first]}") # p = [lambda i: play.get_move(), lambda i: get_action(nodes[i].get_average_strategy())] p = [lambda i: get_action(nodes[i].get_average_strategy())]*2 if first == 1: p = p[::-1] history = [False]*ACTIONS turn = 0 while game.util(rolls, history) is None: move = p[turn](game.hash_info_set(rolls[turn], history)) print(game.last(history), "|", nodes[game.hash_info_set(rolls[turn], history)]) # print(game.last(history)) if turn != first: print(f"Computer plays {game.actions[move]}") history[move] = True turn ^= 1 print(f"Computer had roll {rolls[first ^ 1]}") return (1 if turn == first else -1)*game.util(rolls, history)