コード例 #1
0
def train_normal(iters: int, graph: bool=False) -> list:
    """ Calculates the Nash equilibrium for a normal form game. """
    if graph: rtn = []
    p1, p2 = Regret(), Regret()

    for i in range(iters):
        a1 = get_action(p1.get_strategy())
        a2 = get_action(p2.get_strategy())

        p1.regret(game.util(a2), game.util(a2)[a1])
        p2.regret(game.util(a1), game.util(a1)[a2])

        if graph: rtn.append(p1.get_average_strategy())

    return rtn if graph else (p1.get_average_strategy(), p2.get_average_strategy())
コード例 #2
0
def dudo_cfr(info: list, history: list=[], p0: float=1, p1: float=1) -> float:
    """ Counterfactual regret minimzation iteration. """
    player = game.get_player(history)

    # Return payoff for terminal states
    util = game.util(info, history)
    if util is not None:
        return util

    # Get information set node or create it if nonexistant
    repr = game.hash_info_set(info[player], history)
    if repr not in nodes:
        nodes[repr] = DudoNode(f"{info[player]} {game.format_history(history)}", game.last(history) + 1)

    node = nodes[repr]

    # For each action, recursively call cfr with additional history and probability
    strategy = node.get_strategy(p0 if player == 0 else p1)
    util = [0]*ACTIONS
    node_util = 0

    for a in range(game.last(history) + 1, ACTIONS if sum(history) > 0 else ACTIONS - 1):
        if not history[a]:
            next_history = list(history)
            next_history[a] = True
            # negative because next call's value is from the opponent's perspective
            util[a] = -(dudo_cfr(info, next_history, p0*strategy[a], p1) if player == 0 else \
                        dudo_cfr(info, next_history, p0, p1*strategy[a]))
            node_util += strategy[a]*util[a]

    # For each action, compute and accumulate counterfactual regret
    node.regret(util, node_util, p1 if player == 0 else p0)

    return node_util
コード例 #3
0
def kuhn_cfr(info: list, history: list=[], p0: float=1, p1: float=1) -> float:
    """ Counterfactual regret minimzation iteration. """
    player = len(history) % 2

    # Return payoff for terminal states
    util = game.util(info, history)
    if util is not None:
        return util

    info_set = [str(info[player])] + history

    # Get information set node or create it if nonexistant
    repr = " ".join(info_set)
    if repr not in nodes:
        nodes[repr] = Node(repr)
    node = nodes[repr]

    # For each action, recursively call cfr with additional history and probability
    strategy = node.get_strategy(p0 if player == 0 else p1)
    util = [0]*ACTIONS
    node_util = 0

    for a in range(ACTIONS):
        next_history = history + [game.actions[a]]
        # negative because next call's value is from the opponent's perspective
        util[a] = -(kuhn_cfr(info, next_history, p0*strategy[a], p1) if player == 0 else \
                    kuhn_cfr(info, next_history, p0, p1*strategy[a]))
        node_util += strategy[a]*util[a]

    # For each action, compute and accumulate counterfactual regret
    node.regret(util, node_util, p1 if player == 0 else p0)

    return node_util
コード例 #4
0
def kuhn_play(first: int=random.randint(0, 1)) -> float:
    """ Has a human play againt the computer. """
    cards = list(range(1, 4))
    random.shuffle(cards)

    print(f"Your card is {cards[first]}")
    p = [lambda i: play.get_move(), lambda i: get_action(nodes[i].get_average_strategy())]
    if first == 1:
        p = p[::-1]

    history = ""
    turn = 0
    while game.util(cards, history) is None:
        move = game.actions[p[turn](str(cards[turn]) + history)]
        if turn != first:
            print(f"Computer plays {move}")
        history += " " + move
        turn ^= 1

    print(f"Computer had card {cards[first ^ 1]}")
    return (1 if len(history) % 2 == first else -1)*game.util(cards, history)
コード例 #5
0
    def train(self, iters: int, graph: bool=False) -> list:
        """ Trains the CFR minimization again a known opponenet strategy. """
        if graph: rtn = []

        for i in range(iters):
            # Compute action utilities
            action_util = game.util(get_action(game.OPP_STRATEGY))
            # Get regret-matched mixed-strategy actions
            self.regret(action_util, action_util[get_action(self.get_strategy())])

            if graph: rtn.append(self.get_average_strategy())

        return rtn if graph else self.get_average_strategy()
コード例 #6
0
def dudo_play(first: int=random.randint(0, 1)) -> float:
    """ Has a human play againt the computer. """
    rolls = [random.randrange(1, 7), random.randrange(1, 7)]

    print(f"Your roll is {rolls[first]}")
    # p = [lambda i: play.get_move(), lambda i: get_action(nodes[i].get_average_strategy())]
    p = [lambda i: get_action(nodes[i].get_average_strategy())]*2
    if first == 1:
        p = p[::-1]

    history = [False]*ACTIONS
    turn = 0
    while game.util(rolls, history) is None:
        move = p[turn](game.hash_info_set(rolls[turn], history))
        print(game.last(history), "|", nodes[game.hash_info_set(rolls[turn], history)])
        # print(game.last(history))
        if turn != first:
            print(f"Computer plays {game.actions[move]}")
        history[move] = True
        turn ^= 1

    print(f"Computer had roll {rolls[first ^ 1]}")
    return (1 if turn == first else -1)*game.util(rolls, history)