Ejemplo n.º 1
0
def self_play(agent):
    s = State()
    states = []
    actions = []
    while s.winner == 0:
        batch, valid, _ = state_tensor([s])
        idx = agent.act(batch, valid)
        a = Action(idx // N, idx % N)
        states.append(s.copy())
        actions.append(a)

        s.make_move(a)

    for i in range(len(states)):
        states[i].winner = s.winner

    return states, actions
Ejemplo n.º 2
0
def duel(*players, show=False):
    s = State()
    player = 0
    while s.winner == 0:
        batch, valid, _ = state_tensor([s])
        while True:
            agent = players[player]
            idx = agent(batch, valid)
            a = Action(idx // N, idx % N)
            if s.legal_move(a):
                break
        if show:
            s.show_board()

        s.make_move(a)
        player = (player + 1) % len(players)

    return s.winner