def self_play(agent): s = State() states = [] actions = [] while s.winner == 0: batch, valid, _ = state_tensor([s]) idx = agent.act(batch, valid) a = Action(idx // N, idx % N) states.append(s.copy()) actions.append(a) s.make_move(a) for i in range(len(states)): states[i].winner = s.winner return states, actions
def duel(*players, show=False): s = State() player = 0 while s.winner == 0: batch, valid, _ = state_tensor([s]) while True: agent = players[player] idx = agent(batch, valid) a = Action(idx // N, idx % N) if s.legal_move(a): break if show: s.show_board() s.make_move(a) player = (player + 1) % len(players) return s.winner