def run_agents(agent1, agent2, episodes=100, verbose=False, train=True): agent1wins = 0 agent2wins = 0 draws = 0 for episode in range(episodes): game = TicTacToe(**game_settings) res = STEP_RESULT.NONE agent1_team = X agent2_team = O if episode % 2 == 1: agent1_team = X agent2_team = O for i in range(game_settings["size"]**2): team_this_turn = i % 2 state = game.state(team_this_turn) action = None if (i % 2 == agent1_team): action = agent1.step(state, train) else: action = agent2.step(state, train) _, _, res, _ = game.apply(action) if (res == STEP_RESULT.DRAW): draws += 1 break elif res == STEP_RESULT.X_WIN: if agent1_team == X: agent1wins += 1 else: agent2wins += 1 break elif res == STEP_RESULT.O_WIN: if agent1_team == O: agent1wins += 1 else: agent2wins += 1 break state = game.state(agent1_team) agent1.step(state, train) state = game.state(agent2_team) agent2.step(state, train) if verbose: print( "episode {} - Score: Agent 1 {} - Agent 2 {} - Draws {} - Agent2 Pts - {}" .format(episode + 1, agent1wins, agent2wins, draws, agent2wins - agent1wins - draws))