# Start Play state if b_: gamestate = PLAY_STATE ttt = Tictactoe() time.sleep(0.1) # Play state elif gamestate == PLAY_STATE: # Mouse click if event.type == pygame.MOUSEBUTTONDOWN: point = event.pos # If user's trun if not ai_turn: # If game is not over if not ttt.gameover(): # Cells b_ = True for i in range(3): for j in range(3): if cells[i][j].collidepoint(point): if ttt.is_valid_action((i, j)): ttt.action((i, j)) ai_turn = True break # Menu button if menu_button.collidepoint(point): gamestate = MENU_STATE
def train(self, num_games): """ Trains the agent by playing games against itself Args: num_games (int): number of games to train """ # Play num_games games for n in range(num_games): # Print game number if n % 1000 == 0: print(f'Game #{n + 1}') # Initialize the game ttt = Tictactoe() # Keep track of last state and actions last = { 'X': { 'state': None, 'action': None }, 'O': { 'state': None, 'action': None } } # Play the game while True: # Get the state and action state = ttt.get_board() action = self.best_action(state, epsilon_true=True) # Save as lasts last[ttt.get_player()]['state'] = state last[ttt.get_player()]['action'] = action # Apply action and get the new state ttt.action(action) new_state = ttt.get_board() # Game over if ttt.gameover(): # Won the game if ttt.get_winner() is not None: # Update q value for winner self.update_q_value(state, action, new_state, 1) # Update q value for loser self.update_q_value(last[ttt.get_player()]['state'], last[ttt.get_player()]['action'], new_state, -1) # Draw else: # Update q value self.update_q_value(state, action, new_state, 0) break # Game continues elif last[ttt.get_player()]['state'] is not None: # Update last action self.update_q_value(last[ttt.get_player()]['state'], last[ttt.get_player()]['action'], new_state, 0) print('Training done')