count_white = 2.5 for i in range(self.size): for j in range(self.size): if board[i][j] == 1: count_black += 1 elif board[i][j] == 2: count_white += 1 if piece_type == 1: diff = count_black - count_white else: diff = count_white - count_black return diff if __name__ == "__main__": N = 5 go_game = Game(N) game_piece_type, previous_board, current_board, go_game.n_move = go_game.read_input( ) go_game.set_board(game_piece_type, previous_board, current_board) player = Minimax() if go_game.new_game: player.cache = {} open("cache.txt", "w").close() player.side = game_piece_type next_action = player.get_input(go_game, game_piece_type) go_game.n_move += 2 go_game.write_output(next_action)
reward = LOSS max_q_value = -1.0 first_iteration = True self.states_to_update_opponent.reverse() for state, move in self.states_to_update_opponent: if self.opponent == 1: base_state_action_q, orientation, base_state = self.state_q_values_X(go, state) else: base_state_action_q, orientation, base_state = self.state_q_values_O(go, state) if first_iteration: base_state_action_q[move] = reward first_iteration = False else: base_state_action_q[move] = base_state_action_q[move] \ + self.alpha * (self.gamma * max_q_value - base_state_action_q[move]) max_q_value = max(base_state_action_q.values()) self.opponent_actions = [] self.states_to_update_opponent = [] if __name__ == "__main__": N = 5 go_game = Game(N) game_piece_type, previous_board, board = go_game.read_input() go_game.set_board(game_piece_type, previous_board, board) player = Q_learning_agent() Q_learning_agent.identity = game_piece_type player.fight(0) next_action = player.get_input(go_game, game_piece_type) go_game.write_output(next_action)