def get_input(self, board: Game, piece_type): self.load_dict() print(board.n_move) if board.count_player_stones(piece_type) <= 0: self.side = piece_type self.opponent = 1 if self.side == 2 else 2 if board.is_position_valid(2, 2, self.side, True): copy_board = copy.deepcopy(board) copy_board.next_board(2, 2, self.side, True) print("Minimax: piece_type = {}".format(self.side), \ "current board value = {}".format(self.total_score(copy_board, self.side))) return 2, 2 if board.is_game_finished(): return else: # score, action = self._max(board) DEPTH = 3 if board.n_move > 16: DEPTH = 24 - board.n_move action = self.alpha_beta_cutoff_search(board, DEPTH) copy_board = copy.deepcopy(board) if action != "PASS": print(action) copy_board.next_board(action[0], action[1], self.side, True) print("Minimax: piece_type = {}".format(self.side), \ "current board value = {}".format(self.total_score(copy_board, self.side))) self.save_dict() return action # board.move(action[0], action[1], self.side)
def battle(player1, player2, total_games, show_result=False): p1_stats = [0, 0, 0] # draw, win, lose p2_stats = [0, 0, 0] timer = time.time() game_number = 0 for i in range(total_games + 1): go = Game(GAME_SIZE) go.verbose = show_result go.new_board() batch = 100 if game_number % int(total_games / 100) == 0: print('number of iterations = {}'.format(i)) print('time = {}'.format(time.time() - timer)) timer = time.time() p1_stats, p2_stats = play_learn_track(go, game_number, player1, player2, p1_stats, p2_stats, batch) game_number += 1 go = Game(GAME_SIZE) go.verbose = show_result go.new_board() p1_stats, p2_stats = play_learn_track(go, game_number, player2, player1, p1_stats, p2_stats, batch) game_number += 1 return
def get_input(self, board: Game, piece_type): if board.count_player_stones(piece_type) <= 0: self.side = piece_type self.opponent = 1 if self.side == 2 else 2 if board.is_position_valid(2, 2, self.side, True): return 2, 2 if board.is_game_finished(): return else: # score, action = self._max(board) action = self.alpha_beta_cutoff_search(board, 3) return action # board.move(action[0], action[1], self.side)
def get_input(self, board: Game, piece_type): if board.count_player_stones(piece_type) <= 0: self.side = piece_type self.opponent = 1 if self.side == 2 else 2 if board.is_position_valid(2, 2, self.side, True): copy_board = copy.deepcopy(board) copy_board.place_chess(2, 2, self.side, True) # print("Minimax_old: piece_type = {}".format(self.side), \ # "current board value = {}".format(self.total_score(copy_board, self.side))) return 2, 2 if board.is_game_finished(): return else: # score, action = self._max(board) action = self.alpha_beta_cutoff_search(board, DEPTH) if action != "PASS": copy_board = copy.deepcopy(board) copy_board.place_chess(action[0], action[1], self.side, True) # print("Minimax_old: piece_type = {}".format(self.side), \ # "current board value = {}".format(self.total_score(copy_board, self.side))) return action # board.move(action[0], action[1], self.side)
def get_input(self, go: Game, piece_type): if self.identity is None: self.identity = piece_type elif self.identity != piece_type: self.identity = piece_type else: self.__init__(piece_type) self.load_dict() # print(board.n_move) go.visualize_board() if go.count_player_stones(piece_type) <= 0: self.identity = piece_type self.opponent = 1 if self.identity == 2 else 2 self.cache = {} open("cache.txt", "w").close() if go.is_position_valid(2, 2, self.identity, True): copy_board = go.make_copy() copy_board.next_board(2, 2, self.identity, True) # print("Minimax: piece_type = {}".format(self.side), \ # "current board value = {}".format(self.total_score(copy_board, self.side))) return 2, 2 if go.is_game_finished(): return else: # score, action = self._max(board) depth = DEPTH action = self.alpha_beta_adaptive_agent(go, depth) copy_board = go.make_copy() if action != "PASS": # print(action) copy_board.next_board(action[0], action[1], self.identity, True) # print("Minimax: piece_type = {}".format(self.side), \ # "current board value = {}".format(self.total_score(copy_board, self.side))) self.save_dict() return action # board.move(action[0], action[1], self.side)
from mygame import Game game = Game() game.start()
count_white = 2.5 for i in range(self.size): for j in range(self.size): if board[i][j] == 1: count_black += 1 elif board[i][j] == 2: count_white += 1 if piece_type == 1: diff = count_black - count_white else: diff = count_white - count_black return diff if __name__ == "__main__": N = 5 go_game = Game(N) game_piece_type, previous_board, current_board, go_game.n_move = go_game.read_input( ) go_game.set_board(game_piece_type, previous_board, current_board) player = Minimax() if go_game.new_game: player.cache = {} open("cache.txt", "w").close() player.side = game_piece_type next_action = player.get_input(go_game, game_piece_type) go_game.n_move += 2 go_game.write_output(next_action)
def testMinimax(): # qlearner = Q_learning_agent() random_player = RandomPlayer() minimax = Minimax() # qlearner.fight() # player1: Player instance.always X # player2: Player instance.always O p1_stats = [0, 0, 0] p2_stats = [0, 0, 0] player1 = minimax player2 = random_player for i in range(int(TEST_GAMES)): go = Game(GAME_SIZE) go.verbose = False go.new_board() result = go.play(player1, player2, False) p1_stats[result] += 1 for i in range(int(TEST_GAMES)): go = Game(GAME_SIZE) go.verbose = False go.new_board() result = go.play(player2, player1, False) p2_stats[result] += 1 print(p1_stats, p2_stats) p1_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p1_stats] sys.stdout = open("Minimax_resutls.txt", "a") if True: print('_' * 60) print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format( player1.__class__.__name__, p1_stats[1], p1_stats[0], p1_stats[2]).center(50)) print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format( player2.__class__.__name__, p1_stats[2], p1_stats[0], p1_stats[1]).center(50)) print('_' * 60) print() p2_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p2_stats] if True: print('_' * 60) print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format( player2.__class__.__name__, p2_stats[1], p2_stats[0], p2_stats[2]).center(50)) print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format( player1.__class__.__name__, p2_stats[2], p2_stats[0], p2_stats[1]).center(50)) print('_' * 60) print()
def testQlearner(dict_num): qlearner = Q_learning_agent() random_player = RandomPlayer() qlearner.fight(dict_num) if dict_num > 0: qlearner.load_dict(dict_num) # player1: Player instance.always X # player2: Player instance.always O p1_stats = [0, 0, 0] p2_stats = [0, 0, 0] player1 = qlearner player2 = random_player for i in range(int(TEST_GAMES)): go = Game(GAME_SIZE) go.verbose = False go.new_board() result = go.play(player1, player2, False) p1_stats[result] += 1 for i in range(int(TEST_GAMES)): go = Game(GAME_SIZE) go.verbose = False go.new_board() result = go.play(player2, player1, False) p2_stats[result] += 1 print(p1_stats, p2_stats) p1_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p1_stats] if True: print('_' * 60) print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format( player1.__class__.__name__, p1_stats[1], p1_stats[0], p1_stats[2]).center(50)) print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format( player2.__class__.__name__, p1_stats[2], p1_stats[0], p1_stats[1]).center(50)) print('_' * 60) print() p2_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p2_stats] if True: print('_' * 60) print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format( player2.__class__.__name__, p2_stats[1], p2_stats[0], p2_stats[2]).center(50)) print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format( player1.__class__.__name__, p2_stats[2], p2_stats[0], p2_stats[1]).center(50)) print('_' * 60) print()
def alpha_beta_adaptive_agent(self, go: Game, depth=4): def max_value(board, alpha, beta, depth): if depth == 0 or board.is_game_finished(): state = board.state_string() if state in self.cache: return self.cache[state] return self.total_score(board, self.identity) v_max = -numpy.inf candidates = [] for i in range(board.size): for j in range(board.size): if board.is_position_valid(i, j, self.identity, test_check=True): candidates.append((i, j)) random.shuffle(candidates) if not candidates: action = "PASS" v_max = max(v_max, min_value(board, alpha, beta, depth - 1)) if v_max <= alpha: return v_max alpha = max(alpha, v_max) else: for i, j in candidates: poss_max_board = board.make_copy() poss_max_board.next_board(i, j, self.identity, False) poss_max_board.n_move += 1 v_max = max( v_max, min_value(poss_max_board, alpha, beta, depth - 1)) if v_max is not None: state = board.state_string() self.cache[state] = v_max if v_max >= beta: return v_max alpha = max(alpha, v_max) return v_max def min_value(board, alpha, beta, depth): if depth == 0 or board.is_game_finished(): state = board.state_string() if state in self.cache: return self.cache[state] return self.total_score(board, self.identity) v_min = numpy.inf candidates = [] for i in range(board.size): for j in range(board.size): if board.is_position_valid(i, j, self.opponent, test_check=True): candidates.append((i, j)) random.shuffle(candidates) if not candidates: action = "PASS" v_min = min(v_min, max_value(board, alpha, beta, depth - 1)) if v_min <= alpha: return v_min beta = min(beta, v_min) else: for i, j in candidates: poss_min_board = board.make_copy() valid = poss_min_board.next_board(i, j, self.opponent, True) poss_min_board.n_move += 1 if not valid: raise ValueError("in min invalid move") v_min = min( v_min, max_value(poss_min_board, alpha, beta, depth - 1)) if v_min is not None: state = board.state_string() self.cache[state] = v_min if v_min <= alpha: return v_min beta = min(beta, v_min) return v_min best_score = -numpy.inf beta = numpy.inf best_action = None candidates = [] for i in range(go.size): for j in range(go.size): if go.is_position_valid(i, j, self.identity, test_check=True): candidates.append((i, j)) random.shuffle(candidates) if go.n_move < 6: depth = 0 elif go.n_move < 10: depth = 2 elif len(candidates) < 24 - 18: depth = len(candidates) if not candidates: best_action = "PASS" else: for i, j in candidates: possible_board = go.make_copy() possible_board.next_board(i, j, self.identity, True) possible_board.n_move += 1 value = min_value(possible_board, best_score, beta, depth) if value > best_score: best_score = value best_action = (i, j) return best_action
else: # self.q_values[state][move] = self.q_values[state][move] * (1 - self.alpha) \ # + self.alpha * self.gamma * max_q_value # base_state_action_q[move] = base_state_action_q[move] * (1 - self.alpha) \ # + self.alpha * self.gamma * max_q_value base_state_action_q[move] = base_state_action_q[move] \ + self.alpha * (self.gamma * max_q_value - base_state_action_q[move]) max_q_value = max(base_state_action_q.values()) if num_game % int(self.LEARN_GAMES / 100) == 0: self.update_epsilon() self.update_alpha() if num_game % int(self.LEARN_GAMES / 100) == 0: if self.file_count == 5: self.file_count = 0 self.save_dict(self.file_count) self.save_policy(self.file_count) self.file_count += 1 self.states_to_update = [] if __name__ == "__main__": N = 5 game_piece_type, previous_board, board = readInput(N) go_game = Game(N) go_game.set_board(game_piece_type, previous_board, board) player = Q_learning_agent() Q_learning_agent.identity = game_piece_type player.fight() next_action = player.get_input(go_game, game_piece_type) writeOutput(next_action)