def play_game(players, board): should_print = __name__ == '__main__' # prevent printing when running reports if should_print: print(board) print("============\n") turn = 0 while not Board.so_won(board, players[turn ^ 1].no) and len( Board.valid_locations(board)) > 0: if should_print: print("{}:".format(players[turn].name)) selected_col = players[turn].selector(board, -math.inf, math.inf, True) if Board.legal_check(board, selected_col): row = Board.where_it_lands(board, selected_col) Board.play(board, row, selected_col, players[turn].no) if should_print: Board.print_right_way(board) print("============\n") turn ^= 1 if Board.so_won(board, players[turn ^ 1].no): print("VICTORY FOR " + players[turn ^ 1].name) return players[turn ^ 1].name else: print("DRAW") return 'DRAW'
def minimax(self, board, depth, alpha, beta, maximising_Player): """Method to perform alpha beta pruning""" valid_location = Board.valid_locations(board) is_terminal = Board.terminal_node(board) if depth == 0 or is_terminal: if is_terminal: if Board.so_won(board, self.no): return None, 100000 # always two values since I need space to save the column elif Board.so_won(board, 3 - self.no): return None, -100000 else: return None, 0 else: return None, self.value_function(board, self.no) if maximising_Player: score = -math.inf column = random.choice(valid_location) for selected_col in valid_location: row = Board.where_it_lands(board, selected_col) board_copy = board.copy() Board.play(board_copy, row, selected_col, self.no) new_score = self.minimax(board_copy, depth - 1, alpha, beta, False)[1] if new_score > score: score = new_score column = selected_col alpha = max(alpha, score) if alpha >= beta: break return column, score else: score = math.inf column = random.choice(valid_location) for selected_col in valid_location: row = Board.where_it_lands(board, selected_col) board_copy = board.copy() Board.play(board_copy, row, selected_col, 3 - self.no) new_score = self.minimax(board_copy, depth - 1, alpha, beta, True)[1] if new_score < score: score = new_score column = selected_col beta = min(beta, score) if alpha >= beta: break return column, score
def expansion(self, child): """Second step of MCTS: Expand tree from the selected node if possible""" temp_board = self.board.copy() Board.play(temp_board, child[0], child[1], self.no) if Board.terminal_node(temp_board): self.N += 1 if Board.so_won(self.board, self.no): # MCTS won self.values[child[0]][child[1]] = (self.values[child[0]][child[1]] * (self.N - 1) + 1) / self.N return 1 elif Board.so_won(self.board, 3 - self.no): # Opponent won self.values[child[0]][child[1]] = (self.values[child[0]][child[1]] * (self.N - 1)) / self.N else: # Draw self.values[child[0]][child[1]] = (self.values[child[0]][child[1]] * (self.N - 1) + 0.5) / self.N else: self.simulation(child)
def simulation(self, child): """Third step of MCTS: Simulate game until the end""" simu_board = self.board.copy() Board.play(simu_board, child[0], child[1], self.no) self.cur_visits[child[0]][child[1]] += 1 self.N += 1 self.involved_nodes.append((child[0], child[1])) over = False turn = 0 while not over: # Turn changes from 0 to 1 in each iteration. AI = 1, Human = 0. # Human chooses randomly. try: selected_col = random.choice(Board.valid_locations(simu_board)) # if no valid_locations left but no winner => DRAW. Backpropagate 0.5 except IndexError: self.backpropagation(result=0.5) break row = Board.where_it_lands(simu_board, selected_col) if turn == 1: self.cur_visits[row][selected_col] += 1 # update n of simulated nodes self.N += 1 # update N of child_node self.involved_nodes.append((row, selected_col)) Board.play(simu_board, row, selected_col, turn + 1) if Board.so_won(simu_board, turn + 1): # backpropagate result from perspective of AI: 1 if win, 0 if loss self.backpropagation(result=turn) over = True turn ^= 1