def general_test(): ray.init() # Network loading config = TablutConfig() # Test network loading heuristic_name = "oldschool" heuristic_test = NeuralHeuristicFunction(config) if heuristic_test.init_tflite(): print("Netowrk loaded successfully") heuristic_name = "neural" else: print("Netowrk loading error") search = MultiThreadSearch(config, heuristic_name) state = AshtonTablut.get_initial() best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.iterative_deepening_search( state=state, initial_cutoff_depth=2, cutoff_time=10.0) best_action = AshtonTablut.num_to_coords(best_action) print( "Game move ({0}): {1} -> {2}, Search time: {3}, Max Depth: {4}, Nodes explored: {5}, Score: {6}" .format(state.to_move(), (best_action[0], best_action[1]), (best_action[2], best_action[3]), search_time, max_depth, nodes_explored, best_score))
def min_branch_worker(self, a, board, to_move, turn, player, alpha, beta, depth, current_cutoff_depth, start_time, cutoff_time): state = AshtonTablut.parse_board(board.copy(), to_move, turn) self.metric = Metrics() self.current_cutoff_depth = current_cutoff_depth self.start_time = start_time self.cutoff_time = cutoff_time v = np.inf terminal = state.terminal_test() local_start_time = ptime() if depth > self.current_cutoff_depth or terminal or ( ptime() - self.start_time) > self.cutoff_time: if terminal: return BranchResult(self.metric, a, state.utility(player), ptime() - local_start_time) return BranchResult(self.metric, a, self.heuristic.evalutate(state, player), ptime() - local_start_time) actions = state.actions() for action in actions: next_state = state.result(action) v = min(self.max_value(next_state, player, alpha, beta, depth + 1), v) if v <= alpha: return BranchResult(self.metric, a, state, v) beta = min(beta, v) return BranchResult(self.metric, a, v, ptime() - local_start_time)
def menu_selfplay(tablut): time = input("Insert AlphaTablut Search time in seconds: ") time = int(time) max_moves = input("Insert max moves: ") max_moves = int(max_moves) heuristic = tablut.get_neural_heuristic() if heuristic is None: print("Tflite model not found... Using OldSchoolHeuristic") heuristic = OldSchoolHeuristicFunction() else: heuristic.init_tflite() if heuristic.initialized(): print("Tflite model loaded") else: print("Model loading failed... Using OldSchoolHeuristic") heuristic = OldSchoolHeuristicFunction() search = Search(heuristic) current_state = AshtonTablut.get_initial() # Faccio partire il game loop i = 0 while not current_state.terminal_test() and i < max_moves: current_player = current_state.to_move() print("Turn {0}".format(i + 1)) print("Current Player: {0}".format(current_player)) print(current_state.display()) best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.iterative_deepening_search( state=current_state, initial_cutoff_depth=2, cutoff_time=time) best_action = AshtonTablut.num_to_coords(best_action) print( "AlphaTablut has chosen {0} -> {1}, Max depth: {2}, Nodes Explored: {3}, Best Score: {4}, Search Time: {5}" .format(best_action[:2], best_action[2:4], max_depth, nodes_explored, best_score, search_time)) current_state = best_next_state i += 1 print("Done.")
def JSON_to_local_state(data, turn): logging.debug("Received Data JSON: {0}".format(data)) raw_board = data['board'] player = data['turn'] board = AshtonTablut.get_initial_board() board[0, :, :, 0] = np.zeros((9, 9), dtype=np.int8) board[0, :, :, 1] = np.zeros((9, 9), dtype=np.int8) board[0, :, :, 2] = np.zeros((9, 9), dtype=np.int8) for i in range(9): for j in range(9): if raw_board[i][j][0] == 'W': board[0, i, j, 0] = 1 elif raw_board[i][j][0] == 'B': board[0, i, j, 1] = 1 elif raw_board[i][j][0] == 'K': board[0, i, j, 2] = 1 return AshtonTablut.parse_board(board, player, turn), player
def send_move(connHandle, action, player): y0, x0, y1, x1 = AshtonTablut.num_to_coords(action) col = "abcdefghi" row = "123456789" move = { "from": col[x0] + row[y0], "to": col[x1] + row[y1], "turn": "WHITE" if player == 'W' else 'BLACK' } jsonData = json.dumps(move) connHandle.send(jsonData) logging.debug("Sent Data JSON: {0}".format(jsonData))
def cython_min_branch_worker(self, a, board, to_move, turn, player, alpha, beta, depth, current_cutoff_depth, start_time, cutoff_time): state = AshtonTablut.parse_board(board.copy(), to_move, turn) search = Search(self.heuristic) self.current_cutoff_depth = current_cutoff_depth self.start_time = start_time self.cutoff_time = cutoff_time local_start_time = ptime() v = search.min_branch(state, player, alpha, beta, depth, current_cutoff_depth, start_time, cutoff_time) self.metric = Metrics(search.nodes_explored, search.max_depth) return BranchResult(self.metric, a, v, ptime() - local_start_time)
def test_h(): ray.init() # Network loading config = TablutConfig() # Test network loading heuristic_name = "oldschool" heuristic_test = NeuralHeuristicFunction(config) if heuristic_test.init_tflite(): print("Netowrk loaded successfully") heuristic_name = "neural" else: print("Netowrk loading error") actor = BrachActor.remote(config, heuristic_name) print(ray.get(actor.heuristic_initialized.remote())) state = AshtonTablut.get_initial() print( ray.get( actor.evalutate.remote(state.board(), state.to_move(), state.turn())))
def play(self, random=False): current_state = AshtonTablut.get_initial() player = current_state.to_move() max_moves = self.config.max_moves self.game_history = [current_state.board()] logging.info( "Start new game. Player: {0}, Time per move: {1} s, Priority: {2}, Max Moves: {3}" .format(player, self.time_per_move, self.priority, max_moves)) start = time.time() have_draw = False search = Search(self.heuristic) while not current_state.terminal_test( ) and not have_draw and current_state.turn() < max_moves: if random: best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.cutoff_search( state=current_state, cutoff_depth=1) if best_score < 1.0: best_action = random_player(current_state) best_next_state = current_state.result(best_action) best_score, max_depth, nodes_explored, search_time = 0, 0, 0, 0 else: best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.iterative_deepening_search( state=current_state, initial_cutoff_depth=2, cutoff_time=self.time_per_move) captured = self.have_captured(current_state, best_next_state) if captured == 0: self.steps_without_capturing += 1 self.draw_queue.append(current_state.board()) else: self.steps_without_capturing = 0 self.draw_queue = [] best_action = AshtonTablut.num_to_coords(best_action) logging.debug( "Game move ({0}): {1} -> {2}, Search time: {3}, Max Depth: {4}, Nodes explored: {5}, Score: {6}, Captured: {7}" .format(current_state.to_move(), (best_action[0], best_action[1]), (best_action[2], best_action[3]), search_time, max_depth, nodes_explored, best_score, captured)) current_state = best_next_state self.game_history.append(current_state.board()) logging.debug("\n" + current_state.display()) have_draw = self.have_draw(current_state.board()) end = time.time() winner = "D" result = "DRAW" if not have_draw: if current_state.utility(player) == 1: winner = player result = "WON" elif current_state.utility(player) == -1: winner = 'W' if player == 'B' else 'B' result = "LOST" logging.info( "Game ended: Player {0} {1}, Moves: {2}, Time: {3} s, Utility: {4}" .format(player, result, current_state.turn(), end - start, current_state.utility(player))) return winner, current_state.utility(player), self.game_history
def game_loop(args): # Args player = args.player.upper()[0] playing_player = 'W' timeout = args.timeout host = args.host cores = args.cores port = 5800 if player == 'W' else 5801 ray.init() # Network loading config = TablutConfig() #Test network loading heuristic_name = "oldschool" heuristic_test = NeuralHeuristicFunction(config) if heuristic_test.init_tflite(): logging.info("Netowrk loaded successfully") heuristic_name = "neural" else: logging.info("Netowrk loading error") search = MultiThreadSearch(config, heuristic_name) turn = 0 # Start connection connHandle = ConnectionHandler(host, port) connHandle.send('AlphaTablut') # Game loop while True: try: length, message = connHandle.recv() logging.debug("Received message of length {}".format(length)) if message: # Sync local state with server state data = json.loads(message) state, playing_player = JSON_to_local_state(data, turn) logging.info("Turn {0}: {1} is playing.".format( turn, playing_player)) logging.info("\n" + state.display()) if playing_player == 'WHITEWIN': logging.info("We {} GG WP!".format( 'WON' if playing_player[0] == player else 'LOST')) break elif playing_player == 'BLACKWIN': logging.info("We {} GG WP!".format( 'WON' if playing_player[0] == player else 'LOST')) break elif playing_player == 'DRAW': logging.info("We {} GG WP!".format('DREW')) break elif playing_player[0] == player: logging.info("Computing and sending action.") best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.iterative_deepening_search( state=state, initial_cutoff_depth=2, cutoff_time=timeout) send_move(connHandle, best_action, player) logging.debug("Action sent!") best_action = AshtonTablut.num_to_coords(best_action) logging.info( "Game move ({0}): {1} -> {2}, Search time: {3}, Max Depth: {4}, Nodes explored: {5}, Score: {6}" .format(player, (best_action[0], best_action[1]), (best_action[2], best_action[3]), search_time, max_depth, nodes_explored, best_score)) else: logging.info("Waiting...") turn += 1 #heuristic.set_alpha(2 / turn) except ConnectionException as e: logging.debug(e) logging.info("Coonection lost: {}".format(playing_player)) break connHandle.close()
def evalutate(self, board, player, turn): state = AshtonTablut.parse_board(board.copy(), player, turn) print(state.display()) return self.heuristic.evalutate(state, player)
def menu_play(tablut): time = input("Insert AlphaTablut Search time in seconds: ") time = int(time) player = input("Choose a player: W or B ").upper()[0] while player not in ('W', 'B'): player = input("Invalid input. Choose a player: W or B").upper()[0] # Inizializzo alpha_player = 'W' if player == 'B' else 'B' heuristic = tablut.get_neural_heuristic() if heuristic is None: print("Tflite model not found... Using OldSchoolHeuristic") heuristic = OldSchoolHeuristicFunction() else: heuristic.init_tflite() if heuristic.initialized(): print("Tflite model loaded") else: print("Model loading failed... Using OldSchoolHeuristic") heuristic = OldSchoolHeuristicFunction() search = Search(heuristic) current_state = AshtonTablut.get_initial() # Faccio partire il game loop i = 0 while not current_state.terminal_test(): current_player = current_state.to_move() print("Turn {0}".format(i + 1)) print("Current Player: {0}".format(current_player)) print(current_state.display()) if current_player == player: input_valid = False while not input_valid: actions = [ AshtonTablut.num_to_coords(x) for x in current_state.actions() ] action = input("Choose an action from {0}:".format(actions)) filtered_action = action for x in action: if x not in "1234567890,": filtered_action = filtered_action.replace(x, '') try: action = tuple(int(x) for x in filtered_action.split(",")) except ValueError as a: print(a) continue if action in actions: input_valid = True print("You have chosen {0} -> {1}".format(action[:2], action[2:4])) action = AshtonTablut.coords_to_num(action[0], action[1], action[2], action[3]) current_state = current_state.result(action) else: best_next_state, best_action, best_score, max_depth, nodes_explored, search_time = search.iterative_deepening_search( state=current_state, initial_cutoff_depth=2, cutoff_time=time) best_action = AshtonTablut.num_to_coords(best_action) print( "AlphaTablut has chosen {0} -> {1}, Max depth: {2}, Nodes Explored: {3}, Best Score: {4}, Search Time: {5}" .format(best_action[:2], best_action[2:4], max_depth, nodes_explored, best_score, search_time)) current_state = best_next_state i += 1 utility = current_state.utility(player) if utility >= 1: print("You Won!") elif utility <= -1: print("You Lost!") print("Done.")