def get_sample_from_entries(entries): while True: entry = random.choice(entries) ply = random.randrange(len(entry["boards"])) if "random_ply" in entry: # Note that we need the +1 because we want to train on the board state just AFTER the random move was performed. ply = entry["random_ply"] + 1 to_move = 1 if ply % 2 == 0 else 2 board = ataxx_rules.AtaxxState(entry["boards"][ply], to_move=to_move).copy() move = entry["moves"][ply] if move == "pass": continue # Convert the board into encoded features. features = engine.board_to_features(board) desired_value = [1 if entry["result"] == to_move else -1] # Apply a dihedral symmetry. symmetry_index = random.randrange(8) features = apply_symmetry(symmetry_index, features) # Build up a map of the desired result. desired_policy = np.zeros( (model.BOARD_SIZE, model.BOARD_SIZE, model.MOVE_TYPES), dtype=np.float32, ) if "dists" not in entry: distribution = {uai_interface.uai_encode_move(move): 1} else: distribution = entry["dists"][ply] for move, probability in distribution.items(): if isinstance(move, str): move = uai_interface.uai_decode_move(move) move = apply_symmetry_to_move(symmetry_index, move) engine.add_move_to_heatmap(desired_policy, move, probability) assert abs(1 - desired_policy.sum()) < 1e-3 # desired_policy = engine.encode_move_as_heatmap(move) return features, desired_policy, desired_value
def print_principal_variation(self): _, _, pv = self.mcts.select_principal_variation(best=True) logging.debug("PV [%2i]: %s" % ( len(pv), " ".join(["%s", RED + "%s" + ENDC][i % 2] % (uai_interface.uai_encode_move(edge.move), ) for i, edge in enumerate(pv)), ))
def __str__(self): return "<%s %4.1f%% v=%i s=%.5f c=%i>" % ( uai_interface.uai_encode_move(self.move), 100.0 * self.parent_node.board.evaluations.posterior[self.move], self.edge_visits, self.get_edge_score(), len(self.child_node.outgoing_edges), )
def play_one_game(args, engine1, engine2, opening_moves): print('Game: "%s" vs "%s" with opening: [%s]' % ( " ".join(engine1), " ".join(engine2), ", ".join( uai_interface.uai_encode_move(move) for move in opening_moves), )) game = { "moves": [], "opening": opening_moves, "start_time": time.time(), "white": engine1, "black": engine2, } players = [UAIPlayer(engine1), UAIPlayer(engine2)] board = ataxx_rules.AtaxxState.initial() ply_number = 0 def print_state(): if args.show_games: colorize = lambda do, s: ataxx_rules.RED + s + ataxx_rules.ENDC if do else s player_to_move = ply_number % 2 + 1 engine_name1 = colorize(player_to_move == 1, engine1[-1]) engine_name2 = colorize(player_to_move == 2, engine2[-1]) print() print("======= Player %i move. %s - %s" % (player_to_move, engine_name1, engine_name2)) print("[%3i plies] Score: %2i - %2i" % (ply_number, board.board.count(1), board.board.count(2))) print(board.fen()) print(board) else: print("\r[%3i plies] Score: %2i - %2i " % (ply_number, board.board.count(1), board.board.count(2)), end=' ') sys.stdout.flush() while board.result() == None: print_state() # If there is only one legal move then force it. if ply_number < len(opening_moves): print("Opening move.") move = opening_moves[ply_number] elif len(board.legal_moves()) == 1: print("Forced, only one legal move.") move, = board.legal_moves() else: ms = int(args.tc * 1000) move = players[ply_number % 2].genmove(ms) if args.show_games: print("Move:", uai_interface.uai_encode_move(move)) try: board.move(move) except Exception as e: print("Exception:", e) print(move) print(board) print(game) print(board.fen()) print(uai_interface.uai_encode_move(move)) raise e game["moves"].append(move) for player in players: player.move(move) # for player in players: # player.set_state(board) ply_number += 1 if args.max_plies != None and ply_number > args.max_plies: break for player in players: player.quit() # Hacky hack. print("Killing all tiktaxx processes.") os.system("killall -9 tiktaxx") result = board.result() if result == None: result = "invalid" game["result"] = result game["end_time"] = time.time() game["final_score"] = board.board.count(1), board.board.count(2) print_state() # Print a final newline to finish the line we're "\r"ing over and over. if not args.show_games: print() return game
def move(self, move): self.send("moves %s\n" % uai_interface.uai_encode_move(move))
def play_one_game(args, engine1, engine2, opening_moves): print 'Game: "%s" vs "%s" with opening: [%s]' % ( " ".join(engine1), " ".join(engine2), ", ".join( uai_interface.uai_encode_move(move) for move in opening_moves), ) game = { "moves": [], "opening": opening_moves, "start_time": time.time(), "white": engine1, "black": engine2, } players = [UAIPlayer(engine1), UAIPlayer(engine2)] # board = ataxx_rules.AtaxxState.initial() board = make_initial() ply_number = 0 def print_state(): if args.show_games: print print "===================== Player %i move." % (ply_number % 2 + 1, ) print "[%3i plies] Open: %2i " % (ply_number, sum(board.limits)) #print board.fen() print board else: print "\r[%3i plies] Open: %2i " % (ply_number, sum( board.limits)), sys.stdout.flush() while board.winner == None: print_state() # If there is only one legal move then force it. if ply_number < len(opening_moves): move = opening_moves[ply_number] elif len(list(board.legal_moves())) == 1: move, = board.legal_moves() else: ms = int(args.tc * 1000) move = players[ply_number % 2].genmove(ms) if args.show_games: print "Move:", uai_interface.uai_encode_move(move) try: board.apply_move(move) except Exception as e: print "Exception:", e print move print board print game #print board.fen() print uai_interface.uai_encode_move(move) raise e game["moves"].append(move) for player in players: player.move(move) # for player in players: # player.set_state(board) ply_number += 1 if args.max_plies != None and ply_number > args.max_plies: break for player in players: player.quit() result = board.winner if result == None: result = "invalid" game["result"] = result game["end_time"] = time.time() # game["final_score"] = board.board.count(1), board.board.count(2) print_state() # Print a final newline to finish the line we're "\r"ing over and over. if not args.show_games: print return game