Beispiel #1
0
def get_sample_from_entries(entries):
	while True:
		entry = random.choice(entries)
		ply = random.randrange(len(entry["boards"]))
		if "random_ply" in entry:
			# Note that we need the +1 because we want to train on the board state just AFTER the random move was performed.
			ply = entry["random_ply"] + 1
		to_move = 1 if ply % 2 == 0 else 2
		board = ataxx_rules.AtaxxState(entry["boards"][ply], to_move=to_move).copy()
		move  = entry["moves"][ply]
		if move == "pass":
			continue
		# Convert the board into encoded features.
		features = engine.board_to_features(board)
		desired_value = [1 if entry["result"] == to_move else -1]
		# Apply a dihedral symmetry.
		symmetry_index = random.randrange(8)
		features = apply_symmetry(symmetry_index, features)
		# Build up a map of the desired result.
		desired_policy = np.zeros(
			(model.BOARD_SIZE, model.BOARD_SIZE, model.MOVE_TYPES),
			dtype=np.float32,
		)
		if "dists" not in entry:
			distribution = {uai_interface.uai_encode_move(move): 1}
		else:
			distribution = entry["dists"][ply]
		for move, probability in distribution.items():
			if isinstance(move, str):
				move = uai_interface.uai_decode_move(move)
			move = apply_symmetry_to_move(symmetry_index, move)
			engine.add_move_to_heatmap(desired_policy, move, probability)
		assert abs(1 - desired_policy.sum()) < 1e-3
#		desired_policy = engine.encode_move_as_heatmap(move)
		return features, desired_policy, desired_value
Beispiel #2
0
 def print_principal_variation(self):
     _, _, pv = self.mcts.select_principal_variation(best=True)
     logging.debug("PV [%2i]: %s" % (
         len(pv),
         " ".join(["%s", RED + "%s" + ENDC][i % 2] %
                  (uai_interface.uai_encode_move(edge.move), )
                  for i, edge in enumerate(pv)),
     ))
Beispiel #3
0
 def __str__(self):
     return "<%s %4.1f%% v=%i s=%.5f c=%i>" % (
         uai_interface.uai_encode_move(self.move),
         100.0 * self.parent_node.board.evaluations.posterior[self.move],
         self.edge_visits,
         self.get_edge_score(),
         len(self.child_node.outgoing_edges),
     )
def play_one_game(args, engine1, engine2, opening_moves):
    print('Game: "%s" vs "%s" with opening: [%s]' % (
        " ".join(engine1),
        " ".join(engine2),
        ", ".join(
            uai_interface.uai_encode_move(move) for move in opening_moves),
    ))
    game = {
        "moves": [],
        "opening": opening_moves,
        "start_time": time.time(),
        "white": engine1,
        "black": engine2,
    }

    players = [UAIPlayer(engine1), UAIPlayer(engine2)]
    board = ataxx_rules.AtaxxState.initial()
    ply_number = 0

    def print_state():
        if args.show_games:
            colorize = lambda do, s: ataxx_rules.RED + s + ataxx_rules.ENDC if do else s
            player_to_move = ply_number % 2 + 1
            engine_name1 = colorize(player_to_move == 1, engine1[-1])
            engine_name2 = colorize(player_to_move == 2, engine2[-1])
            print()
            print("======= Player %i move. %s - %s" %
                  (player_to_move, engine_name1, engine_name2))
            print("[%3i plies] Score: %2i - %2i" %
                  (ply_number, board.board.count(1), board.board.count(2)))
            print(board.fen())
            print(board)
        else:
            print("\r[%3i plies] Score: %2i - %2i " %
                  (ply_number, board.board.count(1), board.board.count(2)),
                  end=' ')
            sys.stdout.flush()

    while board.result() == None:
        print_state()
        # If there is only one legal move then force it.
        if ply_number < len(opening_moves):
            print("Opening move.")
            move = opening_moves[ply_number]
        elif len(board.legal_moves()) == 1:
            print("Forced, only one legal move.")
            move, = board.legal_moves()
        else:
            ms = int(args.tc * 1000)
            move = players[ply_number % 2].genmove(ms)
        if args.show_games:
            print("Move:", uai_interface.uai_encode_move(move))
        try:
            board.move(move)
        except Exception as e:
            print("Exception:", e)
            print(move)
            print(board)
            print(game)
            print(board.fen())
            print(uai_interface.uai_encode_move(move))
            raise e
        game["moves"].append(move)
        for player in players:
            player.move(move)


#		for player in players:
#			player.set_state(board)
        ply_number += 1
        if args.max_plies != None and ply_number > args.max_plies:
            break

    for player in players:
        player.quit()

    # Hacky hack.
    print("Killing all tiktaxx processes.")
    os.system("killall -9 tiktaxx")

    result = board.result()
    if result == None:
        result = "invalid"
    game["result"] = result
    game["end_time"] = time.time()
    game["final_score"] = board.board.count(1), board.board.count(2)

    print_state()
    # Print a final newline to finish the line we're "\r"ing over and over.
    if not args.show_games:
        print()

    return game
 def move(self, move):
     self.send("moves %s\n" % uai_interface.uai_encode_move(move))
Beispiel #6
0
def play_one_game(args, engine1, engine2, opening_moves):
    print 'Game: "%s" vs "%s" with opening: [%s]' % (
        " ".join(engine1),
        " ".join(engine2),
        ", ".join(
            uai_interface.uai_encode_move(move) for move in opening_moves),
    )
    game = {
        "moves": [],
        "opening": opening_moves,
        "start_time": time.time(),
        "white": engine1,
        "black": engine2,
    }

    players = [UAIPlayer(engine1), UAIPlayer(engine2)]
    #	board = ataxx_rules.AtaxxState.initial()
    board = make_initial()
    ply_number = 0

    def print_state():
        if args.show_games:
            print
            print "===================== Player %i move." % (ply_number % 2 +
                                                             1, )
            print "[%3i plies] Open: %2i  " % (ply_number, sum(board.limits))
            #print board.fen()
            print board
        else:
            print "\r[%3i plies] Open: %2i  " % (ply_number, sum(
                board.limits)),
            sys.stdout.flush()

    while board.winner == None:
        print_state()
        # If there is only one legal move then force it.
        if ply_number < len(opening_moves):
            move = opening_moves[ply_number]
        elif len(list(board.legal_moves())) == 1:
            move, = board.legal_moves()
        else:
            ms = int(args.tc * 1000)
            move = players[ply_number % 2].genmove(ms)
        if args.show_games:
            print "Move:", uai_interface.uai_encode_move(move)
        try:
            board.apply_move(move)
        except Exception as e:
            print "Exception:", e
            print move
            print board
            print game
            #print board.fen()
            print uai_interface.uai_encode_move(move)
            raise e
        game["moves"].append(move)
        for player in players:
            player.move(move)


#		for player in players:
#			player.set_state(board)
        ply_number += 1
        if args.max_plies != None and ply_number > args.max_plies:
            break

    for player in players:
        player.quit()

    result = board.winner
    if result == None:
        result = "invalid"
    game["result"] = result
    game["end_time"] = time.time()
    #	game["final_score"] = board.board.count(1), board.board.count(2)

    print_state()
    # Print a final newline to finish the line we're "\r"ing over and over.
    if not args.show_games:
        print

    return game