def value_policy(board: chess.Board): env = ChessEnv(board) game_over, score = env.is_game_over() if game_over: return score, [] stockfish = Stockfish() value = stockfish.stockfish_eval(env.board, timeout=100) next_states = [] for move in env.board.legal_moves: board_copy = env.board.copy() board_copy.push(move) next_states.append(board_copy) actions_value = [] for state in next_states: actions_value.append(evaluate_state(state)) policy = softmax(actions_value) index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves] map = np.zeros((5120,)) for index, pi in zip(index_list, policy): map[index] = pi assert policy.sum() > 0.999 return value, map
def evaluate_state(board): # print(fen) env = ChessEnv(board=board) # env.step(move) game_over, score = env.is_game_over() if game_over: return score value = env.stockfish.stockfish_eval(env.board, timeout=100) return value
def generate_game(self, model: PolicyValNetwork_Giraffe): np.random.seed() triplets = [] step_game = 0 temperature = 1 # env = ChessEnv() # env.reset() game_over = False moves = 0 # game_over, z = env.is_game_over(moves) env = ChessEnv() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) while not game_over: moves += 1 step_game += 1 if step_game == 50: temperature = 10e-6 start = time.time() _, successor, root_node = MCTS(temp=temperature, network=model, root=root_node) game_over, z = root_node.env.is_game_over(moves, res_check=True) root_node = successor successor, root_node = human_play( root=root_node, explore_factor=Config.EXPLORE_FACTOR) #print("Calculated next move in {}ms".format(time.time() - start)) feature = board_to_feature(root_node.env.board) #print('') #print(root_node.env.board) #print("Running on {} ".format(mp.current_process())) root_node = successor game_over, z = root_node.env.is_game_over(moves, res_check=True) return
def run_tournament(self, candidate, candidate_alpha_scores, incumbent_alpha_scores, _): moves = 0 temperature = 10e-6 p = np.random.binomial(1, 0.5) == 1 white, black = (self.current_policy, candidate) if p else (candidate, self.current_policy) env = ChessEnv() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False while not game_over: if root_node.env.white_to_move: player = white else: player = black pi, successor, root_node = MCTS(temp=temperature, network=player, root=root_node) root_node = successor moves += 1 game_over, z = root_node.env.is_game_over(moves, res_check=True) # from white perspective if white == candidate: candidate_alpha_scores.append(+z) incumbent_alpha_scores.append(-z) print("Candidate won!") else: candidate_alpha_scores.append(-z) incumbent_alpha_scores.append(+z) print("Incumbent won!")
def main(): network = load_model(args.newnetwork) score_net = 0 score_random = 0 for game in range(args.numgames): moves = 0 temperature = 10e-6 black = None p = np.random.binomial(1, 0.5) == 1 white = (network) if p else (None) if white == None: black = network env = ChessEnv() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False while not game_over: if root_node.env.white_to_move: player = white else: player = black #print(root_node.env.board) start = time.time() if player == network: pi, successor, root_node = MCTS(temp=temperature, network=player, root=root_node) print("MCTS completed move {} in: {}".format( moves, time.time() - start)) root_node = successor else: if (root_node.children == None): root_node.children = [None] * len(root_node.legal_moves) move = np.random.randint(0, (len(root_node.legal_moves))) if (root_node.children[move] is None): next_env = root_node.env.copy() next_env.step(root_node.legal_moves[move]) root_node.children[move] = Node(next_env, temperature, parent=root_node, child_id=move) root_node = root_node.children[move] moves = moves + 1 game_over, z = root_node.env.is_game_over(moves, res_check=True) # from white perspective if white == network: if z >= 1: score_net += 1 else: score_random += 1 else: if z <= -1: score_net += 1 else: score_random += 1 print("Game {} complete. Net: {} Random: {}".format( game, score_net, score_random)) print("New network score total wins: {} Average Score: {}".format( score_net, score_net / args.numgames)) print("Random play score total wins: {} Average Score: {}".format( score_random, score_random / args.numgames))
def main(): old_network = PolicyValNetwork_Giraffe(pretrain=False) new_network, _ = load_model(args.newnetwork) if args.oldnetwork is None: list_of_files = glob.glob('./*.pt') if len(list_of_files) > 0: new_network = load_model(max(list_of_files, key=os.path.getctime)) print('New network will be: {}'.format(new_network)) else: print("No new network to test.") quit() score1 = 0 score2 = 0 for game in range(args.numgames): moves = 0 temperature = 10e-6 p = np.random.binomial(1, 0.5) == 1 white, black = (new_network, old_network) if p else (new_network, old_network) env = ChessEnv() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False while not game_over: if root_node.env.white_to_move: player = white else: player = black start = time.time() pi, successor, root_node = MCTS(temp=temperature, network=player, root=root_node) print("MCTS completed move {} in: {}".format( moves, time.time() - start)) root_node = successor moves = moves + 1 game_over, z = root_node.env.is_game_over(moves, res_check=True) print("Game {} complete. New: {} Old: {}") # from white perspective if white == new_network: if z >= 1: score1 = score1 + 1 else: score2 = score2 + 1 else: if z <= -1: score1 = score1 + 1 else: score2 = score2 + 1 print("Game {} complete. New: {} Old: {}".format(game, score1, score2)) print("New network score total wins: {} Average Score: {}".format( score1, score1 / args.numgames)) print("Old network score total wins: {} Average Score: {}".format( score2, score2 / args.numgames))