def value_policy(board: chess.Board): env = ChessEnv(board) game_over, score = env.is_game_over() if game_over: return score, [] stockfish = Stockfish() value = stockfish.stockfish_eval(env.board, timeout=100) next_states = [] for move in env.board.legal_moves: board_copy = env.board.copy() board_copy.push(move) next_states.append(board_copy) actions_value = [] for state in next_states: actions_value.append(evaluate_state(state)) policy = softmax(actions_value) index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves] map = np.zeros((5120,)) for index, pi in zip(index_list, policy): map[index] = pi assert policy.sum() > 0.999 return value, map
def evaluate_state(board): # print(fen) env = ChessEnv(board=board) # env.step(move) game_over, score = env.is_game_over() if game_over: return score value = env.stockfish.stockfish_eval(env.board, timeout=100) return value