def value_policy(board: chess.Board): env = ChessEnv(board) game_over, score = env.is_game_over() if game_over: return score, [] stockfish = Stockfish() value = stockfish.stockfish_eval(env.board, timeout=100) next_states = [] for move in env.board.legal_moves: board_copy = env.board.copy() board_copy.push(move) next_states.append(board_copy) actions_value = [] for state in next_states: actions_value.append(evaluate_state(state)) policy = softmax(actions_value) index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves] map = np.zeros((5120,)) for index, pi in zip(index_list, policy): map[index] = pi assert policy.sum() > 0.999 return value, map
def pretrain(model): feature_batch = [] targets_batch = [] board_positions = get_board_position() shuffle(board_positions) print("Pretraining on {} board positions...".format(len(board_positions))) stockfish = Stockfish() for batch in range(Config.PRETRAIN_EPOCHS): for index, board_position in enumerate(board_positions): if (index + 1) % Config.minibatch_size != 0: feature_batch.append(board_to_feature(board_position)) targets_batch.append( stockfish.stockfish_eval(board_position, 10)) else: feature_batch = torch.FloatTensor(feature_batch) targets_batch = Variable(torch.FloatTensor(targets_batch)) do_backprop(feature_batch, targets_batch, model) feature_batch = [] targets_batch = [] print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))