def ab_min(board: chess.Board, alpha: int, beta: int, depth: int, tt: {}) -> int: """ Min node of an alpha/beta tree, it calls ab_max until depth == 0 :param board: the chess board :param alpha: alpha :param beta: beta :param depth: depth :param tt: transpositions tables :return: evaluation """ if depth == 0 or board.is_game_over(): return ev.evaluate(board) best_eval = math.inf for coup in board.legal_moves: board.push(coup) h = zb.zobrist_hash(board) if h in tt: it: HashItem = tt[h] if it.depth <= depth: ab = tt[h].evaluation else: ab = ab_max(board, alpha, beta, depth - 1, tt) else: ab = ab_max(board, alpha, beta, depth - 1, tt) board.pop() best_eval = min(best_eval, ab) if best_eval <= alpha: tt[h] = HashItem(h, depth, best_eval, (alpha, beta)) return best_eval beta = min(beta, best_eval) tt[h] = HashItem(h, depth, best_eval, (alpha, beta)) return best_eval
def alphabeta(board, depth, maximizingPlayer, evaluator, transition_table=None, alpha=-999999, beta=999999): moves = list(board.legal_moves) if depth == 0: v = quiesce(board, evaluator, maximizingPlayer, alpha, beta) if maximizingPlayer: return v else: return -v hash_key = None if transition_table is not None: hash_key = zobrist_hash(board) score = None if hash_key and hash_key in transition_table: entry = transition_table[hash_key] if entry.depth >= depth: return entry.score #moves = sort_moves(board, moves, transition_table) if maximizingPlayer: value = -99999 for child in moves: board.push(child) value = max( value, alphabeta(board, depth - 1, False, evaluator, transition_table, alpha, beta)) alpha = max(alpha, value) board.pop() if alpha >= beta: # Beta cutoff break else: value = 99999 for child in moves: board.push(child) value = min( value, alphabeta(board, depth - 1, True, evaluator, transition_table, alpha, beta)) beta = min(beta, value) board.pop() if beta <= alpha: break if hash_key: if hash_key in transition_table and transition_table[ hash_key].depth >= depth: return value entry = Entry(score=value, depth=depth) transition_table[hash_key] = entry return value
def create_node_info_from_python_chess_board(board, depth=255, separator=0): return np.array([(board.pawns, board.knights, board.bishops, board.rooks, board.queens, board.kings, board.occupied_co[chess.WHITE], board.occupied_co[chess.BLACK], board.occupied, board.turn, board.castling_rights, board.ep_square if not board.ep_square is None else 0, board.halfmove_clock, zobrist_hash(board), False, #terminated separator, depth, MIN_FLOAT32_VAL, #best_value np.full([MAX_MOVES_LOOKED_AT, 3], 255, dtype=np.uint8), #unexplored moves np.full([MAX_MOVES_LOOKED_AT], MIN_FLOAT32_VAL, dtype=np.float32), #unexplored move scores np.full([3], 255, dtype=np.uint8), #The move made to reach the position this board represents 0, #next_move_index (the index in the stored moves where the next move to make is) 0) #children_left (the number of children which have yet to returne a value, or be created) ],dtype=numpy_node_info_dtype)
def findboardorbdd(board: chess.Board, tt: {}, depth, alphabeta, ev=None): h = zb.zobrist_hash(board) if h in tt.keys(): return True, tt[h] else: tt[h] = HashItem(h, depth, ev, alphabeta) return False, None
def zobrist_hash_test(hash_getter, fen_to_start=None, num_sequences_to_test=1000, max_moves_per_test=20): """ This functions tests the engine's ability to incrementally maintain a board's Zobrist hash while pushing moves. It tests this by generating a set of random move sequences from a given board, and compares the computed results with the values computed by the functions within the python-chess package. :param hash_getter: The hashing function to test, it must accept 3 parameters, the fen to start the move sequences from, the list of lists of python-chess Moves to make from the starting board, and the maximum number of moves per test. The function must return an ndarray of the board hashes at each position in the random move sequences (zero if the sequence terminates early), it's shape will be [num_sequences_to_test, max_moves_per_test] :param fen_to_start: The fen (as a string) representing the board to start making random moves from. If None is given, the fen for the start of a normal game is used :param num_sequences_to_test: The number of random move sequences (each starting from the given initial fen) to test :param max_moves_per_test: The maximum number of random moves to be made for each testing sequence. This is a maximum because some random move sequences result in a premature win/loss/draw :return: True if all tests were passed, False if not """ if fen_to_start is None: fen_to_start = DEFAULT_TESTING_FENS[0] correct_hashes = np.zeros((num_sequences_to_test, max_moves_per_test), dtype=np.uint64) move_lists = [[] for _ in range(num_sequences_to_test)] for j in range(num_sequences_to_test): cur_board = chess.Board(fen_to_start) for i in range(max_moves_per_test): possible_next_moves = list(cur_board.generate_legal_moves()) if len(possible_next_moves) == 0: break move_lists[j].append(possible_next_moves[random.randrange( len(possible_next_moves))]) cur_board.push(move_lists[j][-1]) correct_hashes[j, i] = zobrist_hash(cur_board) #Go through incorrect hashes and print relevant information about them for use during debugging calculated_hashes = hash_getter(fen_to_start, move_lists, max_moves_per_test) same_hashes = calculated_hashes == correct_hashes if not np.all(same_hashes): for j in range(len(same_hashes)): if np.sum(same_hashes[j]) != 0: cur_board = chess.Board(fen_to_start) for i, move in enumerate(move_lists[j]): if not same_hashes[j, i]: print( "Board and move being made which caused the first incorrect hash in sequence %d:\n%s\n%s\n%s\nDifference in hash values:%d\n" % (j, cur_board, cur_board.fen(), move, correct_hashes[j, i] ^ calculated_hashes[j, i])) break cur_board.push(move) return np.all(same_hashes)
def _minimax(self, board, depth, alpha, beta, maximize): self.nodes += 1 if zobrist_hash(board) in self.cache: return self.cache[zobrist_hash(board)] if depth >= self.max_depth: score = self.umka.evaluate(board, depth, maximize) self.print_info(depth, board) self.cache[zobrist_hash(board)] = score return score if maximize: value = INF for move in board.legal_moves: # if self.time_is_up(): # return -INF board.push(move) value = min( value, self._minimax(board, depth + 1, alpha, beta, False)) board.pop() if value <= alpha: break beta = min(beta, value) else: value = -INF for move in board.legal_moves: # if self.time_is_up(): # return INF board.push(move) value = max(value, self._minimax(board, depth + 1, alpha, beta, True)) board.pop() if value >= beta: break alpha = max(alpha, value) return value
def best_play(board: chess.Board, player: bool, depth: int = 5) -> chess.Move: """ Finds the best play for a board, it does a Max for white on the beginning and a Min for black :param board: the board to analyse :param player: the player who plays :param depth: the depth of wanted :return: the best move """ tt = {} best_moves = collections.deque([(0, 0)], 2) alpha, beta = -math.inf, math.inf if player: # Max Blanc best_val = -math.inf for coup in board.legal_moves: board.push(coup) h = zb.zobrist_hash(board) if h in tt: ab = tt[h].evaluation else: ab = ab_min(board, alpha, beta, depth - 1, tt) if ab > best_val: best_val = ab best_moves.appendleft((coup, ab)) alpha = max(alpha, ab) print(len(tt)) board.pop() else: # Min Noir best_val = math.inf for coup in board.legal_moves: board.push(coup) h = zb.zobrist_hash(board) if h in tt: ab = tt[h].evaluation else: ab = ab_max(board, alpha, beta, depth - 1, tt) if ab < best_val: best_val = ab best_moves.appendleft((coup, ab)) beta = min(beta, ab) board.pop() return util.funcs.creative_move(best_moves)
def move_comparator(self, board, move, depth): value = 0 board.push(move) if zobrist_hash(board) in self.transposition_table: board_depths = self.transposition_table[zobrist_hash(board)] # If value was found at current or earlier depth # Use that as value, otherwise, use simple eval max_depth = max(board_depths) if max_depth > depth and (max_depth - depth) % 2 == 0: value = board_depths[max_depth] elif depth in board_depths: value = board_depths[depth] else: value = self.simple_eval(board, depth) else: value = self.simple_eval(board, depth) board.pop() return value
def alpha_beta_with_memory(self, board, depth, alpha, beta, maximize): self.nodes += 1 if zobrist_hash(board) in self.cache: return self.cache[zobrist_hash(board)] if board.is_game_over() or depth <= 0: val = self.umka.evaluate(board) self.cache[zobrist_hash(board)] = val return val if maximize: value = float('-inf') for move in board.legal_moves: board.push(move) value = max( value, self.alpha_beta_with_memory(board, depth - 1, alpha, beta, False)) board.pop() if value > beta: self.main_line[depth] = move self.print_info(depth, move, value) return value alpha = max(alpha, value) return value else: value = float('inf') for move in board.legal_moves: board.push(move) value = min( value, self.alpha_beta_with_memory(board, depth - 1, alpha, beta, True)) board.pop() if value < alpha: self.main_line[depth] = move # self.print_info(depth, move, value) return value beta = min(beta, value) return value
def score_move(move): if transition_table: board.push(move) hash_key = zobrist_hash(board) board.pop() entry = transition_table.get(hash_key) if entry: return entry.score score = int(board.is_capture(move)) * 9999 score += int(board.gives_check(move)) * 1000 return score
def move_comparator(self, board, move, depth, side): board.push(move) transposition_table = self.transposition_max if side else self.transposition_min if zobrist_hash(board) in transposition_table: board_depths = transposition_table[zobrist_hash(board)] # If value was found at current or earlier depth # Use that as value, otherwise, find and assign max_depth = max(board_depths) if max_depth > depth and (max_depth - depth) % 2 == 0: value = board_depths[max_depth] elif depth in board_depths: value = board_depths[depth] else: value = self.simple_eval(board, depth, side) # If state not found, find and assign else: value = self.simple_eval(board, depth, side) board.pop() return value
def simple_eval(self, board, depth, side): # If the board is in the transposition table, # then a more accurate evaluation is possible transposition_table = self.transposition_max if side else self.transposition_min if zobrist_hash(board) in transposition_table: board_depths = transposition_table[zobrist_hash(board)] max_depth = max(board_depths) if (max_depth - depth) % 2 == 0: return board_depths[max_depth] evaluation = 0 # Check end game conditions if board.is_game_over(): if board.is_stalemate(): return evaluation if board.is_checkmate(): if board.turn != self.is_white: evaluation += 200 + (50 * depth) else: evaluation -= 200 + (50 * depth) # Tally difference in pieces between each side board_status = [ len(board.pieces(i, True)) - len(board.pieces(i, False)) for i in range(1, 7) ] # Account if player is black or white player_coef = 1 if self.is_white else -1 # Multiply each tally by respective material value. Then sum return evaluation + player_coef * sum( np.multiply(board_status, [1, 3, 3, 5, 9, 200]))
def select_move(self, board: chess.Board, n=5, t=1): # Perform number of MCTS's board_hash = polyglot.zobrist_hash(board) moves = list(board.legal_moves) # bar = tqdm(moves, desc="Searching all moves...") # for move in bar: # board.push(move) # for _ in range(n): # self.search(board) # board.pop() # bar.close() bar = tqdm(range(200), desc="Search") for _ in bar: self.search(board) bar.close() counts = [ self.N_vals_edges[(board_hash, move)] if (board_hash, move) in self.N_vals_edges else 0 for move in moves ] # [print(f'Move {move} had chance: {adapter.get_move_prob(self.Policy_vectors[board_hash], move)}') for move in moves] move = None if len(board.move_stack) < 10: # Play stocastically for first 5 full moves counts = [x**(1.0 / t) for x in counts] counts_sum = float(sum(counts)) probs = [x / counts_sum for x in counts] index = np.random.choice(list(range(len(counts))), p=probs) move = moves[index] else: # Play Deterministically after move 7 bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten() move = moves[np.random.choice(bestAs)] # self.show_heatmap(board_hash, moves) print('-------------------------------------') print(f'Next move Q Value: {self.Q_vals[(board_hash, move)]}') print( f'Next move Pi Value: {adapter.get_move_prob(self.Policy_vectors[board_hash], move)}' ) print(f'Move: {move}') return move, adapter.move_to_policy(move)
def get_moves_pred(board, moves, h=None): if not h: h = zobrist_hash(board) if h in movecache: return movecache[h] global gmovemodel if not gmovemodel: if movequant: gmovemodel = tf.lite.Interpreter(model_path=MOVE_MODEL_PATH) gmovemodel.allocate_tensors() else: gmovemodel = tf.keras.models.load_model(MOVE_MODEL_PATH) ys_ = [] gf, pf, mf, sf = get_pos_rep(board) if movequant: gmovemodel.set_tensor(0, [gf]) gmovemodel.set_tensor(3, [pf]) gmovemodel.set_tensor(1, [mf]) gmovemodel.set_tensor(4, [sf]) for move in moves: move_rep = get_move_rep(board, move) if movequant: gmovemodel.set_tensor(2, [move_rep]) gmovemodel.invoke() y_ = gmovemodel.get_tensor(51)[0][0] else: y_ = gmovemodel([ np.array([gf]), np.array([pf]), np.array([mf]), np.array([sf]), np.array([move_rep]) ]) ys_.append((float(y_), move)) movecache[h] = ys_ return ys_
def net_evaluator(board): turn = board.turn mul = 1 if turn else -1 if board.is_checkmate(): return -mul * (9000 - len(board.move_stack)) if board.is_stalemate(): return 0 global gmodel if not gmodel: if quant: gmodel = tf.lite.Interpreter(model_path=WEIGHTS_PATH) gmodel.allocate_tensors() else: gmodel = tf.keras.models.load_model(WEIGHTS_PATH) h = zobrist_hash(board) if h in cache: return cache[h] gf, pf, mf, sf = get_pos_rep(board) if quant: gmodel.set_tensor(0, [gf]) gmodel.set_tensor(2, [pf]) gmodel.set_tensor(1, [mf]) gmodel.set_tensor(3, [sf]) gmodel.invoke() score = gmodel.get_tensor(70)[0][0] else: score = gmodel( [np.array([gf]), np.array([pf]), np.array([mf]), np.array([sf])]) score = float(score) cache[h] = score return score
bestscore = score if score > alpha: alpha = score flag = '' if bestscore <= alphaorig: flag = 'upper' elif bestscore >= beta: flag = 'lower' else: flag = 'exact' ttable.insert(keyorig, depth, bestscore, flag) return bestscore ttable = TTable() b = chess.Board() while True: key = zobrist_hash(b) print(b) qmove = input('move input:') move = chess.Move.from_uci(qmove) b.push(move) cmove = root(b, key=key) b.push(cmove) ''' for k, v in ttable.table.items(): print(f'{k},{v.evaluation}') '''
def negamax(engine, depth, alpha, beta, color, root=False, prob=1.0, curr_depth=0): prob_threshold = engine.prob_threshold alphaorig = alpha board = engine.board evaluator = engine.evaluator transition_table = engine.transition_table moves = list(board.legal_moves) skip_cache = False if root: tmp = [] for move in moves: board.push(move) if board.is_checkmate() or not board.is_game_over(): tmp.append(move) board.pop() if len(tmp) < len(moves) and len(tmp) > 0: moves = tmp skip_cache = True h = zobrist_hash(board) # ttEntry lookup ttEntry = transition_table.get(h) if not skip_cache and ttEntry and ttEntry.depth >= depth: if ttEntry.flag == EntryFlag.EXACT: return (ttEntry.value, ttEntry.move, ttEntry.depth) elif ttEntry.flag == EntryFlag.LOWERBOUND: alpha = max(alpha, ttEntry.value) elif ttEntry.flag == EntryFlag.UPPERBOUND: beta = min(beta, ttEntry.value) if alpha >= beta: return (ttEntry.value, ttEntry.move, ttEntry.depth) #if prob < prob_threshold: if depth == 0 or board.is_checkmate() or len(moves) == 0: return [ quiesce(board, evaluator, alpha, beta, color), None, curr_depth ] _max = [-99999, None, curr_depth] #if True and prob > (prob_threshold): if depth > 1: # Pred sort moves = get_moves_pred(board, moves, h=h) moves.sort(key=lambda x: x[0], reverse=True) else: moves = sorted(moves, key=lambda x: get_move_sort_score(board, x, color), reverse=True) moves = [(1.0 / len(moves), x) for x in moves] for move in moves: (cprob, move) = move board.push(move) (score, ch, node_depth) = negamax(engine, depth - 1, -beta, -alpha, -color, prob=prob * cprob, curr_depth=curr_depth + 1) score = -score board.pop() if score > _max[0]: _max = [score, move, node_depth] alpha = max(alpha, _max[0]) if alpha >= beta: break # Store ttEntry value = _max[0] if value <= alphaorig: flag = EntryFlag.UPPERBOUND elif value >= beta: flag = EntryFlag.LOWERBOUND else: flag = EntryFlag.EXACT ttEntry = Entry(value=value, depth=depth, flag=flag, move=_max[1]) transition_table[h] = ttEntry return _max
def hash_(board: Board) -> Hash: return polyglot.zobrist_hash(board)
def _abNegamax(self, board, maxDepth, depth, alpha, beta): alphaOriginal = alpha zhash = zobrist_hash(board) entry = self._transTable.table.get(zhash) if entry and entry.depth >= maxDepth - depth: if entry.scoreType == self._transTable.EXACT_SCORE: self._transTable.hits = self._transTable.hits + 1 return (entry.move, entry.score, entry.finalBoard) elif entry.scoreType == self._transTable.LOWER_BOUND_SCORE: alpha = max(alpha, entry.score) else: beta = min(beta, entry.score) if alpha >= beta: return (entry.move, entry.score, entry.finalBoard) newEntry = False if not entry: entry = transTable.transTableEntry() entry.zobristHash = zhash newEntry = True entry.result = board.result() entry.depth = maxDepth - depth entry.move = None #result = board.result() if (depth == maxDepth or entry.result != "*"): entry.score = evaluator(board, entry.result) entry.finalBoard = board if (self._transTable.size == self._transTable.maxSize and newEntry): self._transTable.table.popitem() self._transTable.size = self._transTable.size - 1 self._transTable.table[entry.zobristHash] = entry self._transTable.size = self._transTable.size + 1 return ('', entry.score, board) maxScore = -(1<<64) score = maxScore bestMove = None finalBoard = None for move in board.legal_moves: board.push(move) _, score, finalBoard = self._abNegamax(board, maxDepth, depth + 1, -beta, -alpha) score = -score board.pop() if score > maxScore: maxScore = score bestMove = move alpha = max(alpha, score) if alpha >= beta: break entry.score = maxScore entry.move = bestMove entry.finalBoard = finalBoard if maxScore <= alphaOriginal: entry.scoreType = self._transTable.UPPER_BOUND_SCORE elif maxScore >= beta: entry.scoreType = self._transTable.LOWER_BOUND_SCORE else: entry.scoreType = self._transTable.EXACT_SCORE if (self._transTable.size == self._transTable.maxSize and newEntry): self._transTable.table.popitem() self._transTable.size = self._transTable.size - 1 self._transTable.table[entry.zobristHash] = entry self._transTable.size = self._transTable.size + 1 return (bestMove, maxScore, finalBoard)
def search(self, board: chess.Board): """ Recursevly search a tree using UCT and the NNet to intellegently select the proper tree to search. """ if board.is_game_over(): result = board.result() return -end_states[result] board_hash = polyglot.zobrist_hash(board) if board_hash not in self.Policy_vectors: # Get nnet prediction of the current board for use in the # Determine which branch to explore next. # If black to play, mirror the board, unmirror at end. NNet always sees current player as white. # Make a copy of the board as to preserve our move stack. temp = None if not board.turn: # If black to play, flip and mirror board. temp = board.transform(chess.flip_vertical) temp.apply_mirror() else: temp = board.copy() cannonical = adapter.get_cannonical(temp) policy_vector, nnet_value = self.network.predict(cannonical) # Mask out invalid moves valids = adapter.moves_to_policy_mask(list(board.legal_moves)) policy_vector *= valids # Normalize vector, add valid moves if needed. if np.sum(policy_vector > 0): policy_vector /= np.linalg.norm(policy_vector) self.Policy_vectors[board_hash] = policy_vector else: print( "All valid moves were masked. Adding valids. Warning if lots of these messages." ) policy_vector += valids policy_vector /= np.linalg.norm(policy_vector) self.N_vals[board_hash] = 0 del temp # Return the esimate until we actually reach the end. return -nnet_value # Iterate over legal moves and get the probability of making that move, according to the nnet. action_heuristic_dict = {} curr_move_policy = self.Policy_vectors[board_hash] for move in list(board.legal_moves): move_prob = adapter.get_move_prob(curr_move_policy, move) action_heuristic_dict[move] = self.get_ucb(board_hash, move, move_prob * 2.0) # Pick move with max value, make it bigger move = max(action_heuristic_dict, key=action_heuristic_dict.get) # action_heuristic_dict[max_move] *= 50.0 # # Normalize # values = np.array(list(action_heuristic_dict.values())) # values += np.abs(values.min()) # values /= np.linalg.norm(values) # p = self.fix_p(values) # move = np.random.choice(list(action_heuristic_dict.keys()), p=p) board.push(move) value = self.search(board) board.pop() # We've done our search, now we're back-propigating values for the next search. self.update_values(board_hash, move, value) self.N_vals[board_hash] += 1 return -value
def _abNegamax(self, board, maxDepth, depth, alpha, beta): alphaOriginal = alpha zhash = zobrist_hash(board) entry = self._transTable.table.get(zhash) if entry and entry.depth >= maxDepth - depth: if entry.scoreType == self._transTable.EXACT_SCORE: self._transTable.hits = self._transTable.hits + 1 return (entry.move, entry.score, entry.finalBoard) elif entry.scoreType == self._transTable.LOWER_BOUND_SCORE: alpha = max(alpha, entry.score) else: beta = min(beta, entry.score) if alpha >= beta: return (entry.move, entry.score, entry.finalBoard) newEntry = False if not entry: entry = transTable.transTableEntry() entry.zobristHash = zhash newEntry = True entry.result = board.result() entry.depth = maxDepth - depth entry.move = None #result = board.result() if (depth == maxDepth or entry.result != "*"): entry.score = evaluator(board, entry.result) entry.finalBoard = board if (self._transTable.size == self._transTable.maxSize and newEntry): self._transTable.table.popitem() self._transTable.size = self._transTable.size - 1 self._transTable.table[entry.zobristHash] = entry self._transTable.size = self._transTable.size + 1 return ('', entry.score, board) maxScore = -(1 << 64) score = maxScore bestMove = None finalBoard = None for move in board.legal_moves: board.push(move) _, score, finalBoard = self._abNegamax(board, maxDepth, depth + 1, -beta, -alpha) score = -score board.pop() if score > maxScore: maxScore = score bestMove = move alpha = max(alpha, score) if alpha >= beta: break entry.score = maxScore entry.move = bestMove entry.finalBoard = finalBoard if maxScore <= alphaOriginal: entry.scoreType = self._transTable.UPPER_BOUND_SCORE elif maxScore >= beta: entry.scoreType = self._transTable.LOWER_BOUND_SCORE else: entry.scoreType = self._transTable.EXACT_SCORE if (self._transTable.size == self._transTable.maxSize and newEntry): self._transTable.table.popitem() self._transTable.size = self._transTable.size - 1 self._transTable.table[entry.zobristHash] = entry self._transTable.size = self._transTable.size + 1 return (bestMove, maxScore, finalBoard)
def idFromBoard(board: Board) -> AnalysedPositionID: return str(polyglot.zobrist_hash(board))
cnt = 0 fen_data = [ '# ' + chess.Board().variation_san( game.main_line() ) ] board = chess.Board() for m in game.main_line() : print m, board.push(m) if board.is_game_over() : print 'game over' break #cnt += 1 #if cnt < 50 : continue #print '\n', board zobrist = zobrist_hash(board) if zobrist not in already_have : already_have.add( zobrist ) fen, b_eval = normalize( board.fen(), generate_evaluation( board ) ) print fen, '{' + ', '.join( b_eval ) + '}' fen_data.append( fen + ' {' + ', '.join( b_eval ) + '}' ) else : print board.fen(), 'already have' #break if len(fen_data) > 1 : with open( os.path.basename(sys.argv[1]) + '.txt', 'a' ) as fout : fout.write( '\n'.join( fen_data ) )