def play_chessgame(best_estimator: tf.keras.Model, mutated_estimator: tf.keras.Model, training_side): # reset it chess game variables board = chesslib.ChessBoard_StartFormation() drawing_side = 0 draw_history = [] game_state = 'n' round = 1.0 print(chesslib.VisualizeBoard(board)) # play until the game is over while True: # compute all possible draws last_draw = draw_history[-1] if len( draw_history) > 0 else chesslib.ChessDraw_Null draws = chesslib.GenerateDraws(board, drawing_side, last_draw, True) possible_boards = np.array( [chesslib.ApplyDraw(board, draw) for draw in draws]) fill_column = np.expand_dims(draws, axis=1) vector = np.append(possible_boards, fill_column, axis=1) # determine the best of those draws using the estimator model = mutated_estimator if drawing_side == training_side else best_estimator predictions = model.predict(vector) best_draw = draws[np.argmax(predictions)] # apply the draw to the chessboard and update the draw history board = chesslib.ApplyDraw(board, best_draw) draw_history.append(best_draw) # print the board print(chesslib.VisualizeDraw(best_draw)) print(chesslib.VisualizeBoard(board)) # exit if the game is over game_state = get_game_state(board, draw_history, drawing_side) if game_state != 'n': break if has_loops(draw_history): game_state = 't' break drawing_side = (drawing_side + 1) % 2 round += 0.5 return game_state
def generate_game(self, value_est: tf.keras.Model, epsilon: float=0.1): # load the start formation board = chesslib.ChessBoard_StartFormation() # intialize states and actions cache states = np.array([board]) actions = np.array([]) # perform actions until the game is over while chesslib.GameState(board) != chesslib.GameState_Checkmate \ or chesslib.GameState(board) != chesslib.GameState_Tie: # get all possible actions poss_actions = chesslib.GenerateDraws(board) # get ratings for all possible actions next_states = [chesslib.ApplyDraw(draw) for draw in poss_actions] action_ratings = [value_est(conv_board(state)) for state in next_states] # state_rating = value_func(conv_board(board)) # advantages = action_ratings - state_rating # choose the action to be applied (epsilon-greedy) explore = np.random.uniform() <= epsilon selected_action = np.random.choice(poss_actions) if explore \ else poss_actions[np.argmin(action_ratings)] # note: argmin() is required because otherwise the AI would learn draws # that are beneficial for the opponent (argmax() -> maximize opp. reward?!) # write the (state, action) tuple to cache and update the board with the state new_state = chesslib.ApplyDraw(selected_action) states = np.append(states, np.array([new_state])) actions = np.append(actions, np.array([selected_action])) board = new_state # now that the game is over, bring the game data into SARS format rewards = [] next_states = states[1:] states = states[:-2] return (states, actions, rewards, next_states)
def prepare_pandas_data(self, pd_winrates: pd.DataFrame): # convert the dataframe into SARS data slices (state-action-reward-nextstate) states = [self.bitboards_from_hash(x) for x in pd_winrates['BoardBeforeHash']] actions = pd_winrates['DrawHashNumeric'] rewards = pd_winrates['WinRate'] # combine the SARS data slices to a dataframe pd_winrates_sars = pd.DataFrame() pd_winrates_sars['states'] = states pd_winrates_sars['actions'] = actions pd_winrates_sars['rewards'] = rewards next_states = pd_winrates_sars.apply(lambda x: chesslib.ApplyDraw(x[0], x[1]), axis=1) pd_winrates_sars['next_states'] = next_states return pd_winrates_sars
def __call__(self, chessboard: np.ndarray, valid_draws: np.ndarray, last_draw: int, comp_time: int): """ Determine the best draw from the list for the given chess position. Therefore use the negamax algorithm with alpha-beta prune and iterative deepening. :param chessboard: The chess board representing the current position (bitboard format). :param valid_draws: The valid draws to be evaluated. :param comp_time: The max. amount of time to compute the result (in seconds). :returns: The best draw on the valid_draws list. """ start_timestamp = dt.datetime.utcnow() # initialize all draws with neutral scores (represent the draws with the resulting chess position) next_boards = np.array( [chesslib.ApplyDraw(chessboard, draw) for draw in valid_draws]) next_board_hashes = np.array( [self.board_to_str(board) for board in next_boards]) est_scores = np.array([ self.cache[hash] if hash in self.cache else 0.0 for hash in next_board_hashes ]) print("est. scores:", est_scores) # run iterative deepening as deep as possible within the given time limit depth = 1 while depth <= 10: _ = self.negamax(chessboard, valid_draws, int(last_draw), depth, float('-inf'), float('inf')) # update the scores (will be used when the computation time is up) est_scores = np.array([ self.cache[hash] if hash in self.cache else 0.0 for hash in next_board_hashes ]) print("depth", depth, "est. scores:", est_scores) depth += 1 # determine the draw with the highest score return [(valid_draws[i], est_scores[i]) for i in range(len(valid_draws))]
def negamax(self, bitboards: np.ndarray, valid_draws: np.ndarray, last_draw, depth: int, alpha: float, beta: float) -> float: # print('negamax depth:', depth) # determine the drawing side drawing_side = valid_draws[0] >> 23 & 1 if len(valid_draws) > 0 else 0 # print('drawing side:', drawing_side) # handle recursion termination cases (max. search depth or terminal state reached) state = chesslib.GameState(bitboards, last_draw) if state == chesslib.GameState_Checkmate: return float('-inf') # elif state == chesslib.GameState_Tie: return 0.0 elif depth == 0: return self.eval_chessboard(bitboards, drawing_side) # determine the estimated scores for each draw # then, order the draws by their estimated score descendingly # this ensures trying out promising draws first and achieving more cut-offs next_boards = np.array( [chesslib.ApplyDraw(bitboards, draw) for draw in valid_draws]) next_board_hashes = np.array( [self.board_to_str(board) for board in next_boards]) est_scores = np.array([ self.cache[hash] if hash in self.cache else 0.0 for hash in next_board_hashes ]) sort_perm = np.argsort(est_scores * -1.0) # sort by descending scores # initialize the estimated value as negative infinity value = float('-inf') # try out the draws one-by-one and estimate their outcomes recursively # therefore process the most promising draws first according to the score permuation for i in sort_perm: draw = int(valid_draws[i]) next_board = next_boards[i] # compute the resulting chess board and the successing valid draws to be tried out # print('next draws:', next_board, drawing_side, draw) next_valid_draws = chesslib.GenerateDraws(next_board, int(drawing_side), int(draw), True) # perform a recursive function call to estimate the goodness of the draw est_draw_score = self.negamax(next_board, next_valid_draws, draw, depth - 1, -alpha, -beta) # update the cache with the new estimated score # TODO: make sure that the score does not need to be inverted first self.cache[next_board_hashes[i]] = est_draw_score # update alpha and beta value = max(value, est_draw_score * -1.0) alpha = max(alpha, value) # perform cut-off (there is a good enemy reply -> stop computing!) if alpha >= beta: break # return the estimated value of the given chess position considering only best draws return value
drawing_side = chesslib.ChessColor_White last_draw = chesslib.ChessDraw_Null board = chesslib.ChessBoard_StartFormation() print(chesslib.VisualizeBoard(board)) while True: # determine the estimated draw scores draws = chesslib.GenerateDraws(board, drawing_side, int(last_draw), True) draws_x_scores = model(board, draws, last_draw, comp_time) # determine the best and the second best draw draws_desc_by_score = sorted(draws_x_scores, key=1) best_draw = draws_desc_by_score[0][0] second_best_draw = draws_desc_by_score[1][0] if len( draws) > 1 else best_draw # sometimes, use only the second best draw for a bit of variety draw_to_apply = best_draw if epsilon < np.random.uniform( 1) else second_best_draw board = chesslib.ApplyDraw(board, draw_to_apply) print(chesslib.VisualizeBoard(board)) # check whether the game is over (finalize the game in case) last_draw = draw_to_apply state = chesslib.GameState(board, last_draw) if state == chesslib.GameState_Checkmate or state == chesslib.GameState_Tie: break