def main(): # Create a 9x9 board game = GobangGame(n=9, nir=9) network = NNetWrapper(game) if args.load_model: network.load_checkpoint(args.checkpoint) coach = Coach(game, network, args) coach.learn()
#SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 10, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #START ALPHAZERO TRAINING: #Initialize Game_args, nnet, Game, and Alphazero Game_rules = Game_args() Game = CSGame() nnet = NNetWrapper(args) if args['load_nn_model'] == True: filename = 'best' nnet.load_checkpoint(args['network_checkpoint'], filename) Alphazero_train = Coach(Game, nnet, args, Game_rules) if args['load_training'] == True: print('Load trainExamples from file') Alphazero_train.loadTrainExamples() #Start Training Alphazero Alphazero_train.learn()
} #Initialize Algorithms object to compare algorithms Algorithms = CSAlgorithms() #INITIALIZE ALPHAZERO FOR PREDICTION #-------------------------------------------------------------- #initialize Game_args #load sensing_matrix into game_args game_args = Game_args() matrix_filename = 'sensing_matrix.npy' A = np.load(matrix_filename) game_args.sensing_matrix = A #initialize neural network wrapper object #load weights and model we wish to predict with using nnet.load_checkpoint nnet = NNetWrapper(args) model_filename = 'best' nnet.load_checkpoint(os.getcwd(), model_filename) #initialize a new game object new_game = CSGame() #initialize skip_nnet if option is turned on if args['skip_rule'] == 'bootstrap': skip_nnet = NNetWrapper(args) skip_nnet.load_checkpoint(args['skip_nnet_folder'], args['skip_nnet_filename']) elif args['skip_rule'] == None: skip_nnet = nnet else: skip_nnet = None
1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #Test the search capabilities of multiple MCTS objects using Threading_MCTS #global Game_args object, global CSGame(for game rules and such), global policy/value net game_args = Game_args() game_args.generateSensingMatrix(args['m'], args['n'], args['matrix_type']) Game = CSGame() nnet = NNetWrapper(args) #--------------------------------------------------- #Initialize MCTS_States_list for i in range(args['num_batches']): MCTS_States_list = [] batchTrainExamples = [] #In loop below, we create a pair in the form of (MCTS_object, [list of States]) for ep in range(args['eps_per_batch']): #Initialize Game_args() for MCTS temp_game_args = Game_args() temp_game_args.sensing_matrix = game_args.sensing_matrix temp_game_args.generateNewObsVec(args['x_type'], args['sparsity']) #Initialize MCTS object temp_MCTS = MCTS(Game,
for x in range(7): for y in range(7): action2move.append((None, None, x, y)) for x in range(7): for y in range(7): for dx, dy in _directions_2: if {x, y, x + dx, y + dy} <= set(range(7)): action2move.append((x, y, x + dx, y + dy)) action2move.append((None, None, None, None)) g = AtaxxGame() args1 = dotdict({'numMCTSSims': 2000, 'cpuct': 1.0}) n1 = NNet(g) n1.load_checkpoint('.', 'best.pth.tar') mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) player = __file__[2] board = [] actions = { } # { key: piece(start position), value: list of position(destination position) } # make board input_lines = input_str.split("\n") for i in range(7): line = input_lines[i + 1].split(" ") dic = {1: -1, 2: 1, 0: 0} line = [dic.get(n, n) for n in line]
# Set number of threads for OpenMP os.environ["OMP_NUM_THREADS"] = "1" g = Game(is_basic=True) # Suppress logging from fireplace logger = logging.getLogger("fireplace") logger.setLevel(logging.WARNING) # logging.disable(logging.WARNING) # disable logging # logging.disable(logging.NOTSET) # reenable logging # all players hp = HumanPlayer(g).play rp = RandomPlayer(g).play # nnet players n1 = NNet() #n1.nnet.cuda() # n1.load_checkpoint('./temp/', '0.pth.tar') # n1.load_checkpoint('./temp/', 'best18-287k-75i.pth.tar') # newest network n1.load_checkpoint('../remote/models/', '0-32.pth.tar') argsNN = dotdict( { 'numMCTSSims': 25, 'cpuct': 1.0 } ) # minimum numMCTSSims = 2 to always find a valid action (at least end turn) mcts1 = MCTS(g, n1, argsNN) #a1p = lambda x: mcts1.getActionProb(x, temp=0) a1p = functools.partial( mcts1.getActionProb, temp=0 ) # temp=1 means we pick an action by probability, temp=0 always takes the best action (most visited edge. random if more than 1 best action is available)
'x_l2' : True, #solution to min_z||A_Sz - y||_2^2, where A_S is the submatrix of columns we have currently chosen 'lambda' : True, #the vector of residuals, lambda = A^T(A_Sx-y), where x is the optimal solution to min_z||A_Sz - y||_2^2 #--------------------------------------------------------------- #MCTS parameters 'cpuct': 1, #controls the amount of exploration at each depth of MCTS tree. 'numMCTSSims': 500, #For each move, numMCTSSims is equal to the number of MCTS simulations in finding the next move during self play. 'tempThreshold': 0, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. We need gamma large enough!!! 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } test_loss = np.zeros(alphazero_iterations) for i in range(alphazero_iterations): #------------------------------------------------------------- game_args = Game_args() matrix_filename = 'sdnormal0.npy' A = np.load(matrix_filename) game_args.sensing_matrix = A nnet = NNetWrapper(args) model_filepath = os.getcwd() + '/network_checkpoint' model_filename = 'nnet_checkpoint' + str(i) nnet.load_checkpoint(model_filepath, model_filename) new_game = CSGame() Alphazero = Coach(new_game, nnet, args, game_args) #-------------------------------------------------------------- #Alphazero now ready for prediction avgloss_for_NN = 0
""" use this script to play any two agents against each other, or play manually with any agent. """ human_vs_cpu = True g = HalfchessGame() # all players rp = RandomPlayer(g).play # gp = GreedyOthelloPlayer(g).play hp = HumanPlayer(g).play # nnet player nn = NNet(g) #nn.load_checkpoint('./temp/','best.pth.tar') nn.load_checkpoint('./pretrained_models/halfchess', '43it.pth.tar') args = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts = MCTS(g, nn, args) nnp = lambda x: np.argmax(mcts.getActionProb(x, temp=1)) if human_vs_cpu: player1 = hp else: n2 = NNet(g) n2.load_checkpoint('./pretrained_models/halfchess/', '26it_fixed_logic.pth.tar') args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts2 = MCTS(g, n2, args2) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))
args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) n2 = NNet(g, argsNN) n2.load_checkpoint('/content/drive/My Drive/temp/','checkpoint_1.pth.tar') args2 = dotdict({'numMCTSSims': 50, 'cpuct':1.0}) mcts2 = MCTS(g, n2, args2) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) """ for p in range(1, 5): print("iter:%d" % p) for i in range(70, 1, -1): n1 = NNet(g, argsNN) try: n1.load_checkpoint('/content/drive/My Drive/model/', 'checkpoint_%d.pth.tar' % i) except: print("no model:%d" % i) continue args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) #expoitation for j in range(70, 1, -1): if (i <= j): continue n2 = NNet(g, argsNN) try: n2.load_checkpoint('/content/drive/My Drive/model/',
def PlayGame(): menu_def = [ ['&File', ['&Do nothing', 'E&xit']], ['&Help', '&About...'], ] # sg.SetOptions(margins=(0,0)) sg.ChangeLookAndFeel('GreenTan') # create initial board setup psg_board = copy.deepcopy(initial_board) # the main board display layout board_layout = [[sg.T(' ')] + [ sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh' ]] # loop though board and create buttons with images for i in range(8): row = [sg.T(str(8 - i) + ' ', font='Any 13')] for j in range(4): piece_image = images[psg_board[i][j]] row.append(render_square(piece_image, key=(i, j), location=(i, j))) row.append(sg.T(str(8 - i) + ' ', font='Any 13')) board_layout.append(row) # add the labels across bottom of board board_layout.append([sg.T(' ')] + [ sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh' ]) # setup the controls on the right side of screen openings = ('Any', 'Defense', 'Attack', 'Trap', 'Gambit', 'Counter', 'Sicillian', 'English', 'French', 'Queen\'s openings', 'King\'s Openings', 'Indian Openings') board_controls = [ [sg.RButton('New Game', key='New Game'), sg.RButton('Draw')], [sg.RButton('Resign Game'), sg.RButton('Set FEN')], [sg.RButton('Player Odds'), sg.RButton('Training')], [sg.Drop(openings), sg.Text('Opening/Style')], [sg.CBox('Play As White', key='_white_')], [sg.Text('Move List')], [ sg.Multiline([], do_not_clear=True, autoscroll=True, size=(15, 10), key='_movelist_') ], ] # layouts for the tabs controls_layout = [[ sg.Text('Performance Parameters', font='_ 20') ], [sg.T('Put stuff like AI engine tuning parms on this tab')]] statistics_layout = [[sg.Text('Statistics', font=('_ 20'))], [sg.T('Game statistics go here?')]] board_tab = [[sg.Column(board_layout)]] # the main window layout layout = [[sg.Menu(menu_def, tearoff=False)], [ sg.TabGroup([[ sg.Tab('Board', board_tab), sg.Tab('Controls', controls_layout), sg.Tab('Statistics', statistics_layout) ]], title_color='red'), sg.Column(board_controls) ], [sg.Text('Click anywhere on board for next move', font='_ 14')]] window = sg.Window('Chess', default_button_element_size=(12, 1), auto_size_buttons=False, icon='kingb.ico').Layout(layout) g = HalfchessGame.HalfchessGame() nn = NNet(g) nn.load_checkpoint(nn_filepath, nn_filename) args = dotdict({'numMCTSSims': numMCTSSims, 'cpuct': cpuct}) mcts = MCTS(g, nn, args) nnp = lambda x: np.argmax(mcts.getActionProb(x, temp=temp)) board = g.getInitBoard() move_count = curPlayer = 1 move_state = move_from = move_to = 0 # ---===--- Loop taking in user input --- # while g.getGameEnded(board, curPlayer) == 0: canonicalBoard = g.getCanonicalForm(board, curPlayer) if curPlayer == human: # human_player(board) move_state = 0 while True: button, value = window.Read() if button in (None, 'Exit'): exit() if button == 'New Game': sg.Popup( 'You have to restart the program to start a new game... sorry....' ) break psg_board = copy.deepcopy(initial_board) redraw_board(window, psg_board) move_state = 0 break if type(button) is tuple: if move_state == 0: move_from = button row, col = move_from piece = psg_board[row][col] # get the move-from piece button_square = window.FindElement(key=(row, col)) button_square.Update(button_color=('white', 'red')) move_state = 1 elif move_state == 1: move_to = button row, col = move_to if move_to == move_from: # cancelled move color = '#B58863' if (row + col) % 2 else '#F0D9B5' button_square.Update(button_color=('white', color)) move_state = 0 continue picked_move = '{}{}{}{}'.format( 'efgh'[move_from[1]], 8 - move_from[0], 'efgh'[move_to[1]], 8 - move_to[0]) action = moveset[picked_move] valids = g.getValidMoves(canonicalBoard, 1) if valids[action] != 0: board, curPlayer = g.getNextState( board, curPlayer, action) else: print('Illegal move') move_state = 0 color = '#B58863' if ( move_from[0] + move_from[1]) % 2 else '#F0D9B5' button_square.Update(button_color=('white', color)) continue psg_board[move_from[0]][move_from[ 1]] = BLANK # place blank where piece was psg_board[row][ col] = piece # place piece in the move-to square redraw_board(window, psg_board) move_count += 1 window.FindElement('_movelist_').Update(picked_move + '\n', append=True) break else: best_move = nnp(canonicalBoard) move_str = moveset[best_move] if curPlayer == -1: move_str = HalfchessGame.mirrored_move(move_str) from_col = ord(move_str[0]) - ord('e') from_row = 8 - int(move_str[1]) to_col = ord(move_str[2]) - ord('e') to_row = 8 - int(move_str[3]) window.FindElement('_movelist_').Update(move_str + '\n', append=True) piece = psg_board[from_row][from_col] psg_board[from_row][from_col] = BLANK psg_board[to_row][to_col] = piece redraw_board(window, psg_board) board, curPlayer = g.getNextState(board, curPlayer, best_move) move_count += 1 sg.Popup('Game over!', 'Thank you for playing')
'lr': 0.001, 'dropout': 0.3, 'epochs': 10, 'batch_size': 64, 'cuda': torch.cuda.is_available(), 'num_channels': 512, }) # all players rp = RandomPlayer(g).play gp = GreedyOthelloPlayer(g).play hp = HumanOthelloPlayer(g).play mmp = MinMaxOthelloPlayer(g).play # nnet players n1 = NNet(g, argsNN) n1.load_checkpoint('./model/', 'best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) n2 = NNet(g, argsNN) n2.load_checkpoint('./model/', 'checkpoint_5.pth.tar') args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts2 = MCTS(g, n2, args2) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) arena = Arena.Arena(n1p, mmp, g, [0, 0], display=display) print(arena.playGames(6, verbose=True)) #arena = Arena.Arena(n2p, mmp, g, [0,0], display=display)
'skip_nnet_filename': 'skip_nnet', 'beta': 1, #Recall the augmented probability aug_prob = beta * probs + (1-beta) * 1/(len(x)) * x_I, where x_I is the indicator vector of ones of the true sparse solution x. Hence, higher beta values increase the probabilities towards choosing the correct column choices. #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 25, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'alpha':1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #START ALPHAZERO TRAINING: #Initialize Game_args, nnet, Game, and Alphazero Game_args = Game_args() Game = CSGame() nnet = NNetWrapper(args) if args['load_nn_model'] == True: filename = 'best' nnet.load_checkpoint(args['network_checkpoint'], filename) if args['skip_rule'] == 'bootstrap': skip_nnet = NNetWrapper(args) skip_nnet.load_checkpoint(args['skip_nnet_folder'], args['skip_nnet_filename']) elif args['skip_rule'] == None: skip_nnet = nnet else: skip_nnet = None
'tempThreshold': 40, 'updateThreshold': 0.6, # During arena playoff, new neural net will be accepted if threshold or more of games are won. 'maxlenOfQueue': 300000, # Number of game examples to train the neural networks. 'numMCTSSims': 25, # Number of games moves for MCTS to simulate. 'arenaCompare': 40, # Number of games to play during arena play to determine if new net will be accepted. 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, #'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'), 'load_folder_file': ('temp/', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, 'dirichlet': 0.03, }) if __name__ == "__main__": g = Game() nnet = HalfchessNNet(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
num_games = 2 nets = [1, 3, 14, 29, 48, 70, 98] #nets = [1,14,98] victories = {} runner_up = {} rp = RandomPlayer(g).play victories[rp] = [0, 0] runner_up[rp] = [0, 0] playerpool = [rp] args = dotdict({'numMCTSSims': 20, 'cpuct': 1.0, 'dirichlet': 0.05}) for i in nets: nn = NNet(g) nn.load_checkpoint(filepath, 'checkpoint_%d.pth.tar' % i) mcts = MCTS(g, nn, args) def stochastic(x): pi = mcts.getActionProb(x, temp=1) return np.random.choice(len(pi), p=pi) def deterministic(x): return np.argmax(mcts.getActionProb(x, temp=0)) nnp = deterministic if det_pol else stochastic victories[nnp] = [i, 0] runner_up[nnp] = [i, 0] playerpool.append(nnp)
def PlayGame(): # menu_def = [['&File', ['&Open PGN File', 'E&xit']], # ['&Help', '&About...'], ] # sg.SetOptions(margins=(0,0)) sg.ChangeLookAndFeel('GreenTan') # create initial board setup psg_board = copy.deepcopy(initial_board) # the main board display layout board_layout = [[sg.T(' ')] + [ sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh' ]] # loop though board and create buttons with images for i in range(8): row = [sg.T(str(8 - i) + ' ', font='Any 13')] for j in range(4): piece_image = images[psg_board[i][j]] row.append(render_square(piece_image, key=(i, j), location=(i, j))) row.append(sg.T(str(8 - i) + ' ', font='Any 13')) board_layout.append(row) # add the labels across bottom of board board_layout.append([sg.T(' ')] + [ sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh' ]) # setup the controls on the right side of screen # openings = ( # 'Any', 'Defense', 'Attack', 'Trap', 'Gambit', 'Counter', 'Sicillian', 'English', 'French', 'Queen\'s openings', # 'King\'s Openings', 'Indian Openings') # board_controls = [[sg.RButton('New Game', key='New Game'), sg.RButton('Draw')], # [sg.RButton('Resign Game'), sg.RButton('Set FEN')], # [sg.RButton('Player Odds'), sg.RButton('Training')], # [sg.Drop(openings), sg.Text('Opening/Style')], # [sg.CBox('Play As White', key='_white_')], # [sg.Text('Move List')], # [sg.Multiline([], do_not_clear=True, autoscroll=True, size=(15, 10), key='_movelist_')], # ] # # layouts for the tabs # controls_layout = [[sg.Text('Performance Parameters', font='_ 20')], # [sg.T('Put stuff like AI engine tuning parms on this tab')]] # statistics_layout = [[sg.Text('Statistics', font=('_ 20'))], # [sg.T('Game statistics go here?')]] board_tab = [[sg.Column(board_layout)]] # the main window layout layout = [ #[sg.Menu(menu_def, tearoff=False)], [sg.TabGroup([[sg.Tab('Board', board_tab)]])] ] # sg.Tab('Controls', controls_layout), # sg.Tab('Statistics', statistics_layout)]], title_color='red'), #sg.Column(board_controls)], #[sg.Text('Click anywhere on board for next move', font='_ 14')]] window = sg.Window('Chess', default_button_element_size=(12, 1), auto_size_buttons=False, icon='kingb.ico').Layout(layout) g = HalfchessGame.HalfchessGame() nn = NNet(g) nn.load_checkpoint(nn_filepath, nn_filename) args = dotdict({ 'numMCTSSims': numMCTSSims, 'cpuct': cpuct, 'dirichlet': 0.5 }) mcts = MCTS(g, nn, args) def deterministic(x): return np.argmax(mcts.getActionProb(x, temp=0)) def stochastic(x): pi = mcts.getActionProb(x, temp=temp) return np.random.choice(len(pi), p=pi) nnp = stochastic nn2 = NNet(g) nn2.load_checkpoint(nn2_filepath, nn2_filename) mcts2 = MCTS(g, nn2, args) def deterministic(x): return np.argmax(mcts2.getActionProb(x, temp=0)) def stochastic(x): pi = mcts2.getActionProb(x, temp=temp) return np.random.choice(len(pi), p=pi) nnp2 = stochastic rp = RandomPlayer(g).play board = g.getInitBoard() move_count = curPlayer = 1 move_state = move_from = move_to = 0 # ---===--- Loop taking in user input --- # while g.getGameEnded(board, curPlayer) == 0: window.Read() canonicalBoard = g.getCanonicalForm(board, curPlayer) if curPlayer == p2: if twonets: best_move = nnp2(canonicalBoard) move_str = moveset[best_move] else: best_move = rp(canonicalBoard) move_str = moveset[best_move] if curPlayer == -1: move_str = HalfchessGame.mirrored_move(move_str) from_col = ord(move_str[0]) - ord('e') from_row = 8 - int(move_str[1]) to_col = ord(move_str[2]) - ord('e') to_row = 8 - int(move_str[3]) #window.FindElement('_movelist_').Update(move_str + '\n', append=True) piece = psg_board[from_row][from_col] psg_board[from_row][from_col] = BLANK psg_board[to_row][to_col] = piece redraw_board(window, psg_board) board, curPlayer = g.getNextState(board, curPlayer, best_move) move_state = 0 else: best_move = nnp(canonicalBoard) move_str = moveset[best_move] if curPlayer == -1: move_str = HalfchessGame.mirrored_move(move_str) from_col = ord(move_str[0]) - ord('e') from_row = 8 - int(move_str[1]) to_col = ord(move_str[2]) - ord('e') to_row = 8 - int(move_str[3]) #window.FindElement('_movelist_').Update(move_str + '\n', append=True) piece = psg_board[from_row][from_col] psg_board[from_row][from_col] = BLANK psg_board[to_row][to_col] = piece redraw_board(window, psg_board) board, curPlayer = g.getNextState(board, curPlayer, best_move) move_count += 1 sg.Popup('Game over!', 'Thank you for playing')