def AsyncSelfPlay(nnet, game, args, iter_num, ns): mcts = MCTS(game, nnet, args) ##================ Memory Freze protection ==================== if ns.leak: print('memory leak already leak') return if (psutil.virtual_memory()[2]) > 97: print('memory leak') ns.leak = True return ##============================================================= logging.debug("self playing game" + str(iter_num)) os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU start_game_time = time.time() trainExamples = [] board = game.getInitBoard() curPlayer = 1 episodeStep = 0 moves_records = [] while True: start_move = time.time() episodeStep += 1 canonicalBoard = game.getCanonicalForm(board, curPlayer) pi = mcts.getActionProb(canonicalBoard, temp=1) valids = game.getValidMoves(canonicalBoard, 1) trainExamples.append([ canonicalBoard, curPlayer, pi, game.gameState.turn, game.gameState.stale, valids ]) action = random.choices(np.arange(0, len(pi)), weights=pi)[0] board, curPlayer = game.getNextState(board, curPlayer, action) r = game.getGameEnded(board, curPlayer) # winner moves_records.append(time.time() - start_move) if r != 0: end_game_time = time.time() game_duration = end_game_time - start_game_time p = psutil.Process() report = [ iter_num, start_game_time, end_game_time, game_duration, p.cpu_num(), p.memory_info()[0] / (1024 * 1024 * 1024), moves_records ] return [(x[0], x[2], r * x[1], x[3], x[4], x[5]) for x in trainExamples], r, report
def AsyncAgainst(nnet, game, args, iter_num): os.environ["CUDA_VISIBLE_DEVICES"] = '3' minimax = minimaxAI(game,depth=7) local_args = dotdict({'numMCTSSims': 200, 'cpuct': 1.0}) mcts = MCTS(game, nnet, local_args, eval=True) arena = Arena(lambda x: np.argmax(mcts.getActionProb(x, temp=0)), minimax.get_move, game) arena.displayBar = False net_win, minimax_win, draws = arena.playGames(2) return net_win, minimax_win, draws
def AsyncAgainst(nnet, game, args, gameth): logging.debug("play self test game " + str(gameth)) os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU # create nn and load minimax = minimaxAI(game) local_args = dotdict({'numMCTSSims': 100, 'cpuct': 1.0}) # local_args.numMCTSSims = 100 # local_args.cpuct = 1 mcts = MCTS(game, nnet, local_args, eval=True) arena = Arena(lambda x: np.argmax(mcts.getActionProb(x, temp=0)), minimax.get_move, game) arena.displayBar = False net_win, minimax_win, draws = arena.playGames(2) return net_win, minimax_win, draws
def reset(): global checkers global main_canvas global state global board global board_history global move_num global AI_1 global AI_2 turn_label['text'] = 'ENGINES ARE PLAYING!' checkers = Game() board = checkers.getInitBoard() board_history = [copy.deepcopy(checkers)] move_num = 0 state = ENGINE_1 update(main_canvas) if args.player1 != 'minimax': AI_1 = MCTS(checkers, nnet, args1, eval=True, verbose=True) if args.player2 != 'minimax': AI_2 = MCTS(checkers, nnet2, args2, eval=True, verbose=True)
def Async_Play(game, args, iter_num, bar): # bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num+1,x=args.numPlayGames,total=bar.elapsed_td, eta=bar.eta_td) # bar.next() # set gpu # if(args.multiGPU): # if(iter_num%2==0): # os.environ["CUDA_VISIBLE_DEVICES"] = "0" # else: # os.environ["CUDA_VISIBLE_DEVICES"] = "1" # else: # os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU # create NN model1 = NNet(game) model2 = NNet(game) # try load weight try: model1.load_checkpoint(folder=args.model1Folder, filename=args.model1FileName) except: print("load model1 fail") pass try: model2.load_checkpoint(folder=args.model2Folder, filename=args.model2FileName) except: print("load model2 fail") pass # create MCTS mcts1 = MCTS(game, model1, args) mcts2 = MCTS(game, model2, args) # each process play 2 games arena = Arena(lambda x: np.argmax(mcts1.getActionProb(x, temp=0)), lambda x: np.argmax(mcts2.getActionProb(x, temp=0)), game) arena.displayBar = False oneWon, twoWon, draws = arena.playGames(2) return oneWon, twoWon, draws
# total num should x2, because each process play 2 games. 'numPlayGames': 10, 'numPlayPool': 5, # num of processes pool. 'model1Folder': '/workspace/CU_Makhos/models/', 'model1FileName': 'best.pth.tar', 'model2Folder': '/workspace/CU_Makhos/models/', 'model2FileName': 'best.pth.tar', }) g = ThaiCheckersGame() minimax = minimaxAI(game=g, depth=7).get_move # nnet players n1 = NNet(g, gpu_num=0) n1.load_checkpoint('models_minimax/', 'train_iter_268.pth.tar') args1 = dotdict({'numMCTSSims': 100, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1, eval=True, verbose=True) def n1p(x): return np.random.choice(32 * 32, p=mcts1.getActionProb(x, temp=0)) n2 = NNet(g, gpu_num=0) n2.load_checkpoint('models_minimax/', 'train_iter_140.pth.tar') args2 = dotdict({'numMCTSSims': 100, 'cpuct': 1.0}) mcts2 = MCTS(g, n2, args2, eval=True) def n2p(x): return np.random.choice(32 * 32, p=mcts2.getActionProb(x, temp=0)) arena = Arena(n1p, n2p, g, display=display) print(arena.playGames(2, verbose=True))
args = parser.parse_args() checkers = Game() board = checkers.getInitBoard() if args.type == 'minimax': AI = minimaxAI(checkers, depth=args.depth,verbose=True) print("minimax") else: print('Neural network model') nnet = nn(checkers, gpu_num=0,use_gpu = False) nnet.load_checkpoint(folder='models_minimax', filename='train_iter_303.pth.tar') args1 = dotdict({'numMCTSSims':args.mcts, 'cpuct': 1.0}) AI = MCTS(checkers, nnet, args1, eval=True, verbose=True) def move_ai(board_input): print('Calculating...') valid_moves = checkers.getValidMoves(checkers.getCanonicalForm(board_input, -1), 1) if np.sum(valid_moves)==1 and args.type=='minimax': board, _ = checkers.getNextState(board_input, -1 , np.argmax(valid_moves)) return if args.type == 'minimax': action = AI.get_move(checkers.getCanonicalForm(board_input, -1)) else: start = time.time() action = np.random.choice(32*32, p=AI.getActionProb((checkers.getCanonicalForm(board_input, -1)), temp=0))
g = ThaiCheckersGame() # parallel version # ParallelPlay(g) # single process version # all players rp = RandomPlayer(g).play # gp = GreedyOthelloPlayer(g).play # hp = HumanOthelloPlayer(g).play minimax = minimaxAI(game=g, depth=7).get_move # nnet players n1 = NNet(g, gpu_num=0) n1.load_checkpoint('/workspace/CU_Makhos/models_minimax/', 'train_iter_140.pth.tar') args1 = dotdict({'numMCTSSims': 100, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1, eval=True) def n1p(x): return np.random.choice(32 * 32, p=mcts1.getActionProb(x, temp=0)) # n2 = NNet(g, gpu_num=0) # n2.load_checkpoint('/workspace/CU_Makhos/models_minimax/', # 'train_iter_69.pth.tar') # args2 = dotdict({'numMCTSSims': 100, 'cpuct': 8.0}) # mcts2 = MCTS(g, n1, args2, eval=True) # def n2p(x): return np.random.choice( # 32*32, p=mcts2.getActionProb(x, temp=0)) # player1 = {'func': n1p, 'name': 'NNet'} # player2 = {'func': minimax, 'name': 'minimax'}
parser.add_argument('--depth', nargs='?', dest='depth', type=int, default=7) args = parser.parse_args() checkers = Game() board = checkers.getInitBoard() DEPTH = args.depth if args.player1 == 'minimax': AI_1 = minimaxAI(checkers, depth=DEPTH, verbose=True) else: nnet = nn(checkers, gpu_num=0) nnet.load_checkpoint(folder='models', filename='train_iter_268.pth.tar') args1 = dotdict({'numMCTSSims': args.mcts1, 'cpuct': 1.0}) AI_1 = MCTS(checkers, nnet, args1, eval=True, verbose=True) if args.player2 == 'minimax': AI_2 = minimaxAI(checkers, depth=DEPTH, verbose=True) print("minimax") else: nnet2 = nn(checkers, gpu_num=0) nnet2.load_checkpoint(folder='models', filename='train_iter_268.pth.tar') args2 = dotdict({'numMCTSSims': args.mcts2, 'cpuct': 1.0}) AI_2 = MCTS(checkers, nnet2, args2, eval=True, verbose=True) state = ENGINE_1 root = tk.Tk() main_canvas = tk.Canvas(root, width=480, height=480) click_value = None