def player_vs_nnet(results_folder, player, games, chunk): g = Connect4Game() results = [] chunk_number, files = chunk for file in files: nn = NNet(g) nn.load_checkpoint(results_folder, file) args = dotdict({'numMCTSSims': 25, 'cpuct': 5.0}) mcts1 = MCTS(g, nn, args) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) arena = Arena.Arena(n1p, player, g, display=display) result = arena.playGames(games, verbose=False) results.append(list(result)) return chunk_number, results
def errorsTillOptimalPlay(player1, nGames=10): """ :param player1: function that takes a board state as an input and returns a move (look Players in Connec4Players) :param nGames: number of games we play :return: list of lists of moves for all nGames and corresponding mistakes for each """ game = Connect4Game() player2 = EngineConnect4Player(game).play players = [player2, None, player1] moveHistoryList = [] mistakesList = [] for i in range(nGames): board = game.getInitBoard() moveHistory = [] mistakes = {} curPlayer = 1 move_number = 0 while game.getGameEnded(board, curPlayer) == 0: action = players[curPlayer + 1](game.getCanonicalForm(board, curPlayer)) valids = game.getValidMoves(game.getCanonicalForm(board, curPlayer), 1) if valids[action] == 0: print(action) assert valids[action] > 0 moveHistory.append(action) if curPlayer == 1 and getBoardScoreTheoretical("".join([str(x + 1) for x in moveHistory])) != -1: mistakes[move_number] = action + 1 action = players[0](game.getCanonicalForm(board, curPlayer)) moveHistory[-1] = action board, curPlayer = game.getNextState(board, curPlayer, action) move_number += 1 moveHistoryList.append(moveHistory) mistakesList.append(mistakes) return moveHistoryList, mistakesList
def generateMovesTestFile(folder, model): test_files = [ "Test_L1_R1.txt", "Test_L1_R2.txt", "Test_L1_R3.txt", "Test_L2_R1.txt", "Test_L2_R2.txt", "Test_L3_R1.txt" ] g = Connect4Game() nn = NNet(g) nn.load_checkpoint(folder, model) args = dotdict({'numMCTSSims': 25, 'cpuct': 1}) mcts1 = MCTS(g, nn, args) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) for test_file in test_files: moves = preform_moves(n1p, test_file) print(folder + model + "_" + test_file) with open(folder + model[:-8] + "_" + test_file, "w") as moves_file: moves_file.write(str(moves))
def main(): log.info('Loading %s...', Connect4Game.__name__) g = Connect4Game() log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def getBoardScore(moves): """ :param moves: moves that have been preformed given either as a string with char 1-7 or a list with values 0-6 :return: negemax score of the board """ if os.name == 'nt': path = "C:\\Magistrsko_delo\\connect4\\bin\\board_evaluate.exe" else: path = "/home/dlusina/connect4/bin/board_evaluate" if not isinstance(moves, str): moves = "".join([str(move + 1) for move in moves]) process = run(path, stdout=PIPE, input=(moves + "\n").encode()) score = process.returncode if score > 42: score = np.uint32(score).view('int32') board = get_board(moves) if Connect4Game().getGameEnded(board, -1): score *= -1 # we have to negate the score if the last move was a winning move return score
def test_connect4_tensorflow(self): self.execute_game_test(Connect4Game(), Connect4TensorflowNNet)
import Arena from MCTS import MCTS from connect4.Connect4Game import Connect4Game from connect4.Connect4Players import * from connect4.pytorch.NNet import NNetWrapper as NNet import numpy as np from utils import * """ use this script to play any two agents against each other, or play manually with any agent. """ human_vs_cpu = True #False # True g = Connect4Game(6, 7, 4) #g = Connect4Game(5,5,3) # mini # all players rp = RandomPlayer(g).play op = OneStepLookaheadConnect4Player(g).play hp = HumanConnect4Player(g).play if 1: # nnet players n1 = NNet(g) n1.load_checkpoint('./temp/', 'best.pth.tar') args1 = dotdict({'numMCTSSims': 300, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) p1_name = 'az-1'
def main(argv): game_type = '' player_types = ['random', 'heuristic', 'minimax', 'alphazero', 'human'] p1 = '' p2 = '' try: opts, args = getopt.getopt(argv, "hp:o:", ["help", "player=", "opponent="]) except getopt.GetoptError: print( 'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)' ) sys.exit(2) if len(opts) != 2: print( 'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( 'pit.py -p <player type> -o <opponent type> (random, heuristic, minimax, alphazero, human)' ) sys.exit() elif opt in ('-p', '--player'): p1 = arg elif opt in ('-o', '--opponent'): p2 = arg if ((p1 not in player_types) or (p2 not in player_types)): print('Invalid player types. Valid player types are:') print('random') print('heuristic') print('minimax') print('alphazero') print('human') sys.exit(2) args = dotdict({ 'checkpoint': '.connect4/temp/', 'load_folder_file': ('connect4/dev/models/8x100x50', 'connect4/best.pth.tar'), }) g = Connect4Game(6) p1_ind = pselect(p1) p2_ind = pselect(p2) print('playing ' + player_types[p1_ind] + ' against ' + player_types[p2_ind] + '...') # all players rp = RandomPlayer(g).play gp = OneStepLookaheadConnect4Player(g).play hp = HumanConnect4Player(g).play mp = MiniMaxConnect4Player(g).play # nnet players n1 = NNet(g) n1.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) player_list = [rp, gp, mp, n1p, mp] #print('playing' + player_types[p1_ind] + 'against ' + player_types[p2_ind] + '...') arena = Arena.Arena(player_list[p1_ind], player_list[p2_ind], g, display=display) print(arena.playGames(10, verbose=True))
def test_connect4_keras(self): self.execute_game_test(Connect4Game(5), Connect4KerasNNet)
def executeEpisode(self): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to trainExamples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in trainExamples. It uses a temp=1 if episodeStep < tempThreshold, and thereafter uses temp=0. Returns: trainExamples: a list of examples of the form (canonicalBoard,pi,v) pi is the MCTS informed policy vector, v is +1 if the player eventually won the game, else -1. """ trainExamples = [] board = self.game.getInitBoard() self.curPlayer = 1 episodeStep = 0 moveHistory = [] if "openings_prob" in self.args: use_opening = random.random() < self.args.openings_prob else: use_opening = False if use_opening: opening = opening_tree() while True: episodeStep += 1 canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer) temp = int(episodeStep < self.args.tempThreshold) pi = self.mcts.getActionProb(canonicalBoard, temp=temp) valids = self.game.getValidMoves(canonicalBoard, self.curPlayer) pi = pi * valids pi = pi / sum(pi) if not use_opening or episodeStep >= len(opening): if self.args.heuristic_type == 'combined': fraction = self.args.heuristic_probability h_prob = self.args.heuristic_function(canonicalBoard) new_pi = (np.array(pi) * (1 - fraction) + h_prob * fraction) if self.args.change_probabilities: pi = new_pi action = np.random.choice(len(new_pi), p=new_pi) elif self.args.heuristic_type == 'normal' or self.args.heuristic_type == 'cooling': if self.args.heuristic_type == 'cooling': prob = self.args.heuristic_probability - ( episodeStep - 1) * self.args.heuristic_probability / 42 else: prob = self.args.heuristic_probability if np.random.ranf(1)[0] > prob: action = np.random.choice(len(pi), p=pi) else: new_pi = self.args.heuristic_function(canonicalBoard) if self.args.change_probabilities: pi = new_pi action = np.random.choice(len(new_pi), p=new_pi) elif self.args.heuristic_type == 'cooling_iter': fraction = max(0, ((50 - (self.args.curIter - 1)) / 50)) h_prob = heuristic2_prob(canonicalBoard) new_pi = (np.array(pi) * (1 - fraction) + h_prob * fraction) if self.args.change_probabilities: pi = new_pi action = np.random.choice(len(new_pi), p=new_pi) elif self.args.heuristic_type == 'custom': prob = self.args.probability_function(episodeStep) if np.random.ranf(1)[0] > prob: action = np.random.choice(len(pi), p=pi) else: action = self.args.heuristic_function(canonicalBoard) elif self.args.heuristic_type == 'perfect': action = EngineConnect4Player( Connect4Game()).play(canonicalBoard) elif self.args.heuristic_type == 'default': action = np.random.choice(len(pi), p=pi) else: raise NameError("Wrong heuristic type '" + self.args.heuristic_type + "'") else: action = opening[episodeStep - 1] # pi = np.array(7) # pi[action] = 1 sym = self.game.getSymmetries(canonicalBoard, pi) for b, p in sym: if np.all(b == canonicalBoard): trainExamples.append( [b, self.curPlayer, p, list(moveHistory), None]) else: trainExamples.append([ b, self.curPlayer, p, [6 - x for x in moveHistory], None ]) board, self.curPlayer = self.game.getNextState( board, self.curPlayer, action) moveHistory.append(action) r = self.game.getGameEnded(board, self.curPlayer) if r != 0: if self.args.supervised: result = [] for x in trainExamples: r = getBoardScoreTheoretical(x[3]) result.append((x[0], x[2], r)) print(x[0], "Moves", "".join([str(i + 1) for i in x[3]]), "Theoretical value", r) return result else: if self.args.value_game_length: r = 1.198 - 99 / 3500 * episodeStep res = [[x[0], x[2], r * ((-1)**(x[1] != self.curPlayer))] for x in trainExamples] if use_opening: score = 1 for i in range(len(opening)): res[i][2] = score score *= -1 return res
if choice == "gobang": g = GobangGame(5, 4) n1 = NNet1(g) n1.load_checkpoint('./temp/', 'curent13temp_iter75_eps350_dim5.pth.tar') gamename = "gobang" display = display1 hp = HumanGobangPlayer(g).play if choice == "othello": g = OthelloGame(8) n1 = NNet2(g) n1.load_checkpoint('./temp/', 'curent14temp:iter14:eps200:dim8.pth.tar') gamename = "othello" display = display2 hp = MinMaxOthelloPlayer(g, 3).play if choice == "connect4": g = Connect4Game(5, 6) n1 = NNet3(g) n1.load_checkpoint('./temp/', 'best75_eps300_dim5.pth.tar') gamename = "connect4" display = display3 hp = HumanConnect4Player(g).play # all players #rp = RandomPlayer(g).play #gp = GreedyOthelloPlayer(g).play # nnet players args1 = dotdict({ 'numMCTSSims': 400, 'cpuct': 1.5, 'epsilon': 0,
'numEps': 100, # Number of complete self-play games to simulate during a new iteration. 'tempThreshold': 15, # 'updateThreshold': 0.6, # During arena playoff, new neural net will be accepted if threshold or more of games are won. 'maxlenOfQueue': 200000, # Number of game examples to train the neural networks. 'numMCTSSims': 25, # Number of games moves for MCTS to simulate. 'arenaCompare': 40, # Number of games to play during arena play to determine if new net will be accepted. 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) if __name__ == "__main__": g = Connect4Game(6, 7, 4, None) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
def experiment(game): np.random.seed(556) height = game[0] width = game[1] win_streak = game[2] g = Connect4Game(height, width, win_streak) if game == (4, 5, 3): total_episodes = n_episodes[0] ep_step = 10000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) elif game == (5, 6, 4): total_episodes = n_episodes[1] ep_step = 20000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) else: total_episodes = n_episodes[2] ep_step = 28000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) for lr in lrs: for i in epsilon_config: print('Config: Game', game, 'lr', lr, 'epsilon', i) test_wr_list = [] test_wr_list_op = [] test_wr = [] test_wr_op = [] if i == 'f': q_agent = QAgent(g, episodes=total_episodes, lr=lr, epsilon=0.2, dc=1, e_min=0.001, ep_arena=ep_step) rp = RandomPlayer(g).play op = OneStepLookaheadConnect4Player(g, verbose=False).play q_agent_play = q_agent.play else: q_agent = QAgent(g, episodes=total_episodes, lr=lr, epsilon=1, dc=0.99, e_min=0.001, ep_arena=ep_step) rp = RandomPlayer(g).play op = OneStepLookaheadConnect4Player(g, verbose=False).play q_agent_play = q_agent.play start = time() for idx, episode in enumerate(ep_range): if episode == ep_range[-1]: break if episode == 0: print('Training for Episodes ', 0, ' to ', ep_range[idx + 1] - 1, '...', sep='') elif episode == ep_range[-2]: print('Training for Episodes ', episode - 1, ' to ', total_episodes, '...', sep='') else: print('Training for Episodes ', episode - 1, ' to ', ep_range[idx + 1] - 1, '...', sep='') q_agent.train(cur_episode=episode) print('Training Finished.') print('Playing in Arena...') wins = 0 temp = [] for repet in range(reps): arena_rp_op = Arena.Arena(q_agent_play, rp, g, display=display) w, _, _ = arena_rp_op.playGames(n_games, verbose=False) temp.append(w / n_games) wins += w test_wr_list.append(temp) test_wr.append(wins / (reps * n_games)) print('\n') wins_op = 0 temp = [] for repet in range(reps): arena_rp_op = Arena.Arena(q_agent_play, op, g, display=display) w_op, _, _ = arena_rp_op.playGames(n_games, verbose=False) temp.append(w_op / n_games) wins_op += w_op test_wr_list_op.append(temp) test_wr_op.append(wins_op / (reps * n_games)) print('\n') end = time() training_time = np.array([end - start]) np.save( 'train_wr_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_rp', q_agent.total_wins) np.save( 'train_ep_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_rp', q_agent.total_eps) np.save( 'test_wr_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_rp', test_wr) np.save( 'test_wr_list_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_rp', test_wr_list) np.save( 'training_time_' + str(game) + '_' + str(lr) + '_' + str(i) + '_rp', training_time) np.save( 'test_wr_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_op', test_wr_op) np.save( 'test_wr_list_connect4_' + str(game) + '_' + str(lr) + '_' + str(i) + '_op', test_wr_list_op) print('\n')
return data print('Start Parallel') global_start = time() microsecs = np.array([ 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000, 2000000, 3000000 ]) games = [(4, 5, 3), (5, 6, 4), (6, 7, 4)] players = ['rp', 'op'] for player in players: if player == 'rp': cs = [1, 4, 3] else: cs = [5, 2, 5] for i, c in zip(games, cs): c_best = c height = i[0] width = i[1] win_streak = i[2] global_start = time() g = Connect4Game(height, width, win_streak) data = [] data = Parallel(n_jobs=10)(delayed(experiment)(m) for m in microsecs) np.save( 'mcts_best_connect4_results_' + player + '_' + str(height) + str(width) + str(win_streak), data) print('Game: ' + str(i) + ' against ' + player + ', Time: ' + str(time() - global_start))
from MCTS import MCTS from connect4.Connect4Game import Connect4Game, display from connect4.Connect4Players import HumanConnect4Player from connect4.tensorflows.NNet import NNetWrapper as NNet from utils import dotdict import numpy as np if __name__ == '__main__': goingFirst = True folder = "H:\\alpha-zero-trained\\final\\h2\\mcts_visits_tanh\\default\\1\\" game = Connect4Game() nn = NNet(game) nn.load_checkpoint(folder, 'best.pth.tar') args = dotdict({'numMCTSSims': 25, 'cpuct': 1}) mcts1 = MCTS(game, nn, args) AI = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) human = HumanConnect4Player(game).play if goingFirst: players = [AI, None, human] else: players = [human, None, AI] curPlayer = 1 board = game.getInitBoard() while game.getGameEnded(board, curPlayer) == 0: display(board, symbols=True) action = players[curPlayer + 1](game.getCanonicalForm(
from MCTS import MCTS from connect4.Connect4Players import * from Coach import Coach from connect4.Connect4Game import Connect4Game from connect4.tensorflow.NNet import NNetWrapper as NNet from utils import dotdict import numpy as np """ use this script to play any two agents against each other, or play manually with any agent. """ # Play in 6x6 instead of the normal 8x8. human_vs_cpu = True g = Connect4Game() # all players rp = RandomPlayer(g).play gp = OneStepLookaheadConnect4Player(g).play hp = HumanConnect4Player(g).play # nnet players n1 = NNet(g) n1.load_checkpoint('./temp/', 'best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) if human_vs_cpu: player2 = hp
for c in range(6): mcts = MCTSAgent(g, iters=100000000, c=c, rollout_iter=1, time=m).play if player == 'rp': opponent = RandomPlayer(g).play else: opponent = OneStepLookaheadConnect4Player(g, verbose=False).play arena_rp_hp = Arena.Arena(mcts, opponent, g, display=display) wins, loss, draw = arena_rp_hp.playGames(100, verbose=False) data.append([m, c, wins, loss, draw]) return data print('Start Parallel Simulation for Connect4: (4,5,3) (5,6,4) (6,7,4)') global_start = time() microsecs = np.array([ 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000, 2000000, 3000000 ]) games = [(4, 5, 3), (5, 6, 4), (6, 7, 4)] players = ['rp', 'op'] for player in players: for i in games: global_start = time() g = Connect4Game(i[0], i[1], i[2]) data = [] data = Parallel(n_jobs=10)(delayed(experiment)(m) for m in microsecs) np.save('connect4_results_' + player + '_' + str(i), data) print('Game: ' + str(i) + 'Opponent: ' + player + ' Time: ' + str(time() - global_start))