def expand_all(self, leaf): game = Game(leaf.state) allowedmoves = game.allowed_moves() for move in allowedmoves: child = self.createNode(game.nextstate(move), move, parent=leaf) leaf.children += [child]
def back_prop_terminal(self, leaf_terminal): game = Game(leaf_terminal.state) gameover, winner = game.gameover() if winner == 0: leaf_terminal.W += 0 leaf_terminal.N += 1 leaf_terminal.Q = leaf_terminal.W / leaf_terminal.N new_reward = 0 else: #a terminal leaf is always a draw or reward 1 (for the player that played the move) new_reward = 1 leaf_terminal.W += new_reward leaf_terminal.N += 1 leaf_terminal.Q = leaf_terminal.W / leaf_terminal.N # Then init recursion current = leaf_terminal count = 1 while current.parent is not None: current.parent.N += 1 current.parent.W += ((-1)**count) * new_reward current.parent.Q = current.parent.W / current.parent.N # move up current = current.parent count += 1
def getcountermove(currentnode, tree): existcounter = False game = Game(currentnode.state) can_win, where_win, can_be_lost, where_lose = game.iscritical() if can_win == 1: #then take it move = where_win[int(random.random() * len(where_win))] tree.expand_all( currentnode ) # must expand since not done in mcts sims in that case col = game.convert_move_to_col_index(move) for child in currentnode.children: child_col = game.convert_move_to_col_index(child.move) if child_col == col: currentnode = child existcounter = True elif can_be_lost == 1: # then counter move = where_lose[int(random.random() * len(where_lose))] tree.expand_all(currentnode) # must expand since not done in mcts col = game.convert_move_to_col_index(move) for child in currentnode.children: child_col = game.convert_move_to_col_index(child.move) if child_col == col: currentnode = child existcounter = True return currentnode, existcounter
def eval_leaf(self, leaf): self.player.eval() np.random.seed() if leaf.isterminal() == 0: game = Game(leaf.state) flat = game.state_flattener(leaf.state) #NN call reward, P = self.player.forward(flat) proba_children = P.detach().numpy()[0] NN_q_value = reward.detach().numpy()[0][0] if self.use_dirichlet and leaf.parent is None : probs = np.copy(proba_children) alpha = config.alpha_dir epsilon = config.epsilon_dir dirichlet_input = [alpha for _ in range(config.L)] dirichlet_list = np.random.dirichlet(dirichlet_input) proba_children = (1 - epsilon) * probs + epsilon * dirichlet_list leaf.W = leaf.W - NN_q_value leaf.N += 1 leaf.Q = leaf.W / leaf.N if config.maskinmcts: mask = np.zeros(config.L) for child in leaf.children: child_col=game.convert_move_to_col_index(child.move) mask[child_col] = 1 maskit = np.multiply(proba_children, mask) # for possible bug (when proba given by NN is strictly one for a full column) if np.sum(maskit) == 0: print('happening') #actually never happens -> no overflow in softmax -> good epsilon =0.01 proba_children = (proba_children + epsilon) proba_children = proba_children/ np.sum(proba_children) maskit = np.multiply(proba_children, mask) leaf.proba_children = maskit / np.sum(maskit) else: leaf.proba_children = proba_children else: # seems reasonnable to use the true value and not NN value game = Game(leaf.state) _, winner = game.gameover() truereward = np.abs(winner) #to be fair it should include the long_game_factor if used, but it doesnt change much leaf.W = leaf.W + truereward leaf.N += +1 leaf.Q = leaf.W / leaf.N
def printstates(player): part_states = config.particular_states() # knowledge based on http://connect4.gamesolver.org # for instance for turn 5 : http://connect4.gamesolver.org/?pos=44444 print('') print( '(probs should be max for optimal play, ie [0,0,0,1,0,0,0] from turn 0 to 4 included ; turn 5 flat prob, turn 6, [0,0,.5,0,0.5,0,0]' ) print( 'Q-values of the board should be minimal for the corresponding optimal move)' ) print('') getbreak = 1 for i in range(len(part_states)): state = config.getstate(i) game = Game(state) dirichletforprinting = False tree = MCTS_NN(player, dirichletforprinting) rootnode = tree.createNode(game.state) tree.expand_all(rootnode) tree.eval_leaf(rootnode) pchild = rootnode.proba_children pchild = [int(1000 * x) / 10 for x in pchild] for child in rootnode.children: tree.expand_all(child) tree.eval_leaf(child) Qs = [-int(100 * child.Q) / 100 for child in rootnode.children] Qchilds = [-child.Q for child in rootnode.children] turn = str(bin(state[0])).count('1') + str(bin(state[1])).count('1') print('turn', int(turn), 'Qval of this board', -int(1000 * rootnode.Q) / 1000) print('children probs', pchild, 'and of corresponding Q-val', Qs) time.sleep(0.01) #for automatic break of the main loop when the model is good enough #we require probabilities for central column to be at least 92% if int(turn) <= 4 and pchild[3] < 92: getbreak = 0 # and lowest Q-value for the optimal move if int(turn) <= 4: if Qchilds[3] > Qchilds[0] or Qchilds[3] > Qchilds[1] or Qchilds[ 3] > Qchilds[2] or Qchilds[3] > Qchilds[4] or Qchilds[ 3] > Qchilds[5] or Qchilds[3] > Qchilds[6]: getbreak = 0 #and, in the main program, an ELO of at least 1800 (see main) return getbreak
def superselect(self,current,cpuct): # superselection rule : take the win or counter the lose: game = Game(current.state) can_win, wherewin, can_lose, wherelose = game.iscritical() if can_win: i_win = wherewin[int(random.random() * len(wherewin))] # get actual pos in children of child with this column index for child in current.children: child_col=game.convert_move_to_col_index(child.move) if child_col == i_win: current = child elif can_lose: i_counter_lose = wherelose[int(random.random() * len(wherelose))] for child in current.children: child_col=game.convert_move_to_col_index(child.move) if child_col == i_counter_lose: current = child else: values = [] for child in current.children: values += [self.PUCT(child, cpuct)] max_val = max(values) where_max = [i for i, j in enumerate(values) if j == max_val] if len(where_max) == 1: current = current.children[where_max[0]] else: imax = where_max[int(random.random() * len(where_max))] current = current.children[imax] return current
def onevsonegame(budget1, random1, counter1, usecounter_in_rollout_1, budget2, random2, counter2, usecounter_in_rollout_2, whostarts, index): import random random.seed() np.random.seed() if whostarts == 'budget1': modulo = 1 elif whostarts == 'budget2': modulo = 0 # init tree, root, game tree = MCTS() c_uct = 1 game = Game() turn = 0 gameover = 0 rootnode = tree.createNode(game.state) currentnode = rootnode # main loop while gameover == 0: turn = turn + 1 if turn % 2 == modulo: #player = 'player1' sim_number = budget1 usecounterinrollout = usecounter_in_rollout_1 counter = counter1 rd = random1 else: #player = 'player2' sim_number = budget2 usecounterinrollout = usecounter_in_rollout_2 counter = counter2 rd = random2 if rd: #completely random play / or random + counter if counter: currentnode, existscounter = getcountermove(currentnode, tree) if existscounter == False: if len(currentnode.children) == 0: tree.expand_all(currentnode) randindex = int(random.random() * (len(currentnode.children))) currentnode = currentnode.children[randindex] else: if len(currentnode.children) == 0: tree.expand_all(currentnode) randindex = int(random.random() * (len(currentnode.children))) currentnode = currentnode.children[randindex] else: if counter: currentnode, existscounter = getcountermove(currentnode, tree) if existscounter == False: for sims in range(0, sim_number): tree.simulate(currentnode, UCT_simu, c_uct, usecounterinrollout) visits = np.array( [child.N for child in currentnode.children]) max_visits = np.where(visits == np.max(visits))[0] imax = max_visits[int(random.random() * len(max_visits))] currentnode = currentnode.children[imax] else: for sims in range(0, sim_number): tree.simulate(currentnode, UCT_simu, c_uct, usecounterinrollout) visits = np.array([child.N for child in currentnode.children]) max_visits = np.where(visits == np.max(visits))[0] imax = max_visits[int(random.random() * len(max_visits))] currentnode = currentnode.children[imax] # then reinit tree game = Game(currentnode.state) tree = MCTS() rootnode = tree.createNode(game.state) currentnode = rootnode gameover, winner = game.gameover() #print('end of game') if winner == 0: toreturn = 'draw' elif winner == 1: if whostarts == 'budget1': toreturn = 'budget1' else: toreturn = 'budget2' elif winner == -1: if whostarts == 'budget1': toreturn = 'budget2' else: toreturn = 'budget1' monresult = {'result': toreturn} filename = './data/game' + str(index) + '.txt' with open(filename, 'wb') as file: pickle.dump(monresult, file) file.close()
def default_rollout_policy(self, node, usecounter): gameloc = Game(node.state) if node.isterminal() == 0: #init allowedmoves = gameloc.allowed_moves() gameover = 0 # completely random rollout/ or random rollout but take the win or counter the lose, is usecounter while gameover == 0: if usecounter: can_win, where_win, can_lose, where_lose = gameloc.iscritical( ) if can_win: move = where_win[int(random.random() * len(where_win))] gameloc.takestep(move) allowedmoves = gameloc.allowed_moves() gameover, _ = gameloc.gameover() elif can_lose: imax = where_lose[int(random.random() * len(where_lose))] gameloc.takestep(imax) allowedmoves = gameloc.allowed_moves() gameover, _ = gameloc.gameover() else: randommove = allowedmoves[int(random.random() * len(allowedmoves))] gameloc.takestep(randommove) allowedmoves = gameloc.allowed_moves() gameover, _ = gameloc.gameover() else: randommove = allowedmoves[int(random.random() * len(allowedmoves))] gameloc.takestep(randommove) allowedmoves = gameloc.allowed_moves() gameover, _ = gameloc.gameover() _, winner = gameloc.gameover() return winner
def expand_all(self, node): game = Game(node.state) allowed_moves = game.allowed_moves() for move in allowed_moves: child = self.createNode(game.nextstate(move), move, parent=node) node.children += [child]
def isterminal(self): game = Game(self.state) gameover, _ = game.gameover() #is 0 or 1 return gameover
def onevsonehuman(budget, whostarts): if whostarts == 'computer': modulo = 1 else: modulo = 0 file_path_resnet = './best_model_resnet.pth' best_player_so_far = ResNet.resnet18() best_player_so_far.load_state_dict(torch.load(file_path_resnet)) game = Game() tree = MCTS_NN(best_player_so_far, use_dirichlet=False) rootnode = tree.createNode(game.state) currentnode = rootnode turn = 0 isterminal = 0 while isterminal == 0: turn = turn + 1 if turn % 2 == modulo: player = 'computer' sim_number = budget else: player = 'human' if player == 'computer': print('===============IA playing================') for sims in range(0, sim_number): tree.simulate(currentnode, cpuct=1) treefordisplay = MCTS_NN(best_player_so_far, False) rootnodedisplay = treefordisplay.createNode(game.state) treefordisplay.expand_all(rootnodedisplay) tree.eval_leaf(rootnodedisplay) pchild = rootnodedisplay.proba_children pchild = [int(1000 * x) / 10 for x in pchild] for child in rootnodedisplay.children: treefordisplay.eval_leaf(child) Qs = [ int(100 * child.Q) / 100 for child in rootnodedisplay.children ] print('NN thoughts', pchild, Qs) visits_after_all_simulations = [] for child in currentnode.children: visits_after_all_simulations.append(child.N) print('result visits', visits_after_all_simulations) values = np.asarray(visits_after_all_simulations) imax = np.random.choice(np.where(values == np.max(values))[0]) print('choice made', imax) currentnode = currentnode.children[imax] else: #human player print('=============== your turn =====================') game = Game(currentnode.state) game.display_it() moves = game.allowed_moves() print( 'chose a move from 0 to 6 -- beware of full columns! (not taken into account : e.g. if column three is full, enter 5 instead of 6 to play in the last column)' ) human_choice = int(input()) game.takestep(moves[human_choice]) currentnode = Node(game.state, moves[human_choice]) # reinit tree game = Game(currentnode.state) tree = MCTS_NN(best_player_so_far, use_dirichlet=False) rootnode = tree.createNode(game.state) currentnode = rootnode isterminal = currentnode.isterminal() game = Game(currentnode.state) game.display_it() gameover, winner = game.gameover() #print('end of game') if winner == 0: toreturn = 'draw' print('draw') elif winner == 1: if whostarts == 'computer': print('computer wins') toreturn = 'budget1' else: print('you win') toreturn = 'budget2' elif winner == -1: if whostarts == 'computer': print(' you win') toreturn = 'budget2' else: print('computer wins') toreturn = 'budget1' return toreturn
def NN_against_mcts(player_NN, budget_NN, budget_MCTS, whostarts, c_uct, cpuct, tau, tau_zero, use_dirichlet, index): random.seed() np.random.seed() if whostarts == 'player_nn': modulo = 1 elif whostarts == 'player_mcts': modulo = 0 w_nn_start = 0 w_nn_second = 0 gameover = 0 turn = 0 while gameover == 0: turn = turn + 1 if turn % 2 == modulo: player = 'player_nn' sim_number = budget_NN else: player = 'player_mcts' sim_number = budget_MCTS #init tree for NN or MCTS if turn == 1: if player == 'player_nn': game = Game() tree = MCTS_NN(player_NN, use_dirichlet) rootnode = tree.createNode(game.state) currentnode = rootnode else: game = Game() tree = MCTS() rootnode = tree.createNode(game.state) currentnode = rootnode if player == 'player_nn': for sims in range(0, sim_number): tree.simulate(currentnode, cpuct) visits_after_all_simulations = [] for child in currentnode.children: visits_after_all_simulations.append(child.N**(1 / tau)) all_visits = np.asarray(visits_after_all_simulations) probvisit = all_visits / np.sum(all_visits) # take a step if turn < tau_zero: currentnode = np.random.choice(currentnode.children, p=probvisit) else: max = np.random.choice( np.where(all_visits == np.max(all_visits))[0]) currentnode = currentnode.children[max] # reinit tree for next player : mcts game = Game(currentnode.state) tree = MCTS() rootnode = tree.createNode(game.state) currentnode = rootnode gameover = currentnode.isterminal() if player == 'player_mcts': for sims in range(0, sim_number): tree.simulate(currentnode, UCT_simu, c_uct, config.use_counter_in_pure_mcts) visits_after_all_simulations = [] for child in currentnode.children: visits_after_all_simulations.append(child.N) values = np.asarray(visits_after_all_simulations) imax = np.random.choice(np.where(values == np.max(values))[0]) currentnode = currentnode.children[imax] # reinit tree for next player : neural net game = Game(currentnode.state) tree = MCTS_NN(player_NN, use_dirichlet) rootnode = tree.createNode(game.state) currentnode = rootnode gameover = currentnode.isterminal() game = Game(currentnode.state) gameover, winner = game.gameover() wp1 = 0 wp2 = 0 draw = 0 if winner == 0: draw = 1 elif winner == 1: if whostarts == 'player_nn': wp1 = 1 w_nn_start = 1 else: wp2 = 1 elif winner == -1: if whostarts == 'player_nn': wp2 = 1 else: wp1 = 1 w_nn_second = 1 save_dic = {} save_dic['data'] = np.asarray([wp1, wp2, draw, w_nn_start, w_nn_second]) filename = './data/nn_against_mcts' + str(index) + '.txt' with open(filename, 'wb') as file: pickle.dump(save_dic, file) file.close()
def onevsonegame(player1, budget1, player2, budget2, whostarts, cpuct, tau, tau_zero, use_dirichlet, index): #not sure if required but safety first! random.seed() np.random.seed() new_data_for_the_game = np.zeros((3 * config.L * config.H + config.L + 1)) if whostarts == 'player1': modulo = 1 budget1 = config.SIM_NUMBER budget2 = config.sim_number_defense elif whostarts == 'player2': modulo = 0 budget2 = config.SIM_NUMBER budget1 = config.sim_number_defense gameover = 0 turn = 0 while gameover == 0: turn = turn + 1 if turn % 2 == modulo: player = 'player1' sim_number = budget1 who_plays = player1 else: player = 'player2' sim_number = budget2 who_plays = player2 #init tree if turn == 1: game = Game() tree = MCTS_NN(who_plays, use_dirichlet) rootnode = tree.createNode(game.state) currentnode = rootnode for sims in range(0, sim_number): tree.simulate(currentnode, cpuct) visits_after_all_simulations = [] childmoves = [] for child in currentnode.children: visits_after_all_simulations.append(child.N**(1 / tau)) childmoves.append(child.move) all_visits = np.asarray(visits_after_all_simulations) probvisit = all_visits / np.sum(all_visits) child_col = [ game.convert_move_to_col_index(move) for move in childmoves ] #store the data created child_col = np.asarray(child_col, dtype=int) unmask_pi = np.zeros(config.L) unmask_pi[child_col] = probvisit flatten_state = game.state_flattener(currentnode.state) #init z to zero ; z is the actual reward from the current's player point of view, see below this_turn_data = np.hstack((flatten_state, unmask_pi, 0)) new_data_for_the_game = np.vstack( (new_data_for_the_game, this_turn_data)) #then take a step if turn < tau_zero: currentnode = np.random.choice(currentnode.children, p=probvisit) else: max = np.random.choice( np.where(all_visits == np.max(all_visits))[0]) currentnode = currentnode.children[max] # reinit tree for next turn game = Game(currentnode.state) if player == 'player1': tree = MCTS_NN(player2, use_dirichlet) else: tree = MCTS_NN(player1, use_dirichlet) rootnode = tree.createNode(game.state) currentnode = rootnode gameover = currentnode.isterminal() # game has terminated. Then, exit while, and : new_data_for_the_game = np.delete(new_data_for_the_game, 0, 0) game = Game(currentnode.state) gameover, winner = game.gameover() if config.use_z_last: #include last winning move? unclear because there we don't have probabilities => put uniform prob # default : don't use z_last flatten_state = game.state_flattener(currentnode.state) unmask_pi = np.ones(config.L) / config.L this_turn_data = np.hstack((flatten_state, unmask_pi, 0)) new_data_for_the_game = np.vstack( (new_data_for_the_game, this_turn_data)) #update the z's and winner stats wp1 = 0 # win player 1, etc wp2 = 0 winstart = 0 winsecond = 0 draw = 0 # backfill the z such as it becomes the actual reward from the current's player point of view: history_size = new_data_for_the_game.shape[0] if winner == 0: z = 0 draw = 1 elif winner == 1: if config.favorlonggames: z = 1 - config.long_game_factor * history_size / 42 #the reward is bigger for shorter games else: z = 1 winstart += 1 if whostarts == 'player1': wp1 = 1 else: wp2 = 1 elif winner == -1: winsecond += 1 if config.favorlonggames: z = -1 + config.long_game_factor * history_size / 42 #the reward is less negative for long games else: z = -1 if whostarts == 'player1': wp2 = 1 else: wp1 = 1 z_vec = np.zeros(history_size) for i in range(history_size): z_vec[i] = ((-1)**i) * z new_data_for_the_game[:, -1] = z_vec #data extension using parity along the x axis board_size = config.L * config.H if config.data_extension: extend_data = np.zeros( (new_data_for_the_game.shape[0], new_data_for_the_game.shape[1])) for i in range(extend_data.shape[0]): board = np.copy(new_data_for_the_game[i, 0:3 * board_size]).reshape( (3, config.H, config.L)) yellowboard = board[0] redboard = board[1] player_turn = board[2] #parity operation on array for both yellow and red boards flip_yellow = np.fliplr(yellowboard) flip_red = np.fliplr(redboard) extend_data[i, 0:board_size] = flip_yellow.flatten() extend_data[i, board_size:2 * board_size] = flip_red.flatten() extend_data[i, 2 * board_size:3 * board_size] = player_turn.flatten() # parity operation on the Pi's pi_s = np.copy( new_data_for_the_game[i, 3 * board_size:3 * board_size + config.L]) flip_pi = np.flip(pi_s, axis=0) extend_data[i, 3 * board_size:3 * board_size + config.L] = flip_pi extend_data[i, -1] = np.copy(new_data_for_the_game[i, -1]) #stack new_data_for_the_game = np.vstack((new_data_for_the_game, extend_data)) #save data of self play in a file indexed by the CPU used. mydata = { 'data': [ new_data_for_the_game, wp1, wp2, draw, winstart, winsecond, history_size ] } filename = './data/createdata' + str(index) + '.txt' with open(filename, 'wb') as file: pickle.dump(mydata, file) file.close()
def PUCT(self, child, cpuct): game = Game() col_of_child = game.convert_move_to_col_index(child.move) return child.Q + cpuct*child.parent.proba_children[col_of_child]*np.sqrt(child.parent.N)/(1+child.N)