def nnet_learning(self): memory_size = 2000 self.network.eval() for e in range(self.epochs): print("Epoch : " + str(e)) self.num_sim = self.num_sim + e * 5 if self.num_sim > 300: self.num_sim = 300 for s in range(self.num_selfplay): print(" Game number : " + str(s)) self.training_examples_all.extend(self.selfplay()) self.save_examples(self.example_num) self.example_num += 1 self.training_examples_all = [] self.mcts = mcts(self.c4_utils, self.network, 1.5, self.num_sim) self.c4_utils.turn = 0 print(" End of self play") self.network.train() self.network.saveweights(filename='wt_temp.pth.tar') self.network_old.loadweights(filename='wt_temp.pth.tar') net_old_mcts = mcts(self.c4_utils, self.network_old, 1.5, self.num_sim) training_examples = self.load_examples(self.example_num) self.network.train_net(training_examples) training_examples = [] net_mcts = mcts(self.c4_utils, self.network, 1.5, self.num_sim) self.network.eval() self.network_old.eval() c = Compete(net_old_mcts, net_mcts, self.c4_utils) print(" Competition") net_old_win, net_win, draw = c.play(self.num_competitions) print( " New network wins : %d, Old network wins : %d, Draw : %d " % (net_win, net_old_win, draw)) if (net_old_win > net_win): print(" Rejecting new model") self.network.loadweights(filename='wt_temp.pth.tar') else: self.network.saveweights(filename='wt_best.pth.tar')
def self_play(model: keras.layers.Layer, config: Config) -> Game: game = Game() while not game.terminal(): action, root = mcts(game, model, config) game.apply(action) game.store_search_statistics(root) return game
def __init__(self, network, num_selfplay, epochs, chess_utils, num_competitions, num_sim_): self.num_sim = num_sim_ self.network = network self.chess_utils = chess_utils self.network_old = self.network.__class__(epochs) self.mcts = mcts(self.chess_utils, self.network, 1.5, self.num_sim) self.num_selfplay = num_selfplay self.epochs = epochs self.num_competitions = num_competitions self.training_examples_all = [] self.update_threshold = 0.5 self.example_num = 0
def run_once(debug=True): # Randomly assigns actors, places, and items for story root_state = random_state(4, 4) # Initialize Root Node - Possible Methods boolean MUST BE TRUE root_node = TreeNode(root_state, parent_edge=None, possible_methods=True) # Total methods in story num_methods = len(root_node.possible_methods) """ The following max_numsim = max_expansion * thres max_iter : Number of sentances in story = number of story nodes - 1 = number of story edges max_expansion : Number of expansions in search max_simlength : Maximum length of rollout C : Exploration Constant for selection thres : Minimum MCTS Visits for node expansion """ # Perform Monte Carlo - returns final node and whole story max_expansion = 250 if max_expansion < len(root_node.possible_methods): raise ValueError( "Max exp ({}) should be greater than num methods({})".format( max_expansion, len(root_node.possible_methods))) max_iter = 15 max_simlength = 20 C = 1 thres = 40 minlambda = 0.95 s = Story(root_node) print(s.create_expository()) #print("Max iteration: {}\nMax Expansion: {}\nMax simulation length: {}\nC: {}\nThreshold: {}".format(max_iter, max_expansion, max_simlength, C, thres)) n, s = mcts(root_node, max_iter, max_expansion, max_simlength, C, thres, mixlambda, debug=False) # Print out results #if debug: # print(s) # print(n.believability) # print(n.value) # print(percent_goals_satisfied(n, GOALS)) return (n, s)
def __init__(self, c4_utils, network, num_sim, player_x): self.round = 1 self.finished = False self.winner = None self.mcts = mcts(c4_utils, network, 1.5, num_sim) name_mmx = "Minimax" name_mc = "Monte-Carlo Sim - RL" if player_x == "m": self.players[0] = Player(name_mc, self.colors[0], self.mcts) diff = int(input("Enter difficulty for the Minimax AI (1 - 4) ")) self.players[1] = AIPlayer(name_mmx, self.colors[1], diff + 1) else: diff = int(input("Enter difficulty for the Minimax AI (1 - 4) ")) self.players[0] = AIPlayer(name_mmx, self.colors[0], diff + 1) self.players[1] = Player(name_mc, self.colors[1], self.mcts) # # do cross-platform clear screen # os.system( [ 'clear', 'cls' ][ os.name == 'nt' ] ) # print(u"Welcome to {0}!".format(self.game_name)) # print("Should Player 1 be a Minimax(Benchmark) or Monte-Carlo(Our program)?") # while self.players[0] == None: # choice = str(input("Type 'MINIMAX (x)' or 'MONTE-CARLO (m)': ")) # if choice == "M" or choice.lower() == "m": # diff = int(input("Enter difficulty for the Minimax AI (1 - 4) ")) # self.players[0] = Player(name_mc, self.colors[0],self.mcts) # self.players[1] = AIPlayer(name_mmx, self.colors[1], diff +1 ) # elif choice == "X" or choice.lower() == "x": # diff = int(input("Enter difficulty for the Minimax AI (1 - 4) ")) # self.players[0] = AIPlayer(name_mmx, self.colors[0], diff+1) # self.players[1] = Player(name_mc ,self.colors[1], self.mcts) # else: # print("Invalid choice, please try again") print("{0} will be {1}".format(self.players[0].name, self.colors[0])) # x always goes first (arbitrary choice on my part) self.turn = self.players[0] self.board = [] for i in range(6): self.board.append([]) for j in range(7): self.board[i].append(' ')