def find_move(self, node, simulations, batch_player): # Node is the root node move_node = node # print(move_node.state.get_player()) for i in range(0, simulations): # move_node.state.player = 3 - move_node.state.player # this searches through tree based on UCT value best_node = mcts.MCTS().search(move_node, batch_player) # expands the node with children if there are possible states mcts.MCTS().expand(best_node) # if node was expanded, choose a random child to evaluate if len(best_node.get_child_nodes()) > 0: best_node = random.choice(best_node.get_child_nodes()) # simulates winner. Rollout winner = mcts.MCTS().ANET_evaluate(ANET=self.ANET, node=best_node) # traverses up tree with winner mcts.MCTS().backpropogate(best_node, winner, batch_player) return move_node
def __init__(self, player_id, game_id, server_address=('localhost', 4242), time_allowed=5, n_threads=3, tree_file='trees/tree_9h.p'): """ Args: time_allowed (int): time allowed for thinking before a response is required n_threads (int): number of worker MCTS threads to spin up """ self.event = multiprocessing.Event() self.event.clear() self.time_allowed = time_allowed self.n_threads = n_threads self.tree_keeper = mcts.MCTS(checkers.Board()) # holds the merged tree if tree_file is not None: self.tree_keeper.load_tree(tree_file) self.mcts_threads = [ None ] * self.n_threads # holds the pointers to worker threads self.thread_pipes = [None for _ in range(self.n_threads) ] # holds the pipes to workers self._start_threads() super().__init__(player_id, game_id, server_address=server_address)
def generate_small_maze25(num_games=25, num_rollout=50): # board_state_node = mcts.pacmanNode(game_board, 0) data = [] for game in range(num_games): # initialization for the next round of AI search L, ghosts = pp.smallMaze(2, 5) pos_i, pos_j = 5, 10 init_board = pp.MazeGameBoard(L, ghosts, pos_i, pos_j, 0) tree = mcts.MCTS() board_state_node = mcts.pacmanNode(init_board, 0) while True: L0, pos_i0, pos_j0, score0 = pp.retriveInfoFromGameBoard( board_state_node.board) for num in range(num_rollout): tree.do_rollout(board_state_node) board_state_node.board.one_step_more() # print(board_state_node.board.current_steps) board_state_node, max_score = tree.choose(board_state_node) if board_state_node.is_terminal(): data = get_data(tree, data) break maze, pos_i, pos_j, score = pp.retriveInfoFromGameBoard( board_state_node.board) if maze[pos_i][pos_j] != 3: maze[pos_i][pos_j] = " " if board_state_node.is_terminal(): data = get_data(tree, data) break ghosts = board_state_node.board.ghosts for ghost in ghosts: if (ghosts.index(ghost) % 3 == 0): bestAction = pp.eclideanGhostAction( maze, ghost, pos_i, pos_j) ghost.move(bestAction, maze) elif (ghosts.index(ghost) % 3 == 1): bestAction = pp.manhanttanGhostAction( maze, ghost, pos_i, pos_j) ghost.move(bestAction, maze) elif (ghosts.index(ghost) % 3 == 2): bestAction = pp.randomGhostAction(maze, ghost) ghost.move(bestAction, maze) if board_state_node.is_terminal(): data = get_data(tree, data) break # for child in max_node.children(): # data.append((child, mcts.get_score_estimates(child))) return data
def test_mcts(self): storage_threshold = 12 index1 = iaw.IndexItem('public.a', 'col1', index_type='global') index2 = iaw.IndexItem('public.b', 'col1', index_type='global') index3 = iaw.IndexItem('public.c', 'col1', index_type='global') index4 = iaw.IndexItem('public.d', 'col1', index_type='global') atomic_index1 = iaw.IndexItem('public.a', 'col1', index_type='global') atomic_index2 = iaw.IndexItem('public.b', 'col1', index_type='global') atomic_index3 = iaw.IndexItem('public.c', 'col1', index_type='global') atomic_index4 = iaw.IndexItem('public.d', 'col1', index_type='global') atomic_index1.storage = 10 atomic_index2.storage = 4 atomic_index3.storage = 7 available_choices = [index1, index2, index3, index4] atomic_choices = [[], [atomic_index2], [atomic_index1], [atomic_index3], [atomic_index2, atomic_index3], [atomic_index4]] query = iaw.QueryItem('select * from gia_01', 1) query.cost_list = [10, 7, 5, 9, 4, 11] workload_info = [query] results = mcts.MCTS(workload_info, atomic_choices, available_choices, storage_threshold, 2) self.assertLessEqual( [index1.atomic_pos, index2.atomic_pos, index3.atomic_pos], [2, 1, 3]) self.assertSetEqual({results[0].table, results[1].table}, {'public.b', 'public.c'})
def initializeVersusAIVariables(): global playerOne global playerTwo global current global nextMove global board global agent P1 = 0 P2 = 1 # multiprocess computaion print "board", agent board = mancala_board.Board() playerOne = Human(P1) # ai Player print agent, 'sdddddddddddddddddddddddddddd' if agent == "Minimax": playerTwo = minimax.AI(P2, 8) elif agent == "Genesis": playerTwo = genesis.genesis(P2, 3) elif agent == "MonteCarlo": playerTwo = mcts.MCTS(P2, mancala_board.Board()) print "Versus AI Agent:", agent # starting player is random # current = random.randint(0,1) # todo this line is a f*****g problem...... current = None nextMove = random.randint(0,1) return jsonify({'initialized' : True})
def play_minimax_games(net, game_count, mcts_sim_count, network_color): """ returns the error percentage of the optimal move prediction by the network the network and the mcts are used to predict the move to play :param net: the network :param game_count: the number of games to play :param mcts_sim_count: the number of monte carlo simulations :param network_color: the color of the network :return: the score of the network vs the minimax player """ mcts_list = [mcts.MCTS(tic_tac_toe.TicTacToeBoard()) for _ in range(game_count)] player = CONST.WHITE all_terminated = False while not all_terminated: # make a move with the az agent if player == network_color: # run all mcts simulations mcts.run_simulations(mcts_list, mcts_sim_count, net, 0) # paly the best move suggested by the mcts policy for i_mcts_ctx, mcts_ctx in enumerate(mcts_list): # skip terminated games if mcts_ctx.board.is_terminal(): continue policy = mcts_list[i_mcts_ctx].policy_from_state(mcts_ctx.board.state_id(), 0) move = np.where(policy == 1)[0][0] mcts_ctx.board.execute_action(move) # make an optimal minimax move else: for mcts_ctx in mcts_list: # skip terminated games if mcts_ctx.board.is_terminal(): continue move = mcts_ctx.board.minimax_move() mcts_ctx.board.execute_action(move) # swap the player player = CONST.WHITE if player == CONST.BLACK else CONST.BLACK # check if all games are terminated all_terminated = True for mcts_ctx in mcts_list: if not mcts_ctx.board.is_terminal(): all_terminated = False break # extract the score from all boards tot_score = 0 for mcts_ctx in mcts_list: score = mcts_ctx.board.white_score() if network_color == CONST.WHITE else mcts_ctx.board.black_score() tot_score += score tot_score /= game_count return tot_score
def main(): neural_network = network.Network() config = mcts.Config() config.simulation_num = 800 search = mcts.MCTS(config) selfplay.run(neural_network, search, True)
def __init__(self, role, max_simulate_count=500): BasePlayer.__init__(self, role) self.max_simulate_count = 500 self.mcts = mcts.MCTS( c=5, max_simulate_count=max_simulate_count, policy_function=mcts.policy_function, rollout_policy_function=mcts.rollout_policy_function)
def run_hourly(): board = checkers.Board() MC = mcts.MCTS(board) MC.log.setLevel(mcts.logging.INFO) i = 0 while True: print('starting hour', i) MC.run(t=60 * 60) MC.save_tree('trees/tree_{}h.p'.format(i)) i += 1
def play(self): pot = self.pick_random() if self.player_type == "minimax": pot = self.pick_minimax() if self.player_type == "alphabeta": pot = self.pick_minimax_alpha_beta() if self.player_type == "rightpot": pot = self.pick_right_pot() if self.player_type == "leftpot": pot = self.pick_left_pot() if self.player_type == "potwithfewest": pot = self.pick_pot_with_fewest_stones() if self.player_type == "potwithmost": pot = self.pick_pot_with_most_stones() if self.player_type == "takeanotherturn": pot = self.pick_pot_with_extra_turn(True) if self.player_type == "avoidanotherturn": pot = self.pick_pot_with_extra_turn(False) if self.player_type == "mcts": monte_carlo = mcts.MCTS(self.mancala, self.player_number, self.maximum_time_secs, self.maximum_depth) pot = monte_carlo.pick_pot() if self.player_type == "mcts-expansion-apriori": monte_carlo = MCTSModifiedFromApriori(self.mancala, self.player_number, self.maximum_time_secs, self.maximum_depth) pot = monte_carlo.pick_pot() if self.player_type == "mcts-expansion-gsp": monte_carlo = MCTSModifiedFromGsp(self.mancala, self.player_number, self.maximum_time_secs, self.maximum_depth) pot = monte_carlo.pick_pot() if self.player_type == "mcts_simulation_minimax": monte_carlo = MCTSSimulationMiniMax(self.mancala, self.player_number, self.maximum_time_secs, self.maximum_depth) pot = monte_carlo.pick_pot() print(f"Pot chosen for play: {pot}") return self.mancala.play(self.player_number, pot)
def evaluate(net1, net2, rounds, device="cpu"): n1_win, n2_win = 0, 0 mcts_stores = [mcts.MCTS(), mcts.MCTS()] for r_idx in range(rounds): r, _ = model.play_game(mcts_stores=mcts_stores, replay_buffer=None, net1=net1, net2=net2, steps_before_tau_0=0, mcts_searches=20, mcts_batch_size=16, device=device) if r < -0.5: n2_win += 1 elif r > 0.5: n1_win += 1 if (n1_win + n2_win) == 0: return 0 return n1_win / (n1_win + n2_win)
def test_play(): ttt = CheckersGame(8, 8) s1 = mcts.MCTS(ttt, n_plays=20, max_depth=500, player=1) s2 = gaming.RandomStrategy(ttt, player=2) state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=100) print() print( f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}' ) print(state) print(log)
def test1(self): """ TODO Comment """ params = [(.98, .2), (.98, .1), (.99, .2), (.99, .1)] for param in params: path_lengths = [] timings = [] for _ in range(self.runs): mcts_obj = mcts.MCTS(self.env, *param) path, timing = mcts_obj.run(self.n_iter) path_lengths.append(len(path)) timings.append(timing) self.report.append((path_lengths, timings, param))
def generate_game(args): game_engine = game.build_demo_game_engine() entry = {"samples": [], "outcome": None} m = mcts.MCTS(game_engine.initial_state) all_steps = 0 collapse = 0 while True: if m.root_node.state.is_game_over(): break most_visits = 0 while most_visits < args.visits and m.root_node.all_edge_visits < args.visits * MAX_STEP_RATIO: all_steps += 1 edge = m.step() most_visits = max(most_visits, edge.edge_visits) # Compute the proportion of visits each move received. total_visits = float(m.root_node.all_edge_visits) weighted_moves = { move: (m.root_node.outgoing_edges[move].edge_visits / total_visits if move in m.root_node.outgoing_edges else 0) for move in m.root_node.state.moves } if weighted_moves["m0"] == 1.0: collapse += 1 # Mix the policies by their visit counts to get a training policy. training_policy = np.sum([ weight * m.root_node.state.moves[move].policy for move, weight in weighted_moves.iteritems() ], axis=0) # Store training sample. entry["samples"].append(( map(float, m.root_node.state.sensor_data), map(float, training_policy), float(m.root_node.visit_weighted_edge_score()), )) # Step using the most visited move (with no noise). selected_move = mcts.sample_by_weight(weighted_moves) m.play(selected_move) entry["outcome"] = m.root_node.state.compute_utility() if collapse: print "!" * 100, "Collapses:", collapse return entry
def __init__(self, whiteNN, blackNN): self.pgn = chess.pgn.Game() self.board = chess.Board() self.currPlayer = chess.WHITE self.whiteNN = whiteNN self.blackNN = blackNN self.gameTree = mcts.MCTS(self.board) # Training examples self.moves = {chess.WHITE : [], chess.BLACK : []}
def test1(self): """ TODO Comment """ params = [(.95, .4), (.95, .3), (.95, .2), (.97, .4), (.97, .3), (.8, .5), (.8, .4), (.8, .3), (.83, .5), (.83, .4), (.83, .3), (.9, .5)] for param in params: print('running', param) path_lengths = [] timings = [] for _ in range(self.runs): mcts_obj = mcts.MCTS(self.env, *param) path, timing = mcts_obj.run(self.n_iter) path_lengths.append(len(path)) timings.append(timing) self.report.append((path_lengths, timings, param))
def find_one_proof(args, model, env, file): env.set_source(file) env.args.curriculum_allowed = False env.args.max_exploration = None env.args.can_replace_proof = False env.args.use_replay = False env.args.use_action_shuffle = False success = 0 prooflen = 0 if args.evaltype == "mcts": evaltype = args.evaltype my_mcts = mcts.MCTS(model, args.n_action_slots) t0 = time.time() success, prooflen, attempts = my_mcts.build_tree( env, args.evaltime, args.evalcount) if success == 1: print("Proof found: {}, len {}, time: {} sec,\n{}".format( file, prooflen, time.time() - t0, env.current_steps)) else: for attempts in range(1, 1 + args.evalcount): obs = env.reset() t0 = time.time() if args.evaltype == "backtrack": status, prooflen = prove_nonrecursive(args, model, env, t0) evaltype = args.evaltype else: if attempts == 1: evaltype = "det" evaltime = 10000 else: evaltype = args.evaltype evaltime = args.evaltime status, prooflen = prove_nobacktrack(args, model, env, obs, t0, evaltype, evaltime) if status == "success": print("Proof found: {}, len {}, time: {} sec,\n{}".format( file, prooflen, time.time() - t0, env.current_steps)) success = 1 break if success == 0: print("Failure: {}".format(file)) return success, prooflen, attempts, evaltype
def play_game (inference): # Initialize memory actions = [] policies = [] indices = [] moves = [] # Set up search tree state = game_state.GameState() tree = mcts.MCTS(inference, state, num_threads=8) # Play game while not tree.state.done(): print(tree.state.state.unicode()) # Perform search node = tree.search(128) # Calculate move probabilities and get action index probs = mcts.policy(node, T=1.0) index = np.random.choice(len(node.actions), p=probs) # Get action and update tree action = node.actions[index] value = node.Q[index] move = tree.state.parse_action(action) print(tree.state.state.san(move), value) tree.act(index) # Store stats actions.append(action) policies.append(probs) indices.append(node.actions) moves.append(move) # Get game outcome and last player to move outcome = -tree.state.reward() winner = not tree.state.turn() print(tree.state.state.unicode()) print(' '.join([chess.Board().variation_san(moves), state.state.result()])) return actions, policies, indices, outcome, winner
def run(self): mcts_config = mcts.Config() mcts_config.batch_size = 16 mcts_config.simulation_num = 800 mcts_config.forced_playouts = False mcts_config.use_dirichlet = True mcts_config.reuse_tree = True mcts_config.target_pruning = False mcts_config.immediate = False search = mcts.MCTS(mcts_config) device = 'cpu' if self.cpu_only else 'gpu' self.nn = network.Network(device) iter = 0 while True: # Ask the server the current neural network parameters. if self.update and iter % self.update_iter == 0: url = 'http://{}:{}/weight'.format(self.host, self.port) req = urllib.request.Request(url) with urllib.request.urlopen(req) as res: weights = _pickle.loads(res.read()) self.nn.model.set_weights(weights) # Conduct selfplay. if self.random_play: game_record = selfplay.random_play( stop_with_checkmate=False, trim_checkmate=False) else: search.clear() game_record = selfplay.run( self.nn, search, search_checkmate=self.search_checkmate, stop_with_checkmate=False, trim_checkmate=False) # Send result. url = 'http://{}:{}/record'.format(self.host, self.port) data = _pickle.dumps(game_record, protocol=4) req = urllib.request.Request(url, data) with urllib.request.urlopen(req) as res: pass iter += 1
def isready(self): if self.nn is None: self.nn = network.Network() if self.weight_file is not None: self.nn.load(self.weight_file) self.config = mcts.Config() self.config.simulation_num = int(1e9) self.config.reuse_tree = True if self.search is None: self.search = mcts.MCTS(self.config) self.search.clear() self.position = minishogilib.Position() # ponder self.ponder_thread = None
def train(_, network, lock, loss_record): game_index = network.count() print('{} game start'.format(game_index)) tree = mcts.MCTS(network, game_index) game_begin_time = int(time.time()) tree.game() learn_begin_time = int(time.time()) lock.acquire() print('{} learn start'.format(game_index)) mse_total, cross_entropy_total = 0, 0 np.random.seed(int.from_bytes(os.urandom(4), byteorder='little')) for _ in range(TRAINING_STEP): mse, cross_entropy = network.learn() mse_total += mse cross_entropy_total += cross_entropy loss_record.add( [mse_total / TRAINING_STEP, cross_entropy_total / TRAINING_STEP]) print([mse_total / TRAINING_STEP, cross_entropy_total / TRAINING_STEP]) if loss_record.size() % SAVE_INTERVAL == 0: print('save') network.save_state("{}state_{}.pkl".format(STATE_SAVE_FOLDER, loss_record.size())) network.save_memory("memory.npy") loss_record.save("loss_record.pkl") print('{} learn end'.format(game_index)) lock.release() print('{} game end'.format(game_index)) learn_end_time = int(time.time()) learn_min, learn_sec = utils.compute_time(learn_begin_time, learn_end_time) print('learning cost {} mins {} seconds'.format(learn_min, learn_sec)) game_end_time = int(time.time()) game_min, game_sec = utils.compute_time(game_begin_time, game_end_time) print('{} game cost {} mins {} seconds'.format(game_index, game_min, game_sec))
def test_play(): ttt = TicTacToeGame(size_x=4, size_y=4, len_to_win=3, n_players=2) s1 = mcts.MCTS(ttt, n_plays=50, max_depth=500, player=1) s2 = gaming.RandomStrategy(ttt, player=2) state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=50) print() print( f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}' ) print(state) print(log) state, rewards, turn, log = gaming.play_game(ttt, [s1, s2], max_turns=50) print() print( f'the winner is the player {[p for p, r in rewards.items() if r == 1]}, turn: {turn}' ) print(state) print(log)
def tst_sequence_children(): Cons = np.array([[1, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]) roomids = [1, 2, 3, 4, 5] design = mcts.MCTS(Cons, 2000) design.play() states = [] rootnode = design.real_path[0] queue = [rootnode] while len(queue) > 0: node = queue.pop(0) if node.expanded: if node.terminal is False: for child in node.children: queue.append(child) if child.type == 'R': states.append(child.state) states_path = states[0:63] vis = mcts.Visualisation(roomids, states_path, Cons, 'unknown') vis.vis_static()
def act(self, obs, action_space): state = self._create_simulation_state(obs) if self._process_count: # Multiprocessing def _mcts_search(_state, _agent_id, _simulation_env, _iteration_limit, _shared_list): _env_state = _EnvState(_state, _agent_id, _simulation_env) _searcher = mcts.MCTS(_env_state, iteration_limit=_iteration_limit) _shared_list.append(_searcher.search()) def get_most_frequent(l): count = Counter(l) return count.most_common(1)[0][0] with Manager() as manager: shared_list = manager.list() processes = [] for _ in range(self._process_count): env_copy = self._make_env_copy() processes.append(Process( target=_mcts_search, args=(state, self._agent_id, env_copy, self._iteration_limit, shared_list) )) for p in processes: p.start() for p in processes: p.join() action = get_most_frequent(shared_list) else: env_state = _EnvState(state, self._agent_id, self._simulation_env) searcher = mcts.MCTS(env_state, iteration_limit=self._iteration_limit) action = searcher.search() return action
def __init__(self,human): self.MCTS=mcts.MCTS() #White goes first (0 is white and player,1 is black and computer) self.human=human self.player = 1 self.passed = False self.won = False #Initializing an empty board self.array = np.zeros([8,8],dtype=np.int) #Initializing center values self.array[3][3] = 1 self.array[3][4]=-1 self.array[4][3]=-1 self.array[4][4]=1 self.oldarray = self.array global BIT global LSB_TABLE global bitmap for i in range(64): LSB_TABLE[(((bitmap & (~bitmap + 1)) * LSB_HASH) & FULL_MASK) >> 58] = i bitmap <<= 1
def main(): # Configure argparser argparser = argparse.ArgumentParser(prog="do_mcmc_draft_search") configure_argparser(argparser) # Parse the arguments args = argparser.parse_args() # Configure logging utils.configure_logging(args.verbosity_level) # Get names of input/output files draftboard_file = args.db_file league_config_file = args.league_config time_to_run = args.time exploration_const = args.exp_constant bench_weight = args.bench_weight n_rollouts = args.n_rollouts sim_injury = args.sim_injury # Read config file with open(league_config_file, "r") as stream: league_config = yaml.safe_load(stream) # Read draft sheet draft_df = pd.read_excel(draftboard_file) # Initialize and validate draft board db = draftboard.DraftBoard(draft_df, league_config) # Get my potential picks my_players = db.potential_picks[cols.NAME_FIELD].tolist() if not my_players: my_players = db.get_auto_draft_selections() logging.info("Players to compare: {0}".format(", ".join(my_players))) injury_risk_model = mcts_draft.EmpiricalInjuryModel( league_config) if sim_injury else None draft_tree_helper = mcts_draft.DraftTreeHelper( my_players, db, min_adp_prior=0.01, max_draft_node_size=25, injury_model=injury_risk_model, bench_weight=bench_weight) # Initialize MCTS for mcmc tree search mcmc_tree = mcts.MCTS(root_state=draft_tree_helper.get_root(), tree_helper=draft_tree_helper, time_limit=time_to_run * 1000 * 60, num_rollouts=n_rollouts, exploration_constant=exploration_const) # Do MCTS search and output best player best_action = mcmc_tree.search() logging.info("THIS THE BEST PLAYER:\n" "**********************************************\n\n{0}\n\n" "**********************************************".format( best_action.upper())) # Also output best player for next round from best player best_node = mcmc_tree.root.children[best_action] logging.info("Next round best players: ") for child in best_node.children: logging.info(best_node.children[child])
def run(self): print("Starting up.. Playing " + str(self.numGames) + " games:") # set save interval for actor network parameters # clear the replayBuffer # randomly init weights and biases for Actor network self.Anet.setupSession() self.Anet.error_history = [] self.Anet.validation_history = [] startNode = Node.Node( state=State.State(player=self.player, hexSize=self.hexSize)) mcts = MCTS.MCTS(numberOfSimulationsPerMove=self.numSimulations, hexTrainer=self, Anet=self.Anet) player = startNode.getState().getPlayer() startNodeCopy = startNode player1Wins = 0 player2Wins = 0 player1Starts = 0 player2Starts = 0 gc = 1 #for a game in numberOfGames for game in range(0, self.numGames): #Start of a game #clear replay buffer self.replayBuffer = [] #initialize gameboard to empty board startNode = startNodeCopy startingPlayer = startNode.getState().getPlayer() if startingPlayer == 1: player1Starts += 1 else: player2Starts += 1 print("\n\n\n --- Game number " + str(gc)) #print starting state startNode.getState().getBoard().printBoard() while not startNode.getState().gameIsOver(): player = startNode.getState().getPlayer() #use tree policy to search from root to leaf #use ANET to choose rollout actions from L to final state #perform mcts-backpropogation #next gamestate print("Player " + str(player) + "'s turn") print("legal moves:") print(startNode.getState().getBoard().getLegalMoves()) nextNode = mcts.findNextMove(startNode, player, startingPlayer) # D = distribution of visitCounts alogn all arcs emanating from root # add case (root, D) to replayBuffer #choose actual move (action*) based on D #perform action* on root to produce successor state s* #update currentstate to s* # in mcts - retain subtree rooted at s*, discard everything else # rootnode = s* #TODO change this ? if self.verbose: nextNode.getState().getBoard().printBoard() if nextNode.getState().gameIsOver(): if self.verbose: print("\nPlayer " + str(player) + " won! \n") if player == 1: player1Wins += 1 else: player2Wins += 1 startNode = nextNode if nextNode.getState().gameIsOver(): break gc += 1 # train ANET on random minibatch of cases from replayBuffer np.random.shuffle(self.replayBuffer) #TODO write a custom do_training method inputs = [case[0] for case in self.replayBuffer] targets = [case[1] for case in self.replayBuffer] print("inputs:") print(inputs) print("targets") print(targets) feeder = {self.Anet.input: inputs, self.Anet.target: targets} gvars = [self.Anet.error] + self.Anet.grabvars _, grabvals, _ = self.Anet.run_one_step( [self.Anet.trainer], gvars, session=self.Anet.current_session, feed_dict=feeder) error = grabvals[0] self.Anet.error_history.append((gc, error)) # if gameNum %modulo saveinterval: save ANET parameters for later use in TOPP #next game #print result of all games print("\nPlayer 1 started " + str(player1Starts) + " games and won " + str(player1Wins) + " of " + str(self.numGames) + " games! " + str((player1Wins / self.numGames * 100)) + " % ") print("Player 2 started " + str(player2Starts) + " games and won " + str(player2Wins) + " of " + str(self.numGames) + " games! " + str((player2Wins / self.numGames * 100)) + " % ") print("\n") TFT.plot_training_history(self.Anet.error_history, self.Anet.validation_history, xtitle="Game", ytitle="Error", title="", fig=True) self.Anet.close_current_session(view=False) #loop to keep program from closing at the end so we can view the graph x = "" while x == "": x = str(input("enter any key to quit"))
import mcts import same_game_env env = same_game_env.Env() mcts = mcts.MCTS(env) print(mcts.search(100))
def read_file(file, h): with open(file) as f: _, _ = [int(x) for x in next(f).split()] # read first line stacks = [] for line in f: # read rest of lines stack = [int(x) for x in line.split()[1::]] #if stack[0] == 0: stack.pop() stacks.append(stack) S = len(stacks) cells = np.zeros((S, h), dtype=int) for stack in range(S): for tier in range(len(stacks[stack])): cells[stack][tier] = stacks[stack][tier] return (cells, S) H = 5 cells, stacks = read_file("instancias\\BF\\BF1\\cpmp_16_5_48_10_29_1.bay", H) state = MarshallingState(cells, stacks, H) agent = mcts.MCTS() agent.search(state) print(agent.best_state.get_reward()) print(agent.best_state.cells)
BATCH_SIZE = 256 TRAIN_ROUNDS = 10 MIN_REPLAY_TO_TRAIN = 2000 #10000 BEST_NET_WIN_RATIO = 0.60 EVALUATE_EVERY_STEP = 100 EVALUATION_ROUNDS = 20 STEPS_BEFORE_TAU_0 = 10 device = torch.device("cpu") path = os.getcwd() model_path = os.path.join(path, f"model4") net = torch.load(model_path) mcts = mcts.MCTS() while True: won = None cur_player = s.get_random_player() cur_state = s.init() print(s.decode(cur_state)) while won is None: print(f"Player: {cur_player}") if cur_player == 1: mcts.search_batch(20,