def __init__(self, player, args, NN=None): self.args = args self.state = Board(LinearRlAgentV2("A", 3), LinearRlAgentV2("B", 3)) self.training_examples = [] self.mcts = None self.nn = NN if NN != None else ValueFunc() self.loss_array = [] self.mappings = { (0, None): 0, (1, None): 1, (2, None): 2, (3, None): 3, (4, None): 4, (0, 'A'): 5, (1, 'A'): 6, (2, 'A'): 7, (3, 'A'): 8, (0, 'B'): 9, (1, 'B'): 10, (2, 'B'): 11, (3, 'B'): 12, } self.nn.to(self.nn.device) self.name = player self.workers = [Worker([], str("A") + "1"), Worker([], str("A") + "2")] self.fast_board = FastBoard()
def action(self, board, trainer=None): """ Method to select and place a worker, afterwards, place a building/ If trainer is specified, will call corresponding search tree and update weights Otherwise, uses the specified weights and searches with a minimax tree with alpha beta pruning. """ board_levels, all_worker_coords = FastBoard.convert_board_to_array( board) fast_board = FastBoard() if trainer != None: if isinstance(trainer, RootStrapAB): minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords, self.name, self.search_depth, fast_board, trainer.weights, 'V2') elif isinstance(trainer, TreeStrapMinimax): minimax_tree = Minimax(board_levels, all_worker_coords, self.name, self.search_depth, fast_board, trainer.weights, 'V2') new_board_levels, new_worker_coords = minimax_tree.get_best_node() new_board = FastBoard.convert_array_to_board( board, new_board_levels, new_worker_coords) #update weights if in training mode. trainer.update_weights(minimax_tree) else: minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords, self.name, self.search_depth, fast_board, self.trained_weights, 'V2') new_board_levels, new_worker_coords = minimax_tree.get_best_node() new_board = FastBoard.convert_array_to_board( board, new_board_levels, new_worker_coords) return new_board
def __init__(self, state, parent=None): self.state = state self.children = {} self.parent = parent self.visit_count = 0 self.value_sum = 0 self.to_play = state.Player_turn() self.fast_board = FastBoard()
def __init__(self, root, model, args): self.root = root self.model = model self.args = args self.depth = args["Tree_depth"] self.fast_board = FastBoard() self.A = LinearRlAgentV2("A", 3) self.B = LinearRlAgentV2("B", 3)
def __init__(self, weights=None, learning_rate=10**-5): self.NUM_WEIGHTS = 22 self.learning_rate = learning_rate self.fast_board = FastBoard() if type(weights) == type(None): #randomly initialize weights btwn -1 and 1 self.weights = np.array( [random.uniform(-1, 1) for i in range(self.NUM_WEIGHTS)]) else: self.weights = np.array(weights)
class Node(): """ Class representing the a node in the MCTS """ def __init__(self, state, parent=None): self.state = state self.children = {} self.parent = parent self.visit_count = 0 self.value_sum = 0 self.to_play = state.Player_turn() self.fast_board = FastBoard() def add_children(self, children): for child in children: self.children.add(child) def value(self): """ Calculates the value """ if self.visit_count == 0: return 0 else: return self.value_sum / self.visit_count def is_expanded(self): return len(self.children) > 0 def select_action(self, temperature): """ Select an action based on visit count and temperature """ visit_counts = np.array( [child.visit_count for child in self.children.keys()]) actions = list(self.children.keys()) if temperature == 0: new_state = actions[np.argmax(visit_counts)] elif temperature == np.inf: new_state = np.random.choice(actions) else: visit_count_distribution = visit_counts**(1 / temperature) visit_count_distribution = visit_count_distribution / sum( visit_count_distribution) new_state = np.random.choice(actions, p=visit_count_distribution) return new_state def select_child(self): """ Selects the child with the highest UCB score """ children_nodes = list(self.children.keys()) UCB_score = list(map(upper_confidence_bound, children_nodes)) return children_nodes[np.argmax(UCB_score)] def expand(self): """ Expand Node """ build, worker = self.fast_board.convert_board_to_array(self.state) children = self.fast_board.all_possible_next_states( build, worker, self.state.Player_turn()) b_children = [ self.fast_board.convert_array_to_board(self.state, i, j) for i, j in children ] for child in b_children: self.children[Node(child, parent=self)] = child pass
class Trainer_CNN(Trainer): def __init__(self, player, args, NN=None): self.args = args self.state = Board(LinearRlAgentV2("A", 3), LinearRlAgentV2("B", 3)) self.training_examples = [] self.mcts = None self.nn = NN if NN != None else ValueFunc() self.loss_array = [] self.mappings = { (0, None): 0, (1, None): 1, (2, None): 2, (3, None): 3, (4, None): 4, (0, 'A'): 5, (1, 'A'): 6, (2, 'A'): 7, (3, 'A'): 8, (0, 'B'): 9, (1, 'B'): 10, (2, 'B'): 11, (3, 'B'): 12, } self.nn.to(self.nn.device) self.name = player self.workers = [Worker([], str("A") + "1"), Worker([], str("A") + "2")] self.fast_board = FastBoard() def convertTo2D(self, board): """ Takes in a board and converts it into 2D tensor form with shape (2, 5, 5) """ data = [] buildings = [] players = [] for squares in board.board: temp_lst = [] temp_lst2 = [] for square in squares: if square.worker == None: temp_lst.append(square.building_level / 4) temp_lst2.append(0) elif square.worker.name[0] == "A": temp_lst.append(square.building_level / 4) temp_lst2.append(1) else: temp_lst.append(square.building_level / 4) temp_lst2.append(-1) buildings.append(temp_lst) players.append(temp_lst2) data.append(buildings) data.append(players) return torch.as_tensor(data) def convert_nodes_to_training_data(self, set_of_nodes): training_data = [(i.state, i.value()) for i in set_of_nodes] shuffle(training_data) return training_data def generate_training_data(self): """ Perform iteration of MCTS and return a collapsed tree for training """ print("\nGenerating Data") temp_MCTS = self.mcts node = self.mcts.root training_data = [] """ for i in tqdm(range(self.args['Iterations'])): temp_MCTS.run(node.state.Player_turn()) training_data = temp_MCTS.collapse() """ for i in tqdm(range(self.args["Num_Simulations"])): root = temp_MCTS.breadth_run(node) app = list(temp_MCTS.collapse(root)) training_data += app node = root.select_child() return training_data def save_checkpoint(self, folder): """ Save the Neural Network """ if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, "MCTS_AI_CNN") torch.save(self.nn.state_dict(), filepath) def learn(self, train_examples): """ Learn using One MCTS tree """ print("\nLearning from Data") for i in range(len(train_examples)): target = torch.tensor(train_examples[i][1], dtype=torch.float32).to(self.nn.device) target = target.view(1) converted_state = self.convertTo2D(train_examples[i][0]) pred = torch.nn.forward(converted_state).to(t.nn.device) loss = self.nn.loss(pred, target) self.nn.optimizer.zero_grad() loss.backward() self.nn.optimizer.step() self.loss_array.append(loss.item()) self.plot_loss() def train(self): self.loss_array = [] for i in tqdm(range(self.args["epochs"])): training_examples = self.generate_training_data() training_examples = self.convert_nodes_to_training_data( training_examples) self.learn(training_examples) self.save_checkpoint(r'C:\Users\sarya\Desktop\Semester 4\ISM\Game') pass def action(self, board): build, worker = self.fast_board.convert_board_to_array(board) pos_states = self.fast_board.all_possible_next_states( build, worker, board.Player_turn()) b_pos_states = [ self.fast_board.convert_array_to_board(board, i, j) for i, j in pos_states ] values = [] for state in b_pos_states: converted_state = self.convertTo2D(state) values.append( torch.flatten( self.nn.forward(converted_state).to(self.nn.device))) if board.Player_turn() == "A": return b_pos_states[torch.argmax(torch.cat(values)).item()] else: return b_pos_states[torch.argmin(torch.cat(values)).item()] def place_workers(self, board): """ Method to randomly place agent's workers on the board """ place_count = 0 while place_count < 2: try: coords = [np.random.randint(0, 5), np.random.randint(0, 5)] # Updates worker and square self.workers[place_count].update_location(coords) board.board[coords[0]][coords[1]].update_worker( self.workers[place_count]) place_count += 1 except Exception: continue return board
def action(self, board, trainer=None): """ Method to select and place a worker, afterwards, place a building/ If trainer is specified, will call corresponding search tree and update weights Otherwise, uses the specified weights and searches with a minimax tree with alpha beta pruning. """ board_levels, all_worker_coords = FastBoard.convert_board_to_array( board) fast_board = FastBoard() if trainer != None: if isinstance(trainer, RootStrapAB): minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords, self.name, self.search_depth, fast_board, trainer.weights, 'V1') elif isinstance(trainer, TreeStrapMinimax): minimax_tree = Minimax(board_levels, all_worker_coords, self.name, self.search_depth, fast_board, trainer.weights, 'V1') new_board_levels, new_worker_coords = minimax_tree.get_best_node() new_board = FastBoard.convert_array_to_board( board, new_board_levels, new_worker_coords) #update weights if in training mode. trainer.update_weights(minimax_tree) else: search_depth = self.search_depth #adaptive depth when not in training mode if self.adaptive_search: my_num_moves = len( fast_board.all_possible_next_states( board_levels, all_worker_coords, self.name)) if self.name == 'A': opponent = 'B' else: opponent = 'A' opp_num_moves = len( fast_board.all_possible_next_states( board_levels, all_worker_coords, opponent)) if self.search_depth % 2 == 0: next_search = self.name else: next_search = opponent if my_num_moves + opp_num_moves < 20: search_depth = self.search_depth + 3 elif my_num_moves + opp_num_moves < 30: search_depth = self.search_depth + 2 elif my_num_moves + opp_num_moves < 40: search_depth = self.search_depth + 1 elif (my_num_moves < 20 and next_search == self.name) or ( opp_num_moves < 20 and next_search == opponent): search_depth = self.search_depth + 1 print( f'Search Depth is {search_depth}, my moves = {my_num_moves}, opp moves = {opp_num_moves}' ) minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords, self.name, search_depth, fast_board, self.trained_weights, 'V1') new_board_levels, new_worker_coords = minimax_tree.get_best_node() new_board = FastBoard.convert_array_to_board( board, new_board_levels, new_worker_coords) return new_board