def move(self, game: Game, possible_steps=None): self.tree = MCSTTreeNode(game) for trial in range(self.trials_num): node = self.select(possible_steps) result = self.simulate(node) self.backpropogate(node, result) if self.debug: self.tree.draw(f"{self.label} - {game.moves_num} - {trial}") chosen_node = self.choose_best_child() if self.debug: print(self.tree.leafs_counter()) return chosen_node.step
def reset_tree(self, game, possible_states=None): node = None if hasattr(self, 'chosen_node'): node = self.chosen_node.find_game_node(game) if node is None: node = MCSTTreeNode(copy.deepcopy(game)) #self.chosen_node.append(node) elif self.debug: print('found!!!!!!', len(node.children)) self.tree.prune_tree_with_node(node) self.tree.draw('prunned') else: node = MCSTTreeNode(copy.deepcopy(game)) self.update_node_children_with_states(node, possible_states) self.tree = node gc.collect()
def select(self): node = self.tree while True: if node.is_leaf(): if node.n == 0 and node.id != self.tree.id or node.game.is_final_state( ): # new node return node else: possible_steps = node.game.get_possible_next_steps( self.states_limit) for step in possible_steps: new_game = node.game.copy_and_move(step) node.append(MCSTTreeNode(new_game, step)) return node.children[0] elif node.n == 0: return node.children[0] else: ucb1_scores = [(n, self.ucb1(n)) for n in node.children] node = max(ucb1_scores, key=lambda x: x[1])[0]
class DepthSensitiveMCSTAgentWithEvaluation(Agent): def __init__(self, label, UCB1_const=1.41, trials_num=100, steps_limit=3, evaluator=None): super().__init__(label) self.trials_num = trials_num self.steps_limit = steps_limit self.UCB_C = UCB1_const self.e = 0.000001 self.evaluator = evaluator def move(self, game, possible_steps=None): if hasattr(self, 'tree'): del self.tree gc.collect() self.tree = MCSTTreeNode(game) for trial in range(self.trials_num): node = self.select(possible_steps) result = self.simulate(node) self.backpropogate(node, result) if self.debug: self.tree.draw(f"{self.label} - {game.moves_num} - {trial}") chosen_node = self.choose_best_child() if self.debug: print(self.tree.leafs_counter()) return chosen_node.step def select(self, possible_steps=None): node = self.tree while True: if node.is_leaf(): if node.n == 0 and node.id != self.tree.id or node.game.is_final_state( ): # new node return node else: if node.id == self.tree.id and possible_steps is not None: new_steps = possible_steps else: new_steps = node.game.get_possible_next_steps( self.steps_limit) for step in new_steps: new_game = node.game.copy_and_move(step) node.append(MCSTTreeNode(new_game, step)) return node.children[0] else: ucb1_scores = [(n, self.ucb1(n)) for n in node.children] node = max(ucb1_scores, key=lambda x: x[1])[0] def ucb1(self, node: MCSTTreeNode): return node.s + self.UCB_C * math.sqrt( math.log(node.parent.n) / (node.n + self.e)) def simulate(self, node: MCSTTreeNode): return self.evaluator.evaluate(node.game) def winner2score(self, winner, moves_num): if winner == self.label: return 10 - moves_num elif winner == 'draw': return 0 else: return -(10 - moves_num) def backpropogate(self, node, score): current_node = node while current_node is not None: current_node.n += 10 current_node.s += score current_node = current_node.parent def choose_best_child(self): children_with_scores = [(node, node.s / (node.n + self.e)) for node in self.tree.children] return max(children_with_scores, key=lambda x: x[1])[0]
def add_children_for_states(self, node, states): for state in states: game = node.game.next_state_clone(state) node.append(MCSTTreeNode(game))
class MCSTAgent(Agent): def __init__(self, label, UCB1_const=1.41, trials_num=100, steps_limit=3): super().__init__(label) self.trials_num = trials_num self.steps_limit = steps_limit self.UCB_C = UCB1_const self.e = 0.000001 def move(self, game: Game, possible_steps=None): self.tree = MCSTTreeNode(game) for trial in range(self.trials_num): node = self.select(possible_steps) result = self.simulate(node) self.backpropogate(node, result) if self.debug: self.tree.draw(f"{self.label} - {game.moves_num} - {trial}") chosen_node = self.choose_best_child() if self.debug: print(self.tree.leafs_counter()) return chosen_node.step def select(self, possible_steps=None): node = self.tree while True: if node.is_leaf(): if node.n == 0 and node.id != self.tree.id or node.game.is_final_state( ): # new node return node else: if possible_steps is not None and node.id == self.tree.id: new_steps = possible_steps else: new_steps = node.game.get_possible_next_steps( self.steps_limit) for step in new_steps: new_game = node.game.copy_and_move(step) node.append(MCSTTreeNode(new_game, step)) return node.children[0] else: ucb1_scores = [(n, self.ucb1(n)) for n in node.children] node = max(ucb1_scores, key=lambda x: x[1])[0] def ucb1(self, node: MCSTTreeNode): return node.s + self.UCB_C * math.sqrt( math.log(node.parent.n) / (node.n + self.e)) def simulate(self, node: MCSTTreeNode): tmp_game = copy.deepcopy(node.game) tmp_game.debug = False tmp_agents = [RandomAgent(label) for label in tmp_game.labels] tmp_game.play(tmp_agents) winner = tmp_game.evaluate() return self.winner2score(winner) def winner2score(self, winner): if winner == self.label: return 10 elif winner == 'draw': return 0 else: return -10 def backpropogate(self, node, score): current_node = node while current_node is not None: current_node.n += 1 current_node.s += score current_node = current_node.parent def choose_best_child(self): children_with_scores = [(node, node.s / (node.n + self.e)) for node in self.tree.children] return max(children_with_scores, key=lambda x: x[1])[0]