def backup(self, score=0): """Update the node statistics on the path from the passed node to root to reflect the value of the given `simulation_statistics`. """ self.num_visits += 1 self._avg_reward += (score - self._avg_reward) / self.num_visits debug.log( { 'num_visits': self.num_visits, 'score': score, 'avg_reward': self._avg_reward, 'is_root': self.is_root() }, level=logging.INFO) if not self.is_root(): if (not self.parent.is_decision_node() and self.acting_player != self.parent.acting_player): score = -score debug.log( { 'parent.is_decision_node': self.parent.is_decision_node(), 'parent.acting_player': self.parent.acting_player, 'acting_player': self.acting_player, 'parent.score': score }, level=logging.INFO) self.parent.backup(score=score)
def backup(self, score=0): """Update the node statistics on the path from the passed node to root to reflect the value of the given `simulation_statistics`. """ self.num_visits += 1 self._avg_reward += (score - self._avg_reward) / self.num_visits debug.log(lambda: { 'num_visits': self.num_visits, 'score': score, 'avg_reward': self._avg_reward, 'is_root': self.is_root() }, level=logging.INFO) if not self.is_root(): if (not self.parent.is_decision_node() and self.acting_player != self.parent.acting_player): score = -score debug.log(lambda: { 'parent.is_decision_node': self.parent.is_decision_node(), 'parent.acting_player': self.parent.acting_player, 'acting_player': self.acting_player, 'parent.score': score }, level=logging.INFO) self.parent.backup(score=score)
def select_action(self, game_state, time_allowed_s=-1, **_): '''Select an action in game state, `game_state`. Return an optimal action in game state, `game_state`, if the time allowed, `time_allowed_s` is sufficient. Otherwise, return the best action found after `time_allowed_s` seconds. A non-positive `time_allowed_s` implies no time limit. ''' start_time = time.clock() if time_allowed_s > 0 else 0 best_action = None action_value = -INF debug.log({'Time available in seconds': time_allowed_s}, level=logging.INFO) debug.log(str(game_state), level=logging.INFO, raw=True) self._tree = {'children': [], 'value': action_value} for action in game_state.legal_actions(): self._tree['children'].append({'action': action}) with game_state.play(action): try: action_value = -self.value( game_state, time_allowed_s=time_allowed_s, time_used=time.clock() - start_time, tree=self._tree['children'][-1]) except self.TimeIsUp: if best_action is None: best_action = action self._tree['value'] = action_value debug.log({'Time is up': True, 'Best action so far:': best_action, 'Value': self._tree['value']}, level=logging.INFO) break else: debug.log({'Time remaining in seconds': ( time_allowed_s - (time.clock() - start_time)), 'Best action so far:': best_action, 'New value': action_value, 'Value': self._tree['value'], 'Tree': self.to_dict()}, level=logging.INFO) if action_value > self._tree['value']: best_action = action self._tree['value'] = action_value debug.log({'Time remaining in seconds': ( time_allowed_s - (time.clock() - start_time)), 'Best action so far:': best_action, 'Value': self._tree['value']}, level=logging.INFO) debug.log({'Time remaining in seconds': ( time_allowed_s - (time.clock() - start_time)), 'Best action so far:': best_action, 'Value': self._tree['value'], 'Tree': self.to_dict()}, level=logging.INFO) return best_action
def select_action(self, game_state, time_allowed_s=-1, max_depth=-1): """ Select an action in game state, `game_state`. Return an optimal action in game state, `game_state`, if the time allowed, `time_allowed_s` is sufficient. Otherwise, return the best action found after `time_allowed_s` seconds. A non-positive `time_allowed_s` implies no time limit. If max_depth is positive the search only goes to max_depth below the rootnode before applying a heuristic. """ self.pruned_nodes = 0 start_time = time.clock() if time_allowed_s > 0 else 0 best_action = None action_value = -INF debug.log({'Time available in seconds': time_allowed_s}, level=logging.INFO) debug.log(str(game_state), level=logging.INFO, raw=True) self._tree = {'children': [], 'value': action_value} for action in game_state.legal_actions(): self._tree['children'].append({'action': action}) with game_state.play(action): try: action_value = -self.value( game_state, -INF, INF, time_allowed_s, time.clock() - start_time, max_depth - 1, self._tree['children'][-1]) except self.TimeIsUp: if best_action is None: best_action = action self._tree['value'] = action_value debug.log( { 'Time is up': True, 'Best action so far:': best_action, 'Value': self._tree['value'] }, level=logging.INFO) break else: log_time = time_allowed_s - (time.clock() - start_time) debug.log( { 'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'New value': action_value, 'Value': self._tree['value'], 'Tree': self.to_dict() }, level=logging.INFO) if action_value > self._tree['value']: best_action = action self._tree['value'] = action_value debug.log( { 'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'Value': self._tree['value'] }, level=logging.INFO) log_time = time_allowed_s - (time.clock() - start_time) debug.log( { 'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'Value': self._tree['value'], 'Tree': self.to_dict(), 'Pruned nodes': self.pruned_nodes }, level=logging.INFO) return best_action
def search(self, root_state, time_allowed_s=-1, num_iterations=-1): """Execute MCTS from `root_state`. Parameters: `root_state`: The state of the game from which to search. Must adhere to the generic game state interface described by `games_puzzles_algorithms.games.fake_game_state`. `time_allowed_s`: The time allotted to search for a good action. Negative values imply that there is no time limit. Setting this to zero will ensure that no search is done. `num_iterations`: The number of search iterations (rollouts) to complete. Negative values imply that there is not iteration limit. Setting this to zero will ensure that no search is done. If `time_allowed_s` and `num_iterations` are both negative, `num_iterations` will be set to 1. """ if time_allowed_s < 0 and num_iterations < 0: num_iterations = 1 if root_state.is_terminal(): return None start_time = time.clock() self._root.expand(root_state) debug.log( { 'Initial search tree': self._root.info_strings_to_dict(), 'Time available in seconds': time_allowed_s, '# iterations': num_iterations }, level=logging.INFO) debug.log(str(root_state), level=logging.INFO, raw=True) num_iterations_completed = 0 time_used_s = 0 def time_is_available(): nonlocal time_used_s time_used_s = time.clock() - start_time return (time_allowed_s < 0 or time_used_s < time_allowed_s) while (num_iterations < 1 or num_iterations_completed < num_iterations): try: node, game_state, num_actions = self.select_node( self._root, root_state, time_is_available=time_is_available) except TimeIsUp: break debug.log("Executing roll-out from (player {} is acting):".format( game_state.player_to_act()), level=logging.INFO) debug.log(str(game_state), level=logging.INFO, raw=True) rollout_results = self.roll_out(game_state, node.acting_player) debug.log({'Roll-out results': rollout_results}) node.backup(**rollout_results) debug.log( { 'Updated search tree': self._root.info_strings_to_dict(), 'Seconds used': time_used_s, '# iterations completed': num_iterations_completed + 1 }, level=logging.INFO) for _ in range(num_actions): game_state.undo() num_iterations_completed += 1 return { 'num_iterations_completed': num_iterations_completed, 'time_used_s': time_used_s, 'num_nodes_expanded': self._root.num_nodes() }
def search(self, root_state, time_allowed_s=-1, num_iterations=-1): """Execute MCTS from `root_state`. Parameters: `root_state`: The state of the game from which to search. Must adhere to the generic game state interface described by `games_puzzles_algorithms.games.fake_game_state`. `time_allowed_s`: The time allotted to search for a good action. Negative values imply that there is no time limit. Setting this to zero will ensure that no search is done. `num_iterations`: The number of search iterations (rollouts) to complete. Negative values imply that there is not iteration limit. Setting this to zero will ensure that no search is done. If `time_allowed_s` and `num_iterations` are both negative, `num_iterations` will be set to 1. """ if time_allowed_s < 0 and num_iterations < 0: num_iterations = 1 if root_state.is_terminal(): return None start_time = time.clock() self._root.expand(root_state) debug.log(lambda: { 'Initial search tree': self._root.info_strings_to_dict(), 'Time available in seconds': time_allowed_s, '# iterations': num_iterations }, level=logging.INFO) debug.log(lambda: str(root_state), level=logging.INFO, raw=True) num_iterations_completed = 0 time_used_s = 0 def time_is_available(): nonlocal time_used_s time_used_s = time.clock() - start_time return (time_allowed_s < 0 or time_used_s < time_allowed_s) while (num_iterations < 1 or num_iterations_completed < num_iterations): try: node, game_state, num_actions = self.select_node( self._root, root_state, time_is_available=time_is_available) except TimeIsUp: break debug.log(lambda:"Executing roll-out from (player {} is acting):" .format(game_state.player_to_act()), level=logging.INFO) debug.log(lambda:str(game_state), level=logging.INFO, raw=True) rollout_results = self.roll_out(game_state, node.acting_player) debug.log(lambda:{'Roll-out results': rollout_results}) node.backup(**rollout_results) debug.log(lambda: { 'Updated search tree': self._root.info_strings_to_dict(), 'Seconds used': time_used_s, '# iterations completed': num_iterations_completed + 1 }, level=logging.INFO) for _ in range(num_actions): game_state.undo() num_iterations_completed += 1 return {'num_iterations_completed': num_iterations_completed, 'time_used_s': time_used_s, 'num_nodes_expanded': self._root.num_nodes()}
def select_action(self, game_state, time_allowed_s=-1, max_depth=-1): """ Select an action in game state, `game_state`. Return an optimal action in game state, `game_state`, if the time allowed, `time_allowed_s` is sufficient. Otherwise, return the best action found after `time_allowed_s` seconds. A non-positive `time_allowed_s` implies no time limit. If max_depth is positive the search only goes to max_depth below the rootnode before applying a heuristic. """ self.pruned_nodes = 0 start_time = time.clock() if time_allowed_s > 0 else 0 best_action = None action_value = -INF debug.log(lambda:{'Time available in seconds': time_allowed_s}, level=logging.INFO) debug.log(lambda:str(game_state), level=logging.INFO, raw=True) self._tree = {'children': [], 'value': action_value} for action in game_state.legal_actions(): self._tree['children'].append({'action': action}) with game_state.play(action): try: action_value = -self.value( game_state, -INF, INF, time_allowed_s, time.clock() - start_time, max_depth - 1, self._tree['children'][-1]) except self.TimeIsUp: if best_action is None: best_action = action self._tree['value'] = action_value debug.log(lambda:{'Time is up': True, 'Best action so far:': best_action, 'Value': self._tree['value']}, level=logging.INFO) break else: log_time = time_allowed_s - (time.clock() - start_time) debug.log(lambda:{'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'New value': action_value, 'Value': self._tree['value'], 'Tree': self.to_dict()}, level=logging.INFO) if action_value > self._tree['value']: best_action = action self._tree['value'] = action_value debug.log(lambda:{'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'Value': self._tree['value']}, level=logging.INFO) log_time = time_allowed_s - (time.clock() - start_time) debug.log(lambda:{'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'Value': self._tree['value'], 'Tree': self.to_dict(), 'Pruned nodes': self.pruned_nodes}, level=logging.INFO) debug.log_t(lambda:{'Time remaining in seconds': log_time, 'Best action so far:': best_action, 'Value': self._tree['value'], 'Pruned nodes': self.pruned_nodes}, level=logging.INFO) return best_action