def solve(self, solving_cube, time_limit=None, iter_limit=None): if time_limit is None and iter_limit is None: raise ValueError('infinite loop maybe') if iter_limit is None: iter_limit = ITER_LIMIT solving_cube = solving_cube.copy() start_time = time.time() actions = [] for i_current in range(iter_limit): state = cube.get_state(solving_cube) observation = cube.get_observation(state) if cube.is_done(state): return True, actions, i_current, None action = cube.ACTIONS[self.policy.select_action(observation)] actions.append(action) solving_cube.perform_step(action) if time_limit and time.time() - start_time > time_limit: return False, None, i_current, None else: return False, None, iter_limit, None
def update(self): if self.is_leaf: if self.is_done: return True, [], self.depth self.probabilities = self.mcts.policy.get_action_probabilities( cube.get_observation(cube.get_state(self.cube)), self.mcts.temperature) if DEBUG: print('node is leaf, init all children') print('probs: {}'.format(' '.join(list(map( lambda x: str(round(x, 2)), self.probabilities.probs.numpy()))))) self.is_leaf = False return False, None, self.depth else: action = self.probabilities.sample() if DEBUG: print('depth = {}\tnext action = {}'.format( self.depth, cube.ACTIONS[action])) if self.children[action] is None: self.children[action] = SimpleNode( self.cube.copy().perform_step(cube.ACTIONS[action]), self.mcts, self.depth + 1) is_done, actions, max_depth = self.children[action].update() if is_done: return True, [cube.ACTIONS[action]] + actions, max_depth else: return False, None, max_depth
def __init__(self, node_cube, mcts, depth): self.cube = node_cube self.mcts = mcts self.depth = depth self.is_leaf = True self.is_done = cube.is_done(cube.get_state(node_cube)) self.probabilities = None self.children = [None] * cube.N_ACTION
def __init__(self, node_cube, mcts, depth, c_puct): self.cube = node_cube self.mcts = mcts self.depth = depth self.c_puct = c_puct self.state = cube.get_state(node_cube) self.is_done = cube.is_done(self.state) self.sum_value = float(self.mcts.value_f.get_value(self.state)) self.count_value = 1 self.visit_count = 0 self.is_leaf = True self.probabilities = None self.children = [None] * cube.N_ACTION