Beispiel #1
0
    def solve(self, solving_cube, time_limit=None, iter_limit=None):
        if time_limit is None and iter_limit is None:
            raise ValueError('infinite loop maybe')

        if iter_limit is None:
            iter_limit = ITER_LIMIT

        solving_cube = solving_cube.copy()
        start_time = time.time()
        actions = []

        for i_current in range(iter_limit):
            state = cube.get_state(solving_cube)
            observation = cube.get_observation(state)
            if cube.is_done(state):
                return True, actions, i_current, None

            action = cube.ACTIONS[self.policy.select_action(observation)]
            actions.append(action)
            solving_cube.perform_step(action)

            if time_limit and time.time() - start_time > time_limit:
                return False, None, i_current, None

        else:
            return False, None, iter_limit, None
Beispiel #2
0
    def update(self):
        if self.is_leaf:
            if self.is_done:
                return True, [], self.depth
            self.probabilities = self.mcts.policy.get_action_probabilities(
                cube.get_observation(cube.get_state(self.cube)),
                self.mcts.temperature)
            if DEBUG:
                print('node is leaf, init all children')
                print('probs: {}'.format(' '.join(list(map(
                    lambda x: str(round(x, 2)),
                    self.probabilities.probs.numpy())))))

            self.is_leaf = False
            return False, None, self.depth

        else:
            action = self.probabilities.sample()
            if DEBUG:
                print('depth = {}\tnext action = {}'.format(
                    self.depth, cube.ACTIONS[action]))

            if self.children[action] is None:
                self.children[action] = SimpleNode(
                    self.cube.copy().perform_step(cube.ACTIONS[action]),
                    self.mcts, self.depth + 1)
            is_done, actions, max_depth = self.children[action].update()

            if is_done:
                return True, [cube.ACTIONS[action]] + actions, max_depth
            else:
                return False, None, max_depth
Beispiel #3
0
    def __init__(self, node_cube, mcts, depth):
        self.cube = node_cube
        self.mcts = mcts
        self.depth = depth

        self.is_leaf = True
        self.is_done = cube.is_done(cube.get_state(node_cube))
        self.probabilities = None
        self.children = [None] * cube.N_ACTION
Beispiel #4
0
    def __init__(self, node_cube, mcts, depth, c_puct):
        self.cube = node_cube
        self.mcts = mcts
        self.depth = depth
        self.c_puct = c_puct

        self.state = cube.get_state(node_cube)
        self.is_done = cube.is_done(self.state)

        self.sum_value = float(self.mcts.value_f.get_value(self.state))
        self.count_value = 1

        self.visit_count = 0
        self.is_leaf = True
        self.probabilities = None
        self.children = [None] * cube.N_ACTION