Ejemplo n.º 1
0
    def rollout(self, lookahead, env, n: OR_Node):
        while not n.SOLVED and lookahead.sim_calls - lookahead.init_sim_calls < lookahead.sim_budget:
            lookahead.rollout_depth += 1
            t0 = time.perf_counter()
            lookahead.expand(env, n)

            # Pick random unsolved child of n
            t0 = time.perf_counter()
            n = lookahead.pick_random_unsolved_child(env, n)
            tf = time.perf_counter()
            lookahead.rollout_runtime_pick_random_unsolved += tf - t0
            if n.terminal:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.solve_and_propagate_labels(n)
                if lookahead.worst_terminal_accumulated_reward is None or lookahead.worst_terminal_accumulated_reward > n.accumulated_reward:
                    lookahead.worst_terminal_accumulated_reward = n.accumulated_reward
                break
            t0 = time.perf_counter()
            is_novel = lookahead.root.feature_table.is_novel((n.state[0]))
            tf = time.perf_counter()
            lookahead.rollout_runtime_is_novel += tf - t0
            if is_novel:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.root.feature_table.update_feature_table((n.state[0]))
            elif not n.visited:
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)
                lookahead.solve_and_propagate_labels(n)
                break
        if not n.SOLVED and lookahead._pruned_state_strategy == "heuristic":
            # If didn't finish rollout due to computational budget apply heuistic value
            n.randomV = lookahead.cost_to_go_est(env, n)
Ejemplo n.º 2
0
    def rollout(self, lookahead, env, n: OR_Node):
        while not n.SOLVED and lookahead.sim_calls - lookahead.init_sim_calls < lookahead.sim_budget:
            lookahead.rollout_depth += 1
            lookahead.expand(env, n)
            # Pick random unsolved child of n
            n = lookahead.pick_random_unsolved_child(env, n)
            if n.terminal:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.solve_and_propagate_labels(n)
                if lookahead.worst_terminal_accumulated_reward is None or lookahead.worst_terminal_accumulated_reward > n.accumulated_reward:
                    lookahead.worst_terminal_accumulated_reward = n.accumulated_reward
                break
            f, v, rank, old_depth = lookahead.root.feature_table.get_novel_feature(
                (n.state[0], n.d))
            if n.d < old_depth:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.root.feature_table.update_feature_table(
                    (n.state[0], n.d))
            elif not n.visited and n.d >= old_depth:
                n.visited = True
                lookahead.num_visited += 1
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)

                lookahead.solve_and_propagate_labels(n)
                break
            elif n.visited and old_depth < n.d:
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)
                n._children = {}
                lookahead.solve_and_propagate_labels(n)
                break

        if not n.SOLVED and lookahead._pruned_state_strategy == "heuristic":  #If didn't finish rollout due to computational budget apply heuistic value
            n.randomV = lookahead.cost_to_go_est(env, n)