Exemple #1
0
def value_iteration(nnet, device, env: Environment,
                    states: List[State]) -> List[float]:

    children_cost_list = env.expand(states)
    children = children_cost_list[0]
    transition_costs = children_cost_list[1]

    flat_children, index_children = flatten(children)

    #pdb.set_trace()
    inputs_tensor = torch.from_numpy(
        env.state_to_nnet_input(flat_children)).float()
    output_tensor = nnet(inputs_tensor).data.numpy()[:, 0]

    #pdb.set_trace()
    targets_list = np.ndarray = np.array(transition_costs) + np.array(
        unflatten(list(output_tensor), index_children))
    targets = np.min(targets_list, axis=1)

    #pdb.set_trace()
    is_solved: np.array = np.array(env.is_solved(states))

    #pdb.set_trace()
    targets = targets * np.logical_not(is_solved)

    #pdb.set_trace()
    return targets
Exemple #2
0
def generate_plot(nnet: nn.Module(), device, env: Environment, states: List[State], outputs: np.array):
    nnet.eval()

    states_targ_nnet: np.ndarray = env.state_to_nnet_input(states)
    out_nnet = nnet(states_nnet_to_pytorch_input(states_targ_nnet, device).float()).cpu().data.numpy()
    out_nnet, _ = flatten(out_nnet)
    outputs, _ = flatten(outputs)

    out_nnet_array = np.array(out_nnet)
    outputs_array = np.array(outputs)

    random_indexs = list(range(len(out_nnet_array)))
    random.shuffle(random_indexs)

    random_states: np.ndarray = []
    sample_expected: np.ndarray = []
    sample_outputs: np.ndarray = []

    for i in range(100):
        random_states.append(states[random_indexs[i]])
        sample_expected.append(outputs_array[random_indexs[i]])
        sample_outputs.append(out_nnet_array[random_indexs[i]])

    h_new: np.ndarray = approx_admissible_conv(env, nnet, out_nnet_array, outputs_array, states, random_states, sample_outputs, sample_expected)

    #before, after = plt.subplots()
    plt.scatter(sample_expected, sample_outputs, c = '000000', linewidths = 0.1)
    #plt.plot([0,0],[30,30], c = 'g')
    plt.axline([0,0],[30,30], linewidth =3, c = 'g')
    plt.ylabel('NNet output')
    plt.xlabel('Expected value')
    plt.title("Output vs Expected")
    plt.show()
    #before.savefig("preconversion.pdf")

    
    plt.scatter(sample_expected, h_new, c = '000000', linewidths = 0.1)
    plt.axline([0,0],[30,30], linewidth =3, c = 'g')
    plt.ylabel('Converted output')
    plt.xlabel('Expected value')
    plt.title("Converted Output vs Expected")
    plt.show() 
def bellman(
    states: List, heuristic_fn, env: Environment
) -> Tuple[np.ndarray, List[np.ndarray], List[List[State]]]:
    # expand states
    states_exp, tc_l = env.expand(states)
    tc = np.concatenate(tc_l, axis=0)

    # get cost-to-go of expanded states
    states_exp_flat, split_idxs = misc_utils.flatten(states_exp)
    ctg_next: np.ndarray = heuristic_fn(states_exp_flat)

    # backup cost-to-go
    ctg_next_p_tc = tc + ctg_next
    ctg_next_p_tc_l = np.split(ctg_next_p_tc, split_idxs)

    is_solved = env.is_solved(states)
    ctg_backup = np.array([np.min(x) for x in ctg_next_p_tc_l
                           ]) * np.logical_not(is_solved)

    return ctg_backup, ctg_next_p_tc_l, states_exp
Exemple #4
0
    def patchIt(self, testInst, config=False):
        C = Changes()  # Record changes
        testInst = pd.DataFrame(testInst).transpose()
        current = self.find(testInst, self.tree)
        node = current
        while node.lvl > -1:
            node = node.up  # Move to tree root

        leaves = flatten([self.leaves(_k) for _k in node.kids])
        try:
            if self.config:
                best = sorted(
                    [l for l in leaves if l.score <= 0.9 * current.score],
                    key=lambda F: self.howfar(current, F))[0]
            else:
                best = \
                    sorted(
                        [l for l in leaves if l.score == 0 ],
                        key=lambda F: self.howfar(current, F))[0]
                # set_trace()
        except:
            return testInst.values.tolist()[0]

        def new(old, range):
            rad = abs(min(range[1] - old, old - range[1]))
            return abs(range[0]), abs(range[1])
            # return uniform(range[0], range[1])

        for ii in best.branch:
            before = testInst[ii[0]]
            if not ii in current.branch:
                then = testInst[ii[0]].values[0]
                now = ii[1] if self.config else new(testInst[ii[0]].values[0],
                                                    ii[1])
                # print(now)
                testInst[ii[0]] = str(now)
                # C.save(name=ii[0], old=then, new=now)

        testInst[testInst.columns[-1]] = 1
        # self.change.append(C.log)
        return testInst.values.tolist()[0]
Exemple #5
0
def astar_update(states: List[State], env: Environment, num_steps: int,
                 heuristic_fn):
    weights: List[float] = list(np.random.rand(len(states)))
    astar = AStar(states, env, heuristic_fn, weights)
    for _ in range(num_steps):
        astar.step(heuristic_fn, 1, verbose=False)

    nodes_popped: List[List[Node]] = astar.get_popped_nodes()
    nodes_popped_flat: List[Node]
    nodes_popped_flat, _ = misc_utils.flatten(nodes_popped)

    for node in nodes_popped_flat:
        node.compute_bellman()

    states_update: List[State] = [node.state for node in nodes_popped_flat]
    cost_to_go_update: np.array = np.array(
        [node.bellman for node in nodes_popped_flat])

    is_solved: np.array = np.array(astar.has_found_goal())

    return states_update, cost_to_go_update, is_solved
Exemple #6
0
def gbfs_update(states: List[State], env: Environment, num_steps: int,
                heuristic_fn, eps_max: float):
    eps: List[float] = list(np.random.rand(len(states)) * eps_max)

    gbfs = GBFS(states, env, eps=eps)
    for _ in range(num_steps):
        gbfs.step(heuristic_fn)

    trajs: List[List[Tuple[State, float]]] = gbfs.get_trajs()

    trajs_flat: List[Tuple[State, float]]
    trajs_flat, _ = misc_utils.flatten(trajs)

    is_solved: np.ndarray = np.array(gbfs.get_is_solved())

    states_update: List = []
    cost_to_go_update_l: List[float] = []
    for traj in trajs_flat:
        states_update.append(traj[0])
        cost_to_go_update_l.append(traj[1])

    cost_to_go_update = np.array(cost_to_go_update_l)

    return states_update, cost_to_go_update, is_solved
Exemple #7
0
    def step(self,
             heuristic_fn: Callable,
             batch_size: int,
             include_solved: bool = False,
             verbose: bool = False):
        start_time_itr = time.time()
        instances: List[Instance]
        if include_solved:
            instances = self.instances
        else:
            instances = [
                instance for instance in self.instances
                if len(instance.goal_nodes) == 0
            ]

        # Pop from open
        start_time = time.time()
        popped_nodes_all: List[List[Node]] = pop_from_open(
            instances, batch_size)
        pop_time = time.time() - start_time

        # Expand nodes
        start_time = time.time()
        nodes_c_all: List[List[Node]] = expand_nodes(instances,
                                                     popped_nodes_all,
                                                     self.env)
        expand_time = time.time() - start_time

        # Get heuristic of children, do heur before check so we can do backup
        start_time = time.time()
        nodes_c_all_flat, _ = misc_utils.flatten(nodes_c_all)
        weights, _ = misc_utils.flatten(
            [[weight] * len(nodes_c)
             for weight, nodes_c in zip(self.weights, nodes_c_all)])
        path_costs, heuristics = add_heuristic_and_cost(
            nodes_c_all_flat, heuristic_fn, weights)
        heur_time = time.time() - start_time

        # Check if children are in closed
        start_time = time.time()
        nodes_c_all = remove_in_closed(instances, nodes_c_all)
        check_time = time.time() - start_time

        # Add to open
        start_time = time.time()
        add_to_open(instances, nodes_c_all)
        add_time = time.time() - start_time

        itr_time = time.time() - start_time_itr

        # Print to screen
        if verbose:
            if heuristics.shape[0] > 0:
                min_heur = np.min(heuristics)
                min_heur_pc = path_costs[np.argmin(heuristics)]
                max_heur = np.max(heuristics)
                max_heur_pc = path_costs[np.argmax(heuristics)]

                print("Itr: %i, Added to OPEN - Min/Max Heur(PathCost): "
                      "%.2f(%.2f)/%.2f(%.2f) " %
                      (self.step_num, min_heur, min_heur_pc, max_heur,
                       max_heur_pc))

            print("Times - pop: %.2f, expand: %.2f, check: %.2f, heur: %.2f, "
                  "add: %.2f, itr: %.2f" % (pop_time, expand_time, check_time,
                                            heur_time, add_time, itr_time))

            print("")

        # Update timings
        self.timings['pop'] += pop_time
        self.timings['expand'] += expand_time
        self.timings['check'] += check_time
        self.timings['heur'] += heur_time
        self.timings['add'] += add_time
        self.timings['itr'] += itr_time

        self.step_num += 1
Exemple #8
0
def expand_nodes(instances: List[Instance], popped_nodes_all: List[List[Node]],
                 env: Environment):
    # Get children of all nodes at once (for speed)
    popped_nodes_flat: List[Node]
    split_idxs: List[int]
    popped_nodes_flat, split_idxs = misc_utils.flatten(popped_nodes_all)

    if len(popped_nodes_flat) == 0:
        return [[]]

    states: List[State] = [x.state for x in popped_nodes_flat]

    states_c_by_node: List[List[State]]
    tcs_np: List[np.ndarray]

    states_c_by_node, tcs_np = env.expand(states)

    tcs_by_node: List[List[float]] = [list(x) for x in tcs_np]

    # Get is_solved on all states at once (for speed)
    states_c: List[State]

    states_c, split_idxs_c = misc_utils.flatten(states_c_by_node)
    is_solved_c: List[bool] = list(env.is_solved(states_c))
    is_solved_c_by_node: List[List[bool]] = misc_utils.unflatten(
        is_solved_c, split_idxs_c)

    # Update path costs for all states at once (for speed)
    parent_path_costs = np.expand_dims(
        np.array([node.path_cost for node in popped_nodes_flat]), 1)
    path_costs_c: List[float] = (parent_path_costs +
                                 np.array(tcs_by_node)).flatten().tolist()

    path_costs_c_by_node: List[List[float]] = misc_utils.unflatten(
        path_costs_c, split_idxs_c)

    # Reshape lists
    tcs_by_inst_node: List[List[List[float]]] = misc_utils.unflatten(
        tcs_by_node, split_idxs)
    patch_costs_c_by_inst_node: List[List[List[float]]] = misc_utils.unflatten(
        path_costs_c_by_node, split_idxs)
    states_c_by_inst_node: List[List[List[State]]] = misc_utils.unflatten(
        states_c_by_node, split_idxs)
    is_solved_c_by_inst_node: List[List[List[bool]]] = misc_utils.unflatten(
        is_solved_c_by_node, split_idxs)

    # Get child nodes
    instance: Instance
    nodes_c_by_inst: List[List[Node]] = []
    for inst_idx, instance in enumerate(instances):
        nodes_c_by_inst.append([])
        parent_nodes: List[Node] = popped_nodes_all[inst_idx]
        tcs_by_node: List[List[float]] = tcs_by_inst_node[inst_idx]
        path_costs_c_by_node: List[
            List[float]] = patch_costs_c_by_inst_node[inst_idx]
        states_c_by_node: List[List[State]] = states_c_by_inst_node[inst_idx]

        is_solved_c_by_node: List[
            List[bool]] = is_solved_c_by_inst_node[inst_idx]

        parent_node: Node
        tcs_node: List[float]
        states_c: List[State]
        str_reps_c: List[str]
        for parent_node, tcs_node, path_costs_c, states_c, is_solved_c in zip(
                parent_nodes, tcs_by_node, path_costs_c_by_node,
                states_c_by_node, is_solved_c_by_node):
            state: State
            for move_idx, state in enumerate(states_c):
                path_cost: float = path_costs_c[move_idx]
                is_solved: bool = is_solved_c[move_idx]
                node_c: Node = Node(state, path_cost, is_solved, move_idx,
                                    parent_node)

                nodes_c_by_inst[inst_idx].append(node_c)

                parent_node.children.append(node_c)

            parent_node.transition_costs.extend(tcs_node)

        instance.num_nodes_generated += len(nodes_c_by_inst[inst_idx])

    return nodes_c_by_inst