Example #1
0
    def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]:
        """milp method"""
        ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn)
        post = []
        for chosen_action in range(2):
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            gurobi_model.setParam('DualReductions', 0)
            input = generate_input_region(gurobi_model, template, x, self.env_input_size)
            x_prime = StoppingCarExperimentProbabilistic.apply_dynamic(input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size)
            gurobi_model.update()
            gurobi_model.optimize()
            x_prime_results = optimise(template, gurobi_model, x_prime)
            if x_prime_results is None:
                assert x_prime_results is not None
            successor_info = Experiment.SuccessorInfo()
            successor_info.successor = tuple(x_prime_results)
            successor_info.parent = x
            successor_info.parent_lbl = x_label
            successor_info.t = t + 1
            successor_info.action = "policy"  # chosen_action
            successor_info.lb = ranges_probs[chosen_action][0]
            successor_info.ub = ranges_probs[chosen_action][1]
            post.append(successor_info)

        return post
Example #2
0
 def post_milp(self, x, x_label, nn, output_flag, t, template):
     """milp method"""
     post = []
     for chosen_action in range(2):
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         # gurobi_model.addConstr(input[0] >= 0, name=f"input_base_constr1")
         # gurobi_model.addConstr(input[1] >= 0, name=f"input_base_constr2")
         # gurobi_model.addConstr(input[2] >= 20, name=f"input_base_constr3")
         observation = gurobi_model.addMVar(shape=(2, ),
                                            lb=float("-inf"),
                                            ub=float("inf"),
                                            name="observation")
         gurobi_model.addConstr(
             observation[1] <= input[0] - input[1] + self.input_epsilon / 2,
             name=f"obs_constr21")
         gurobi_model.addConstr(
             observation[1] >= input[0] - input[1] - self.input_epsilon / 2,
             name=f"obs_constr22")
         gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] +
                                self.input_epsilon / 2,
                                name=f"obs_constr11")
         gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] -
                                self.input_epsilon / 2,
                                name=f"obs_constr12")
         # gurobi_model.addConstr(input[3] <= self.max_speed, name=f"v_constr_input")
         # gurobi_model.addConstr(input[3] >= -self.max_speed, name=f"v_constr_input")
         feasible_action = Experiment.generate_nn_guard(
             gurobi_model, observation, nn, action_ego=chosen_action)
         # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action)
         if feasible_action:
             # apply dynamic
             x_prime = StoppingCarExperiment.apply_dynamic(
                 input,
                 gurobi_model,
                 action=chosen_action,
                 env_input_size=self.env_input_size)
             gurobi_model.update()
             gurobi_model.optimize()
             found_successor, x_prime_results = self.h_repr_to_plot(
                 gurobi_model, template, x_prime)
             if found_successor:
                 # post.append((tuple(x_prime_results),(x, x_label)))
                 successor_info = Experiment.SuccessorInfo()
                 successor_info.successor = tuple(x_prime_results)
                 successor_info.parent = x
                 successor_info.parent_lbl = x_label
                 successor_info.t = t + 1
                 successor_info.action = "policy"  # chosen_action
                 # successor_info.lb = ranges_probs[chosen_action][0]
                 # successor_info.ub = ranges_probs[chosen_action][1]
                 post.append(successor_info)
     return post
Example #3
0
 def post_milp(self, x, x_label, nn, output_flag, t, template):
     """milp method"""
     ranges_probs = self.create_range_bounds_model(template, x,
                                                   self.env_input_size, nn)
     post = []
     # for split_angle in itertools.product([True, False], repeat=2):  # split successor if theta is within safe_angle
     for chosen_action in range(self.n_actions):
         # if (chosen_action == 2 or chosen_action == 1) and x_label == 1:  # skip actions when battery is dead
         #     continue
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         gurobi_model.setParam('Threads', 2)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds(
             gurobi_model, input)
         # feasible_action = PendulumExperiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action, M=1e03)
         # if feasible_action:  # performs action 2 automatically when battery is dead
         sin_cos_table = self.get_sin_cos_table(max_theta,
                                                min_theta,
                                                max_theta_dot,
                                                min_theta_dot,
                                                action=chosen_action)
         # for normalisation_split in [True,False]:
         newthdot, newtheta = PendulumExperiment.generate_angle_milp(
             gurobi_model, input, sin_cos_table)
         # gurobi_model.addConstr(newtheta >)
         # apply dynamic
         x_prime = self.apply_dynamic(input,
                                      gurobi_model,
                                      newthdot=newthdot,
                                      newtheta=newtheta,
                                      env_input_size=self.env_input_size,
                                      action=chosen_action)
         # for i, (A, b) in enumerate(self.angle_split):
         #     Experiment.generate_region_constraints(gurobi_model, A, x_prime, b, self.env_input_size, invert=not split_angle[i])
         gurobi_model.update()
         gurobi_model.optimize()
         if gurobi_model.status != 2:
             continue
         found_successor, x_prime_results = self.h_repr_to_plot(
             gurobi_model, template, x_prime)
         if found_successor:
             successor_info = Experiment.SuccessorInfo()
             successor_info.successor = tuple(x_prime_results)
             successor_info.parent = x
             successor_info.parent_lbl = x_label
             successor_info.t = t + 1
             successor_info.action = "policy"  # chosen_action
             successor_info.lb = ranges_probs[chosen_action][0]
             successor_info.ub = ranges_probs[chosen_action][1]
             post.append(successor_info)
     return post
Example #4
0
 def post_milp(self, x, x_label, nn, output_flag, t,
               template) -> List[Experiment.SuccessorInfo]:
     """milp method"""
     ranges_probs = self.create_range_bounds_model(template, x,
                                                   self.env_input_size, nn)
     post = []
     for chosen_action in range(2):
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         gurobi_model.setParam('DualReductions', 0)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         observation = gurobi_model.addMVar(shape=(2, ),
                                            lb=float("-inf"),
                                            ub=float("inf"),
                                            name="input")
         gurobi_model.addConstr(
             observation[1] <= input[0] - input[1] + self.input_epsilon / 2,
             name=f"obs_constr21")
         gurobi_model.addConstr(
             observation[1] >= input[0] - input[1] - self.input_epsilon / 2,
             name=f"obs_constr22")
         gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] +
                                self.input_epsilon / 2,
                                name=f"obs_constr11")
         gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] -
                                self.input_epsilon / 2,
                                name=f"obs_constr12")
         feasible_action = Experiment.generate_nn_guard(
             gurobi_model, observation, nn, action_ego=chosen_action)
         if feasible_action:
             x_prime = StoppingCarExperiment.apply_dynamic(
                 input,
                 gurobi_model,
                 action=chosen_action,
                 env_input_size=self.env_input_size)
             gurobi_model.update()
             gurobi_model.optimize()
             found_successor, x_prime_results = self.h_repr_to_plot(
                 gurobi_model, template, x_prime)
             if found_successor:
                 successor_info = Experiment.SuccessorInfo()
                 successor_info.successor = tuple(x_prime_results)
                 successor_info.parent = x
                 successor_info.parent_lbl = x_label
                 successor_info.t = t + 1
                 successor_info.action = "policy"  # chosen_action
                 successor_info.lb = ranges_probs[chosen_action][0]
                 successor_info.ub = ranges_probs[chosen_action][1]
                 post.append(successor_info)
     return post
Example #5
0
    def post_milp(self, x, x_label, nn, output_flag, t,
                  template) -> List[Experiment.SuccessorInfo]:
        """milp method"""
        ranges_probs = self.create_range_bounds_model(template, x,
                                                      self.env_input_size, nn)

        def standard_op():
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            input = self.generate_input_region(gurobi_model, template, x,
                                               self.env_input_size)
            z = self.apply_dynamic(input, gurobi_model, self.env_input_size)
            return gurobi_model, z, input

        post = []
        # case 0
        gurobi_model, z, input = standard_op()
        feasible0 = self.generate_guard(gurobi_model, z, case=0)  # bounce
        if feasible0:  # action is irrelevant in this case
            # apply dynamic
            x_prime = self.apply_dynamic2(z,
                                          gurobi_model,
                                          case=0,
                                          env_input_size=self.env_input_size)
            found_successor, x_prime_results = self.h_repr_to_plot(
                gurobi_model, template, x_prime)
            if found_successor:
                successor_info = Experiment.SuccessorInfo()
                successor_info.successor = tuple(x_prime_results)
                successor_info.parent = x
                successor_info.parent_lbl = x_label
                successor_info.t = t + 1
                successor_info.action = "case0"  # doesn't matter
                successor_info.lb = 1.0
                successor_info.ub = 1.0
                post.append(successor_info)

        for chosen_action in range(2):
            if ranges_probs[chosen_action][
                    1] <= 1e-6:  # ignore very small probabilities of happening
                # skip action
                continue

            # case 1 : ball going down and hit
            gurobi_model, z, input = standard_op()
            feasible11 = self.generate_guard(gurobi_model, z, case=1)
            if feasible11:
                feasible12 = chosen_action == 1  # check for action =1 over input (not z!)
                if feasible12:
                    # apply dynamic
                    x_prime = self.apply_dynamic2(
                        z,
                        gurobi_model,
                        case=1,
                        env_input_size=self.env_input_size)
                    found_successor, x_prime_results = self.h_repr_to_plot(
                        gurobi_model, template, x_prime)
                    if found_successor:
                        successor_info = Experiment.SuccessorInfo()
                        successor_info.successor = tuple(x_prime_results)
                        successor_info.parent = x
                        successor_info.parent_lbl = x_label
                        successor_info.t = t + 1
                        successor_info.action = "policy"
                        successor_info.lb = ranges_probs[chosen_action][0]
                        successor_info.ub = ranges_probs[chosen_action][1]
                        post.append(successor_info)
            # case 2 : ball going up and hit
            gurobi_model, z, input = standard_op()
            feasible21 = self.generate_guard(gurobi_model, z, case=2)
            if feasible21:
                feasible22 = chosen_action == 1  # check for action =1 over input (not z!)
                if feasible22:
                    # apply dynamic
                    x_prime = self.apply_dynamic2(
                        z,
                        gurobi_model,
                        case=2,
                        env_input_size=self.env_input_size)
                    found_successor, x_prime_results = self.h_repr_to_plot(
                        gurobi_model, template, x_prime)
                    if found_successor:
                        successor_info = Experiment.SuccessorInfo()
                        successor_info.successor = tuple(x_prime_results)
                        successor_info.parent = x
                        successor_info.parent_lbl = x_label
                        successor_info.t = t + 1
                        successor_info.action = "policy"
                        successor_info.lb = ranges_probs[chosen_action][0]
                        successor_info.ub = ranges_probs[chosen_action][1]
                        post.append(successor_info)
            # case 1 alt : ball going down and NO hit
            gurobi_model, z, input = standard_op()
            feasible11_alt = self.generate_guard(gurobi_model, z, case=1)
            if feasible11_alt:
                feasible12_alt = chosen_action == 0  # check for action = 0 over input (not z!)
                if feasible12_alt:
                    # apply dynamic
                    x_prime = self.apply_dynamic2(
                        z,
                        gurobi_model,
                        case=3,
                        env_input_size=self.env_input_size)  # normal dynamic
                    found_successor, x_prime_results = self.h_repr_to_plot(
                        gurobi_model, template, x_prime)
                    if found_successor:
                        successor_info = Experiment.SuccessorInfo()
                        successor_info.successor = tuple(x_prime_results)
                        successor_info.parent = x
                        successor_info.parent_lbl = x_label
                        successor_info.t = t + 1
                        successor_info.action = "policy"
                        successor_info.lb = ranges_probs[chosen_action][0]
                        successor_info.ub = ranges_probs[chosen_action][1]
                        post.append(successor_info)
            # case 2 alt : ball going up and NO hit
            gurobi_model, z, input = standard_op()
            feasible21_alt = self.generate_guard(gurobi_model, z, case=2)
            if feasible21_alt:
                feasible22_alt = chosen_action == 0  # check for action = 0 over input (not z!)
                if feasible22_alt:
                    # apply dynamic
                    x_prime = self.apply_dynamic2(
                        z,
                        gurobi_model,
                        case=3,
                        env_input_size=self.env_input_size)  # normal dynamic
                    found_successor, x_prime_results = self.h_repr_to_plot(
                        gurobi_model, template, x_prime)
                    if found_successor:
                        successor_info = Experiment.SuccessorInfo()
                        successor_info.successor = tuple(x_prime_results)
                        successor_info.parent = x
                        successor_info.parent_lbl = x_label
                        successor_info.t = t + 1
                        successor_info.action = "policy"
                        successor_info.lb = ranges_probs[chosen_action][0]
                        successor_info.ub = ranges_probs[chosen_action][1]
                        post.append(successor_info)
        # case 3 : ball out of reach and not bounce
        gurobi_model, z, input = standard_op()
        feasible3 = self.generate_guard(gurobi_model, z,
                                        case=3)  # out of reach
        if feasible3:  # action is irrelevant in this case
            # apply dynamic
            x_prime = self.apply_dynamic2(
                z, gurobi_model, case=3,
                env_input_size=self.env_input_size)  # normal dynamic
            found_successor, x_prime_results = self.h_repr_to_plot(
                gurobi_model, template, x_prime)
            if found_successor:
                successor_info = Experiment.SuccessorInfo()
                successor_info.successor = tuple(x_prime_results)
                successor_info.parent = x
                successor_info.parent_lbl = x_label
                successor_info.t = t + 1
                successor_info.action = "case3"  # doesn't matter
                successor_info.lb = 1.0
                successor_info.ub = 1.0
                post.append(successor_info)

        return post
    def inner_loop_step(self, stats: Experiment.LoopStats, template_2d, template, nn, bar_main):
        # fills up the worker threads
        while len(stats.proc_ids) < self.n_workers and len(stats.frontier) != 0:
            t, (x, x_label) = heapq.heappop(stats.frontier) if self.use_bfs else stats.frontier.pop()
            if t >= self.time_horizon or (datetime.datetime.now() - stats.start_time).seconds > self.max_elapsed_time:
                print(f"Discard timestep t={t}")
                stats.discarded.append((x, x_label))
                continue
            stats.max_t = max(stats.max_t, t)

            if self.use_contained:
                contained_flag = False
                to_remove = []
                for (s, s_label) in stats.seen:
                    if s_label == x_label:
                        if contained(x, s):
                            if not self.graph.has_predecessor((x, x_label), (s, x_label)):  # ensures that if there was a split it doesn't count as contained
                                self.graph.add_edge((x, x_label), (s, x_label), action="contained", lb=1.0, ub=1.0)
                                contained_flag = True
                                break
                        if contained(s, x):
                            to_remove.append((s, s_label))
                for rem in to_remove:
                    stats.num_already_visited += 1
                    stats.seen.remove(rem)
                if contained_flag:
                    stats.num_already_visited += 1
                    continue
            stats.seen.append((x, x_label))
            if self.show_progressbar:
                bar_main.update(value=bar_main.value + 1, n_workers=len(stats.proc_ids), seen=len(stats.seen), frontier=len(stats.frontier), num_already_visited=stats.num_already_visited,
                                # elapsed_time=(datetime.datetime.now()-stats.start_time).total_seconds()/60.0,
                                max_t=stats.max_t)
            if self.use_split:
                if self.max_t_split < 0 or t < self.max_t_split:  # limits the splitting to a given timestep
                    if self.use_abstract_mapping:
                        splitted_elements = self.split_item_abstract_mapping(x, [m[0] for m in self.abstract_mapping])
                        n_fragments = len(splitted_elements)
                        if n_fragments > 1:
                            new_fragments = []
                            stats.seen.remove((x, x_label))  # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment
                            for i, splitted_polytope in enumerate(splitted_elements):
                                successor_info = Experiment.SuccessorInfo()
                                successor_info.successor = tuple(splitted_polytope)
                                successor_info.parent = x
                                successor_info.parent_lbl = x_label
                                successor_info.t = t
                                successor_info.action = f"split{i}"
                                new_fragments.append(successor_info)
                            stats.proc_ids.append(ray.put(new_fragments))
                            continue
                    else:  # split on the go
                        if len(list(self.graph.in_edges((x, x_label)))) == 0 or not "split" in self.graph.edges[list(self.graph.in_edges((x, x_label)))[0]].get("action"):
                            if self.can_be_splitted(template, x):
                                splitted_elements = self.check_split(t, x, x_label, nn, bar_main, stats, template, template_2d)
                                n_fragments = len(splitted_elements)
                                if n_fragments > 1:
                                    new_fragments = []
                                    stats.seen.remove((x, x_label))  # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment
                                    for i, (splitted_polytope, probs_range) in enumerate(splitted_elements):
                                        successor_info = Experiment.SuccessorInfo()
                                        successor_info.successor = tuple(splitted_polytope)
                                        successor_info.parent = x
                                        successor_info.parent_lbl = x_label
                                        successor_info.t = t
                                        successor_info.action = f"split{i}"
                                        new_fragments.append(successor_info)
                                    stats.proc_ids.append(ray.put(new_fragments))
                                    continue

            if self.use_split_with_seen:  # split according to the seen elements
                splitted_elements2 = self.split_item_abstract_mapping(x, [m[0] for m in stats.seen])  # splits according to the seen list
                n_fragments = len(splitted_elements2)
                if self.use_split_with_seen and n_fragments > 1:
                    new_fragments = []
                    stats.seen.remove((x, x_label))  # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment
                    for i, splitted_polytope in enumerate(splitted_elements2):
                        successor_info = Experiment.SuccessorInfo()
                        successor_info.successor = tuple(splitted_polytope)
                        successor_info.parent = x
                        successor_info.parent_lbl = x_label
                        successor_info.t = t
                        successor_info.action = f"split{i}"
                        new_fragments.append(successor_info)
                    stats.proc_ids.append(ray.put(new_fragments))
                    continue
            # if nothing else applies, compute the successor
            stats.proc_ids.append(self.post_fn_remote.remote(self, x, x_label, nn, self.output_flag, t, template))  # compute successors

        if stats.last_time_plot is None or time.time() - stats.last_time_plot >= self.plotting_time_interval:
            if stats.last_time_plot is not None:
                self.plot_fn(stats.vertices_list, template, template_2d)
            stats.last_time_plot = time.time()
        if self.update_progress_fn is not None:
            self.update_progress_fn(n_workers=len(stats.proc_ids), seen=len(stats.seen), frontier=len(stats.frontier), num_already_visited=stats.num_already_visited, max_t=stats.max_t)

        # process the results (if any)
        new_frontier = self.collect_results(stats, template)

        if self.avoid_irrelevants:
            # update prism
            self.update_prism_step(stats.frontier, new_frontier, stats.root, stats)
        else:
            if self.use_bfs:
                for element in new_frontier:
                    heapq.heappush(stats.frontier, element)
            else:
                stats.frontier.extend(new_frontier)
        # todo go through the tree and decide if we want to split already visited nodes based on the max and min probability of encountering a terminal state
        stats.new_frontier = []  # resets the new_frontier
        if self.save_graph:
            stats.last_time_save = datetime.datetime.now()
            networkx.write_gpickle(self.graph, os.path.join(self.save_dir, "graph.p"))
            pickle.dump(stats, open(os.path.join(self.save_dir, "stats.p"), "wb"))