Python Experiment Examples, polyhedra.experiments_nn_analysis.Experiment Python Examples

Example #1

0

Show file

 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp_invariant_net
     self.get_nn_fn = self.get_nn
     self.get_invariant_nn_fn = self.get_invariant_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     self.assign_lbl_fn = 1  # just to stop error
     _, template = self.get_template(1)
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2 ** 10
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31"  # safe both with and without epsilon of 0.1.
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24"  # safe at t=216
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00005_5_cost_fn=0,epsilon_input=0.1_2021-01-17_12-41-27/checkpoint_10/checkpoint-10"  # unsafe

Example #2

0

Show file

File: total_split.py Project: phate09/SafeDRL

    def find_direction_split(self, template, x, nn, pre_nn):
        samples = polytope.sample(10000, template, np.array(x))
        preprocessed = pre_nn(torch.tensor(samples).float())
        preprocessed_np = preprocessed.detach().numpy()
        samples_ontput = torch.softmax(nn(preprocessed), 1)
        predicted_label = samples_ontput.detach().numpy()[:, 0]
        y = np.clip(predicted_label, 1e-7, 1 - 1e-7)
        inv_sig_y = np.log(y / (1 - y))  # transform to log-odds-ratio space
        from sklearn.linear_model import LinearRegression
        lr = LinearRegression()
        lr.fit(samples, inv_sig_y)
        template_2d: np.ndarray = np.array([Experiment.e(3, 2), Experiment.e(3, 0) - Experiment.e(3, 1)])

        def sigmoid(x):
            ex = np.exp(x)
            return ex / (1 + ex)

        preds = sigmoid(lr.predict(samples))
        plot_points_and_prediction(samples @ template_2d.T, preds)
        plot_points_and_prediction(samples @ template_2d.T, predicted_label)
        coeff = lr.coef_
        intercept = lr.intercept_
        a = sympy.symbols('x')
        b = sympy.symbols('y')
        classif_line1 = Line(coeff[0].item() * a + coeff[1].item() * b + intercept)
        new_coeff = -coeff[0].item() / coeff[1].item()

Example #3

0

Show file

 def post_milp(self, x, x_label, nn, output_flag, t, template):
     """milp method"""
     post = []
     for chosen_action in range(2):
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         # gurobi_model.addConstr(input[0] >= 0, name=f"input_base_constr1")
         # gurobi_model.addConstr(input[1] >= 0, name=f"input_base_constr2")
         # gurobi_model.addConstr(input[2] >= 20, name=f"input_base_constr3")
         observation = gurobi_model.addMVar(shape=(2, ),
                                            lb=float("-inf"),
                                            ub=float("inf"),
                                            name="observation")
         gurobi_model.addConstr(
             observation[1] <= input[0] - input[1] + self.input_epsilon / 2,
             name=f"obs_constr21")
         gurobi_model.addConstr(
             observation[1] >= input[0] - input[1] - self.input_epsilon / 2,
             name=f"obs_constr22")
         gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] +
                                self.input_epsilon / 2,
                                name=f"obs_constr11")
         gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] -
                                self.input_epsilon / 2,
                                name=f"obs_constr12")
         # gurobi_model.addConstr(input[3] <= self.max_speed, name=f"v_constr_input")
         # gurobi_model.addConstr(input[3] >= -self.max_speed, name=f"v_constr_input")
         feasible_action = Experiment.generate_nn_guard(
             gurobi_model, observation, nn, action_ego=chosen_action)
         # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action)
         if feasible_action:
             # apply dynamic
             x_prime = StoppingCarExperiment.apply_dynamic(
                 input,
                 gurobi_model,
                 action=chosen_action,
                 env_input_size=self.env_input_size)
             gurobi_model.update()
             gurobi_model.optimize()
             found_successor, x_prime_results = self.h_repr_to_plot(
                 gurobi_model, template, x_prime)
             if found_successor:
                 # post.append((tuple(x_prime_results),(x, x_label)))
                 successor_info = Experiment.SuccessorInfo()
                 successor_info.successor = tuple(x_prime_results)
                 successor_info.parent = x
                 successor_info.parent_lbl = x_label
                 successor_info.t = t + 1
                 successor_info.action = "policy"  # chosen_action
                 # successor_info.lb = ranges_probs[chosen_action][0]
                 # successor_info.ub = ranges_probs[chosen_action][1]
                 post.append(successor_info)
     return post

Example #4

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def check_contained(self, poly1, poly2):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', self.output_flag)
     input1 = Experiment.generate_input_region(gurobi_model, self.analysis_template, poly1, self.env_input_size)
     Experiment.generate_region_constraints(gurobi_model, self.analysis_template, input1, poly2, self.env_input_size, eps=1e-5)  # use epsilon to prevent single points
     x_results = self.optimise(self.analysis_template, gurobi_model, input1)
     if x_results is None:
         # not contained
         return False
     else:
         return True

Example #5

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def __init__(self):
     super().__init__()
     self.max_probability_split = 0.3
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = [5, 5, 5, 5]
     self.use_softmax = False
     self.use_milp_range_prob = False
     self.input_template = Experiment.box(self.env_input_size)
     template = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"

Example #6

0

Show file

File: continuous_experiments_nn_analysis.py Project: phate09/SafeDRL

 def check_unsafe(self, template, bnds):
     for A, b in self.unsafe_zone:
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', False)
         input = gurobi_model.addMVar(shape=(self.env_input_size,), lb=float("-inf"), name="input")
         Experiment.generate_region_constraints(gurobi_model, template, input, bnds, self.env_input_size)
         Experiment.generate_region_constraints(gurobi_model, A, input, b, self.env_input_size)
         gurobi_model.update()
         gurobi_model.optimize()
         if gurobi_model.status == 2:
             return True
     return False

Example #7

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def __init__(self):
     super().__init__()
     self.max_probability_split = 0.5
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = [np.pi / 5, np.pi / 5, 1 / 5, 1 / 5]
     self.input_template = Experiment.box(self.env_input_size)
     # theta = Experiment.e(self.env_input_size, 0)
     # theta_dot = Experiment.e(self.env_input_size, 1)
     # template = np.array([theta, -theta, theta_dot, -theta_dot])
     template = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"

Example #8

0

Show file

File: probabilistic_experiments_nn_analysis.py Project: phate09/SafeDRL

 def octagon(n):
     template = []
     for i in range(n):
         x = Experiment.e(n, i)
         template.append(x)
         template.append(-x)
         for j in range(0, i):
             y = Experiment.e(n, j)
             template.append(x + y)
             template.append(x - y)
             template.append(y - x)
             template.append(-y - x)
     return np.stack(template)

Example #9

0

Show file

 def post_milp(self, x, x_label, nn, output_flag, t,
               template) -> List[Experiment.SuccessorInfo]:
     """milp method"""
     ranges_probs = self.create_range_bounds_model(template, x,
                                                   self.env_input_size, nn)
     post = []
     for chosen_action in range(2):
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         gurobi_model.setParam('DualReductions', 0)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         observation = gurobi_model.addMVar(shape=(2, ),
                                            lb=float("-inf"),
                                            ub=float("inf"),
                                            name="input")
         gurobi_model.addConstr(
             observation[1] <= input[0] - input[1] + self.input_epsilon / 2,
             name=f"obs_constr21")
         gurobi_model.addConstr(
             observation[1] >= input[0] - input[1] - self.input_epsilon / 2,
             name=f"obs_constr22")
         gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] +
                                self.input_epsilon / 2,
                                name=f"obs_constr11")
         gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] -
                                self.input_epsilon / 2,
                                name=f"obs_constr12")
         feasible_action = Experiment.generate_nn_guard(
             gurobi_model, observation, nn, action_ego=chosen_action)
         if feasible_action:
             x_prime = StoppingCarExperiment.apply_dynamic(
                 input,
                 gurobi_model,
                 action=chosen_action,
                 env_input_size=self.env_input_size)
             gurobi_model.update()
             gurobi_model.optimize()
             found_successor, x_prime_results = self.h_repr_to_plot(
                 gurobi_model, template, x_prime)
             if found_successor:
                 successor_info = Experiment.SuccessorInfo()
                 successor_info.successor = tuple(x_prime_results)
                 successor_info.parent = x
                 successor_info.parent_lbl = x_label
                 successor_info.t = t + 1
                 successor_info.action = "policy"  # chosen_action
                 successor_info.lb = ranges_probs[chosen_action][0]
                 successor_info.ub = ranges_probs[chosen_action][1]
                 post.append(successor_info)
     return post

Example #10

0

Show file

File: run_experiment_pendulum.py Project: phate09/SafeDRL

 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.assign_lbl_fn = self.assign_label
     self.additional_seen_fn = self.additional_seen
     self.template_2d: np.ndarray = np.array([[1, 0], [0, 1]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     # _, template = self.get_template(1)
     template = Experiment.octagon(env_input_size)
     self.plotting_time_interval = 60 * 5
     self.analysis_template: np.ndarray = template
     self.safe_angle = 30 * 2 * math.pi / 360
     theta = [self.e(env_input_size, 0)]
     neg_theta = [-self.e(env_input_size, 0)]
     # battery = [self.e(env_input_size, 4)]
     self.unsafe_zone: List[Tuple] = [(theta, np.array([-self.safe_angle])),
                                      (neg_theta,
                                       np.array([-self.safe_angle]))]
     epsilon = 1e-4
     self.angle_split: List[Tuple] = [
         (theta, np.array([self.safe_angle - epsilon])),
         (neg_theta, np.array([self.safe_angle - epsilon]))
     ]
     self.use_rounding = False
     self.rounding_value = 1024
     self.time_horizon = 300
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
     self.tau = 0.02
     self.n_actions = 3

Example #11

0

Show file

File: run_experiment_stopping_car_continuous.py Project: phate09/SafeDRL

 def post_milp(self, x, nn, output_flag, t, template):
     """milp method"""
     post = []
     gurobi_model = self.internal_model  # grb.Model()
     input = self.last_input  # Experiment.generate_input_region(gurobi_model, template, x, self.env_input_size)
     observation = gurobi_model.addMVar(shape=(2,), lb=float("-inf"), ub=float("inf"), name="observation")
     gurobi_model.addConstr(observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21")
     gurobi_model.addConstr(observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22")
     gurobi_model.addConstr(observation[0] <= v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11")
     gurobi_model.addConstr(observation[0] >= v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12")
     nn_output, max_val, min_val = Experiment.generate_nn_guard_continuous(gurobi_model, observation, nn)
     is_equal = torch.isclose(nn(torch.from_numpy(observation.X).float()), torch.from_numpy(nn_output.X).float(), rtol=1e-3).all().item()
     assert is_equal
     # clipped_nn_output = gurobi_model.addMVar(lb=float("-inf"), shape=(len(nn_output)), name=f"clipped_nn_output")
     # gurobi_model.addConstr(nn_output[0] >= -12, name=f"clipped_out_constr1")
     # gurobi_model.addConstr(nn_output[0] <= 12, name=f"clipped_out_constr2")
     # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action)
     # apply dynamic
     x_prime = StoppingCarContinuousExperiment.apply_dynamic(input, gurobi_model, action=nn_output, env_input_size=self.env_input_size)
     gurobi_model.update()
     gurobi_model.optimize()
     found_successor, x_prime_results = self.h_repr_to_plot(gurobi_model, template, x_prime)
     self.last_input = x_prime
     # x_prime_results = x_prime_results.round(4)  # correct for rounding errors introduced by the conversion to h-repr
     if found_successor:
         post.append(tuple(x_prime_results))
     return post

Example #12

0

Show file

    def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]:
        """milp method"""
        ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn)
        post = []
        for chosen_action in range(2):
            gurobi_model = grb.Model()
            gurobi_model.setParam('OutputFlag', output_flag)
            gurobi_model.setParam('DualReductions', 0)
            input = generate_input_region(gurobi_model, template, x, self.env_input_size)
            x_prime = StoppingCarExperimentProbabilistic.apply_dynamic(input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size)
            gurobi_model.update()
            gurobi_model.optimize()
            x_prime_results = optimise(template, gurobi_model, x_prime)
            if x_prime_results is None:
                assert x_prime_results is not None
            successor_info = Experiment.SuccessorInfo()
            successor_info.successor = tuple(x_prime_results)
            successor_info.parent = x
            successor_info.parent_lbl = x_label
            successor_info.t = t + 1
            successor_info.action = "policy"  # chosen_action
            successor_info.lb = ranges_probs[chosen_action][0]
            successor_info.ub = ranges_probs[chosen_action][1]
            post.append(successor_info)

        return post

Example #13

0

Show file

File: probabilistic_experiments_nn_analysis.py Project: phate09/SafeDRL

 def box(n):
     template = []
     for i in range(n):
         x = Experiment.e(n, i)
         template.append(x)
         template.append(-x)
     return np.stack(template)

Example #14

0

Show file

File: probabilistic_experiments_nn_analysis.py Project: phate09/SafeDRL

 def round_tuple(x, rounding_value):
     '''To be used only for template values, this is not a good rounding in other cases'''
     rounded_x = []
     for val in x:
         rounded_value = Experiment.round_single(val, rounding_value)
         rounded_x.append(rounded_value)
     return tuple(rounded_x)

Example #15

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.max_probability_split = 0.15
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([9, 0, 3, 3])
     self.input_template = Experiment.box(self.env_input_size)
     p = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v = Experiment.e(self.env_input_size, 1)
     template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)])  # , 1 / 6 * p - v, -1 / 6 * p - v
     # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"

Example #16

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def find_important_dimensions(self, poly1, poly2):
     '''assuming check_contained(poly1,poly2) returns true, we are interested in the halfspaces that matters
     poly1 = root, poly2 = candidate
     '''
     # #Binary Space Partitioning
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', self.output_flag)
     input1 = Experiment.generate_input_region(gurobi_model, self.analysis_template, poly1, self.env_input_size)
     relevant_directions = []
     for j, template in enumerate(self.analysis_template):
         multiplication = 0
         for i in range(self.env_input_size):
             multiplication += template[i] * input1[i]
         previous_constraint = gurobi_model.getConstrByName("check_contained_constraint")
         if previous_constraint is not None:
             gurobi_model.remove(previous_constraint)
             gurobi_model.update()
         gurobi_model.addConstr(multiplication <= poly2[j], name=f"check_contained_constraint")
         gurobi_model.update()
         x_results = self.optimise(self.analysis_template, gurobi_model, input1)
         if np.allclose(np.array(poly1), x_results) is False:
             vertices = np.stack(self.pypoman_compute_polytope_vertices(self.analysis_template, np.array(x_results)))
             samples = polytope.sample(1000, self.analysis_template, x_results)
             from scipy.spatial import ConvexHull
             hull = ConvexHull(samples)
             volume = hull.volume  # estimated volume
             relevant_directions.append((j, volume))
     return relevant_directions

Example #17

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.max_probability_split = 0.33
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe

Example #18

0

Show file

def main():
    template = Experiment.box(2)
    intervals = generate_intervals(200)
    items_array = np.array(intervals)
    fig = plot_intervals(intervals, template)
    fig.show()
    # best_variable = run_genetic_merge(items_array, template)
    best_variable = np.array([0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
                              1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
                              0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
                              1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
                              1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
                              0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                              0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
                              0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
                              0, 0, 1, 1, 0, 1, 1, 1, ], dtype="bool")
    filtered = items_array[best_variable]
    merged, volume_new, volume_old = merge_with_volume_analysis(template, filtered)
    remaining_intervals: np.ndarray = items_array[np.invert(best_variable)]
    merged_intermediate = np.append(remaining_intervals, np.expand_dims(merged, 0), 0)
    fig = plot_intervals(merged_intermediate, template)
    fig.show()
    merged_final = merged_intermediate.copy()
    interval: np.ndarray
    for interval in merged_intermediate:
        merged_final = [x for x in merged_final if np.equal(interval, x).all() or not contained(x, interval)]
    fig = plot_intervals(merged_final, template)
    fig.show()
    print("done")

Example #19

0

Show file

 def get_template(self, mode=0):
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     if mode == 0:  # box directions with intervals
         # input_boundaries = [0, 0, 10, 10]
         input_boundaries = [9, -8, 0, 0.1]
         # optimise in a direction
         template = []
         for dimension in range(self.env_input_size):
             template.append(Experiment.e(self.env_input_size, dimension))
             template.append(-Experiment.e(self.env_input_size, dimension))
         template = np.array(template)  # the 6 dimensions in 2 variables
         return input_boundaries, template
     if mode == 1:  # directions to easily find fixed point
         input_boundaries = None
         template = np.array([v + p, -v - p, -p])
         return input_boundaries, template

Example #20

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def create_range_bounds_model(self, template, x, env_input_size, nn, round=-1):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', False)
     input = Experiment.generate_input_region(gurobi_model, template, x, env_input_size)
     gurobi_model.update()
     gurobi_model.optimize()
     assert gurobi_model.status == 2, "LP wasn't optimally solved"
     observation = self.get_observation_variable(input, gurobi_model)  # get the observation from the input
     ranges = Experiment.get_range_bounds(observation, nn, gurobi_model)
     if self.use_softmax:
         ranges_probs = unroll_methods.softmax_interval(ranges)
     else:
         ranges_probs = ranges
     if round >= 0:
         pass
         # todo round the probabilities
     return ranges_probs

Example #21

0

Show file

def main():
    template = Experiment.box(2)
    item1 = [1, 1, 1, 1]
    item2 = [2, 0, 2, 0]
    merged1 = merge_regions(template, np.array([item1, item2]))
    volume = compute_volume(template, merged1)
    merged, volume_new, volume_old = merge_with_volume_analysis(
        template, np.array([item1, item2]))

Example #22

0

Show file

 def get_template(self, mode=0):
     theta = Experiment.e(self.env_input_size, 0)
     theta_dot = Experiment.e(self.env_input_size, 1)
     # battery = Experiment.e(self.env_input_size, 4)
     if mode == 0:  # box directions with intervals
         input_boundaries = [0.05, 0.05, 0.05, 0.05]
         # input_boundaries = [0.20, 0.20, 1 / 5, 1 / 5]
         # input_boundaries = [3.13, 3.15, -0.08193365, 0.08193365]
         # input_boundaries = [1, 1, 1, 1]
         # input_boundaries = [0.04373426, -0.04373426, -0.04980056, 0.04980056, 0.045, -0.045, -0.51, 0.51]
         # optimise in a direction
         template = np.array([theta, -theta, theta_dot, -theta_dot])
         # for dimension in range(self.env_input_size):
         #     template.append(Experiment.e(self.env_input_size, dimension))
         #     template.append(-Experiment.e(self.env_input_size, dimension))
         # template = np.array(template)  # the 6 dimensions in 2 variables
         return input_boundaries, template
     if mode == 1:  # directions to easily find fixed point
         input_boundaries = None
         template = np.array([
             theta, -theta, theta_dot, -theta_dot, theta + theta_dot,
             -(theta + theta_dot), (theta - theta_dot), -(theta - theta_dot)
         ])  # x_dot, -x_dot,theta_dot - theta
         return input_boundaries, template
     if mode == 2:
         input_boundaries = None
         template = np.array([theta, -theta, theta_dot, -theta_dot])
         return input_boundaries, template
     if mode == 3:
         input_boundaries = None
         template = np.array([theta, theta_dot, -theta_dot])
         return input_boundaries, template
     if mode == 4:
         input_boundaries = [0.09375, 0.625, 0.625, 0.0625, 0.1875]
         # input_boundaries = [0.09375, 0.5, 0.5, 0.0625, 0.09375]
         template = np.array([
             theta, theta_dot, -theta_dot, theta + theta_dot,
             (theta - theta_dot)
         ])
         return input_boundaries, template
     if mode == 5:
         input_boundaries = [0.125, 0.0625, 0.1875]
         template = np.array(
             [theta, theta + theta_dot, (theta - theta_dot)])
         return input_boundaries, template

Example #23

0

Show file

File: run_ora_bouncing_ball.py Project: phate09/SafeDRL

 def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', output_flag)
     gurobi_model.setParam('Threads', 2)
     observation = gurobi_model.addMVar(shape=(2, ),
                                        lb=float("-inf"),
                                        ub=float("inf"),
                                        name="observation")
     Experiment.generate_nn_guard(gurobi_model,
                                  observation,
                                  nn,
                                  action_ego=chosen_action)
     observable_template = Experiment.octagon(2)
     # self.env_input_size = 2
     observable_result = optimise(observable_template, gurobi_model,
                                  observation)
     # self.env_input_size = 6
     return observable_template, observable_result

Example #24

0

Show file

 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     _, template = self.get_template(0)
     self.analysis_template: np.ndarray = template
     self.time_horizon = 500
     self.rounding_value = 2**8
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([0, 1, 0]))]
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"

Example #25

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def generate_root_polytope(self, input_boundaries):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', self.output_flag)
     input = Experiment.generate_input_region(gurobi_model, self.input_template, input_boundaries, self.env_input_size)
     x_results = self.optimise(self.analysis_template, gurobi_model, input)
     if x_results is None:
         print("Model unsatisfiable")
         return None
     root = tuple(x_results)
     return root

Example #26

0

Show file

 def get_template(self, mode=0):
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     if mode == 0:  # large s0
         # input_boundaries = [0, 0, 10, 10]
         input_boundaries = [6, -3, 1, 1]
         return input_boundaries
     if mode == 1:  # small s0
         input_boundaries = [6, -3, 0, 0.1]
         return input_boundaries
     if mode == 2:
         input_boundaries = [9, -3, 0, 0.1]
         return input_boundaries
     if mode == 3:
         input_boundaries = [9, -5, 0, 0.1]
         return input_boundaries
     if mode == 4:
         input_boundaries = [9, -5, 1, 1]
         return input_boundaries

Example #27

0

Show file

 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(env_input_size)
     # self.input_boundaries: List = input_boundaries
     # self.input_template: np.ndarray = input_template
     # _, template = self.get_template(1)
     delta_x = Experiment.e(env_input_size, 0)
     v_ego = Experiment.e(env_input_size, 1)
     # template = Experiment.combinations([delta_x, - v_ego])
     template = np.array([
         delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego,
         1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
         -1 / 4.5 * delta_x - v_ego
     ])
     # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [Experiment.e(self.env_input_size, 0)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 20
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31"  # safe both with and without epsilon of 0.1.
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24"  # safe at t=216
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe

Example #28

0

Show file

File: total_split.py Project: phate09/SafeDRL

 def __init__(self):
     self.use_split = True
     self.env_input_size: int = 2
     self.max_probability_split = 0.33
     self.input_epsilon = 0
     self.use_entropy_split = True
     self.output_flag = False
     self.minimum_length = 0.2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([50, 0, 36, 36])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     self.use_softmax = True
     self.use_milp_range_prob = True
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe

Example #29

0

Show file

 def post_milp(self, x, x_label, nn, output_flag, t, template):
     """milp method"""
     ranges_probs = self.create_range_bounds_model(template, x,
                                                   self.env_input_size, nn)
     post = []
     # for split_angle in itertools.product([True, False], repeat=2):  # split successor if theta is within safe_angle
     for chosen_action in range(self.n_actions):
         # if (chosen_action == 2 or chosen_action == 1) and x_label == 1:  # skip actions when battery is dead
         #     continue
         gurobi_model = grb.Model()
         gurobi_model.setParam('OutputFlag', output_flag)
         gurobi_model.setParam('Threads', 2)
         input = generate_input_region(gurobi_model, template, x,
                                       self.env_input_size)
         max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds(
             gurobi_model, input)
         # feasible_action = PendulumExperiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action, M=1e03)
         # if feasible_action:  # performs action 2 automatically when battery is dead
         sin_cos_table = self.get_sin_cos_table(max_theta,
                                                min_theta,
                                                max_theta_dot,
                                                min_theta_dot,
                                                action=chosen_action)
         # for normalisation_split in [True,False]:
         newthdot, newtheta = PendulumExperiment.generate_angle_milp(
             gurobi_model, input, sin_cos_table)
         # gurobi_model.addConstr(newtheta >)
         # apply dynamic
         x_prime = self.apply_dynamic(input,
                                      gurobi_model,
                                      newthdot=newthdot,
                                      newtheta=newtheta,
                                      env_input_size=self.env_input_size,
                                      action=chosen_action)
         # for i, (A, b) in enumerate(self.angle_split):
         #     Experiment.generate_region_constraints(gurobi_model, A, x_prime, b, self.env_input_size, invert=not split_angle[i])
         gurobi_model.update()
         gurobi_model.optimize()
         if gurobi_model.status != 2:
             continue
         found_successor, x_prime_results = self.h_repr_to_plot(
             gurobi_model, template, x_prime)
         if found_successor:
             successor_info = Experiment.SuccessorInfo()
             successor_info.successor = tuple(x_prime_results)
             successor_info.parent = x
             successor_info.parent_lbl = x_label
             successor_info.t = t + 1
             successor_info.action = "policy"  # chosen_action
             successor_info.lb = ranges_probs[chosen_action][0]
             successor_info.ub = ranges_probs[chosen_action][1]
             post.append(successor_info)
     return post

Example #30

0

Show file

File: run_experiment_stopping_car_simple.py Project: phate09/SafeDRL

 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]])
     input_boundaries = [30, -25, 0, 0, 36, -28]
     input_template = Experiment.box(self.env_input_size)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     x_lead = Experiment.e(self.env_input_size, 0)
     x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 2)
     template = np.array(
         [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [
         Experiment.e(self.env_input_size, 0) -
         Experiment.e(self.env_input_size, 1)
     ]
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36