def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp_invariant_net self.get_nn_fn = self.get_nn self.get_invariant_nn_fn = self.get_invariant_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template self.assign_lbl_fn = 1 # just to stop error _, template = self.get_template(1) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2 ** 10 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31" # safe both with and without epsilon of 0.1. # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24" # safe at t=216 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00005_5_cost_fn=0,epsilon_input=0.1_2021-01-17_12-41-27/checkpoint_10/checkpoint-10" # unsafe
def find_direction_split(self, template, x, nn, pre_nn): samples = polytope.sample(10000, template, np.array(x)) preprocessed = pre_nn(torch.tensor(samples).float()) preprocessed_np = preprocessed.detach().numpy() samples_ontput = torch.softmax(nn(preprocessed), 1) predicted_label = samples_ontput.detach().numpy()[:, 0] y = np.clip(predicted_label, 1e-7, 1 - 1e-7) inv_sig_y = np.log(y / (1 - y)) # transform to log-odds-ratio space from sklearn.linear_model import LinearRegression lr = LinearRegression() lr.fit(samples, inv_sig_y) template_2d: np.ndarray = np.array([Experiment.e(3, 2), Experiment.e(3, 0) - Experiment.e(3, 1)]) def sigmoid(x): ex = np.exp(x) return ex / (1 + ex) preds = sigmoid(lr.predict(samples)) plot_points_and_prediction(samples @ template_2d.T, preds) plot_points_and_prediction(samples @ template_2d.T, predicted_label) coeff = lr.coef_ intercept = lr.intercept_ a = sympy.symbols('x') b = sympy.symbols('y') classif_line1 = Line(coeff[0].item() * a + coeff[1].item() * b + intercept) new_coeff = -coeff[0].item() / coeff[1].item()
def post_milp(self, x, x_label, nn, output_flag, t, template): """milp method""" post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input = generate_input_region(gurobi_model, template, x, self.env_input_size) # gurobi_model.addConstr(input[0] >= 0, name=f"input_base_constr1") # gurobi_model.addConstr(input[1] >= 0, name=f"input_base_constr2") # gurobi_model.addConstr(input[2] >= 20, name=f"input_base_constr3") observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="observation") gurobi_model.addConstr( observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr( observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12") # gurobi_model.addConstr(input[3] <= self.max_speed, name=f"v_constr_input") # gurobi_model.addConstr(input[3] >= -self.max_speed, name=f"v_constr_input") feasible_action = Experiment.generate_nn_guard( gurobi_model, observation, nn, action_ego=chosen_action) # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action) if feasible_action: # apply dynamic x_prime = StoppingCarExperiment.apply_dynamic( input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: # post.append((tuple(x_prime_results),(x, x_label))) successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action # successor_info.lb = ranges_probs[chosen_action][0] # successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def check_contained(self, poly1, poly2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', self.output_flag) input1 = Experiment.generate_input_region(gurobi_model, self.analysis_template, poly1, self.env_input_size) Experiment.generate_region_constraints(gurobi_model, self.analysis_template, input1, poly2, self.env_input_size, eps=1e-5) # use epsilon to prevent single points x_results = self.optimise(self.analysis_template, gurobi_model, input1) if x_results is None: # not contained return False else: return True
def __init__(self): super().__init__() self.max_probability_split = 0.3 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = [5, 5, 5, 5] self.use_softmax = False self.use_milp_range_prob = False self.input_template = Experiment.box(self.env_input_size) template = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
def check_unsafe(self, template, bnds): for A, b in self.unsafe_zone: gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', False) input = gurobi_model.addMVar(shape=(self.env_input_size,), lb=float("-inf"), name="input") Experiment.generate_region_constraints(gurobi_model, template, input, bnds, self.env_input_size) Experiment.generate_region_constraints(gurobi_model, A, input, b, self.env_input_size) gurobi_model.update() gurobi_model.optimize() if gurobi_model.status == 2: return True return False
def __init__(self): super().__init__() self.max_probability_split = 0.5 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = [np.pi / 5, np.pi / 5, 1 / 5, 1 / 5] self.input_template = Experiment.box(self.env_input_size) # theta = Experiment.e(self.env_input_size, 0) # theta_dot = Experiment.e(self.env_input_size, 1) # template = np.array([theta, -theta, theta_dot, -theta_dot]) template = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
def octagon(n): template = [] for i in range(n): x = Experiment.e(n, i) template.append(x) template.append(-x) for j in range(0, i): y = Experiment.e(n, j) template.append(x + y) template.append(x - y) template.append(y - x) template.append(-y - x) return np.stack(template)
def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]: """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('DualReductions', 0) input = generate_input_region(gurobi_model, template, x, self.env_input_size) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="input") gurobi_model.addConstr( observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr( observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12") feasible_action = Experiment.generate_nn_guard( gurobi_model, observation, nn, action_ego=chosen_action) if feasible_action: x_prime = StoppingCarExperiment.apply_dynamic( input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.assign_lbl_fn = self.assign_label self.additional_seen_fn = self.additional_seen self.template_2d: np.ndarray = np.array([[1, 0], [0, 1]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template # _, template = self.get_template(1) template = Experiment.octagon(env_input_size) self.plotting_time_interval = 60 * 5 self.analysis_template: np.ndarray = template self.safe_angle = 30 * 2 * math.pi / 360 theta = [self.e(env_input_size, 0)] neg_theta = [-self.e(env_input_size, 0)] # battery = [self.e(env_input_size, 4)] self.unsafe_zone: List[Tuple] = [(theta, np.array([-self.safe_angle])), (neg_theta, np.array([-self.safe_angle]))] epsilon = 1e-4 self.angle_split: List[Tuple] = [ (theta, np.array([self.safe_angle - epsilon])), (neg_theta, np.array([self.safe_angle - epsilon])) ] self.use_rounding = False self.rounding_value = 1024 self.time_horizon = 300 self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333" self.tau = 0.02 self.n_actions = 3
def post_milp(self, x, nn, output_flag, t, template): """milp method""" post = [] gurobi_model = self.internal_model # grb.Model() input = self.last_input # Experiment.generate_input_region(gurobi_model, template, x, self.env_input_size) observation = gurobi_model.addMVar(shape=(2,), lb=float("-inf"), ub=float("inf"), name="observation") gurobi_model.addConstr(observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr(observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr(observation[0] <= v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr(observation[0] >= v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12") nn_output, max_val, min_val = Experiment.generate_nn_guard_continuous(gurobi_model, observation, nn) is_equal = torch.isclose(nn(torch.from_numpy(observation.X).float()), torch.from_numpy(nn_output.X).float(), rtol=1e-3).all().item() assert is_equal # clipped_nn_output = gurobi_model.addMVar(lb=float("-inf"), shape=(len(nn_output)), name=f"clipped_nn_output") # gurobi_model.addConstr(nn_output[0] >= -12, name=f"clipped_out_constr1") # gurobi_model.addConstr(nn_output[0] <= 12, name=f"clipped_out_constr2") # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action) # apply dynamic x_prime = StoppingCarContinuousExperiment.apply_dynamic(input, gurobi_model, action=nn_output, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_prime_results = self.h_repr_to_plot(gurobi_model, template, x_prime) self.last_input = x_prime # x_prime_results = x_prime_results.round(4) # correct for rounding errors introduced by the conversion to h-repr if found_successor: post.append(tuple(x_prime_results)) return post
def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]: """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('DualReductions', 0) input = generate_input_region(gurobi_model, template, x, self.env_input_size) x_prime = StoppingCarExperimentProbabilistic.apply_dynamic(input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() x_prime_results = optimise(template, gurobi_model, x_prime) if x_prime_results is None: assert x_prime_results is not None successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def box(n): template = [] for i in range(n): x = Experiment.e(n, i) template.append(x) template.append(-x) return np.stack(template)
def round_tuple(x, rounding_value): '''To be used only for template values, this is not a good rounding in other cases''' rounded_x = [] for val in x: rounded_value = Experiment.round_single(val, rounding_value) rounded_x.append(rounded_value) return tuple(rounded_x)
def __init__(self): super().__init__() self.v_lead = 28 self.max_probability_split = 0.15 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([9, 0, 3, 3]) self.input_template = Experiment.box(self.env_input_size) p = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v = Experiment.e(self.env_input_size, 1) template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)]) # , 1 / 6 * p - v, -1 / 6 * p - v # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36" self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
def find_important_dimensions(self, poly1, poly2): '''assuming check_contained(poly1,poly2) returns true, we are interested in the halfspaces that matters poly1 = root, poly2 = candidate ''' # #Binary Space Partitioning gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', self.output_flag) input1 = Experiment.generate_input_region(gurobi_model, self.analysis_template, poly1, self.env_input_size) relevant_directions = [] for j, template in enumerate(self.analysis_template): multiplication = 0 for i in range(self.env_input_size): multiplication += template[i] * input1[i] previous_constraint = gurobi_model.getConstrByName("check_contained_constraint") if previous_constraint is not None: gurobi_model.remove(previous_constraint) gurobi_model.update() gurobi_model.addConstr(multiplication <= poly2[j], name=f"check_contained_constraint") gurobi_model.update() x_results = self.optimise(self.analysis_template, gurobi_model, input1) if np.allclose(np.array(poly1), x_results) is False: vertices = np.stack(self.pypoman_compute_polytope_vertices(self.analysis_template, np.array(x_results))) samples = polytope.sample(1000, self.analysis_template, x_results) from scipy.spatial import ConvexHull hull = ConvexHull(samples) volume = hull.volume # estimated volume relevant_directions.append((j, volume)) return relevant_directions
def __init__(self): super().__init__() self.v_lead = 28 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.max_probability_split = 0.33 self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def main(): template = Experiment.box(2) intervals = generate_intervals(200) items_array = np.array(intervals) fig = plot_intervals(intervals, template) fig.show() # best_variable = run_genetic_merge(items_array, template) best_variable = np.array([0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, ], dtype="bool") filtered = items_array[best_variable] merged, volume_new, volume_old = merge_with_volume_analysis(template, filtered) remaining_intervals: np.ndarray = items_array[np.invert(best_variable)] merged_intermediate = np.append(remaining_intervals, np.expand_dims(merged, 0), 0) fig = plot_intervals(merged_intermediate, template) fig.show() merged_final = merged_intermediate.copy() interval: np.ndarray for interval in merged_intermediate: merged_final = [x for x in merged_final if np.equal(interval, x).all() or not contained(x, interval)] fig = plot_intervals(merged_final, template) fig.show() print("done")
def get_template(self, mode=0): p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) if mode == 0: # box directions with intervals # input_boundaries = [0, 0, 10, 10] input_boundaries = [9, -8, 0, 0.1] # optimise in a direction template = [] for dimension in range(self.env_input_size): template.append(Experiment.e(self.env_input_size, dimension)) template.append(-Experiment.e(self.env_input_size, dimension)) template = np.array(template) # the 6 dimensions in 2 variables return input_boundaries, template if mode == 1: # directions to easily find fixed point input_boundaries = None template = np.array([v + p, -v - p, -p]) return input_boundaries, template
def create_range_bounds_model(self, template, x, env_input_size, nn, round=-1): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', False) input = Experiment.generate_input_region(gurobi_model, template, x, env_input_size) gurobi_model.update() gurobi_model.optimize() assert gurobi_model.status == 2, "LP wasn't optimally solved" observation = self.get_observation_variable(input, gurobi_model) # get the observation from the input ranges = Experiment.get_range_bounds(observation, nn, gurobi_model) if self.use_softmax: ranges_probs = unroll_methods.softmax_interval(ranges) else: ranges_probs = ranges if round >= 0: pass # todo round the probabilities return ranges_probs
def main(): template = Experiment.box(2) item1 = [1, 1, 1, 1] item2 = [2, 0, 2, 0] merged1 = merge_regions(template, np.array([item1, item2])) volume = compute_volume(template, merged1) merged, volume_new, volume_old = merge_with_volume_analysis( template, np.array([item1, item2]))
def get_template(self, mode=0): theta = Experiment.e(self.env_input_size, 0) theta_dot = Experiment.e(self.env_input_size, 1) # battery = Experiment.e(self.env_input_size, 4) if mode == 0: # box directions with intervals input_boundaries = [0.05, 0.05, 0.05, 0.05] # input_boundaries = [0.20, 0.20, 1 / 5, 1 / 5] # input_boundaries = [3.13, 3.15, -0.08193365, 0.08193365] # input_boundaries = [1, 1, 1, 1] # input_boundaries = [0.04373426, -0.04373426, -0.04980056, 0.04980056, 0.045, -0.045, -0.51, 0.51] # optimise in a direction template = np.array([theta, -theta, theta_dot, -theta_dot]) # for dimension in range(self.env_input_size): # template.append(Experiment.e(self.env_input_size, dimension)) # template.append(-Experiment.e(self.env_input_size, dimension)) # template = np.array(template) # the 6 dimensions in 2 variables return input_boundaries, template if mode == 1: # directions to easily find fixed point input_boundaries = None template = np.array([ theta, -theta, theta_dot, -theta_dot, theta + theta_dot, -(theta + theta_dot), (theta - theta_dot), -(theta - theta_dot) ]) # x_dot, -x_dot,theta_dot - theta return input_boundaries, template if mode == 2: input_boundaries = None template = np.array([theta, -theta, theta_dot, -theta_dot]) return input_boundaries, template if mode == 3: input_boundaries = None template = np.array([theta, theta_dot, -theta_dot]) return input_boundaries, template if mode == 4: input_boundaries = [0.09375, 0.625, 0.625, 0.0625, 0.1875] # input_boundaries = [0.09375, 0.5, 0.5, 0.0625, 0.09375] template = np.array([ theta, theta_dot, -theta_dot, theta + theta_dot, (theta - theta_dot) ]) return input_boundaries, template if mode == 5: input_boundaries = [0.125, 0.0625, 0.1875] template = np.array( [theta, theta + theta_dot, (theta - theta_dot)]) return input_boundaries, template
def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="observation") Experiment.generate_nn_guard(gurobi_model, observation, nn, action_ego=chosen_action) observable_template = Experiment.octagon(2) # self.env_input_size = 2 observable_result = optimise(observable_template, gurobi_model, observation) # self.env_input_size = 6 return observable_template, observable_result
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template _, template = self.get_template(0) self.analysis_template: np.ndarray = template self.time_horizon = 500 self.rounding_value = 2**8 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([0, 1, 0]))] self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
def generate_root_polytope(self, input_boundaries): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', self.output_flag) input = Experiment.generate_input_region(gurobi_model, self.input_template, input_boundaries, self.env_input_size) x_results = self.optimise(self.analysis_template, gurobi_model, input) if x_results is None: print("Model unsatisfiable") return None root = tuple(x_results) return root
def get_template(self, mode=0): p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) if mode == 0: # large s0 # input_boundaries = [0, 0, 10, 10] input_boundaries = [6, -3, 1, 1] return input_boundaries if mode == 1: # small s0 input_boundaries = [6, -3, 0, 0.1] return input_boundaries if mode == 2: input_boundaries = [9, -3, 0, 0.1] return input_boundaries if mode == 3: input_boundaries = [9, -5, 0, 0.1] return input_boundaries if mode == 4: input_boundaries = [9, -5, 1, 1] return input_boundaries
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(env_input_size) # self.input_boundaries: List = input_boundaries # self.input_template: np.ndarray = input_template # _, template = self.get_template(1) delta_x = Experiment.e(env_input_size, 0) v_ego = Experiment.e(env_input_size, 1) # template = Experiment.combinations([delta_x, - v_ego]) template = np.array([ delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego ]) # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [Experiment.e(self.env_input_size, 0)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 20 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31" # safe both with and without epsilon of 0.1. # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24" # safe at t=216 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): self.use_split = True self.env_input_size: int = 2 self.max_probability_split = 0.33 self.input_epsilon = 0 self.use_entropy_split = True self.output_flag = False self.minimum_length = 0.2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([50, 0, 36, 36]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) self.use_softmax = True self.use_milp_range_prob = True # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def post_milp(self, x, x_label, nn, output_flag, t, template): """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] # for split_angle in itertools.product([True, False], repeat=2): # split successor if theta is within safe_angle for chosen_action in range(self.n_actions): # if (chosen_action == 2 or chosen_action == 1) and x_label == 1: # skip actions when battery is dead # continue gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) input = generate_input_region(gurobi_model, template, x, self.env_input_size) max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds( gurobi_model, input) # feasible_action = PendulumExperiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action, M=1e03) # if feasible_action: # performs action 2 automatically when battery is dead sin_cos_table = self.get_sin_cos_table(max_theta, min_theta, max_theta_dot, min_theta_dot, action=chosen_action) # for normalisation_split in [True,False]: newthdot, newtheta = PendulumExperiment.generate_angle_milp( gurobi_model, input, sin_cos_table) # gurobi_model.addConstr(newtheta >) # apply dynamic x_prime = self.apply_dynamic(input, gurobi_model, newthdot=newthdot, newtheta=newtheta, env_input_size=self.env_input_size, action=chosen_action) # for i, (A, b) in enumerate(self.angle_split): # Experiment.generate_region_constraints(gurobi_model, A, x_prime, b, self.env_input_size, invert=not split_angle[i]) gurobi_model.update() gurobi_model.optimize() if gurobi_model.status != 2: continue found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]]) input_boundaries = [30, -25, 0, 0, 36, -28] input_template = Experiment.box(self.env_input_size) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template x_lead = Experiment.e(self.env_input_size, 0) x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 2) template = np.array( [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [ Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1) ] self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36