def post_milp(self, x, nn, output_flag, t, template): """milp method""" post = [] for chosen_action in range(2): observable_template = self.observable_templates[chosen_action] observable_result = self.observable_results[chosen_action] gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) input = Experiment.generate_input_region(gurobi_model, template, x, self.env_input_size) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="input") gurobi_model.addConstr( observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr( observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr( observation[0] <= input[2] - input[3] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr( observation[0] >= input[2] - input[3] - self.input_epsilon / 2, name=f"obs_constr12") # feasible_action = Experiment.generate_nn_guard(gurobi_model, observation, nn, action_ego=chosen_action) # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action) Experiment.generate_region_constraints(gurobi_model, observable_template, observation, observable_result, 2) gurobi_model.optimize() feasible_action = gurobi_model.status == 2 if feasible_action: # apply dynamic # x_prime_results = self.optimise(template, gurobi_model, input) # h representation # x_prime = Experiment.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = StoppingCarExperiment.apply_dynamic( input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) return post
def post_milp(self, x, nn, output_flag, t, template): post = [] observable_template_action1 = self.observable_templates[1] observable_result_action1 = self.observable_results[1] observable_template_action0 = self.observable_templates[0] observable_result_action0 = self.observable_results[0] def standard_op(): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input = self.generate_input_region(gurobi_model, template, x, self.env_input_size) z = self.apply_dynamic(input, gurobi_model, self.env_input_size) return gurobi_model, z, input # case 0 gurobi_model, z, input = standard_op() feasible0 = self.generate_guard(gurobi_model, z, case=0) # bounce if feasible0: # action is irrelevant in this case # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2(input2, gurobi_model, case=0, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) # case 1 : ball going down and hit gurobi_model, z, input = standard_op() feasible11 = self.generate_guard(gurobi_model, z, case=1) if feasible11: Experiment.generate_region_constraints( gurobi_model, observable_template_action1, input, observable_result_action1, 2) gurobi_model.optimize() feasible12 = gurobi_model.status == 2 # feasible12 = self.generate_nn_guard(gurobi_model, input, nn, action_ego=1) # check for action =1 over input (not z!) if feasible12: # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2( input2, gurobi_model, case=1, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) # case 2 : ball going up and hit gurobi_model, z, input = standard_op() feasible21 = self.generate_guard(gurobi_model, z, case=2) if feasible21: Experiment.generate_region_constraints( gurobi_model, observable_template_action1, input, observable_result_action1, 2) gurobi_model.optimize() feasible22 = gurobi_model.status == 2 # feasible22 = self.generate_nn_guard(gurobi_model, input, nn, action_ego=1) # check for action =1 over input (not z!) if feasible22: # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2( input2, gurobi_model, case=2, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) # case 1 alt : ball going down and NO hit gurobi_model, z, input = standard_op() feasible11_alt = self.generate_guard(gurobi_model, z, case=1) if feasible11_alt: Experiment.generate_region_constraints( gurobi_model, observable_template_action0, input, observable_result_action0, 2) gurobi_model.optimize() feasible12_alt = gurobi_model.status == 2 # feasible12_alt = self.generate_nn_guard(gurobi_model, input, nn, action_ego=0) # check for action = 0 over input (not z!) if feasible12_alt: # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2( input2, gurobi_model, case=3, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) # case 2 alt : ball going up and NO hit gurobi_model, z, input = standard_op() feasible21_alt = self.generate_guard(gurobi_model, z, case=2) if feasible21_alt: Experiment.generate_region_constraints( gurobi_model, observable_template_action0, input, observable_result_action0, 2) gurobi_model.optimize() feasible22_alt = gurobi_model.status == 2 # feasible22_alt = self.generate_nn_guard(gurobi_model, input, nn, action_ego=0) # check for action = 0 over input (not z!) if feasible22_alt: # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2( input2, gurobi_model, case=3, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) # case 3 : ball out of reach and not bounce gurobi_model, z, input = standard_op() feasible3 = self.generate_guard(gurobi_model, z, case=3) # out of reach if feasible3: # action is irrelevant in this case # apply dynamic x_prime_results = self.optimise(template, gurobi_model, z) gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input2 = self.generate_input_region(gurobi_model, template, x_prime_results, self.env_input_size) x_second = self.apply_dynamic2(input2, gurobi_model, case=3, env_input_size=self.env_input_size) found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) return post
def post_milp(self, x, nn, output_flag, t, template): """milp method""" post = [] for chosen_action in range(2): observable_template = self.observable_templates[chosen_action] observable_result = self.observable_results[chosen_action] if USE_GUROBI: gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) input = Experiment.generate_input_region( gurobi_model, template, x, self.env_input_size) Experiment.generate_region_constraints( gurobi_model, observable_template, input, observable_result, env_input_size=self.env_input_size) gurobi_model.optimize() feasible_action = gurobi_model.status == 2 if feasible_action: max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds( gurobi_model, input) sin_cos_table = self.get_sin_cos_table( max_theta, min_theta, max_theta_dot, min_theta_dot, action=chosen_action, step_thetaacc=100) x_prime_results = self.optimise(template, gurobi_model, input) # h representation x_prime = Experiment.generate_input_region( gurobi_model, template, x_prime_results, self.env_input_size) thetaacc, xacc = CartpoleExperiment.generate_angle_milp( gurobi_model, x_prime, sin_cos_table) # apply dynamic x_second = self.apply_dynamic( x_prime, gurobi_model, thetaacc=thetaacc, xacc=xacc, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_second_results = self.h_repr_to_plot( gurobi_model, template, x_second) if found_successor: post.append(tuple(x_second_results)) else: model = pyo.ConcreteModel() input = Experiment.generate_input_region_pyo( model, template, x, self.env_input_size) feasible_action = ORACartpoleExperiment.generate_nn_guard_pyo( model, input, nn, action_ego=chosen_action, M=1e04) if feasible_action: # performs action 2 automatically when battery is dead max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds_pyo( model, input) sin_cos_table = self.get_sin_cos_table( max_theta, min_theta, max_theta_dot, min_theta_dot, action=chosen_action, step_thetaacc=100) x_prime_results = self.optimise_pyo(template, model, input) x_prime = Experiment.generate_input_region_pyo( model, template, x_prime_results, self.env_input_size, name="x_prime_input") thetaacc, xacc = ORACartpoleExperiment.generate_angle_milp_pyo( model, x_prime, sin_cos_table) model.del_component(model.obj) model.obj = pyo.Objective(expr=thetaacc, sense=pyo.maximize) result = Experiment.solve(model, solver=Experiment.use_solver) assert (result.solver.status == SolverStatus.ok) and ( result.solver.termination_condition == TerminationCondition.optimal ), f"LP wasn't optimally solved {x}" # apply dynamic x_second = self.apply_dynamic_pyo( x_prime, model, thetaacc=thetaacc, xacc=xacc, env_input_size=self.env_input_size, action=chosen_action) x_second_results = self.optimise_pyo( template, model, x_second) found_successor = x_prime_results is not None if found_successor: post.append((tuple(x_second_results))) return post