def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="observation") Experiment.generate_nn_guard(gurobi_model, observation, nn, action_ego=chosen_action) observable_template = Experiment.octagon(2) self.env_input_size = 2 observable_result = self.optimise(observable_template, gurobi_model, observation) self.env_input_size = 6 return observable_template, observable_result
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] experiment = CartpoleExperiment() experiment.nn_path = other_config[ "folder"] # nn_paths_cartpole[other_config["nn_path"]] experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon( experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box( experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": if method == "ora": experiment = ORABouncingBallExperiment() else: experiment = BouncingBallExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() elif problem == "stopping_car": if method == "ora": experiment = ORAStoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: experiment = StoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: if method == "ora": experiment = ORACartpoleExperiment() else: experiment = CartpoleExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
def get_nn(self): config = get_PPO_config(1234) trainer = ppo.PPOTrainer(config=config) trainer.restore(self.nn_path) policy = trainer.get_policy() sequential_nn = convert_ray_policy_to_sequential(policy).cpu() # l0 = torch.nn.Linear(6, 2, bias=False) # l0.weight = torch.nn.Parameter(torch.tensor([[0, 0, 1, -1, 0, 0], [1, -1, 0, 0, 0, 0]], dtype=torch.float32)) # layers = [l0] # for l in sequential_nn: # layers.append(l) # # nn = torch.nn.Sequential(*layers) nn = sequential_nn # ray.shutdown() return nn if __name__ == '__main__': ray.init(log_to_driver=False, local_mode=False) experiment = StoppingCarExperiment() experiment.plotting_time_interval = 60 * 2 experiment.show_progressbar = True experiment.show_progress_plot = False template = Experiment.octagon(experiment.env_input_size) experiment.analysis_template = template # standard input_boundaries = [40, -30, 10, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0] experiment.input_boundaries = input_boundaries experiment.time_horizon = 150 experiment.run_experiment()