def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.assign_lbl_fn = self.assign_label self.additional_seen_fn = self.additional_seen self.template_2d: np.ndarray = np.array([[1, 0], [0, 1]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template # _, template = self.get_template(1) template = Experiment.octagon(env_input_size) self.plotting_time_interval = 60 * 5 self.analysis_template: np.ndarray = template self.safe_angle = 30 * 2 * math.pi / 360 theta = [self.e(env_input_size, 0)] neg_theta = [-self.e(env_input_size, 0)] # battery = [self.e(env_input_size, 4)] self.unsafe_zone: List[Tuple] = [(theta, np.array([-self.safe_angle])), (neg_theta, np.array([-self.safe_angle]))] epsilon = 1e-4 self.angle_split: List[Tuple] = [ (theta, np.array([self.safe_angle - epsilon])), (neg_theta, np.array([self.safe_angle - epsilon])) ] self.use_rounding = False self.rounding_value = 1024 self.time_horizon = 300 self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333" self.tau = 0.02 self.n_actions = 3
def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="observation") Experiment.generate_nn_guard(gurobi_model, observation, nn, action_ego=chosen_action) observable_template = Experiment.octagon(2) # self.env_input_size = 2 observable_result = optimise(observable_template, gurobi_model, observation) # self.env_input_size = 6 return observable_template, observable_result
def additional_seen(self): # adds an element that captures all the states where battery is <=0 gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', self.output_flag) additional = [ self.safe_angle, self.safe_angle, float("inf"), float("inf"), float("inf"), float("inf"), float("inf"), float("inf") ] input = generate_input_region(gurobi_model, Experiment.octagon(self.env_input_size), additional, self.env_input_size) x_results = optimise(self.analysis_template, gurobi_model, input) if x_results is None: print("Model unsatisfiable") return None root = tuple(x_results) return [(root, 0)]
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] experiment = CartpoleExperiment() experiment.nn_path = other_config[ "folder"] # nn_paths_cartpole[other_config["nn_path"]] experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon( experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box( experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
policy = trainer.get_policy() # sequential_nn = convert_ray_simple_policy_to_sequential(policy).cpu() sequential_nn = convert_ray_policy_to_sequential(policy).cpu() # l0 = torch.nn.Linear(5, 3, bias=False) # l0.weight = torch.nn.Parameter(torch.tensor([[0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]], dtype=torch.float32)) layers = [] for l in sequential_nn: layers.append(l) nn = torch.nn.Sequential(*layers) torch.save(nn, pickled_path) # ray.shutdown() return nn if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"] = "0" ray.init(log_to_driver=False, local_mode=False) experiment = PendulumExperimentProbabilistic() experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_pendulum/octagon_psi05_h7_nocontain2" experiment.plotting_time_interval = 60 experiment.show_progressbar = True experiment.show_progress_plot = False experiment.analysis_template = Experiment.octagon(2) experiment.use_contained = False experiment.use_split_with_seen = False # experiment.max_probability_split = 0.5 experiment.time_horizon = 6 # experiment.load_graph = True experiment.run_experiment()
def get_experiment_instance(config, trial_dir): # Hyperparameters problem, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": experiment = BouncingBallExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) assert os.path.exists(experiment.nn_path) experiment.use_contained = other_config["use_contain"] if other_config["initial_state"] == 0: experiment.input_boundaries = [9, -5, 0, 0.1] elif other_config["initial_state"] == 1: experiment.input_boundaries = [9, -5, 1, 1] else: raise NotImplementedError() if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress elif problem == "stopping_car": experiment = StoppingCarExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.max_probability_split = other_config["phi"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 1: delta_x = Experiment.e(experiment.env_input_size, 0) v_ego = Experiment.e(experiment.env_input_size, 1) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) experiment.analysis_template = template elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress else: experiment = PendulumExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]]) if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress return experiment
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": experiment = BouncingBallExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() elif problem == "stopping_car": if method == "ora": experiment = ORAStoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: experiment = StoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: experiment = CartpoleExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)