def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.assign_lbl_fn = self.assign_label
     self.additional_seen_fn = self.additional_seen
     self.template_2d: np.ndarray = np.array([[1, 0], [0, 1]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     # _, template = self.get_template(1)
     template = Experiment.octagon(env_input_size)
     self.plotting_time_interval = 60 * 5
     self.analysis_template: np.ndarray = template
     self.safe_angle = 30 * 2 * math.pi / 360
     theta = [self.e(env_input_size, 0)]
     neg_theta = [-self.e(env_input_size, 0)]
     # battery = [self.e(env_input_size, 4)]
     self.unsafe_zone: List[Tuple] = [(theta, np.array([-self.safe_angle])),
                                      (neg_theta,
                                       np.array([-self.safe_angle]))]
     epsilon = 1e-4
     self.angle_split: List[Tuple] = [
         (theta, np.array([self.safe_angle - epsilon])),
         (neg_theta, np.array([self.safe_angle - epsilon]))
     ]
     self.use_rounding = False
     self.rounding_value = 1024
     self.time_horizon = 300
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
     self.tau = 0.02
     self.n_actions = 3
 def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', output_flag)
     gurobi_model.setParam('Threads', 2)
     observation = gurobi_model.addMVar(shape=(2, ),
                                        lb=float("-inf"),
                                        ub=float("inf"),
                                        name="observation")
     Experiment.generate_nn_guard(gurobi_model,
                                  observation,
                                  nn,
                                  action_ego=chosen_action)
     observable_template = Experiment.octagon(2)
     # self.env_input_size = 2
     observable_result = optimise(observable_template, gurobi_model,
                                  observation)
     # self.env_input_size = 6
     return observable_template, observable_result
 def additional_seen(self):
     # adds an element that captures all the states where battery is <=0
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', self.output_flag)
     additional = [
         self.safe_angle, self.safe_angle,
         float("inf"),
         float("inf"),
         float("inf"),
         float("inf"),
         float("inf"),
         float("inf")
     ]
     input = generate_input_region(gurobi_model,
                                   Experiment.octagon(self.env_input_size),
                                   additional, self.env_input_size)
     x_results = optimise(self.analysis_template, gurobi_model, input)
     if x_results is None:
         print("Model unsatisfiable")
         return None
     root = tuple(x_results)
     return [(root, 0)]
Exemple #4
0
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]

    experiment = CartpoleExperiment()
    experiment.nn_path = other_config[
        "folder"]  # nn_paths_cartpole[other_config["nn_path"]]
    experiment.tau = other_config["tau"]
    if other_config["template"] == 2:  # octagon
        experiment.analysis_template = Experiment.octagon(
            experiment.env_input_size)
    elif other_config["template"] == 0:  # box
        experiment.analysis_template = Experiment.box(
            experiment.env_input_size)
    else:
        _, template = experiment.get_template(1)
        experiment.analysis_template = template  # standard
    experiment.n_workers = n_workers
    experiment.show_progressbar = False
    experiment.show_progress_plot = False
    # experiment.use_rounding = False
    experiment.save_dir = trial_dir
    experiment.update_progress_fn = update_progress
    elapsed_seconds, safe, max_t = experiment.run_experiment()

    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds,
                safe=safe_value,
                max_t=max_t,
                done=True)
Exemple #5
0
        policy = trainer.get_policy()
        # sequential_nn = convert_ray_simple_policy_to_sequential(policy).cpu()
        sequential_nn = convert_ray_policy_to_sequential(policy).cpu()
        # l0 = torch.nn.Linear(5, 3, bias=False)
        # l0.weight = torch.nn.Parameter(torch.tensor([[0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]], dtype=torch.float32))
        layers = []
        for l in sequential_nn:
            layers.append(l)
        nn = torch.nn.Sequential(*layers)
        torch.save(nn, pickled_path)
        # ray.shutdown()
        return nn


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    ray.init(log_to_driver=False, local_mode=False)
    experiment = PendulumExperimentProbabilistic()
    experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_pendulum/octagon_psi05_h7_nocontain2"
    experiment.plotting_time_interval = 60
    experiment.show_progressbar = True
    experiment.show_progress_plot = False
    experiment.analysis_template = Experiment.octagon(2)
    experiment.use_contained = False
    experiment.use_split_with_seen = False
    # experiment.max_probability_split = 0.5
    experiment.time_horizon = 6
    # experiment.load_graph = True
    experiment.run_experiment()
Exemple #6
0
def get_experiment_instance(config, trial_dir):
    # Hyperparameters
    problem, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        experiment = BouncingBallExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        assert os.path.exists(experiment.nn_path)
        experiment.use_contained = other_config["use_contain"]
        if other_config["initial_state"] == 0:
            experiment.input_boundaries = [9, -5, 0, 0.1]
        elif other_config["initial_state"] == 1:
            experiment.input_boundaries = [9, -5, 1, 1]
        else:
            raise NotImplementedError()
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    elif problem == "stopping_car":
        experiment = StoppingCarExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]])
        experiment.max_probability_split = other_config["phi"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 1:
            delta_x = Experiment.e(experiment.env_input_size, 0)
            v_ego = Experiment.e(experiment.env_input_size, 1)
            template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                                 -1 / 4.5 * delta_x - v_ego])
            experiment.analysis_template = template
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    else:
        experiment = PendulumExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]])
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    return experiment
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        experiment = BouncingBallExperiment()
        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    elif problem == "stopping_car":
        if method == "ora":
            experiment = ORAStoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
        else:
            experiment = StoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
    else:
        experiment = CartpoleExperiment()
        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            _, template = experiment.get_template(1)
            experiment.analysis_template = template  # standard
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)