예제 #1
0
 def __init__(self):
     super().__init__()
     self.max_probability_split = 0.3
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = [5, 5, 5, 5]
     self.use_softmax = False
     self.use_milp_range_prob = False
     self.input_template = Experiment.box(self.env_input_size)
     template = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
예제 #2
0
 def __init__(self):
     super().__init__()
     self.max_probability_split = 0.5
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = [np.pi / 5, np.pi / 5, 1 / 5, 1 / 5]
     self.input_template = Experiment.box(self.env_input_size)
     # theta = Experiment.e(self.env_input_size, 0)
     # theta_dot = Experiment.e(self.env_input_size, 1)
     # template = np.array([theta, -theta, theta_dot, -theta_dot])
     template = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]])
     input_boundaries = [30, -25, 0, 0, 36, -28]
     input_template = Experiment.box(self.env_input_size)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     x_lead = Experiment.e(self.env_input_size, 0)
     x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 2)
     template = np.array(
         [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [
         Experiment.e(self.env_input_size, 0) -
         Experiment.e(self.env_input_size, 1)
     ]
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
예제 #4
0
def main():
    template = Experiment.box(2)
    intervals = generate_intervals(200)
    items_array = np.array(intervals)
    fig = plot_intervals(intervals, template)
    fig.show()
    # best_variable = run_genetic_merge(items_array, template)
    best_variable = np.array([0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
                              1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
                              0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
                              1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
                              1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
                              0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                              0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
                              0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
                              0, 0, 1, 1, 0, 1, 1, 1, ], dtype="bool")
    filtered = items_array[best_variable]
    merged, volume_new, volume_old = merge_with_volume_analysis(template, filtered)
    remaining_intervals: np.ndarray = items_array[np.invert(best_variable)]
    merged_intermediate = np.append(remaining_intervals, np.expand_dims(merged, 0), 0)
    fig = plot_intervals(merged_intermediate, template)
    fig.show()
    merged_final = merged_intermediate.copy()
    interval: np.ndarray
    for interval in merged_intermediate:
        merged_final = [x for x in merged_final if np.equal(interval, x).all() or not contained(x, interval)]
    fig = plot_intervals(merged_final, template)
    fig.show()
    print("done")
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_continuous = self.post_milp
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 0, 1], [1, -1, 0]])
     # self.input_boundaries: List = input_boundaries
     x_lead = Experiment.e(self.env_input_size, 0)
     x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 2)
     template = np.array([-(x_lead - x_ego)])
     self.analysis_template = template  # standard
     self.input_template = Experiment.box(self.env_input_size)
     collision_distance = 0
     distance = [Experiment.e(env_input_size, 0) - Experiment.e(env_input_size, 1)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2 ** 6
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))]
     self.input_epsilon = 0
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_0a03b_00000_0_cost_fn=2,epsilon_input=0_2021-02-27_17-12-58/checkpoint_680/checkpoint-680"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_62310_00000_0_cost_fn=2,epsilon_input=0_2021-03-04_13-34-45/checkpoint_780/checkpoint-780"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_ 3665a_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_14-37-57/checkpoint_114/checkpoint-114"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_47b16_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_17-08-46/checkpoint_600/checkpoint-600"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_28110_00000_0_cost_fn=0,epsilon_input=0_2021-03-07_17-40-07/checkpoint_1250/checkpoint-1250"
     self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_7bdde_00000_0_cost_fn=0,epsilon_input=0_2021-03-09_11-49-20/checkpoint_1460/checkpoint-1460"
예제 #6
0
def main():
    template = Experiment.box(2)
    item1 = [1, 1, 1, 1]
    item2 = [2, 0, 2, 0]
    merged1 = merge_regions(template, np.array([item1, item2]))
    volume = compute_volume(template, merged1)
    merged, volume_new, volume_old = merge_with_volume_analysis(
        template, np.array([item1, item2]))
예제 #7
0
    def __init__(self):
        env_input_size: int = 2
        super().__init__(env_input_size)
        self.post_fn_remote = self.post_milp
        self.get_nn_fn = self.get_nn
        self.plot_fn = self.plot
        self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
        self.template_index = 1
        input_boundaries = self.get_template(self.template_index)

        self.input_boundaries: List = input_boundaries
        self.input_template: np.ndarray = Experiment.box(self.env_input_size)
        self.analysis_template: np.ndarray = Experiment.box(
            self.env_input_size)
        self.time_horizon = 20
        self.rounding_value = 2**8
        self.load_graph = False
        self.minimum_length = 0.2
        self.max_probability_split = 0.20
        p = Experiment.e(self.env_input_size, 0)
        v = Experiment.e(self.env_input_size, 1)
        self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([1, 7, 7]))]
        # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
        self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
예제 #8
0
 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.max_probability_split = 0.15
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([9, 0, 3, 3])
     self.input_template = Experiment.box(self.env_input_size)
     p = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v = Experiment.e(self.env_input_size, 1)
     template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)])  # , 1 / 6 * p - v, -1 / 6 * p - v
     # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
예제 #9
0
 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.max_probability_split = 0.33
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
예제 #10
0
 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(env_input_size)
     # self.input_boundaries: List = input_boundaries
     # self.input_template: np.ndarray = input_template
     # _, template = self.get_template(1)
     delta_x = Experiment.e(env_input_size, 0)
     v_ego = Experiment.e(env_input_size, 1)
     # template = Experiment.combinations([delta_x, - v_ego])
     template = np.array([
         delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego,
         1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
         -1 / 4.5 * delta_x - v_ego
     ])
     # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [Experiment.e(self.env_input_size, 0)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 20
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31"  # safe both with and without epsilon of 0.1.
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24"  # safe at t=216
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
예제 #11
0
 def __init__(self):
     self.use_split = True
     self.env_input_size: int = 2
     self.max_probability_split = 0.33
     self.input_epsilon = 0
     self.use_entropy_split = True
     self.output_flag = False
     self.minimum_length = 0.2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([50, 0, 36, 36])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     self.use_softmax = True
     self.use_milp_range_prob = True
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
예제 #12
0
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.assign_lbl_fn = self.assign_label
     self.template_2d: np.ndarray = np.array([[1]])
     self.input_boundaries: List = [55, -45, 6, 0, 1, 0]
     template = Experiment.box(3)
     self.input_template: np.ndarray = template
     self.analysis_template: np.ndarray = template
     self.time_horizon = 500
     self.use_rounding = False
     self.rounding_value = 2**4
     # self.plotting_time_interval = 10
     p = Experiment.e(self.env_input_size, 0)
     self.minimum_percentage_population = -0.55
     self.n_actions = 2
     self.unsafe_zone: List[Tuple] = [([p], np.array([100])),
                                      ([-p], np.array([0]))]  # -0.55
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_watertank/PPO_MonitoredWaterTank_e219e_00000_0_2021-05-19_20-39-10/checkpoint_3334/checkpoint-3334"
예제 #13
0
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]

    experiment = CartpoleExperiment()
    experiment.nn_path = other_config[
        "folder"]  # nn_paths_cartpole[other_config["nn_path"]]
    experiment.tau = other_config["tau"]
    if other_config["template"] == 2:  # octagon
        experiment.analysis_template = Experiment.octagon(
            experiment.env_input_size)
    elif other_config["template"] == 0:  # box
        experiment.analysis_template = Experiment.box(
            experiment.env_input_size)
    else:
        _, template = experiment.get_template(1)
        experiment.analysis_template = template  # standard
    experiment.n_workers = n_workers
    experiment.show_progressbar = False
    experiment.show_progress_plot = False
    # experiment.use_rounding = False
    experiment.save_dir = trial_dir
    experiment.update_progress_fn = update_progress
    elapsed_seconds, safe, max_t = experiment.run_experiment()

    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds,
                safe=safe_value,
                max_t=max_t,
                done=True)
예제 #14
0
 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     _, template = self.get_template(0)
     theta = Experiment.e(self.env_input_size, 0)
     theta_dot = Experiment.e(self.env_input_size, 1)
     self.analysis_template: np.ndarray = Experiment.box(env_input_size)
     self.time_horizon = 21
     self.rounding_value = 2**8
     self.load_graph = False
     self.minimum_length = 0.2
     # self.use_split_with_seen = True
     self.n_actions = 3
     self.max_probability_split = 0.5
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     self.tau = 0.02
     self.safe_angle = 12 * 2 * math.pi / 360
     epsilon = 1e-4
     theta = [self.e(env_input_size, 0)]
     neg_theta = [-self.e(env_input_size, 0)]
     safe_angle = 12 * 2 * math.pi / 360
     self.angle_split: List[Tuple] = [
         (theta, np.array([self.safe_angle - epsilon])),
         (neg_theta, np.array([self.safe_angle - epsilon]))
     ]
     self.unsafe_zone: List[Tuple] = [(theta, np.array([-safe_angle])),
                                      (neg_theta, np.array([-safe_angle]))]
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
예제 #15
0
        # nn = torch.nn.Sequential(torch.nn.Linear(2, 50), torch.nn.ReLU(), torch.nn.Linear(50, 1), torch.nn.Tanh())
        # checkpoint = torch.load("/home/edoardo/Development/SafeDRL/runnables/invariant/invariant_checkpoint.pt", map_location=torch.device("cpu"))
        # nn.load_state_dict(checkpoint)
        state_size = 2
        action_size = 2
        ALPHA = 0.6  # the higher the more aggressive the sampling towards high TD transitions
        agent = InvariantAgent(state_size=state_size, action_size=action_size, alpha=ALPHA)
        agent.load("/home/edoardo/Development/SafeDRL/runs/Aug05_14-55-31_alpha=0.6, min_eps=0.01, eps_decay=0.2/checkpoint_1100.pth")
        nn = agent.inetwork_local
        nn.cpu()
        return nn


if __name__ == '__main__':
    ray.init(log_to_driver=False, local_mode=True)
    experiment = StoppingCarExperiment()
    experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_stopping_car/test"
    experiment.plotting_time_interval = 60
    experiment.show_progressbar = True
    experiment.show_progress_plot = False
    x_lead = Experiment.e(experiment.env_input_size, 0)
    x_ego = Experiment.e(experiment.env_input_size, 1)
    v_ego = Experiment.e(experiment.env_input_size, 2)
    template = np.array([-(x_lead - x_ego), v_ego, -v_ego])
    experiment.analysis_template = template  # standard
    experiment.input_template = Experiment.box(3)
    input_boundaries = [40, -30, 10, -0, 36, -28]
    experiment.input_boundaries = input_boundaries
    experiment.time_horizon = 150
    experiment.run_experiment()
예제 #16
0
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        experiment = BouncingBallExperiment()
        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    elif problem == "stopping_car":
        if method == "ora":
            experiment = ORAStoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
        else:
            experiment = StoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
    else:
        experiment = CartpoleExperiment()
        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            _, template = experiment.get_template(1)
            experiment.analysis_template = template  # standard
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
예제 #17
0
def get_experiment_instance(config, trial_dir):
    # Hyperparameters
    problem, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        experiment = BouncingBallExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        assert os.path.exists(experiment.nn_path)
        experiment.use_contained = other_config["use_contain"]
        if other_config["initial_state"] == 0:
            experiment.input_boundaries = [9, -5, 0, 0.1]
        elif other_config["initial_state"] == 1:
            experiment.input_boundaries = [9, -5, 1, 1]
        else:
            raise NotImplementedError()
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    elif problem == "stopping_car":
        experiment = StoppingCarExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]])
        experiment.max_probability_split = other_config["phi"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 1:
            delta_x = Experiment.e(experiment.env_input_size, 0)
            v_ego = Experiment.e(experiment.env_input_size, 1)
            template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                                 -1 / 4.5 * delta_x - v_ego])
            experiment.analysis_template = template
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    else:
        experiment = PendulumExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]])
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    return experiment
예제 #18
0
ray.init()
nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
config = get_PPO_config(1234, use_gpu=0)
trainer = ppo.PPOTrainer(config=config)
trainer.restore(nn_path)
policy = trainer.get_policy()
sequential_nn = convert_ray_policy_to_sequential(policy).cpu()
layers = []
for l in sequential_nn:
    layers.append(l)
nn = torch.nn.Sequential(*layers)
horizon = 10

gateway = JavaGateway(auto_field=True)
mc = gateway.jvm.explicit.MDPModelChecker(None)
analysis_template = Experiment.box(2)
boundaries = [6, -5, 1, 1]
samples = polytope.sample(2000, analysis_template,
                          np.array(boundaries, dtype=float))
point_probabilities = []
for i, point in enumerate(samples):
    # generate prism graph

    frontier = [(0, point)]
    root = point
    graph = networkx.DiGraph()
    widgets = [
        progressbar.Variable('frontier'), ", ",
        progressbar.Variable('max_t'), ", ",
        progressbar.widgets.Timer()
    ]