def __init__(self): super().__init__() self.max_probability_split = 0.3 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = [5, 5, 5, 5] self.use_softmax = False self.use_milp_range_prob = False self.input_template = Experiment.box(self.env_input_size) template = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
def __init__(self): super().__init__() self.max_probability_split = 0.5 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = [np.pi / 5, np.pi / 5, 1 / 5, 1 / 5] self.input_template = Experiment.box(self.env_input_size) # theta = Experiment.e(self.env_input_size, 0) # theta_dot = Experiment.e(self.env_input_size, 1) # template = np.array([theta, -theta, theta_dot, -theta_dot]) template = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]]) input_boundaries = [30, -25, 0, 0, 36, -28] input_template = Experiment.box(self.env_input_size) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template x_lead = Experiment.e(self.env_input_size, 0) x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 2) template = np.array( [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [ Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1) ] self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36
def main(): template = Experiment.box(2) intervals = generate_intervals(200) items_array = np.array(intervals) fig = plot_intervals(intervals, template) fig.show() # best_variable = run_genetic_merge(items_array, template) best_variable = np.array([0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, ], dtype="bool") filtered = items_array[best_variable] merged, volume_new, volume_old = merge_with_volume_analysis(template, filtered) remaining_intervals: np.ndarray = items_array[np.invert(best_variable)] merged_intermediate = np.append(remaining_intervals, np.expand_dims(merged, 0), 0) fig = plot_intervals(merged_intermediate, template) fig.show() merged_final = merged_intermediate.copy() interval: np.ndarray for interval in merged_intermediate: merged_final = [x for x in merged_final if np.equal(interval, x).all() or not contained(x, interval)] fig = plot_intervals(merged_final, template) fig.show() print("done")
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_continuous = self.post_milp self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 0, 1], [1, -1, 0]]) # self.input_boundaries: List = input_boundaries x_lead = Experiment.e(self.env_input_size, 0) x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 2) template = np.array([-(x_lead - x_ego)]) self.analysis_template = template # standard self.input_template = Experiment.box(self.env_input_size) collision_distance = 0 distance = [Experiment.e(env_input_size, 0) - Experiment.e(env_input_size, 1)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2 ** 6 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_0a03b_00000_0_cost_fn=2,epsilon_input=0_2021-02-27_17-12-58/checkpoint_680/checkpoint-680" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_62310_00000_0_cost_fn=2,epsilon_input=0_2021-03-04_13-34-45/checkpoint_780/checkpoint-780" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_ 3665a_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_14-37-57/checkpoint_114/checkpoint-114" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_47b16_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_17-08-46/checkpoint_600/checkpoint-600" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_28110_00000_0_cost_fn=0,epsilon_input=0_2021-03-07_17-40-07/checkpoint_1250/checkpoint-1250" self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_7bdde_00000_0_cost_fn=0,epsilon_input=0_2021-03-09_11-49-20/checkpoint_1460/checkpoint-1460"
def main(): template = Experiment.box(2) item1 = [1, 1, 1, 1] item2 = [2, 0, 2, 0] merged1 = merge_regions(template, np.array([item1, item2])) volume = compute_volume(template, merged1) merged, volume_new, volume_old = merge_with_volume_analysis( template, np.array([item1, item2]))
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.template_index = 1 input_boundaries = self.get_template(self.template_index) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = Experiment.box( self.env_input_size) self.time_horizon = 20 self.rounding_value = 2**8 self.load_graph = False self.minimum_length = 0.2 self.max_probability_split = 0.20 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([1, 7, 7]))] # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36" self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
def __init__(self): super().__init__() self.v_lead = 28 self.max_probability_split = 0.15 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([9, 0, 3, 3]) self.input_template = Experiment.box(self.env_input_size) p = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v = Experiment.e(self.env_input_size, 1) template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)]) # , 1 / 6 * p - v, -1 / 6 * p - v # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36" self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
def __init__(self): super().__init__() self.v_lead = 28 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.max_probability_split = 0.33 self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(env_input_size) # self.input_boundaries: List = input_boundaries # self.input_template: np.ndarray = input_template # _, template = self.get_template(1) delta_x = Experiment.e(env_input_size, 0) v_ego = Experiment.e(env_input_size, 1) # template = Experiment.combinations([delta_x, - v_ego]) template = np.array([ delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego ]) # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [Experiment.e(self.env_input_size, 0)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 20 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31" # safe both with and without epsilon of 0.1. # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24" # safe at t=216 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): self.use_split = True self.env_input_size: int = 2 self.max_probability_split = 0.33 self.input_epsilon = 0 self.use_entropy_split = True self.output_flag = False self.minimum_length = 0.2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([50, 0, 36, 36]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) self.use_softmax = True self.use_milp_range_prob = True # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.assign_lbl_fn = self.assign_label self.template_2d: np.ndarray = np.array([[1]]) self.input_boundaries: List = [55, -45, 6, 0, 1, 0] template = Experiment.box(3) self.input_template: np.ndarray = template self.analysis_template: np.ndarray = template self.time_horizon = 500 self.use_rounding = False self.rounding_value = 2**4 # self.plotting_time_interval = 10 p = Experiment.e(self.env_input_size, 0) self.minimum_percentage_population = -0.55 self.n_actions = 2 self.unsafe_zone: List[Tuple] = [([p], np.array([100])), ([-p], np.array([0]))] # -0.55 self.nn_path = "/home/edoardo/ray_results/tune_PPO_watertank/PPO_MonitoredWaterTank_e219e_00000_0_2021-05-19_20-39-10/checkpoint_3334/checkpoint-3334"
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] experiment = CartpoleExperiment() experiment.nn_path = other_config[ "folder"] # nn_paths_cartpole[other_config["nn_path"]] experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon( experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box( experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template _, template = self.get_template(0) theta = Experiment.e(self.env_input_size, 0) theta_dot = Experiment.e(self.env_input_size, 1) self.analysis_template: np.ndarray = Experiment.box(env_input_size) self.time_horizon = 21 self.rounding_value = 2**8 self.load_graph = False self.minimum_length = 0.2 # self.use_split_with_seen = True self.n_actions = 3 self.max_probability_split = 0.5 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.tau = 0.02 self.safe_angle = 12 * 2 * math.pi / 360 epsilon = 1e-4 theta = [self.e(env_input_size, 0)] neg_theta = [-self.e(env_input_size, 0)] safe_angle = 12 * 2 * math.pi / 360 self.angle_split: List[Tuple] = [ (theta, np.array([self.safe_angle - epsilon])), (neg_theta, np.array([self.safe_angle - epsilon])) ] self.unsafe_zone: List[Tuple] = [(theta, np.array([-safe_angle])), (neg_theta, np.array([-safe_angle]))] self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
# nn = torch.nn.Sequential(torch.nn.Linear(2, 50), torch.nn.ReLU(), torch.nn.Linear(50, 1), torch.nn.Tanh()) # checkpoint = torch.load("/home/edoardo/Development/SafeDRL/runnables/invariant/invariant_checkpoint.pt", map_location=torch.device("cpu")) # nn.load_state_dict(checkpoint) state_size = 2 action_size = 2 ALPHA = 0.6 # the higher the more aggressive the sampling towards high TD transitions agent = InvariantAgent(state_size=state_size, action_size=action_size, alpha=ALPHA) agent.load("/home/edoardo/Development/SafeDRL/runs/Aug05_14-55-31_alpha=0.6, min_eps=0.01, eps_decay=0.2/checkpoint_1100.pth") nn = agent.inetwork_local nn.cpu() return nn if __name__ == '__main__': ray.init(log_to_driver=False, local_mode=True) experiment = StoppingCarExperiment() experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_stopping_car/test" experiment.plotting_time_interval = 60 experiment.show_progressbar = True experiment.show_progress_plot = False x_lead = Experiment.e(experiment.env_input_size, 0) x_ego = Experiment.e(experiment.env_input_size, 1) v_ego = Experiment.e(experiment.env_input_size, 2) template = np.array([-(x_lead - x_ego), v_ego, -v_ego]) experiment.analysis_template = template # standard experiment.input_template = Experiment.box(3) input_boundaries = [40, -30, 10, -0, 36, -28] experiment.input_boundaries = input_boundaries experiment.time_horizon = 150 experiment.run_experiment()
def run_parameterised_experiment(config): # Hyperparameters trial_dir = tune.get_trial_dir() problem, method, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": experiment = BouncingBallExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() elif problem == "stopping_car": if method == "ora": experiment = ORAStoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: experiment = StoppingCarExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.input_epsilon = other_config["epsilon_input"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() else: experiment = CartpoleExperiment() experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]]) experiment.tau = other_config["tau"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: _, template = experiment.get_template(1) experiment.analysis_template = template # standard experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir experiment.update_progress_fn = update_progress elapsed_seconds, safe, max_t = experiment.run_experiment() safe_value = 0 if safe is None: safe_value = 0 elif safe: safe_value = 1 elif not safe: safe_value = -1 tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
def get_experiment_instance(config, trial_dir): # Hyperparameters problem, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": experiment = BouncingBallExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) assert os.path.exists(experiment.nn_path) experiment.use_contained = other_config["use_contain"] if other_config["initial_state"] == 0: experiment.input_boundaries = [9, -5, 0, 0.1] elif other_config["initial_state"] == 1: experiment.input_boundaries = [9, -5, 1, 1] else: raise NotImplementedError() if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress elif problem == "stopping_car": experiment = StoppingCarExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.max_probability_split = other_config["phi"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 1: delta_x = Experiment.e(experiment.env_input_size, 0) v_ego = Experiment.e(experiment.env_input_size, 1) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) experiment.analysis_template = template elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress else: experiment = PendulumExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]]) if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress return experiment
ray.init() nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10" config = get_PPO_config(1234, use_gpu=0) trainer = ppo.PPOTrainer(config=config) trainer.restore(nn_path) policy = trainer.get_policy() sequential_nn = convert_ray_policy_to_sequential(policy).cpu() layers = [] for l in sequential_nn: layers.append(l) nn = torch.nn.Sequential(*layers) horizon = 10 gateway = JavaGateway(auto_field=True) mc = gateway.jvm.explicit.MDPModelChecker(None) analysis_template = Experiment.box(2) boundaries = [6, -5, 1, 1] samples = polytope.sample(2000, analysis_template, np.array(boundaries, dtype=float)) point_probabilities = [] for i, point in enumerate(samples): # generate prism graph frontier = [(0, point)] root = point graph = networkx.DiGraph() widgets = [ progressbar.Variable('frontier'), ", ", progressbar.Variable('max_t'), ", ", progressbar.widgets.Timer() ]