def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp_invariant_net self.get_nn_fn = self.get_nn self.get_invariant_nn_fn = self.get_invariant_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template self.assign_lbl_fn = 1 # just to stop error _, template = self.get_template(1) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2 ** 10 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31" # safe both with and without epsilon of 0.1. # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24" # safe at t=216 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00005_5_cost_fn=0,epsilon_input=0.1_2021-01-17_12-41-27/checkpoint_10/checkpoint-10" # unsafe
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_continuous = self.post_milp self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 0, 1], [1, -1, 0]]) # self.input_boundaries: List = input_boundaries x_lead = Experiment.e(self.env_input_size, 0) x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 2) template = np.array([-(x_lead - x_ego)]) self.analysis_template = template # standard self.input_template = Experiment.box(self.env_input_size) collision_distance = 0 distance = [Experiment.e(env_input_size, 0) - Experiment.e(env_input_size, 1)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2 ** 6 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_0a03b_00000_0_cost_fn=2,epsilon_input=0_2021-02-27_17-12-58/checkpoint_680/checkpoint-680" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_62310_00000_0_cost_fn=2,epsilon_input=0_2021-03-04_13-34-45/checkpoint_780/checkpoint-780" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_ 3665a_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_14-37-57/checkpoint_114/checkpoint-114" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_47b16_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_17-08-46/checkpoint_600/checkpoint-600" # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_28110_00000_0_cost_fn=0,epsilon_input=0_2021-03-07_17-40-07/checkpoint_1250/checkpoint-1250" self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_7bdde_00000_0_cost_fn=0,epsilon_input=0_2021-03-09_11-49-20/checkpoint_1460/checkpoint-1460"
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]]) input_boundaries = [30, -25, 0, 0, 36, -28] input_template = Experiment.box(self.env_input_size) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template x_lead = Experiment.e(self.env_input_size, 0) x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 2) template = np.array( [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [ Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1) ] self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 400 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36
def find_direction_split(self, template, x, nn, pre_nn): samples = polytope.sample(10000, template, np.array(x)) preprocessed = pre_nn(torch.tensor(samples).float()) preprocessed_np = preprocessed.detach().numpy() samples_ontput = torch.softmax(nn(preprocessed), 1) predicted_label = samples_ontput.detach().numpy()[:, 0] y = np.clip(predicted_label, 1e-7, 1 - 1e-7) inv_sig_y = np.log(y / (1 - y)) # transform to log-odds-ratio space from sklearn.linear_model import LinearRegression lr = LinearRegression() lr.fit(samples, inv_sig_y) template_2d: np.ndarray = np.array([Experiment.e(3, 2), Experiment.e(3, 0) - Experiment.e(3, 1)]) def sigmoid(x): ex = np.exp(x) return ex / (1 + ex) preds = sigmoid(lr.predict(samples)) plot_points_and_prediction(samples @ template_2d.T, preds) plot_points_and_prediction(samples @ template_2d.T, predicted_label) coeff = lr.coef_ intercept = lr.intercept_ a = sympy.symbols('x') b = sympy.symbols('y') classif_line1 = Line(coeff[0].item() * a + coeff[1].item() * b + intercept) new_coeff = -coeff[0].item() / coeff[1].item()
def octagon(n): template = [] for i in range(n): x = Experiment.e(n, i) template.append(x) template.append(-x) for j in range(0, i): y = Experiment.e(n, j) template.append(x + y) template.append(x - y) template.append(y - x) template.append(-y - x) return np.stack(template)
def __init__(self): super().__init__() self.v_lead = 28 self.max_probability_split = 0.15 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([9, 0, 3, 3]) self.input_template = Experiment.box(self.env_input_size) p = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v = Experiment.e(self.env_input_size, 1) template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)]) # , 1 / 6 * p - v, -1 / 6 * p - v # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = template # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36" self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
def box(n): template = [] for i in range(n): x = Experiment.e(n, i) template.append(x) template.append(-x) return np.stack(template)
def __init__(self): super().__init__() self.v_lead = 28 self.env_input_size: int = 2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.max_probability_split = 0.33 self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def get_template(self, mode=0): p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) if mode == 0: # box directions with intervals # input_boundaries = [0, 0, 10, 10] input_boundaries = [9, -8, 0, 0.1] # optimise in a direction template = [] for dimension in range(self.env_input_size): template.append(Experiment.e(self.env_input_size, dimension)) template.append(-Experiment.e(self.env_input_size, dimension)) template = np.array(template) # the 6 dimensions in 2 variables return input_boundaries, template if mode == 1: # directions to easily find fixed point input_boundaries = None template = np.array([v + p, -v - p, -p]) return input_boundaries, template
def get_template(self, mode=0): theta = Experiment.e(self.env_input_size, 0) theta_dot = Experiment.e(self.env_input_size, 1) # battery = Experiment.e(self.env_input_size, 4) if mode == 0: # box directions with intervals input_boundaries = [0.05, 0.05, 0.05, 0.05] # input_boundaries = [0.20, 0.20, 1 / 5, 1 / 5] # input_boundaries = [3.13, 3.15, -0.08193365, 0.08193365] # input_boundaries = [1, 1, 1, 1] # input_boundaries = [0.04373426, -0.04373426, -0.04980056, 0.04980056, 0.045, -0.045, -0.51, 0.51] # optimise in a direction template = np.array([theta, -theta, theta_dot, -theta_dot]) # for dimension in range(self.env_input_size): # template.append(Experiment.e(self.env_input_size, dimension)) # template.append(-Experiment.e(self.env_input_size, dimension)) # template = np.array(template) # the 6 dimensions in 2 variables return input_boundaries, template if mode == 1: # directions to easily find fixed point input_boundaries = None template = np.array([ theta, -theta, theta_dot, -theta_dot, theta + theta_dot, -(theta + theta_dot), (theta - theta_dot), -(theta - theta_dot) ]) # x_dot, -x_dot,theta_dot - theta return input_boundaries, template if mode == 2: input_boundaries = None template = np.array([theta, -theta, theta_dot, -theta_dot]) return input_boundaries, template if mode == 3: input_boundaries = None template = np.array([theta, theta_dot, -theta_dot]) return input_boundaries, template if mode == 4: input_boundaries = [0.09375, 0.625, 0.625, 0.0625, 0.1875] # input_boundaries = [0.09375, 0.5, 0.5, 0.0625, 0.09375] template = np.array([ theta, theta_dot, -theta_dot, theta + theta_dot, (theta - theta_dot) ]) return input_boundaries, template if mode == 5: input_boundaries = [0.125, 0.0625, 0.1875] template = np.array( [theta, theta + theta_dot, (theta - theta_dot)]) return input_boundaries, template
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template _, template = self.get_template(0) self.analysis_template: np.ndarray = template self.time_horizon = 500 self.rounding_value = 2**8 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([0, 1, 0]))] self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
def get_template(self, mode=0): p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) if mode == 0: # large s0 # input_boundaries = [0, 0, 10, 10] input_boundaries = [6, -3, 1, 1] return input_boundaries if mode == 1: # small s0 input_boundaries = [6, -3, 0, 0.1] return input_boundaries if mode == 2: input_boundaries = [9, -3, 0, 0.1] return input_boundaries if mode == 3: input_boundaries = [9, -5, 0, 0.1] return input_boundaries if mode == 4: input_boundaries = [9, -5, 1, 1] return input_boundaries
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([10, -3, 32, -26]) self.input_template = Experiment.box(env_input_size) # self.input_boundaries: List = input_boundaries # self.input_template: np.ndarray = input_template # _, template = self.get_template(1) delta_x = Experiment.e(env_input_size, 0) v_ego = Experiment.e(env_input_size, 1) # template = Experiment.combinations([delta_x, - v_ego]) template = np.array([ delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego ]) # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego]) self.analysis_template: np.ndarray = template collision_distance = 0 distance = [Experiment.e(self.env_input_size, 0)] # self.use_bfs = True # self.n_workers = 1 self.rounding_value = 2**10 self.use_rounding = False self.time_horizon = 20 self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))] self.input_epsilon = 0 self.v_lead = 28 self.max_speed = 36 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31" # safe both with and without epsilon of 0.1. # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24" # safe at t=216 # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40" # not determined # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): self.use_split = True self.env_input_size: int = 2 self.max_probability_split = 0.33 self.input_epsilon = 0 self.use_entropy_split = True self.output_flag = False self.minimum_length = 0.2 self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.input_boundaries = tuple([50, 0, 36, 36]) self.input_template = Experiment.box(self.env_input_size) delta_x = Experiment.e(self.env_input_size, 0) # x_ego = Experiment.e(self.env_input_size, 1) v_ego = Experiment.e(self.env_input_size, 1) self.use_softmax = True self.use_milp_range_prob = True # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego]) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) self.analysis_template: np.ndarray = template self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58" # safe
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) self.template_index = 1 input_boundaries = self.get_template(self.template_index) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = Experiment.box(self.env_input_size) self.analysis_template: np.ndarray = Experiment.box( self.env_input_size) self.time_horizon = 20 self.rounding_value = 2**8 self.load_graph = False self.minimum_length = 0.2 self.max_probability_split = 0.20 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([1, 7, 7]))] # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36" self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
def __init__(self): env_input_size: int = 2 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]]) input_boundaries, input_template = self.get_template(0) self.input_boundaries: List = input_boundaries self.input_template: np.ndarray = input_template _, template = self.get_template(0) theta = Experiment.e(self.env_input_size, 0) theta_dot = Experiment.e(self.env_input_size, 1) self.analysis_template: np.ndarray = Experiment.box(env_input_size) self.time_horizon = 21 self.rounding_value = 2**8 self.load_graph = False self.minimum_length = 0.2 # self.use_split_with_seen = True self.n_actions = 3 self.max_probability_split = 0.5 p = Experiment.e(self.env_input_size, 0) v = Experiment.e(self.env_input_size, 1) self.tau = 0.02 self.safe_angle = 12 * 2 * math.pi / 360 epsilon = 1e-4 theta = [self.e(env_input_size, 0)] neg_theta = [-self.e(env_input_size, 0)] safe_angle = 12 * 2 * math.pi / 360 self.angle_split: List[Tuple] = [ (theta, np.array([self.safe_angle - epsilon])), (neg_theta, np.array([self.safe_angle - epsilon])) ] self.unsafe_zone: List[Tuple] = [(theta, np.array([-safe_angle])), (neg_theta, np.array([-safe_angle]))] self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
def __init__(self): env_input_size: int = 3 super().__init__(env_input_size) self.post_fn_remote = self.post_milp self.get_nn_fn = self.get_nn self.plot_fn = self.plot self.assign_lbl_fn = self.assign_label self.template_2d: np.ndarray = np.array([[1]]) self.input_boundaries: List = [55, -45, 6, 0, 1, 0] template = Experiment.box(3) self.input_template: np.ndarray = template self.analysis_template: np.ndarray = template self.time_horizon = 500 self.use_rounding = False self.rounding_value = 2**4 # self.plotting_time_interval = 10 p = Experiment.e(self.env_input_size, 0) self.minimum_percentage_population = -0.55 self.n_actions = 2 self.unsafe_zone: List[Tuple] = [([p], np.array([100])), ([-p], np.array([0]))] # -0.55 self.nn_path = "/home/edoardo/ray_results/tune_PPO_watertank/PPO_MonitoredWaterTank_e219e_00000_0_2021-05-19_20-39-10/checkpoint_3334/checkpoint-3334"
def get_template(mode=0): x_lead = Experiment.e(6, 0) x_ego = Experiment.e(6, 1) v_lead = Experiment.e(6, 2) v_ego = Experiment.e(6, 3) a_lead = Experiment.e(6, 4) a_ego = Experiment.e(6, 5) if mode == 0: # box directions with intervals input_boundaries = [ 50, -40, 10, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0 ] # optimise in a direction template = [] for dimension in range(6): template.append(Experiment.e(6, dimension)) template.append(-Experiment.e(6, dimension)) template = np.array(template) # the 6 dimensions in 2 variables # t1 = [0] * 6 # t1[0] = -1 # t1[1] = 1 # template = np.vstack([template, t1]) return input_boundaries, template if mode == 1: # directions to easily find fixed point input_boundaries = [20] template = np.array([ a_lead, -a_lead, a_ego, -a_ego, -v_lead, v_lead, -(v_lead - v_ego), (v_lead - v_ego), -(x_lead - x_ego), (x_lead - x_ego) ]) return input_boundaries, template if mode == 2: input_boundaries = [ 0, -100, 30, -31, 20, -30, 0, -35, 0, -0, -10, -10, 20 ] # optimise in a direction template = [] for dimension in range(6): t1 = [0] * 6 t1[dimension] = 1 t2 = [0] * 6 t2[dimension] = -1 template.append(t1) template.append(t2) # template = np.array([[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]]) # the 8 dimensions in 2 variables template = np.array(template) # the 6 dimensions in 2 variables t1 = [0] * 6 t1[0] = 1 t1[1] = -1 template = np.vstack([template, t1]) return input_boundaries, template if mode == 3: # single point box directions +diagonal input_boundaries = [ 30, -30, 0, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0 ] # optimise in a direction template = [] for dimension in range(6): t1 = [0] * 6 t1[dimension] = 1 t2 = [0] * 6 t2[dimension] = -1 template.append(t1) template.append(t2) # template = np.array([[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]]) # the 8 dimensions in 2 variables template = np.array(template) # the 6 dimensions in 2 variables t1 = [0] * 6 t1[0] = -1 t1[1] = 1 template = np.vstack([template, t1]) return input_boundaries, template if mode == 4: # octagon, every pair of variables input_boundaries = [20] template = [] for dimension in range(6): t1 = [0] * 6 t1[dimension] = 1 t2 = [0] * 6 t2[dimension] = -1 template.append(t1) template.append(t2) for other_dimension in range(dimension + 1, 6): t1 = [0] * 6 t1[dimension] = 1 t1[other_dimension] = -1 t2 = [0] * 6 t2[dimension] = -1 t2[other_dimension] = 1 t3 = [0] * 6 t3[dimension] = 1 t3[other_dimension] = 1 t4 = [0] * 6 t4[dimension] = -1 t4[other_dimension] = -1 template.append(t1) template.append(t2) template.append(t3) template.append(t4) return input_boundaries, np.array(template) if mode == 5: input_boundaries = [20] template = np.array([ a_lead, -a_lead, -v_lead, v_lead, -(v_lead - v_ego), (v_lead - v_ego), -(x_lead - x_ego), (x_lead - x_ego) ]) return input_boundaries, template
# nn = torch.nn.Sequential(torch.nn.Linear(2, 50), torch.nn.ReLU(), torch.nn.Linear(50, 1), torch.nn.Tanh()) # checkpoint = torch.load("/home/edoardo/Development/SafeDRL/runnables/invariant/invariant_checkpoint.pt", map_location=torch.device("cpu")) # nn.load_state_dict(checkpoint) state_size = 2 action_size = 2 ALPHA = 0.6 # the higher the more aggressive the sampling towards high TD transitions agent = InvariantAgent(state_size=state_size, action_size=action_size, alpha=ALPHA) agent.load("/home/edoardo/Development/SafeDRL/runs/Aug05_14-55-31_alpha=0.6, min_eps=0.01, eps_decay=0.2/checkpoint_1100.pth") nn = agent.inetwork_local nn.cpu() return nn if __name__ == '__main__': ray.init(log_to_driver=False, local_mode=True) experiment = StoppingCarExperiment() experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_stopping_car/test" experiment.plotting_time_interval = 60 experiment.show_progressbar = True experiment.show_progress_plot = False x_lead = Experiment.e(experiment.env_input_size, 0) x_ego = Experiment.e(experiment.env_input_size, 1) v_ego = Experiment.e(experiment.env_input_size, 2) template = np.array([-(x_lead - x_ego), v_ego, -v_ego]) experiment.analysis_template = template # standard experiment.input_template = Experiment.box(3) input_boundaries = [40, -30, 10, -0, 36, -28] experiment.input_boundaries = input_boundaries experiment.time_horizon = 150 experiment.run_experiment()
def get_experiment_instance(config, trial_dir): # Hyperparameters problem, other_config = config["main_params"] n_workers = config["n_workers"] if problem == "bouncing_ball": experiment = BouncingBallExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]]) assert os.path.exists(experiment.nn_path) experiment.use_contained = other_config["use_contain"] if other_config["initial_state"] == 0: experiment.input_boundaries = [9, -5, 0, 0.1] elif other_config["initial_state"] == 1: experiment.input_boundaries = [9, -5, 1, 1] else: raise NotImplementedError() if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress elif problem == "stopping_car": experiment = StoppingCarExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]]) experiment.max_probability_split = other_config["phi"] if other_config["template"] == 2: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 1: delta_x = Experiment.e(experiment.env_input_size, 0) v_ego = Experiment.e(experiment.env_input_size, 1) template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego, -1 / 4.5 * delta_x - v_ego]) experiment.analysis_template = template elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress else: experiment = PendulumExperimentProbabilistic() experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]]) if other_config["template"] == 1: # octagon experiment.analysis_template = Experiment.octagon(experiment.env_input_size) elif other_config["template"] == 0: # box experiment.analysis_template = Experiment.box(experiment.env_input_size) else: raise NotImplementedError() experiment.max_probability_split = other_config["phi"] experiment.n_workers = n_workers experiment.show_progressbar = False experiment.show_progress_plot = False # experiment.use_rounding = False experiment.save_dir = trial_dir # experiment.update_progress_fn = update_progress return experiment