コード例 #1
0
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp_invariant_net
     self.get_nn_fn = self.get_nn
     self.get_invariant_nn_fn = self.get_invariant_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     self.assign_lbl_fn = 1  # just to stop error
     _, template = self.get_template(1)
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [Experiment.e(self.env_input_size, 0) - Experiment.e(self.env_input_size, 1)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2 ** 10
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31"  # safe both with and without epsilon of 0.1.
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24"  # safe at t=216
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00005_5_cost_fn=0,epsilon_input=0.1_2021-01-17_12-41-27/checkpoint_10/checkpoint-10"  # unsafe
コード例 #2
0
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_continuous = self.post_milp
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 0, 1], [1, -1, 0]])
     # self.input_boundaries: List = input_boundaries
     x_lead = Experiment.e(self.env_input_size, 0)
     x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 2)
     template = np.array([-(x_lead - x_ego)])
     self.analysis_template = template  # standard
     self.input_template = Experiment.box(self.env_input_size)
     collision_distance = 0
     distance = [Experiment.e(env_input_size, 0) - Experiment.e(env_input_size, 1)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2 ** 6
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance, np.array([collision_distance]))]
     self.input_epsilon = 0
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_0a03b_00000_0_cost_fn=2,epsilon_input=0_2021-02-27_17-12-58/checkpoint_680/checkpoint-680"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_62310_00000_0_cost_fn=2,epsilon_input=0_2021-03-04_13-34-45/checkpoint_780/checkpoint-780"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_ 3665a_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_14-37-57/checkpoint_114/checkpoint-114"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/TD3_StoppingCar_47b16_00000_0_cost_fn=3,epsilon_input=0_2021-03-04_17-08-46/checkpoint_600/checkpoint-600"
     # self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_28110_00000_0_cost_fn=0,epsilon_input=0_2021-03-07_17-40-07/checkpoint_1250/checkpoint-1250"
     self.nn_path = "/home/edoardo/ray_results/tune_TD3_stopping_car_continuous/PPO_StoppingCar_7bdde_00000_0_cost_fn=0,epsilon_input=0_2021-03-09_11-49-20/checkpoint_1460/checkpoint-1460"
コード例 #3
0
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[1, 0, 0], [1, -1, 0]])
     input_boundaries = [30, -25, 0, 0, 36, -28]
     input_template = Experiment.box(self.env_input_size)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     x_lead = Experiment.e(self.env_input_size, 0)
     x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 2)
     template = np.array(
         [-(x_lead - x_ego), (x_lead - x_ego), v_ego, -v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [
         Experiment.e(self.env_input_size, 0) -
         Experiment.e(self.env_input_size, 1)
     ]
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 400
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
コード例 #4
0
ファイル: total_split.py プロジェクト: phate09/SafeDRL
    def find_direction_split(self, template, x, nn, pre_nn):
        samples = polytope.sample(10000, template, np.array(x))
        preprocessed = pre_nn(torch.tensor(samples).float())
        preprocessed_np = preprocessed.detach().numpy()
        samples_ontput = torch.softmax(nn(preprocessed), 1)
        predicted_label = samples_ontput.detach().numpy()[:, 0]
        y = np.clip(predicted_label, 1e-7, 1 - 1e-7)
        inv_sig_y = np.log(y / (1 - y))  # transform to log-odds-ratio space
        from sklearn.linear_model import LinearRegression
        lr = LinearRegression()
        lr.fit(samples, inv_sig_y)
        template_2d: np.ndarray = np.array([Experiment.e(3, 2), Experiment.e(3, 0) - Experiment.e(3, 1)])

        def sigmoid(x):
            ex = np.exp(x)
            return ex / (1 + ex)

        preds = sigmoid(lr.predict(samples))
        plot_points_and_prediction(samples @ template_2d.T, preds)
        plot_points_and_prediction(samples @ template_2d.T, predicted_label)
        coeff = lr.coef_
        intercept = lr.intercept_
        a = sympy.symbols('x')
        b = sympy.symbols('y')
        classif_line1 = Line(coeff[0].item() * a + coeff[1].item() * b + intercept)
        new_coeff = -coeff[0].item() / coeff[1].item()
コード例 #5
0
 def octagon(n):
     template = []
     for i in range(n):
         x = Experiment.e(n, i)
         template.append(x)
         template.append(-x)
         for j in range(0, i):
             y = Experiment.e(n, j)
             template.append(x + y)
             template.append(x - y)
             template.append(y - x)
             template.append(-y - x)
     return np.stack(template)
コード例 #6
0
ファイル: total_split.py プロジェクト: phate09/SafeDRL
 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.max_probability_split = 0.15
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([9, 0, 3, 3])
     self.input_template = Experiment.box(self.env_input_size)
     p = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v = Experiment.e(self.env_input_size, 1)
     template = np.array([p, -p, v, -v, 1 / 3 * (p - v), -1 / 3 * (p - v)])  # , 1 / 6 * p - v, -1 / 6 * p - v
     # self.analysis_template: np.ndarray = Experiment.box(self.env_input_size)
     self.analysis_template: np.ndarray = template
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
コード例 #7
0
 def box(n):
     template = []
     for i in range(n):
         x = Experiment.e(n, i)
         template.append(x)
         template.append(-x)
     return np.stack(template)
コード例 #8
0
ファイル: total_split.py プロジェクト: phate09/SafeDRL
 def __init__(self):
     super().__init__()
     self.v_lead = 28
     self.env_input_size: int = 2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.max_probability_split = 0.33
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
コード例 #9
0
 def get_template(self, mode=0):
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     if mode == 0:  # box directions with intervals
         # input_boundaries = [0, 0, 10, 10]
         input_boundaries = [9, -8, 0, 0.1]
         # optimise in a direction
         template = []
         for dimension in range(self.env_input_size):
             template.append(Experiment.e(self.env_input_size, dimension))
             template.append(-Experiment.e(self.env_input_size, dimension))
         template = np.array(template)  # the 6 dimensions in 2 variables
         return input_boundaries, template
     if mode == 1:  # directions to easily find fixed point
         input_boundaries = None
         template = np.array([v + p, -v - p, -p])
         return input_boundaries, template
コード例 #10
0
 def get_template(self, mode=0):
     theta = Experiment.e(self.env_input_size, 0)
     theta_dot = Experiment.e(self.env_input_size, 1)
     # battery = Experiment.e(self.env_input_size, 4)
     if mode == 0:  # box directions with intervals
         input_boundaries = [0.05, 0.05, 0.05, 0.05]
         # input_boundaries = [0.20, 0.20, 1 / 5, 1 / 5]
         # input_boundaries = [3.13, 3.15, -0.08193365, 0.08193365]
         # input_boundaries = [1, 1, 1, 1]
         # input_boundaries = [0.04373426, -0.04373426, -0.04980056, 0.04980056, 0.045, -0.045, -0.51, 0.51]
         # optimise in a direction
         template = np.array([theta, -theta, theta_dot, -theta_dot])
         # for dimension in range(self.env_input_size):
         #     template.append(Experiment.e(self.env_input_size, dimension))
         #     template.append(-Experiment.e(self.env_input_size, dimension))
         # template = np.array(template)  # the 6 dimensions in 2 variables
         return input_boundaries, template
     if mode == 1:  # directions to easily find fixed point
         input_boundaries = None
         template = np.array([
             theta, -theta, theta_dot, -theta_dot, theta + theta_dot,
             -(theta + theta_dot), (theta - theta_dot), -(theta - theta_dot)
         ])  # x_dot, -x_dot,theta_dot - theta
         return input_boundaries, template
     if mode == 2:
         input_boundaries = None
         template = np.array([theta, -theta, theta_dot, -theta_dot])
         return input_boundaries, template
     if mode == 3:
         input_boundaries = None
         template = np.array([theta, theta_dot, -theta_dot])
         return input_boundaries, template
     if mode == 4:
         input_boundaries = [0.09375, 0.625, 0.625, 0.0625, 0.1875]
         # input_boundaries = [0.09375, 0.5, 0.5, 0.0625, 0.09375]
         template = np.array([
             theta, theta_dot, -theta_dot, theta + theta_dot,
             (theta - theta_dot)
         ])
         return input_boundaries, template
     if mode == 5:
         input_boundaries = [0.125, 0.0625, 0.1875]
         template = np.array(
             [theta, theta + theta_dot, (theta - theta_dot)])
         return input_boundaries, template
コード例 #11
0
 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     _, template = self.get_template(0)
     self.analysis_template: np.ndarray = template
     self.time_horizon = 500
     self.rounding_value = 2**8
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([0, 1, 0]))]
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
コード例 #12
0
 def get_template(self, mode=0):
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     if mode == 0:  # large s0
         # input_boundaries = [0, 0, 10, 10]
         input_boundaries = [6, -3, 1, 1]
         return input_boundaries
     if mode == 1:  # small s0
         input_boundaries = [6, -3, 0, 0.1]
         return input_boundaries
     if mode == 2:
         input_boundaries = [9, -3, 0, 0.1]
         return input_boundaries
     if mode == 3:
         input_boundaries = [9, -5, 0, 0.1]
         return input_boundaries
     if mode == 4:
         input_boundaries = [9, -5, 1, 1]
         return input_boundaries
コード例 #13
0
 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([10, -3, 32, -26])
     self.input_template = Experiment.box(env_input_size)
     # self.input_boundaries: List = input_boundaries
     # self.input_template: np.ndarray = input_template
     # _, template = self.get_template(1)
     delta_x = Experiment.e(env_input_size, 0)
     v_ego = Experiment.e(env_input_size, 1)
     # template = Experiment.combinations([delta_x, - v_ego])
     template = np.array([
         delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego,
         1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
         -1 / 4.5 * delta_x - v_ego
     ])
     # template = np.stack([x_lead - x_ego, -(x_lead - x_ego), - v_ego, v_ego])
     self.analysis_template: np.ndarray = template
     collision_distance = 0
     distance = [Experiment.e(self.env_input_size, 0)]
     # self.use_bfs = True
     # self.n_workers = 1
     self.rounding_value = 2**10
     self.use_rounding = False
     self.time_horizon = 20
     self.unsafe_zone: List[Tuple] = [(distance,
                                       np.array([collision_distance]))]
     self.input_epsilon = 0
     self.v_lead = 28
     self.max_speed = 36
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_11-56-58/checkpoint_31/checkpoint-31"  # safe both with and without epsilon of 0.1.
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_14b68_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_11-56-58/checkpoint_37/checkpoint-37" #not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00000_0_cost_fn=0,epsilon_input=0_2021-01-17_12-37-27/checkpoint_24/checkpoint-24"  # safe at t=216
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00001_1_cost_fn=0,epsilon_input=0.1_2021-01-17_12-37-27/checkpoint_36/checkpoint-36"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00002_2_cost_fn=0,epsilon_input=0_2021-01-17_12-38-53/checkpoint_40/checkpoint-40"  # not determined
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_c1c7e_00006_6_cost_fn=0,epsilon_input=0_2021-01-17_12-44-54/checkpoint_41/checkpoint-41" #safe
     # self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00000_0_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_39/checkpoint-39" #unsafe
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
コード例 #14
0
ファイル: total_split.py プロジェクト: phate09/SafeDRL
 def __init__(self):
     self.use_split = True
     self.env_input_size: int = 2
     self.max_probability_split = 0.33
     self.input_epsilon = 0
     self.use_entropy_split = True
     self.output_flag = False
     self.minimum_length = 0.2
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     self.input_boundaries = tuple([50, 0, 36, 36])
     self.input_template = Experiment.box(self.env_input_size)
     delta_x = Experiment.e(self.env_input_size, 0)
     # x_ego = Experiment.e(self.env_input_size, 1)
     v_ego = Experiment.e(self.env_input_size, 1)
     self.use_softmax = True
     self.use_milp_range_prob = True
     # template = Experiment.combinations([1 / 4.5*(x_lead - x_ego), - v_ego])
     template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                          -1 / 4.5 * delta_x - v_ego])
     self.analysis_template: np.ndarray = template
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_stopping_car/PPO_StoppingCar_acc24_00001_1_cost_fn=0,epsilon_input=0_2021-01-21_02-30-49/checkpoint_58/checkpoint-58"  # safe
コード例 #15
0
    def __init__(self):
        env_input_size: int = 2
        super().__init__(env_input_size)
        self.post_fn_remote = self.post_milp
        self.get_nn_fn = self.get_nn
        self.plot_fn = self.plot
        self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
        self.template_index = 1
        input_boundaries = self.get_template(self.template_index)

        self.input_boundaries: List = input_boundaries
        self.input_template: np.ndarray = Experiment.box(self.env_input_size)
        self.analysis_template: np.ndarray = Experiment.box(
            self.env_input_size)
        self.time_horizon = 20
        self.rounding_value = 2**8
        self.load_graph = False
        self.minimum_length = 0.2
        self.max_probability_split = 0.20
        p = Experiment.e(self.env_input_size, 0)
        v = Experiment.e(self.env_input_size, 1)
        self.unsafe_zone: List[Tuple] = [([p, -v, v], np.array([1, 7, 7]))]
        # self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_c7326_00000_0_2021-01-16_05-43-36/checkpoint_36/checkpoint-36"
        self.nn_path = "/home/edoardo/ray_results/tune_PPO_bouncing_ball/PPO_BouncingBall_71684_00004_4_2021-01-18_23-48-21/checkpoint_10/checkpoint-10"
コード例 #16
0
 def __init__(self):
     env_input_size: int = 2
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.template_2d: np.ndarray = np.array([[0, 1], [1, 0]])
     input_boundaries, input_template = self.get_template(0)
     self.input_boundaries: List = input_boundaries
     self.input_template: np.ndarray = input_template
     _, template = self.get_template(0)
     theta = Experiment.e(self.env_input_size, 0)
     theta_dot = Experiment.e(self.env_input_size, 1)
     self.analysis_template: np.ndarray = Experiment.box(env_input_size)
     self.time_horizon = 21
     self.rounding_value = 2**8
     self.load_graph = False
     self.minimum_length = 0.2
     # self.use_split_with_seen = True
     self.n_actions = 3
     self.max_probability_split = 0.5
     p = Experiment.e(self.env_input_size, 0)
     v = Experiment.e(self.env_input_size, 1)
     self.tau = 0.02
     self.safe_angle = 12 * 2 * math.pi / 360
     epsilon = 1e-4
     theta = [self.e(env_input_size, 0)]
     neg_theta = [-self.e(env_input_size, 0)]
     safe_angle = 12 * 2 * math.pi / 360
     self.angle_split: List[Tuple] = [
         (theta, np.array([self.safe_angle - epsilon])),
         (neg_theta, np.array([self.safe_angle - epsilon]))
     ]
     self.unsafe_zone: List[Tuple] = [(theta, np.array([-safe_angle])),
                                      (neg_theta, np.array([-safe_angle]))]
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_pendulum/PPO_MonitoredPendulum_035b5_00000_0_2021-05-11_11-59-52/checkpoint_3333/checkpoint-3333"
コード例 #17
0
 def __init__(self):
     env_input_size: int = 3
     super().__init__(env_input_size)
     self.post_fn_remote = self.post_milp
     self.get_nn_fn = self.get_nn
     self.plot_fn = self.plot
     self.assign_lbl_fn = self.assign_label
     self.template_2d: np.ndarray = np.array([[1]])
     self.input_boundaries: List = [55, -45, 6, 0, 1, 0]
     template = Experiment.box(3)
     self.input_template: np.ndarray = template
     self.analysis_template: np.ndarray = template
     self.time_horizon = 500
     self.use_rounding = False
     self.rounding_value = 2**4
     # self.plotting_time_interval = 10
     p = Experiment.e(self.env_input_size, 0)
     self.minimum_percentage_population = -0.55
     self.n_actions = 2
     self.unsafe_zone: List[Tuple] = [([p], np.array([100])),
                                      ([-p], np.array([0]))]  # -0.55
     self.nn_path = "/home/edoardo/ray_results/tune_PPO_watertank/PPO_MonitoredWaterTank_e219e_00000_0_2021-05-19_20-39-10/checkpoint_3334/checkpoint-3334"
コード例 #18
0
    def get_template(mode=0):
        x_lead = Experiment.e(6, 0)
        x_ego = Experiment.e(6, 1)
        v_lead = Experiment.e(6, 2)
        v_ego = Experiment.e(6, 3)
        a_lead = Experiment.e(6, 4)
        a_ego = Experiment.e(6, 5)
        if mode == 0:  # box directions with intervals
            input_boundaries = [
                50, -40, 10, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0
            ]
            # optimise in a direction
            template = []
            for dimension in range(6):
                template.append(Experiment.e(6, dimension))
                template.append(-Experiment.e(6, dimension))
            template = np.array(template)  # the 6 dimensions in 2 variables

            # t1 = [0] * 6
            # t1[0] = -1
            # t1[1] = 1
            # template = np.vstack([template, t1])
            return input_boundaries, template
        if mode == 1:  # directions to easily find fixed point

            input_boundaries = [20]

            template = np.array([
                a_lead, -a_lead, a_ego, -a_ego, -v_lead, v_lead,
                -(v_lead - v_ego), (v_lead - v_ego), -(x_lead - x_ego),
                (x_lead - x_ego)
            ])
            return input_boundaries, template
        if mode == 2:
            input_boundaries = [
                0, -100, 30, -31, 20, -30, 0, -35, 0, -0, -10, -10, 20
            ]
            # optimise in a direction
            template = []
            for dimension in range(6):
                t1 = [0] * 6
                t1[dimension] = 1
                t2 = [0] * 6
                t2[dimension] = -1
                template.append(t1)
                template.append(t2)
            # template = np.array([[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]])  # the 8 dimensions in 2 variables
            template = np.array(template)  # the 6 dimensions in 2 variables

            t1 = [0] * 6
            t1[0] = 1
            t1[1] = -1
            template = np.vstack([template, t1])
            return input_boundaries, template
        if mode == 3:  # single point box directions +diagonal
            input_boundaries = [
                30, -30, 0, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0
            ]
            # optimise in a direction
            template = []
            for dimension in range(6):
                t1 = [0] * 6
                t1[dimension] = 1
                t2 = [0] * 6
                t2[dimension] = -1
                template.append(t1)
                template.append(t2)
            # template = np.array([[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]])  # the 8 dimensions in 2 variables
            template = np.array(template)  # the 6 dimensions in 2 variables

            t1 = [0] * 6
            t1[0] = -1
            t1[1] = 1
            template = np.vstack([template, t1])
            return input_boundaries, template
        if mode == 4:  # octagon, every pair of variables
            input_boundaries = [20]
            template = []
            for dimension in range(6):
                t1 = [0] * 6
                t1[dimension] = 1
                t2 = [0] * 6
                t2[dimension] = -1
                template.append(t1)
                template.append(t2)
                for other_dimension in range(dimension + 1, 6):
                    t1 = [0] * 6
                    t1[dimension] = 1
                    t1[other_dimension] = -1
                    t2 = [0] * 6
                    t2[dimension] = -1
                    t2[other_dimension] = 1
                    t3 = [0] * 6
                    t3[dimension] = 1
                    t3[other_dimension] = 1
                    t4 = [0] * 6
                    t4[dimension] = -1
                    t4[other_dimension] = -1
                    template.append(t1)
                    template.append(t2)
                    template.append(t3)
                    template.append(t4)
            return input_boundaries, np.array(template)
        if mode == 5:
            input_boundaries = [20]

            template = np.array([
                a_lead, -a_lead, -v_lead, v_lead, -(v_lead - v_ego),
                (v_lead - v_ego), -(x_lead - x_ego), (x_lead - x_ego)
            ])
            return input_boundaries, template
コード例 #19
0
        # nn = torch.nn.Sequential(torch.nn.Linear(2, 50), torch.nn.ReLU(), torch.nn.Linear(50, 1), torch.nn.Tanh())
        # checkpoint = torch.load("/home/edoardo/Development/SafeDRL/runnables/invariant/invariant_checkpoint.pt", map_location=torch.device("cpu"))
        # nn.load_state_dict(checkpoint)
        state_size = 2
        action_size = 2
        ALPHA = 0.6  # the higher the more aggressive the sampling towards high TD transitions
        agent = InvariantAgent(state_size=state_size, action_size=action_size, alpha=ALPHA)
        agent.load("/home/edoardo/Development/SafeDRL/runs/Aug05_14-55-31_alpha=0.6, min_eps=0.01, eps_decay=0.2/checkpoint_1100.pth")
        nn = agent.inetwork_local
        nn.cpu()
        return nn


if __name__ == '__main__':
    ray.init(log_to_driver=False, local_mode=True)
    experiment = StoppingCarExperiment()
    experiment.save_dir = "/home/edoardo/ray_results/tune_PPO_stopping_car/test"
    experiment.plotting_time_interval = 60
    experiment.show_progressbar = True
    experiment.show_progress_plot = False
    x_lead = Experiment.e(experiment.env_input_size, 0)
    x_ego = Experiment.e(experiment.env_input_size, 1)
    v_ego = Experiment.e(experiment.env_input_size, 2)
    template = np.array([-(x_lead - x_ego), v_ego, -v_ego])
    experiment.analysis_template = template  # standard
    experiment.input_template = Experiment.box(3)
    input_boundaries = [40, -30, 10, -0, 36, -28]
    experiment.input_boundaries = input_boundaries
    experiment.time_horizon = 150
    experiment.run_experiment()
コード例 #20
0
def get_experiment_instance(config, trial_dir):
    # Hyperparameters
    problem, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        experiment = BouncingBallExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        assert os.path.exists(experiment.nn_path)
        experiment.use_contained = other_config["use_contain"]
        if other_config["initial_state"] == 0:
            experiment.input_boundaries = [9, -5, 0, 0.1]
        elif other_config["initial_state"] == 1:
            experiment.input_boundaries = [9, -5, 1, 1]
        else:
            raise NotImplementedError()
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    elif problem == "stopping_car":
        experiment = StoppingCarExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_stopping_car[other_config["nn_path"]])
        experiment.max_probability_split = other_config["phi"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 1:
            delta_x = Experiment.e(experiment.env_input_size, 0)
            v_ego = Experiment.e(experiment.env_input_size, 1)
            template = np.array([delta_x, -delta_x, v_ego, -v_ego, 1 / 4.5 * delta_x + v_ego, 1 / 4.5 * delta_x - v_ego, -1 / 4.5 * delta_x + v_ego,
                                 -1 / 4.5 * delta_x - v_ego])
            experiment.analysis_template = template
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    else:
        experiment = PendulumExperimentProbabilistic()
        experiment.nn_path = os.path.join(utils.get_agents_dir(), nn_paths_cartpole[other_config["nn_path"]])
        if other_config["template"] == 1:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.max_probability_split = other_config["phi"]
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        # experiment.update_progress_fn = update_progress
    return experiment