def __init__(self, input_pars):
     self.input_pars = input_pars
     self.N_MC_eval_samples = 250
     self.episode_length = 500
     self.data_columns = (
         "x",
         "theta",
         "xdot",
         "thetadot",
         "u",
         "r",
     )  # assume the 2nd to last is u and the last is r
     self.n_dim = 4
     self.bounds = np.array(
         [[-XBOUND, XBOUND], [-THETABOUND, THETABOUND], [-XDOTBOUND, XDOTBOUND], [-THETADOTBOUND, THETADOTBOUND]]
     ).transpose()
     self.goal = np.array(
         [[-np.inf, np.inf], [-THETABOUND, THETABOUND], [-np.inf, np.inf], [-np.inf, np.inf]]
     ).transpose()
     self.initstate = INITSTATE
     self.action_centers = np.array([-5, 5])
     self.n_x_centers = 1
     self.n_theta_centers = 50
     self.n_xdot_centers = 30
     self.n_thetadot_centers = 30
     self.true_pars = (1, 1, 1)
     self.optimization_pars = {
         "initial step size": np.array([5, 5, 5]),
         "start": np.array([1, 10, 1]),
         "maximum evaluations": 50,
         "only positive": True,
     }
     # self.initial_par_search_space = [[p1, p2] for p1 in np.linspace(-0.003, -.002, 5) for p2 in np.linspace(2, 4, 5)] # TODO
     self.noise = input_pars
     self.value_iteration_threshold = 1e-5
     self.state_centers = self.construct_discrete_policy_centers()
     self.dim_centers = rl_tools.split_states_on_dim(self.state_centers)
     self.pi_init = 1 - np.int8((np.sign(self.state_centers[:, 1]) + 1) / 2)
     self.training_data_random_start = rnd_start
 def __init__(self, input_pars):
     self.input_pars = input_pars
     self.N_MC_eval_samples = 250
     self.episode_length = 500
     self.data_columns = ('x','xdot','u','r') # assume the 2nd to last is u and the last is r
     self.n_dim = 2
     self.bounds = np.array([[XMIN, XMAX],[XDOTMIN, XDOTMAX]]).transpose()
     self.goal = np.array([[-np.inf, XMAX],[-np.inf, np.inf]]).transpose()
     self.initstate = INITSTATE
     self.action_centers = np.array([-1, 1])
     self.n_x_centers = 150
     self.n_xdot_centers = 150
     self.true_pars = (-0.0025, 3)
     self.initial_par_search_space = [[p1, p2] for p1 in np.linspace(-0.003, -.002, 5) for p2 in np.linspace(2, 4, 5)]
     self.noise = input_pars
     self.value_iteration_threshold = 1e-5
     self.optimization_pars = {'initial step size':np.array([.0024, 1]),
                               'start':np.array([-0.0025, 3]),
                               'maximum evaluations':75,
                               'only positive':False}
     self.state_centers = self.construct_discrete_policy_centers()
     self.dim_centers = rl_tools.split_states_on_dim(self.state_centers)
     self.pi_init = None
     self.training_data_random_start = rnd_start
 def __init__(self, state_centers, action_centers, states_to_actions):
     self.state_centers = state_centers
     self.action_centers = action_centers
     self.states_to_actions = states_to_actions
     self.dim_centers = rl_tools.split_states_on_dim(state_centers)