def __init__(self, input_pars): self.input_pars = input_pars self.N_MC_eval_samples = 250 self.episode_length = 500 self.data_columns = ( "x", "theta", "xdot", "thetadot", "u", "r", ) # assume the 2nd to last is u and the last is r self.n_dim = 4 self.bounds = np.array( [[-XBOUND, XBOUND], [-THETABOUND, THETABOUND], [-XDOTBOUND, XDOTBOUND], [-THETADOTBOUND, THETADOTBOUND]] ).transpose() self.goal = np.array( [[-np.inf, np.inf], [-THETABOUND, THETABOUND], [-np.inf, np.inf], [-np.inf, np.inf]] ).transpose() self.initstate = INITSTATE self.action_centers = np.array([-5, 5]) self.n_x_centers = 1 self.n_theta_centers = 50 self.n_xdot_centers = 30 self.n_thetadot_centers = 30 self.true_pars = (1, 1, 1) self.optimization_pars = { "initial step size": np.array([5, 5, 5]), "start": np.array([1, 10, 1]), "maximum evaluations": 50, "only positive": True, } # self.initial_par_search_space = [[p1, p2] for p1 in np.linspace(-0.003, -.002, 5) for p2 in np.linspace(2, 4, 5)] # TODO self.noise = input_pars self.value_iteration_threshold = 1e-5 self.state_centers = self.construct_discrete_policy_centers() self.dim_centers = rl_tools.split_states_on_dim(self.state_centers) self.pi_init = 1 - np.int8((np.sign(self.state_centers[:, 1]) + 1) / 2) self.training_data_random_start = rnd_start
def __init__(self, input_pars): self.input_pars = input_pars self.N_MC_eval_samples = 250 self.episode_length = 500 self.data_columns = ('x','xdot','u','r') # assume the 2nd to last is u and the last is r self.n_dim = 2 self.bounds = np.array([[XMIN, XMAX],[XDOTMIN, XDOTMAX]]).transpose() self.goal = np.array([[-np.inf, XMAX],[-np.inf, np.inf]]).transpose() self.initstate = INITSTATE self.action_centers = np.array([-1, 1]) self.n_x_centers = 150 self.n_xdot_centers = 150 self.true_pars = (-0.0025, 3) self.initial_par_search_space = [[p1, p2] for p1 in np.linspace(-0.003, -.002, 5) for p2 in np.linspace(2, 4, 5)] self.noise = input_pars self.value_iteration_threshold = 1e-5 self.optimization_pars = {'initial step size':np.array([.0024, 1]), 'start':np.array([-0.0025, 3]), 'maximum evaluations':75, 'only positive':False} self.state_centers = self.construct_discrete_policy_centers() self.dim_centers = rl_tools.split_states_on_dim(self.state_centers) self.pi_init = None self.training_data_random_start = rnd_start
def __init__(self, state_centers, action_centers, states_to_actions): self.state_centers = state_centers self.action_centers = action_centers self.states_to_actions = states_to_actions self.dim_centers = rl_tools.split_states_on_dim(state_centers)