def __init__(self, state_space=BoundedSpace(array([-1, -0.07]), array([1, 0.07])), action_space=BoundedSpace(-1, 1, shape=(1, )), state=np.array([0, 0]), contour=None, gravitation=0.0025, power=0.0015, goal=0.6, horizon=100): """Initialize GeneralMountainCar Environment. Parameters ---------- state_space : BoundedSpace Space object describing the state space. action_space : BoundedSpace Space object describing the action space. state : array-like Initial state of the car contour : tuple of callables If contour is None, a default shape will be generated. A valid tuple needs to contain a function for the height at a position in the first element and a function for the gradient at a position in the second argument. gravitation : double power : double goal : double Goal along x-coordinate """ # Initialize Environment Base Parameters super(GeneralMountainCar, self).__init__(state_space, action_space, horizon) # setup environment parameters self.goal = goal self.power = power self.gravitation = gravitation # setup contour if contour is None: def _hx(x): return -cos(pi * x) self._hx = _hx def _dydx(x): return pi * sin(pi * x) self._dydx = _dydx else: self._hx = contour[0] self._dydx = contour[1] # init state self.state = copy(state) self.initial_state = state
def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3, )), max_it=200, eps=0.001, lam=0.5): """Initialize.""" super(GPOMDPEstimator, self).__init__(environment, parameter_space, max_it, eps) self.lam = lam
def __init__(self, environment, parameter_space=BoundedSpace(0, 1, (3, )), max_it=200, eps=0.001, var=1): """Initialize.""" super(CentralFDEstimator, self).__init__(environment, parameter_space, max_it, eps) self.var = var
def __init__(self, environment, policy, estimator='reinforce', max_it=1000, eps=0.0001, est_eps=0.001, parameter_space=BoundedSpace(0, 1, (3, )), rate=1, var=0.5): """Initialize PolicyGradient. Parameters ---------- environment : Environment we want to optimize the policy on. This should be a subclass of `EnvironmentBase`. policy : Policy we want to find parameters for. This should be a subclass of `Policy`. estimator : Either an estimator object, that is a subclass of PolicyGradientEstimator or a string. A list of possible estimator strings can be found in the Notes section. By default 'reinforce' will be used eps : float The optimizer will stop optimization ones the norm of the gradient is smaller than `eps`. est_eps : float In case an estimator needs to converge, this is the margin it will use to stop. parameter_space : rate : float This is the rate we use for the updates in each step var : float This parameter will be used depending on the estimator type. e.g. for central differences this value corresponds to the grid size that is used. """ super(PolicyGradient, self).__init__(environment, policy, max_it) self.parameter_space = policy.parameter_space self.eps = eps self.rate = rate if isinstance(estimator, str): estimator = estimators[estimator] elif issubclass(estimator, PolicyGradientEstimator): pass else: raise ImportError('Invalid Estimator') self.estimator = estimator(environment, self.parameter_space, max_it, est_eps, var)
def parameter_space(self): """Property storing the parameter space. By default the parameter space will be assigned to be a BoundedSpace between [0,1]^d. However it might be necessary to change this. A user may thus assign a new parameter space. WARNING: Currently there is no sanity check for manually assigned parameter spaces. """ if self._par_space is None: if self.biased: shape = (self.par_dim + 1, ) else: shape = (self.par_dim, ) self._par_space = BoundedSpace(0, 1, shape) return self._par_space
def __init__(self, state=array([[0.], [0.]]), goal=array([[1.], [0.]]), step=0.01, eps=0, horizon=100): """ Initialize LinearCar. Parameters ---------- state : ndarray Initial state of the LinearCar. The state and action space will be deduced from this. The shape needs to be (2, d) for d > 0. goal : ndarray Goal state of the LinearCar. The shape should comply to the shape of the initial state. In case the velocity is non-zero, eps should be strictly greater than zero, since there is no way for the system to stabilize in the goal state anyway. eps : float Reward at which we want to abort. If zero we do not abort at all. step : float Update step. """ assert state.shape[0] == 2, 'Invalid shape of the initial state.' assert state.shape == goal.shape, 'State and goal shape have to agree.' # Initialize EnivronmentBase attributes self.horizon = horizon self.state_space = RdSpace(state.shape) self.action_space = BoundedSpace(-1, 1, shape=(state.shape[1], )) # Initialize State self.initial_state = state self.state = copy(state) # Initialize Environment Parameters self.goal = goal self.eps = eps self.step = step