def __init__(self,
                 state_space=BoundedSpace(array([-1, -0.07]), array([1,
                                                                     0.07])),
                 action_space=BoundedSpace(-1, 1, shape=(1, )),
                 state=np.array([0, 0]),
                 contour=None,
                 gravitation=0.0025,
                 power=0.0015,
                 goal=0.6,
                 horizon=100):
        """Initialize GeneralMountainCar Environment.

        Parameters
        ----------
        state_space : BoundedSpace
            Space object describing the state space.
        action_space : BoundedSpace
            Space object describing the action space.
        state : array-like
            Initial state of the car
        contour : tuple of callables
            If contour is None, a default shape will be generated. A valid
            tuple needs to contain a function for the height at a position
            in the first element and a function for the gradient at a position
            in the second argument.
        gravitation : double
        power : double
        goal : double
            Goal along x-coordinate
        """
        # Initialize Environment Base Parameters
        super(GeneralMountainCar, self).__init__(state_space, action_space,
                                                 horizon)

        # setup environment parameters
        self.goal = goal
        self.power = power
        self.gravitation = gravitation

        # setup contour
        if contour is None:

            def _hx(x):
                return -cos(pi * x)

            self._hx = _hx

            def _dydx(x):
                return pi * sin(pi * x)

            self._dydx = _dydx
        else:
            self._hx = contour[0]
            self._dydx = contour[1]

        # init state
        self.state = copy(state)
        self.initial_state = state
Beispiel #2
0
 def __init__(self,
              environment,
              parameter_space=BoundedSpace(0, 1, (3, )),
              max_it=200,
              eps=0.001,
              lam=0.5):
     """Initialize."""
     super(GPOMDPEstimator, self).__init__(environment, parameter_space,
                                           max_it, eps)
     self.lam = lam
Beispiel #3
0
 def __init__(self,
              environment,
              parameter_space=BoundedSpace(0, 1, (3, )),
              max_it=200,
              eps=0.001,
              var=1):
     """Initialize."""
     super(CentralFDEstimator, self).__init__(environment, parameter_space,
                                              max_it, eps)
     self.var = var
Beispiel #4
0
    def __init__(self,
                 environment,
                 policy,
                 estimator='reinforce',
                 max_it=1000,
                 eps=0.0001,
                 est_eps=0.001,
                 parameter_space=BoundedSpace(0, 1, (3, )),
                 rate=1,
                 var=0.5):
        """Initialize PolicyGradient.

        Parameters
        ----------
        environment :
            Environment we want to optimize the policy on. This should be a
            subclass of `EnvironmentBase`.
        policy :
            Policy we want to find parameters for. This should be a subclass of
            `Policy`.
        estimator :
            Either an estimator object, that is a subclass of
            PolicyGradientEstimator or a string. A list of possible estimator
            strings can be found in the Notes section. By default 'reinforce'
            will be used
        eps : float
            The optimizer will stop optimization ones the norm of the gradient
            is smaller than `eps`.
        est_eps : float
            In case an estimator needs to converge, this is the margin it will
            use to stop.
        parameter_space :
        rate : float
            This is the rate we use for the updates in each step
        var : float
            This parameter will be used depending on the estimator type. e.g.
            for central differences this value corresponds to the grid size
            that is used.
        """
        super(PolicyGradient, self).__init__(environment, policy, max_it)

        self.parameter_space = policy.parameter_space

        self.eps = eps
        self.rate = rate

        if isinstance(estimator, str):
            estimator = estimators[estimator]
        elif issubclass(estimator, PolicyGradientEstimator):
            pass
        else:
            raise ImportError('Invalid Estimator')

        self.estimator = estimator(environment, self.parameter_space, max_it,
                                   est_eps, var)
    def parameter_space(self):
        """Property storing the parameter space.

        By default the parameter space will be assigned to be a BoundedSpace
        between [0,1]^d. However it might be necessary to change this. A user
        may thus assign a new parameter space.

        WARNING: Currently there is no sanity check for manually assigned
        parameter spaces.
        """
        if self._par_space is None:
            if self.biased:
                shape = (self.par_dim + 1, )
            else:
                shape = (self.par_dim, )
            self._par_space = BoundedSpace(0, 1, shape)

        return self._par_space
Beispiel #6
0
    def __init__(self,
                 state=array([[0.], [0.]]),
                 goal=array([[1.], [0.]]),
                 step=0.01,
                 eps=0,
                 horizon=100):
        """
        Initialize LinearCar.

        Parameters
        ----------
        state : ndarray
            Initial state of the LinearCar. The state and action space will be
            deduced from this. The shape needs to be (2, d) for d > 0.
        goal : ndarray
            Goal state of the LinearCar. The shape should comply to the shape
            of the initial state.
            In case the velocity is non-zero, eps should be strictly greater
            than zero, since there is no way for the system to stabilize in
            the goal state anyway.
        eps : float
            Reward at which we want to abort. If zero we do not abort at all.
        step : float
            Update step.
        """
        assert state.shape[0] == 2, 'Invalid shape of the initial state.'
        assert state.shape == goal.shape, 'State and goal shape have to agree.'

        # Initialize EnivronmentBase attributes
        self.horizon = horizon
        self.state_space = RdSpace(state.shape)
        self.action_space = BoundedSpace(-1, 1, shape=(state.shape[1], ))

        # Initialize State
        self.initial_state = state
        self.state = copy(state)

        # Initialize Environment Parameters
        self.goal = goal
        self.eps = eps
        self.step = step