Exemple #1
0
    def __init__(self, state, action, next_state=None, hidden_units=(), activation='linear', last_activation=None,
                 dropout=None, distributions=None, preprocessors=None, postprocessors=None):
        """
        Initialize the multi-layer perceptron model.

        Args:
            state (State): state inputs.
            action (Action): action inputs.
            next_state (State, None): state outputs. If None, it will take the state inputs as the outputs.
            hidden_units (tuple, list of int): number of hidden units in each layer
            activation (str): activation function to apply on each layer
            last_activation (str, None): activation function to apply on the last layer
            dropout (None, float): dropout probability
            distributions (torch.distributions.Distribution): distribution to use to sample the next state. If None,
                it will be deterministic.
            preprocessors (Processor, list of Processor, None): pre-processors to be applied to the given input
            postprocessors (Processor, list of Processor, None): post-processors to be applied to the output
        """
        if next_state is None:
            next_state = state
        model = MLPApproximator(inputs=[state, action], outputs=next_state, hidden_units=hidden_units,
                                activation=activation, last_activation=last_activation,
                                dropout=dropout)
        super(MLPDynamicModel, self).__init__(state, action, model=model, next_state=next_state,
                                              distributions=distributions, preprocessors=preprocessors,
                                              postprocessors=postprocessors)
Exemple #2
0
    def __init__(self,
                 state,
                 hidden_units=(),
                 activation='linear',
                 last_activation=None,
                 dropout=None,
                 preprocessors=None):
        """Initialize the Value MLP approximator.

        Args:
            state (State): 1D-states that is feed to the policy (the input dimensions will be inferred from the
                            states)
            hidden_units (list/tuple of int): number of hidden units in the corresponding layer
            activation (None, str, or list/tuple of str/None): activation function to be applied after each layer.
                                                                   If list/tuple, then it has to match the
            last_activation (None or str): last activation function to be applied. If not specified, it will check
                                               if it is in the list/tuple of activation functions provided for the
                                               previous argument.
            dropout (None, float, or list/tuple of float/None): dropout probability.
            preprocessors ((list of) Processor): pre-processors to be applied on the input state before being fed to
                the inner model / function approximator.
        """
        output = torch.Tensor([1.])  # torch.Tensor([[1.]])
        model = MLPApproximator(state,
                                output,
                                hidden_units=hidden_units,
                                activation=activation,
                                last_activation=last_activation,
                                dropout=dropout,
                                preprocessors=preprocessors)
        super(MLPValue, self).__init__(state, model)
Exemple #3
0
    def __init__(self,
                 state,
                 action,
                 hidden_units=(),
                 activation='linear',
                 last_activation=None,
                 dropout=None,
                 preprocessors=None):
        """
        Initialize the MLP state-action value function approximator.

        Args:
            state (State): input state.
            action (Action): output action.
            hidden_units (list/tuple of int): number of hidden units in the corresponding layer
            activation (None, str, or list/tuple of str/None): activation function to be applied after each layer.
                                                                   If list/tuple, then it has to match the
            last_activation (None or str): last activation function to be applied. If not specified, it will check
                                               if it is in the list/tuple of activation functions provided for the
                                               previous argument.
            dropout (None, float, or list/tuple of float/None): dropout probability.
            preprocessors ((list of) Processor): pre-processors to be applied on the input state before being fed to
                the inner model / function approximator.
        """
        model = MLPApproximator(inputs=state,
                                outputs=action,
                                hidden_units=hidden_units,
                                activation=activation,
                                last_activation=last_activation,
                                dropout=dropout,
                                preprocessors=preprocessors)
        super(MLPQValueOutput, self).__init__(state, action, model=model)
Exemple #4
0
    def __init__(self,
                 state,
                 action,
                 hidden_units=(),
                 activation='linear',
                 last_activation=None,
                 dropout=None,
                 rate=1,
                 preprocessors=None,
                 postprocessors=None):
        """Initialize MLP policy.

        Args:
            state (State): 1D-states that is feed to the policy (the input dimensions will be inferred from the
                            states)
            action (Action): 1D-actions outputted by the policy and will be applied in the simulator (the output
                              dimensions will be inferred from the actions)
            hidden_units (list/tuple of int): number of hidden units in the corresponding layer
            activation (None, str, or list/tuple of str/None): activation function to be applied after each layer.
                                                                   If list/tuple, then it has to match the
            last_activation (None or str): last activation function to be applied. If not specified, it will check
                                               if it is in the list/tuple of activation functions provided for the
                                               previous argument.
            dropout (None, float, or list/tuple of float/None): dropout probability.
            rate (int, float): rate (float) at which the policy operates if we are operating in real-time. If we are
                stepping deterministically in the simulator, it represents the number of ticks (int) to sleep before
                executing the model.
            preprocessors (Processor, list of Processor, None): pre-processors to be applied to the given input
            postprocessors (Processor, list of Processor, None): post-processors to be applied to the output
        """
        model = MLPApproximator(state,
                                action,
                                hidden_units=hidden_units,
                                activation=activation,
                                last_activation=last_activation,
                                dropout=dropout,
                                preprocessors=preprocessors,
                                postprocessors=postprocessors)
        super(MLPPolicy, self).__init__(state, action, model, rate=rate)