def __init__(self, state, action, next_state=None, hidden_units=(), activation='linear', last_activation=None, dropout=None, distributions=None, preprocessors=None, postprocessors=None): """ Initialize the multi-layer perceptron model. Args: state (State): state inputs. action (Action): action inputs. next_state (State, None): state outputs. If None, it will take the state inputs as the outputs. hidden_units (tuple, list of int): number of hidden units in each layer activation (str): activation function to apply on each layer last_activation (str, None): activation function to apply on the last layer dropout (None, float): dropout probability distributions (torch.distributions.Distribution): distribution to use to sample the next state. If None, it will be deterministic. preprocessors (Processor, list of Processor, None): pre-processors to be applied to the given input postprocessors (Processor, list of Processor, None): post-processors to be applied to the output """ if next_state is None: next_state = state model = MLPApproximator(inputs=[state, action], outputs=next_state, hidden_units=hidden_units, activation=activation, last_activation=last_activation, dropout=dropout) super(MLPDynamicModel, self).__init__(state, action, model=model, next_state=next_state, distributions=distributions, preprocessors=preprocessors, postprocessors=postprocessors)
def __init__(self, state, hidden_units=(), activation='linear', last_activation=None, dropout=None, preprocessors=None): """Initialize the Value MLP approximator. Args: state (State): 1D-states that is feed to the policy (the input dimensions will be inferred from the states) hidden_units (list/tuple of int): number of hidden units in the corresponding layer activation (None, str, or list/tuple of str/None): activation function to be applied after each layer. If list/tuple, then it has to match the last_activation (None or str): last activation function to be applied. If not specified, it will check if it is in the list/tuple of activation functions provided for the previous argument. dropout (None, float, or list/tuple of float/None): dropout probability. preprocessors ((list of) Processor): pre-processors to be applied on the input state before being fed to the inner model / function approximator. """ output = torch.Tensor([1.]) # torch.Tensor([[1.]]) model = MLPApproximator(state, output, hidden_units=hidden_units, activation=activation, last_activation=last_activation, dropout=dropout, preprocessors=preprocessors) super(MLPValue, self).__init__(state, model)
def __init__(self, state, action, hidden_units=(), activation='linear', last_activation=None, dropout=None, preprocessors=None): """ Initialize the MLP state-action value function approximator. Args: state (State): input state. action (Action): output action. hidden_units (list/tuple of int): number of hidden units in the corresponding layer activation (None, str, or list/tuple of str/None): activation function to be applied after each layer. If list/tuple, then it has to match the last_activation (None or str): last activation function to be applied. If not specified, it will check if it is in the list/tuple of activation functions provided for the previous argument. dropout (None, float, or list/tuple of float/None): dropout probability. preprocessors ((list of) Processor): pre-processors to be applied on the input state before being fed to the inner model / function approximator. """ model = MLPApproximator(inputs=state, outputs=action, hidden_units=hidden_units, activation=activation, last_activation=last_activation, dropout=dropout, preprocessors=preprocessors) super(MLPQValueOutput, self).__init__(state, action, model=model)
def __init__(self, state, action, hidden_units=(), activation='linear', last_activation=None, dropout=None, rate=1, preprocessors=None, postprocessors=None): """Initialize MLP policy. Args: state (State): 1D-states that is feed to the policy (the input dimensions will be inferred from the states) action (Action): 1D-actions outputted by the policy and will be applied in the simulator (the output dimensions will be inferred from the actions) hidden_units (list/tuple of int): number of hidden units in the corresponding layer activation (None, str, or list/tuple of str/None): activation function to be applied after each layer. If list/tuple, then it has to match the last_activation (None or str): last activation function to be applied. If not specified, it will check if it is in the list/tuple of activation functions provided for the previous argument. dropout (None, float, or list/tuple of float/None): dropout probability. rate (int, float): rate (float) at which the policy operates if we are operating in real-time. If we are stepping deterministically in the simulator, it represents the number of ticks (int) to sleep before executing the model. preprocessors (Processor, list of Processor, None): pre-processors to be applied to the given input postprocessors (Processor, list of Processor, None): post-processors to be applied to the output """ model = MLPApproximator(state, action, hidden_units=hidden_units, activation=activation, last_activation=last_activation, dropout=dropout, preprocessors=preprocessors, postprocessors=postprocessors) super(MLPPolicy, self).__init__(state, action, model, rate=rate)