def _value_net(self, input_shapes, output_size): values = MLP(input_shapes=input_shapes, hidden_layer_sizes=self._hidden_layer_sizes, output_size=output_size, activation=self._activation, output_activation=self._output_activation) return values
def _policy_net(self, input_shapes, output_size): raw_actions = MLP(input_shapes=input_shapes, hidden_layer_sizes=self._hidden_layer_sizes, output_size=output_size, activation=self._activation, output_activation=self._output_activation, name='{}/GaussianMLPPolicy'.format(self._name)) return raw_actions
def _shift_and_log_scale_diag_net(self, input_shapes, output_size): shift_and_log_scale_diag_net = MLP( input_shapes=input_shapes, hidden_layer_sizes=self._hidden_layer_sizes, output_size=output_size, activation=self._activation, output_activation=self._output_activation, name='{}/GaussianMLPPolicy'.format(self._name)) return shift_and_log_scale_diag_net
def _shift_and_log_scale_diag_net(self, input_shapes, output_size): shift_and_log_scale_diag_net = MLP( input_shapes=input_shapes, hidden_layer_sizes=self._hidden_layer_sizes, output_size=output_size, activation=self._activation, output_activation=self._output_activation, name="{}/GaussianMLPPolicy".format(self._name), kernel_regularizer=tf.keras.regularizers.l2(0.001), bias_regularizer=tf.keras.regularizers.l2(0.001), ) return shift_and_log_scale_diag_net