Esempio n. 1
0
        def build_q_net(name_):
            activation = get_activation_fn(critic_hidden_activation,
                                           framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + self.action_dim
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(
                        ins,
                        n,
                        initializer=torch.nn.init.xavier_uniform_,
                        activation_fn=activation,
                    ),
                )
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(
                    ins,
                    1,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=None,
                ),
            )
            return q_net
Esempio n. 2
0
    def _build_q_net(self, name_):
        # actions are concatenated with flattened obs
        critic_hidden_activation = self.model_config[
            "critic_hidden_activation"]
        critic_hiddens = self.model_config["critic_hiddens"]

        activation = get_activation_fn(critic_hidden_activation,
                                       framework="torch")
        q_net = nn.Sequential()
        ins = (self.obs_ins if self._is_action_discrete else self.obs_ins +
               self.action_dim)
        for i, n in enumerate(critic_hiddens):
            q_net.add_module(
                f"{name_}_hidden_{i}",
                SlimFC(
                    ins,
                    n,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=activation,
                ),
            )
            ins = n

        q_net.add_module(
            f"{name_}_out",
            SlimFC(
                ins,
                self.action_space.n if self._is_action_discrete else 1,
                initializer=torch.nn.init.xavier_uniform_,
                activation_fn=None,
            ),
        )
        return q_net
Esempio n. 3
0
    def __init__(self,
                 in_size: int,
                 out_size: int,
                 initializer: Any = None,
                 activation_fn: Any = None,
                 use_bias: bool = True,
                 bias_init: float = 0.0):
        """Creates a standard FC layer, similar to torch.nn.Linear

        Args:
            in_size(int): Input size for FC Layer
            out_size (int): Output size for FC Layer
            initializer (Any): Initializer function for FC layer weights
            activation_fn (Any): Activation function at the end of layer
            use_bias (bool): Whether to add bias weights or not
            bias_init (float): Initalize bias weights to bias_init const
        """
        super(SlimFC, self).__init__()
        layers = []
        # Actual nn.Linear layer (including correct initialization logic).
        linear = nn.Linear(in_size, out_size, bias=use_bias)
        if initializer is None:
            initializer = nn.init.xavier_uniform_
        initializer(linear.weight)
        if use_bias is True:
            nn.init.constant_(linear.bias, bias_init)
        layers.append(linear)
        # Activation function (if any; default=None (linear)).
        if isinstance(activation_fn, str):
            activation_fn = get_activation_fn(activation_fn, "torch")
        if activation_fn is not None:
            layers.append(activation_fn())
        # Put everything in sequence.
        self._model = nn.Sequential(*layers)
Esempio n. 4
0
    def __init__(
        self,
        *,
        input_size: int,
        filters: Tuple[Tuple[int]] = (
            (1024, 5, 2),
            (128, 5, 2),
            (64, 6, 2),
            (32, 6, 2),
        ),
        initializer="default",
        bias_init=0,
        activation_fn: str = "relu",
        output_shape: Tuple[int] = (3, 64, 64)
    ):
        """Initializes a TransposedConv2DStack instance.

        Args:
            input_size: The size of the 1D input vector, from which to
                generate the image distribution.
            filters (Tuple[Tuple[int]]): Tuple of filter setups (1 for each
                ConvTranspose2D layer): [in_channels, kernel, stride].
            initializer (Union[str]):
            bias_init: The initial bias values to use.
            activation_fn: Activation function descriptor (str).
            output_shape (Tuple[int]): Shape of the final output image.
        """
        super().__init__()
        self.activation = get_activation_fn(activation_fn, framework="torch")
        self.output_shape = output_shape
        initializer = get_initializer(initializer, framework="torch")

        in_channels = filters[0][0]
        self.layers = [
            # Map from 1D-input vector to correct initial size for the
            # Conv2DTransposed stack.
            nn.Linear(input_size, in_channels),
            # Reshape from the incoming 1D vector (input_size) to 1x1 image
            # format (channels first).
            Reshape([-1, in_channels, 1, 1]),
        ]
        for i, (_, kernel, stride) in enumerate(filters):
            out_channels = (
                filters[i + 1][0] if i < len(filters) - 1 else output_shape[0]
            )
            conv_transp = nn.ConvTranspose2d(in_channels, out_channels, kernel, stride)
            # Apply initializer.
            initializer(conv_transp.weight)
            nn.init.constant_(conv_transp.bias, bias_init)
            self.layers.append(conv_transp)
            # Apply activation function, if provided and if not last layer.
            if self.activation is not None and i < len(filters) - 1:
                self.layers.append(self.activation())

            # num-outputs == num-inputs for next layer.
            in_channels = out_channels

        self._model = nn.Sequential(*self.layers)
Esempio n. 5
0
File: misc.py Progetto: tchordia/ray
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel: Union[int, Tuple[int, int]],
        stride: Union[int, Tuple[int, int]],
        padding: Union[int, Tuple[int, int]],
        # Defaulting these to nn.[..] will break soft torch import.
        initializer: Any = "default",
        activation_fn: Any = "default",
        bias_init: float = 0,
    ):
        """Creates a standard Conv2d layer, similar to torch.nn.Conv2d

        Args:
            in_channels(int): Number of input channels
            out_channels (int): Number of output channels
            kernel (Union[int, Tuple[int, int]]): If int, the kernel is
                a tuple(x,x). Elsewise, the tuple can be specified
            stride (Union[int, Tuple[int, int]]): Controls the stride
                for the cross-correlation. If int, the stride is a
                tuple(x,x). Elsewise, the tuple can be specified
            padding (Union[int, Tuple[int, int]]): Controls the amount
                of implicit zero-paddings during the conv operation
            initializer (Any): Initializer function for kernel weights
            activation_fn (Any): Activation function at the end of layer
            bias_init (float): Initalize bias weights to bias_init const
        """
        super(SlimConv2d, self).__init__()
        layers = []
        # Padding layer.
        if padding:
            layers.append(nn.ZeroPad2d(padding))
        # Actual Conv2D layer (including correct initialization logic).
        conv = nn.Conv2d(in_channels, out_channels, kernel, stride)
        if initializer:
            if initializer == "default":
                initializer = nn.init.xavier_uniform_
            initializer(conv.weight)
        nn.init.constant_(conv.bias, bias_init)
        layers.append(conv)
        # Activation function (if any; default=ReLu).
        if isinstance(activation_fn, str):
            if activation_fn == "default":
                activation_fn = nn.ReLU
            else:
                activation_fn = get_activation_fn(activation_fn, "torch")
        if activation_fn is not None:
            layers.append(activation_fn())
        # Put everything in sequence.
        self._model = nn.Sequential(*layers)
Esempio n. 6
0
    def feed_forward(self, obs, policy_vars, policy_config):
        # Hacky for now, reconstruct FC network with adapted weights
        # @mluo: TODO for any network
        def fc_network(
            inp, network_vars, hidden_nonlinearity, output_nonlinearity, policy_config
        ):
            bias_added = False
            x = inp
            for name, param in network_vars.items():
                if "kernel" in name:
                    x = tf.matmul(x, param)
                elif "bias" in name:
                    x = tf.add(x, param)
                    bias_added = True
                else:
                    raise NameError

                if bias_added:
                    if "out" not in name:
                        x = hidden_nonlinearity(x)
                    elif "out" in name:
                        x = output_nonlinearity(x)
                    else:
                        raise NameError
                    bias_added = False
            return x

        policyn_vars = {}
        valuen_vars = {}
        log_std = None
        for name, param in policy_vars.items():
            if "value" in name:
                valuen_vars[name] = param
            elif "log_std" in name:
                log_std = param
            else:
                policyn_vars[name] = param

        output_nonlinearity = tf.identity
        hidden_nonlinearity = get_activation_fn(policy_config["fcnet_activation"])

        pi_new_logits = fc_network(
            obs, policyn_vars, hidden_nonlinearity, output_nonlinearity, policy_config
        )
        if log_std is not None:
            pi_new_logits = tf.concat([pi_new_logits, 0.0 * pi_new_logits + log_std], 1)
        value_fn = fc_network(
            obs, valuen_vars, hidden_nonlinearity, output_nonlinearity, policy_config
        )

        return pi_new_logits, tf.reshape(value_fn, [-1])
Esempio n. 7
0
    def __init__(self,
                 in_size: int,
                 out_size: int,
                 sigma0: float,
                 activation: str = "relu"):
        """Initializes a NoisyLayer object.

        Args:
            in_size: Input size for Noisy Layer
            out_size: Output size for Noisy Layer
            sigma0: Initialization value for sigma_b (bias noise)
            activation: Non-linear activation for Noisy Layer
        """
        super().__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.sigma0 = sigma0
        self.activation = get_activation_fn(activation, framework="torch")
        if self.activation is not None:
            self.activation = self.activation()

        sigma_w = nn.Parameter(
            torch.from_numpy(
                np.random.uniform(
                    low=-1.0 / np.sqrt(float(self.in_size)),
                    high=1.0 / np.sqrt(float(self.in_size)),
                    size=[self.in_size, out_size],
                )).float())
        self.register_parameter("sigma_w", sigma_w)
        sigma_b = nn.Parameter(
            torch.from_numpy(
                np.full(shape=[out_size],
                        fill_value=sigma0 /
                        np.sqrt(float(self.in_size)))).float())
        self.register_parameter("sigma_b", sigma_b)

        w = nn.Parameter(
            torch.from_numpy(
                np.full(
                    shape=[self.in_size, self.out_size],
                    fill_value=6 / np.sqrt(float(in_size) + float(out_size)),
                )).float())
        self.register_parameter("w", w)
        b = nn.Parameter(torch.from_numpy(np.zeros([out_size])).float())
        self.register_parameter("b", b)
Esempio n. 8
0
    def call(self, inputs: TensorType) -> TensorType:
        in_size = int(inputs.shape[1])
        epsilon_in = tf.random.normal(shape=[in_size])
        epsilon_out = tf.random.normal(shape=[self.out_size])
        epsilon_in = self._f_epsilon(epsilon_in)
        epsilon_out = self._f_epsilon(epsilon_out)
        epsilon_w = tf.matmul(a=tf.expand_dims(epsilon_in, -1),
                              b=tf.expand_dims(epsilon_out, 0))
        epsilon_b = epsilon_out

        action_activation = (
            tf.matmul(inputs, self.w + self.sigma_w * epsilon_w) + self.b +
            self.sigma_b * epsilon_b)

        fn = get_activation_fn(self.activation, framework="tf")
        if fn is not None:
            action_activation = fn(action_activation)
        return action_activation
Esempio n. 9
0
    def _create_fc_net(self, layer_dims, activation, name=None):
        """Given a list of layer dimensions (incl. input-dim), creates FC-net.

        Args:
            layer_dims (Tuple[int]): Tuple of layer dims, including the input
                dimension.
            activation (str): An activation specifier string (e.g. "relu").

        Examples:
            If layer_dims is [4,8,6] we'll have a two layer net: 4->8 (8 nodes)
            and 8->6 (6 nodes), where the second layer (6 nodes) does not have
            an activation anymore. 4 is the input dimension.
        """
        layers = (
            [tf.keras.layers.Input(shape=(layer_dims[0],), name="{}_in".format(name))]
            if self.framework != "torch"
            else []
        )

        for i in range(len(layer_dims) - 1):
            act = activation if i < len(layer_dims) - 2 else None
            if self.framework == "torch":
                layers.append(
                    SlimFC(
                        in_size=layer_dims[i],
                        out_size=layer_dims[i + 1],
                        initializer=torch.nn.init.xavier_uniform_,
                        activation_fn=act,
                    )
                )
            else:
                layers.append(
                    tf.keras.layers.Dense(
                        units=layer_dims[i + 1],
                        activation=get_activation_fn(act),
                        name="{}_{}".format(name, i),
                    )
                )

        if self.framework == "torch":
            return nn.Sequential(*layers)
        else:
            return tf.keras.Sequential(layers)
Esempio n. 10
0
    def _build_actor_net(self, name_):
        actor_hidden_activation = self.model_config["actor_hidden_activation"]
        actor_hiddens = self.model_config["actor_hiddens"]

        # Build the policy network.
        actor_net = nn.Sequential()

        activation = get_activation_fn(actor_hidden_activation,
                                       framework="torch")
        ins = self.obs_ins
        for i, n in enumerate(actor_hiddens):
            actor_net.add_module(
                f"{name_}_hidden_{i}",
                SlimFC(
                    ins,
                    n,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=activation,
                ),
            )
            ins = n

        # also includes log_std in continuous case
        n_act_out = (self.action_space.n if self._is_action_discrete else 2 *
                     self.action_dim)
        actor_net.add_module(
            f"{name_}_out",
            SlimFC(
                ins,
                n_act_out,
                initializer=torch.nn.init.xavier_uniform_,
                activation_fn=None,
            ),
        )

        return actor_net
Esempio n. 11
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(CustomVisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        input_shape = obs_space.shape
        self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        #is_training = tf.keras.layers.Input(
        #    shape=(), dtype=tf.bool, batch_size=1, name="is_training")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                padding="same",
                activation=activation,
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        p_layer = tf.keras.layers.Conv2D(
            filters=out_size,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(len(filters)))(last_layer)
        p_layer = tf.keras.layers.ReLU()(p_layer)

        v_layer = tf.keras.layers.Conv2D(
            filters=out_size,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(len(filters) + 1))(last_layer)
        v_layer = tf.keras.layers.ReLU()(v_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)
        v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer)
        self.last_layer_is_flattened = True

        self.num_outputs_p = p_layer.shape[1]
        self.num_outputs_v = v_layer.shape[1]
        self._value_out = v_layer

        self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out])
        self.base_model.summary()
Esempio n. 12
0
    def __init__(self,
                 obs_space: Space,
                 action_space: Space,
                 num_outputs: int,
                 model_config: Dict[str, Any],
                 name: str,
                 num_frames: int = 1) -> None:
        # Call base initializer first
        super().__init__(obs_space, action_space, None, model_config, name)

        # Backup some user arguments
        self.num_frames = num_frames
        self.num_outputs = num_outputs

        # Define some proxies for convenience
        sensor_space_start = 0
        for field, space in obs_space.original_space.spaces.items():
            if field != "sensors":
                sensor_space_start += flatdim(space)
            else:
                sensor_space_size = flatdim(space)
                sensor_space_end = sensor_space_start + sensor_space_size
                break
        self.sensor_space_range = [sensor_space_start, sensor_space_end]

        # Extract some user arguments
        activation = get_activation_fn(model_config.get("fcnet_activation"))
        no_final_linear = model_config.get("no_final_linear")
        hiddens = model_config.get("fcnet_hiddens", [])
        vf_share_layers = model_config.get("vf_share_layers")

        # Specify the inputs
        if self.num_frames > 1:
            self.view_requirements["prev_n_obs"] = ViewRequirement(
                data_col=SampleBatch.OBS,
                shift="-{}:-1".format(num_frames),
                space=obs_space)
            self.view_requirements["prev_n_act"] = ViewRequirement(
                data_col=SampleBatch.ACTIONS,
                shift="-{}:-1".format(num_frames),
                space=action_space)
            self.view_requirements["prev_n_rew"] = ViewRequirement(
                data_col=SampleBatch.REWARDS,
                shift="-{}:-1".format(num_frames))

        # Buffer to store last computed value
        self._last_value = None

        # Define the input layer of the model
        stack_size = sensor_space_size + action_space.shape[0] + 1
        obs = tf.keras.layers.Input(shape=obs_space.shape, name="obs")
        if self.num_frames > 1:
            stack = tf.keras.layers.Input(shape=(self.num_frames, stack_size),
                                          name="stack")
            inputs = [obs, stack]
        else:
            inputs = obs

        # Build features extraction network
        # In:  (batch_size, n_features, n_timesteps)
        # Out: (batch_size, n_filters, n_timesteps - (kernel_size - 1))
        if self.num_frames >= 16:
            conv_1 = tf.keras.layers.Conv1D(filters=4,
                                            kernel_size=5,
                                            strides=1,
                                            activation="tanh",
                                            padding="valid",
                                            name="conv_1")(stack)

            pool_1 = tf.keras.layers.AveragePooling1D(pool_size=2,
                                                      strides=2,
                                                      padding="valid",
                                                      name="pool_1")(conv_1)

            conv_2 = tf.keras.layers.Conv1D(filters=8,
                                            kernel_size=5,
                                            strides=1,
                                            activation="tanh",
                                            padding="valid",
                                            name="conv_2")(pool_1)

            pool_2 = tf.keras.layers.AveragePooling1D(pool_size=2,
                                                      strides=2,
                                                      padding="valid",
                                                      name="pool_2")(conv_2)

            # Gather observation and extracted features as input
            flatten = tf.keras.layers.Flatten(name="flatten")(pool_2)
            features = tf.keras.layers.Dense(
                units=8,
                name="fc_features",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(flatten)
            concat = tf.keras.layers.Concatenate(
                axis=-1, name="concat")([obs, features])
        elif self.num_frames > 1:
            # Gather current observation and previous stack as input
            features = tf.keras.layers.Flatten(name="flatten")(stack)
            concat = tf.keras.layers.Concatenate(
                axis=-1, name="concat")([obs, features])
        else:
            # Current observation is the only input
            concat = obs

        # concat = tf.keras.layers.GaussianNoise(0.1)(concat)

        # Create policy layers 0 to second-last.
        i = 1
        last_layer = concat
        for size in hiddens[:-1]:
            last_layer = tf.keras.layers.Dense(
                units=size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        # The last layer is adjusted to be of size num_outputs, but it is a
        # layer with activation.
        if no_final_linear:
            logits_out = tf.keras.layers.Dense(
                units=num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        # Finish the layers with the provided sizes (`hiddens`), plus a last
        # linear layer of size num_outputs.
        else:
            last_layer = tf.keras.layers.Dense(
                units=hiddens[-1],
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            logits_out = tf.keras.layers.Dense(
                units=num_outputs,
                name="fc_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)

        last_vf_layer = None
        if not vf_share_layers:
            # Build a dedicated hidden layers for the value net if requested
            i = 1
            last_vf_layer = concat
            for size in hiddens:
                last_vf_layer = tf.keras.layers.Dense(
                    units=size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_vf_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            units=1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(1.0))(last_vf_layer
                                                       or last_layer)

        # Finish definition of the model
        self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
Esempio n. 13
0
    def __init__(
        self,
        input_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: Optional[int] = None,
        *,
        name: str = "",
        conv_filters: Optional[Sequence[Sequence[int]]] = None,
        conv_activation: Optional[str] = None,
        post_fcnet_hiddens: Optional[Sequence[int]] = (),
        post_fcnet_activation: Optional[str] = None,
        no_final_linear: bool = False,
        vf_share_layers: bool = False,
        free_log_std: bool = False,
        **kwargs,
    ):

        super().__init__(name=name)

        if not conv_filters:
            conv_filters = get_filter_config(input_space.shape)
        assert len(conv_filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        conv_activation = get_activation_fn(conv_activation, framework="tf")
        post_fcnet_activation = get_activation_fn(post_fcnet_activation,
                                                  framework="tf")

        input_shape = input_space.shape
        self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(conv_filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=conv_activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = conv_filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(
                out_size if post_fcnet_hiddens else num_outputs,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=conv_activation,
                padding="valid",
                data_format="channels_last",
                name="conv_out")(last_layer)
            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else [])
            for i, out_size in enumerate(layer_sizes):
                last_layer = tf.keras.layers.Dense(
                    out_size,
                    name="post_fcnet_{}".format(i),
                    activation=post_fcnet_activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=conv_activation,
                padding="valid",
                data_format="channels_last",
                name="conv{}".format(len(conv_filters)))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                if post_fcnet_hiddens:
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        post_fcnet_hiddens[0], [1, 1],
                        activation=post_fcnet_activation,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens[1:] +
                                                 [num_outputs]):
                        last_layer = tf.keras.layers.Dense(
                            out_size,
                            name="post_fcnet_{}".format(i + 1),
                            activation=post_fcnet_activation
                            if i < len(post_fcnet_hiddens) - 1 else None,
                            kernel_initializer=normc_initializer(1.0))(
                                last_layer)
                else:
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        num_outputs, [1, 1],
                        activation=None,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)

                if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1:
                    raise ValueError(
                        "Given `conv_filters` ({}) do not result in a [B, 1, "
                        "1, {} (`num_outputs`)] shape (but in {})! Please "
                        "adjust your Conv2D stack such that the dims 1 and 2 "
                        "are both 1.".format(self.model_config["conv_filters"],
                                             num_outputs,
                                             list(last_cnn.shape)))

            # num_outputs not known -> Flatten.
            else:
                self.last_layer_is_flattened = True
                last_layer = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)

                # Add (optional) post-fc-stack after last Conv2D layer.
                for i, out_size in enumerate(post_fcnet_hiddens):
                    last_layer = tf.keras.layers.Dense(
                        out_size,
                        name="post_fcnet_{}".format(i),
                        activation=post_fcnet_activation,
                        kernel_initializer=normc_initializer(1.0))(last_layer)
        logits_out = last_layer

        # Build the value layers
        if vf_share_layers:
            if not self.last_layer_is_flattened:
                last_layer = tf.keras.layers.Lambda(
                    lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel,
                    stride) in enumerate(conv_filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=stride if isinstance(stride, (list, tuple)) else
                    (stride, stride),
                    activation=conv_activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = conv_filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=conv_activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(len(conv_filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
Esempio n. 14
0
    def __init__(
            self,
            obs_space: gym.spaces.Space,
            action_space: gym.spaces.Space,
            num_outputs: int,
            model_config: ModelConfigDict,
            name: str,
            # Extra DDPGActionModel args:
            actor_hiddens: List[int] = [256, 256],
            actor_hidden_activation: str = "relu",
            critic_hiddens: List[int] = [256, 256],
            critic_hidden_activation: str = "relu",
            twin_q: bool = False,
            add_layer_norm: bool = False):
        """Initialize variables of this model.

        Extra model kwargs:
            actor_hidden_activation (str): activation for actor network
            actor_hiddens (list): hidden layers sizes for actor network
            critic_hidden_activation (str): activation for critic network
            critic_hiddens (list): hidden layers sizes for critic network
            twin_q (bool): build twin Q networks.
            add_layer_norm (bool): Enable layer norm (for param noise).

        Note that the core layers for forward() are not defined here, this
        only defines the layers for the output heads. Those layers for
        forward() should be defined in subclasses of DDPGTorchModel.
        """
        nn.Module.__init__(self)
        super(DDPGTorchModel, self).__init__(obs_space, action_space,
                                             num_outputs, model_config, name)

        self.bounded = np.logical_and(self.action_space.bounded_above,
                                      self.action_space.bounded_below).any()
        self.action_dim = np.product(self.action_space.shape)

        # Build the policy network.
        self.policy_model = nn.Sequential()
        ins = num_outputs
        self.obs_ins = ins
        activation = get_activation_fn(actor_hidden_activation,
                                       framework="torch")
        for i, n in enumerate(actor_hiddens):
            self.policy_model.add_module(
                "action_{}".format(i),
                SlimFC(ins,
                       n,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=activation))
            # Add LayerNorm after each Dense.
            if add_layer_norm:
                self.policy_model.add_module("LayerNorm_A_{}".format(i),
                                             nn.LayerNorm(n))
            ins = n

        self.policy_model.add_module(
            "action_out",
            SlimFC(ins,
                   self.action_dim,
                   initializer=torch.nn.init.xavier_uniform_,
                   activation_fn=None))

        # Use sigmoid to scale to [0,1], but also double magnitude of input to
        # emulate behaviour of tanh activation used in DDPG and TD3 papers.
        # After sigmoid squashing, re-scale to env action space bounds.
        class _Lambda(nn.Module):
            def __init__(self_):
                super().__init__()
                low_action = nn.Parameter(
                    torch.from_numpy(self.action_space.low).float())
                low_action.requires_grad = False
                self_.register_parameter("low_action", low_action)
                action_range = nn.Parameter(
                    torch.from_numpy(self.action_space.high -
                                     self.action_space.low).float())
                action_range.requires_grad = False
                self_.register_parameter("action_range", action_range)

            def forward(self_, x):
                sigmoid_out = nn.Sigmoid()(2.0 * x)
                squashed = self_.action_range * sigmoid_out + self_.low_action
                return squashed

        # Only squash if we have bounded actions.
        if self.bounded:
            self.policy_model.add_module("action_out_squashed", _Lambda())

        # Build the Q-net(s), including target Q-net(s).
        def build_q_net(name_):
            activation = get_activation_fn(critic_hidden_activation,
                                           framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + self.action_dim
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(ins,
                           n,
                           initializer=torch.nn.init.xavier_uniform_,
                           activation_fn=activation))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(ins,
                       1,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=None))
            return q_net

        self.q_model = build_q_net("q")
        if twin_q:
            self.twin_q_model = build_q_net("twin_q")
        else:
            self.twin_q_model = None
Esempio n. 15
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(out_size,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv{}".format(
                                                    len(filters)))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                                  activation=None,
                                                  padding="same",
                                                  data_format="channels_last",
                                                  name="conv_out")(last_layer)

                if conv_out.shape[1] != 1 or conv_out.shape[2] != 1:
                    raise ValueError(
                        "Given `conv_filters` ({}) do not result in a [B, 1, "
                        "1, {} (`num_outputs`)] shape (but in {})! Please "
                        "adjust your Conv2D stack such that the dims 1 and 2 "
                        "are both 1.".format(self.model_config["conv_filters"],
                                             self.num_outputs,
                                             list(conv_out.shape)))

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                conv_out = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)
                self.num_outputs = conv_out.shape[1]

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(out_size,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_value_{}".format(
                                                    len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)
Esempio n. 16
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        super(FullyConnectedNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        hiddens = model_config.get("fcnet_hiddens", []) + \
            model_config.get("post_fcnet_hiddens", [])
        activation = model_config.get("fcnet_activation")
        if not model_config.get("fcnet_hiddens", []):
            activation = model_config.get("post_fcnet_activation")
        activation = get_activation_fn(activation)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")
        free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2
            self.log_std_var = tf.Variable([0.0] * num_outputs,
                                           dtype=tf.float32,
                                           name="log_std")

        # We are using obs_flat, so take the flattened shape as input.
        inputs = tf.keras.layers.Input(shape=(int(np.product(
            obs_space.shape)), ),
                                       name="observations")
        # Last hidden layer output (before logits outputs).
        last_layer = inputs
        # The action distribution outputs.
        logits_out = None
        i = 1

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            logits_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                last_layer = tf.keras.layers.Dense(
                    hiddens[-1],
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
            if num_outputs:
                logits_out = tf.keras.layers.Dense(
                    num_outputs,
                    name="fc_out",
                    activation=None,
                    kernel_initializer=normc_initializer(0.01))(last_layer)
            # Adjust num_outputs to be the number of nodes in the last layer.
            else:
                self.num_outputs = ([int(np.product(obs_space.shape))] +
                                    hiddens[-1:])[-1]

        # Concat the log std vars to the end of the state-dependent means.
        if free_log_std and logits_out is not None:

            def tiled_log_std(x):
                return tf.tile(tf.expand_dims(self.log_std_var, 0),
                               [tf.shape(x)[0], 1])

            log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs)
            logits_out = tf.keras.layers.Concatenate(axis=1)(
                [logits_out, log_std_out])

        last_vf_layer = None
        if not vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            last_vf_layer = inputs
            i = 1
            for size in hiddens:
                last_vf_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_vf_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(
                last_vf_layer if last_vf_layer is not None else last_layer)

        self.base_model = tf.keras.Model(inputs, [
            (logits_out if logits_out is not None else last_layer), value_out
        ])
Esempio n. 17
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="tf")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")
        self.traj_view_framestacking = False

        # Perform Atari framestacking via traj. view API.
        if model_config.get("num_framestacks") != "auto" and \
                model_config.get("num_framestacks", 0) > 1:
            input_shape = obs_space.shape + (model_config["num_framestacks"], )
            self.data_format = "channels_first"
            self.traj_view_framestacking = True
        else:
            input_shape = obs_space.shape
            self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(
                out_size if post_fcnet_hiddens else num_outputs,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_out")(last_layer)
            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else [])
            for i, out_size in enumerate(layer_sizes):
                last_layer = tf.keras.layers.Dense(
                    out_size,
                    name="post_fcnet_{}".format(i),
                    activation=post_fcnet_activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv{}".format(len(filters)))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                if post_fcnet_hiddens:
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        post_fcnet_hiddens[0], [1, 1],
                        activation=post_fcnet_activation,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens[1:] +
                                                 [num_outputs]):
                        last_layer = tf.keras.layers.Dense(
                            out_size,
                            name="post_fcnet_{}".format(i + 1),
                            activation=post_fcnet_activation
                            if i < len(post_fcnet_hiddens) - 1 else None,
                            kernel_initializer=normc_initializer(1.0))(
                                last_layer)
                else:
                    last_cnn = last_layer = tf.keras.layers.Conv2D(
                        num_outputs, [1, 1],
                        activation=None,
                        padding="same",
                        data_format="channels_last",
                        name="conv_out")(last_layer)

                if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1:
                    raise ValueError(
                        "Given `conv_filters` ({}) do not result in a [B, 1, "
                        "1, {} (`num_outputs`)] shape (but in {})! Please "
                        "adjust your Conv2D stack such that the dims 1 and 2 "
                        "are both 1.".format(self.model_config["conv_filters"],
                                             self.num_outputs,
                                             list(last_cnn.shape)))

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                last_layer = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)

                # Add (optional) post-fc-stack after last Conv2D layer.
                for i, out_size in enumerate(post_fcnet_hiddens):
                    last_layer = tf.keras.layers.Dense(
                        out_size,
                        name="post_fcnet_{}".format(i),
                        activation=post_fcnet_activation,
                        kernel_initializer=normc_initializer(1.0))(last_layer)
                self.num_outputs = last_layer.shape[1]
        logits_out = last_layer

        # Build the value layers
        if vf_share_layers:
            if not self.last_layer_is_flattened:
                last_layer = tf.keras.layers.Lambda(
                    lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=stride if isinstance(stride, (list, tuple)) else
                    (stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=stride if isinstance(stride, (list, tuple)) else
                (stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [logits_out, value_out])

        # Optional: framestacking obs/new_obs for Atari.
        if self.traj_view_framestacking:
            from_ = model_config["num_framestacks"] - 1
            self.view_requirements[SampleBatch.OBS].shift = \
                "-{}:0".format(from_)
            self.view_requirements[SampleBatch.OBS].shift_from = -from_
            self.view_requirements[SampleBatch.OBS].shift_to = 0
            self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement(
                data_col=SampleBatch.OBS,
                shift="-{}:1".format(from_ - 1),
                space=self.view_requirements[SampleBatch.OBS].space,
                used_for_compute_actions=False,
            )
Esempio n. 18
0
    def __init__(
        self,
        input_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: Optional[int] = None,
        *,
        name: str = "",
        fcnet_hiddens: Optional[Sequence[int]] = (),
        fcnet_activation: Optional[str] = None,
        post_fcnet_hiddens: Optional[Sequence[int]] = (),
        post_fcnet_activation: Optional[str] = None,
        no_final_linear: bool = False,
        vf_share_layers: bool = False,
        free_log_std: bool = False,
        **kwargs,
    ):
        super().__init__(name=name)

        hiddens = list(fcnet_hiddens or ()) + list(post_fcnet_hiddens or ())
        activation = fcnet_activation
        if not fcnet_hiddens:
            activation = post_fcnet_activation
        activation = get_activation_fn(activation)

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two",
                num_outputs,
            )
            num_outputs = num_outputs // 2
            self.log_std_var = tf.Variable([0.0] * num_outputs,
                                           dtype=tf.float32,
                                           name="log_std")

        # We are using obs_flat, so take the flattened shape as input.
        inputs = tf.keras.layers.Input(shape=(int(np.product(
            input_space.shape)), ),
                                       name="observations")
        # Last hidden layer output (before logits outputs).
        last_layer = inputs
        # The action distribution outputs.
        logits_out = None
        i = 1

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0),
            )(last_layer)
            i += 1

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            logits_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0),
            )(last_layer)
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                last_layer = tf.keras.layers.Dense(
                    hiddens[-1],
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0),
                )(last_layer)
            if num_outputs:
                logits_out = tf.keras.layers.Dense(
                    num_outputs,
                    name="fc_out",
                    activation=None,
                    kernel_initializer=normc_initializer(0.01),
                )(last_layer)

        # Concat the log std vars to the end of the state-dependent means.
        if free_log_std and logits_out is not None:

            def tiled_log_std(x):
                return tf.tile(tf.expand_dims(self.log_std_var, 0),
                               [tf.shape(x)[0], 1])

            log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs)
            logits_out = tf.keras.layers.Concatenate(axis=1)(
                [logits_out, log_std_out])

        last_vf_layer = None
        if not vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            last_vf_layer = inputs
            i = 1
            for size in hiddens:
                last_vf_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0),
                )(last_vf_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01),
        )(last_vf_layer if last_vf_layer is not None else last_layer)

        self.base_model = tf.keras.Model(inputs, [
            (logits_out if logits_out is not None else last_layer), value_out
        ])
Esempio n. 19
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        super(CustomVisionNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation = get_activation_fn(
            self.model_config.get("conv_activation"), framework="tf")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="tf")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")
        self.traj_view_framestacking = False

        # Perform Atari framestacking via traj. view API.
        if model_config.get("num_framestacks") != "auto" and \
                model_config.get("num_framestacks", 0) > 1:
            input_shape = obs_space.shape + (model_config["num_framestacks"], )
            self.data_format = "channels_first"
            self.traj_view_framestacking = True
        else:
            input_shape = obs_space.shape
            self.data_format = "channels_last"

        inputs = tf.keras.layers.Input(shape=input_shape, name="observations")
        #is_training = tf.keras.layers.Input(
        #    shape=(), dtype=tf.bool, batch_size=1, name="is_training")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            if i == 1:
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.ReLU()(last_layer)
            else:
                input_layer = last_layer
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i * 2 - 2))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.ReLU()(last_layer)
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    padding="same",
                    data_format="channels_last",
                    name="conv{}".format(i * 2 - 1))(last_layer)
                #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0])
                last_layer = tf.keras.layers.Add()([input_layer, last_layer])
                last_layer = tf.keras.layers.ReLU()(last_layer)

        out_size, kernel, stride = filters[-1]

        p_layer = tf.keras.layers.Conv2D(filters=out_size,
                                         kernel_size=kernel,
                                         strides=(stride, stride),
                                         padding="valid",
                                         data_format="channels_last",
                                         name="conv{}".format(
                                             2 * len(filters)))(last_layer)
        p_layer = tf.keras.layers.ReLU()(p_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        #p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)

        v_layer = tf.keras.layers.Conv2D(
            filters=1,
            kernel_size=kernel,
            strides=(stride, stride),
            padding="valid",
            data_format="channels_last",
            name="conv{}".format(2 * len(filters) + 1))(last_layer)
        v_layer = tf.keras.layers.ReLU()(v_layer)

        # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer)
        p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer)
        v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer)
        self.last_layer_is_flattened = True
        '''
        # Add (optional) post-fc-stack after last Conv2D layer.
        for i, out_size in enumerate(post_fcnet_hiddens):
            last_layer = tf.keras.layers.Dense(
                out_size,
                name="post_fcnet_{}".format(i),
                activation=post_fcnet_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        '''
        self.num_outputs_p = p_layer.shape[1]
        self.num_outputs_v = v_layer.shape[1]
        logits_out = p_layer
        self._value_out = v_layer
        '''
        # Add (optional) post-fc-stack after last Conv2D layer.
        for i, out_size in enumerate(post_fcnet_hiddens):
            last_layer = tf.keras.layers.Dense(
                out_size,
                name="post_fcnet_{}".format(i),
                activation=post_fcnet_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        '''
        '''
        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Flatten(
                data_format="channels_last")(last_layer)
            #last_layer = tf.keras.layers.Lambda(
            #    lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(len(filters)))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
        '''
        self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out])
        self.base_model.summary()
Esempio n. 20
0
    def __init__(
        self,
        obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):

        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="torch"
        )

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        (w, h, in_channels) = obs_space.shape

        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, stride)
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation,
                )
            )
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            out_channels = out_channels if post_fcnet_hiddens else num_outputs
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else []
            )
            for i, out_size in enumerate(layer_sizes):
                layers.append(
                    SlimFC(
                        in_size=out_channels,
                        out_size=out_size,
                        activation_fn=post_fcnet_activation,
                        initializer=normc_initializer(1.0),
                    )
                )
                out_channels = out_size

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride),
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                if post_fcnet_hiddens:
                    layers.append(nn.Flatten())
                    in_size = out_channels
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]):
                        layers.append(
                            SlimFC(
                                in_size=in_size,
                                out_size=out_size,
                                activation_fn=post_fcnet_activation
                                if i < len(post_fcnet_hiddens) - 1
                                else None,
                                initializer=normc_initializer(1.0),
                            )
                        )
                        in_size = out_size
                    # Last layer is logits layer.
                    self._logits = layers.pop()

                else:
                    self._logits = SlimConv2d(
                        out_channels,
                        num_outputs,
                        [1, 1],
                        1,
                        padding,
                        activation_fn=None,
                    )

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())

        self._convs = nn.Sequential(*layers)

        # If our num_outputs still unknown, we need to do a test pass to
        # figure out the output dimensions. This could be the case, if we have
        # the Flatten layer at the end.
        if self.num_outputs is None:
            # Create a B=1 dummy sample and push it through out conv-net.
            dummy_in = (
                torch.from_numpy(self.obs_space.sample())
                .permute(2, 0, 1)
                .unsqueeze(0)
                .float()
            )
            dummy_out = self._convs(dummy_in)
            self.num_outputs = dummy_out.shape[1]

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(
                out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None
            )
        else:
            vf_layers = []
            (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel, stride)
                vf_layers.append(
                    SlimConv2d(
                        in_channels,
                        out_channels,
                        kernel,
                        stride,
                        padding,
                        activation_fn=activation,
                    )
                )
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,
                    activation_fn=activation,
                )
            )

            vf_layers.append(
                SlimConv2d(
                    in_channels=out_channels,
                    out_channels=1,
                    kernel=1,
                    stride=1,
                    padding=None,
                    activation_fn=None,
                )
            )
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None
Esempio n. 21
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="torch")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None
        self.traj_view_framestacking = False

        layers = []
        # Perform Atari framestacking via traj. view API.
        if model_config.get("num_framestacks") != "auto" and \
                model_config.get("num_framestacks", 0) > 1:
            (w, h) = obs_space.shape
            in_channels = model_config["num_framestacks"]
            self.traj_view_framestacking = True
        else:
            (w, h, in_channels) = obs_space.shape

        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            out_channels = out_channels if post_fcnet_hiddens else num_outputs
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else [])
            for i, out_size in enumerate(layer_sizes):
                layers.append(
                    SlimFC(in_size=out_channels,
                           out_size=out_size,
                           activation_fn=post_fcnet_activation,
                           initializer=normc_initializer(1.0)))
                out_channels = out_size

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride)
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                if post_fcnet_hiddens:
                    layers.append(nn.Flatten())
                    in_size = out_channels
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens +
                                                 [num_outputs]):
                        layers.append(
                            SlimFC(in_size=in_size,
                                   out_size=out_size,
                                   activation_fn=post_fcnet_activation if
                                   i < len(post_fcnet_hiddens) - 1 else None,
                                   initializer=normc_initializer(1.0)))
                        in_size = out_size
                    # Last layer is logits layer.
                    self._logits = layers.pop()

                else:
                    self._logits = SlimConv2d(out_channels,
                                              num_outputs, [1, 1],
                                              1,
                                              padding,
                                              activation_fn=None)

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(out_channels,
                                        1,
                                        initializer=normc_initializer(0.01),
                                        activation_fn=None)
        else:
            vf_layers = []
            if self.traj_view_framestacking:
                (w, h) = obs_space.shape
                in_channels = model_config["num_framestacks"]
            else:
                (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel,
                                                 [stride, stride])
                vf_layers.append(
                    SlimConv2d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           None,
                           activation_fn=activation))

            vf_layers.append(
                SlimConv2d(in_channels=out_channels,
                           out_channels=1,
                           kernel=1,
                           stride=1,
                           padding=None,
                           activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None

        # Optional: framestacking obs/new_obs for Atari.
        if self.traj_view_framestacking:
            from_ = model_config["num_framestacks"] - 1
            self.view_requirements[SampleBatch.OBS].shift = \
                "-{}:0".format(from_)
            self.view_requirements[SampleBatch.OBS].shift_from = -from_
            self.view_requirements[SampleBatch.OBS].shift_to = 0
            self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement(
                data_col=SampleBatch.OBS,
                shift="-{}:1".format(from_ - 1),
                space=self.view_requirements[SampleBatch.OBS].space,
            )
Esempio n. 22
0
    def __init__(self,
                 obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 num_outputs: Optional[int],
                 model_config: ModelConfigDict,
                 name: str,
                 actor_hidden_activation: str = "relu",
                 actor_hiddens: Tuple[int] = (256, 256),
                 critic_hidden_activation: str = "relu",
                 critic_hiddens: Tuple[int] = (256, 256),
                 twin_q: bool = False,
                 initial_alpha: float = 1.0,
                 target_entropy: Optional[float] = None):
        """Initializes a SACTorchModel instance.
7
        Args:
            actor_hidden_activation (str): Activation for the actor network.
            actor_hiddens (list): Hidden layers sizes for the actor network.
            critic_hidden_activation (str): Activation for the critic network.
            critic_hiddens (list): Hidden layers sizes for the critic network.
            twin_q (bool): Build twin Q networks (Q-net and target) for more
                stable Q-learning.
            initial_alpha (float): The initial value for the to-be-optimized
                alpha parameter (default: 1.0).
            target_entropy (Optional[float]): A target entropy value for
                the to-be-optimized alpha parameter. If None, will use the
                defaults described in the papers for SAC (and discrete SAC).

        Note that the core layers for forward() are not defined here, this
        only defines the layers for the output heads. Those layers for
        forward() should be defined in subclasses of SACModel.
        """
        nn.Module.__init__(self)
        super(SACTorchModel, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        if isinstance(action_space, Discrete):
            self.action_dim = action_space.n
            self.discrete = True
            action_outs = q_outs = self.action_dim
            action_ins = None  # No action inputs for the discrete case.
        elif isinstance(action_space, Box):
            self.action_dim = np.product(action_space.shape)
            self.discrete = False
            action_outs = 2 * self.action_dim
            action_ins = self.action_dim
            q_outs = 1
        else:
            assert isinstance(action_space, Simplex)
            self.action_dim = np.product(action_space.shape)
            self.discrete = False
            action_outs = self.action_dim
            action_ins = self.action_dim
            q_outs = 1

        # Build the policy network.
        self.action_model = nn.Sequential()
        ins = self.num_outputs
        self.obs_ins = ins
        activation = get_activation_fn(actor_hidden_activation,
                                       framework="torch")
        for i, n in enumerate(actor_hiddens):
            self.action_model.add_module(
                "action_{}".format(i),
                SlimFC(ins,
                       n,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=activation))
            ins = n
        self.action_model.add_module(
            "action_out",
            SlimFC(ins,
                   action_outs,
                   initializer=torch.nn.init.xavier_uniform_,
                   activation_fn=None))

        # Build the Q-net(s), including target Q-net(s).
        def build_q_net(name_):
            activation = get_activation_fn(critic_hidden_activation,
                                           framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + (0 if self.discrete else action_ins)
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(ins,
                           n,
                           initializer=torch.nn.init.xavier_uniform_,
                           activation_fn=activation))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(ins,
                       q_outs,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=None))
            return q_net

        self.q_net = build_q_net("q")
        if twin_q:
            self.twin_q_net = build_q_net("twin_q")
        else:
            self.twin_q_net = None

        log_alpha = nn.Parameter(
            torch.from_numpy(np.array([np.log(initial_alpha)])).float())
        self.register_parameter("log_alpha", log_alpha)

        # Auto-calculate the target entropy.
        if target_entropy is None or target_entropy == "auto":
            # See hyperparams in [2] (README.md).
            if self.discrete:
                target_entropy = 0.98 * np.array(-np.log(1.0 / action_space.n),
                                                 dtype=np.float32)
            # See [1] (README.md).
            else:
                target_entropy = -np.prod(action_space.shape)

        self.target_entropy = torch.tensor(data=[target_entropy],
                                           dtype=torch.float32,
                                           requires_grad=False)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, last_layer_activation):
        super(FullyConnectedNetworkLastLayerActivation,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation = get_activation_fn(model_config.get("fcnet_activation"))
        if last_layer_activation is not None:
            last_layer_activation = get_activation_fn(last_layer_activation)
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        # we are using obs_flat, so take the flattened shape as input
        inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ),
                                       name="observations")
        last_layer = inputs
        i = 1

        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            for size in hiddens[:-1]:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        else:
            # the last layer is a linear if last_layer_activation is None, else last_layer_activation
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=last_layer_activation,
                kernel_initializer=normc_initializer(0.01))(last_layer)

        if not vf_share_layers:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            i = 1
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.base_model = tf.keras.Model(inputs, [layer_out, value_out])