Exemple #1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        num_options = model_config.get('oc_num_options')
        TorchModelV2.__init__(self, obs_space, action_space,
                              num_outputs * num_options, model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        self.option_epsilon = model_config.get('oc_option_epsilon')

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))

        # Create layers
        for size in hiddens:
            layers.append(nn.Linear(prev_layer_size, size))
            layers.append(activation)
            prev_layer_size = size
        self._body = nn.Sequential(*layers)
        self.q = nn.Linear(prev_layer_size,
                           num_options)  # Value for each option
        self.pi = nn.Sequential(
            nn.Linear(prev_layer_size, num_options * num_outputs),
            View((num_options, num_outputs)),
            nn.Softmax(dim=-1))  # Action probabilities for each option
        self.beta = nn.Sequential(nn.Linear(prev_layer_size, num_options),
                                  nn.Sigmoid)  # Termination probabilities
        # Holds the current "base" output (before logits layer).
        self._features = self._q = self._v = self._pi = self._beta = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Exemple #2
0
        def build_q_net(name_):
            activation = get_activation_fn(
                critic_hidden_activation, framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + self.action_dim
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(
                        ins,
                        n,
                        initializer=torch.nn.init.xavier_uniform_,
                        activation_fn=activation))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(
                    ins,
                    1,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=None))
            return q_net
Exemple #3
0
    def _build_value_model(self, model_config: ModelConfigDict):
        """Build value model with given model configuration
        model_config = {'activation': str, 'hiddens': Sequence}
        """
        activation = get_activation_fn(model_config.get("activation"))
        hiddens = model_config.get("hiddens", [])
        inputs = tf.keras.layers.Input(
            shape=(np.product(self.critic_preprocessor.shape),), name="value-inputs"
        )

        last_layer = inputs
        for i, size in enumerate(hiddens):
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0),
            )(last_layer)

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01),
        )(last_layer)

        return tf.keras.Model(inputs, [value_out])
Exemple #4
0
    def __init__(self,
                 in_size: int,
                 out_size: int,
                 initializer: Any = None,
                 activation_fn: Any = None,
                 use_bias: bool = True,
                 bias_init: float = 0.0):
        """Creates a standard FC layer, similar to torch.nn.Linear

        Args:
            in_size(int): Input size for FC Layer
            out_size (int): Output size for FC Layer
            initializer (Any): Initializer function for FC layer weights
            activation_fn (Any): Activation function at the end of layer
            use_bias (bool): Whether to add bias weights or not
            bias_init (float): Initalize bias weights to bias_init const
        """
        super(SlimFC, self).__init__()
        layers = []
        # Actual Conv2D layer (including correct initialization logic).
        linear = nn.Linear(in_size, out_size, bias=use_bias)
        if initializer:
            initializer(linear.weight)
        if use_bias is True:
            nn.init.constant_(linear.bias, bias_init)
        layers.append(linear)
        # Activation function (if any; default=None (linear)).
        if isinstance(activation_fn, str):
            activation_fn = get_activation_fn(activation_fn, "torch")
        if activation_fn is not None:
            layers.append(activation_fn())
        # Put everything in sequence.
        self._model = nn.Sequential(*layers)
Exemple #5
0
    def feed_forward(self, obs, policy_vars, policy_config):
        # Hacky for now, reconstruct FC network with adapted weights
        # @mluo: TODO for any network
        def fc_network(inp, network_vars, hidden_nonlinearity,
                       output_nonlinearity, policy_config, hiddens_name,
                       logits_name):
            x = inp

            hidden_w = []
            logits_w = []

            for name, w in network_vars.items():
                if hiddens_name in name:
                    hidden_w.append(w)
                elif logits_name in name:
                    logits_w.append(w)
                else:
                    raise NameError

            assert len(hidden_w) % 2 == 0 and len(logits_w) == 2

            while len(hidden_w) != 0:
                x = nn.functional.linear(x, hidden_w.pop(0), hidden_w.pop(0))
                x = hidden_nonlinearity()(x)

            x = nn.functional.linear(x, logits_w.pop(0), logits_w.pop(0))
            x = output_nonlinearity()(x)

            return x

        policyn_vars = {}
        valuen_vars = {}
        log_std = None
        for name, param in policy_vars.items():
            if "value" in name:
                valuen_vars[name] = param
            elif "log_std" in name:
                log_std = param
            else:
                policyn_vars[name] = param

        output_nonlinearity = nn.Identity
        hidden_nonlinearity = get_activation_fn(
            policy_config["fcnet_activation"], framework="torch")

        pi_new_logits = fc_network(obs, policyn_vars, hidden_nonlinearity,
                                   output_nonlinearity, policy_config,
                                   "hidden_layers", "logits")
        if log_std is not None:
            pi_new_logits = torch.cat([
                pi_new_logits,
                log_std.unsqueeze(0).repeat([len(pi_new_logits), 1])
            ],
                                      axis=1)

        value_fn = fc_network(obs, valuen_vars, hidden_nonlinearity,
                              output_nonlinearity, policy_config,
                              "value_branch_separate", "value_branch")

        return pi_new_logits, torch.squeeze(value_fn)
Exemple #6
0
        def build_q_net(name_):

            act = get_activation_fn(critic_hidden_activation,
                                    framework="torch")
            init = nn.init.xavier_uniform_
            # For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = embed_dim
            # embed to encoder embed
            outs = self.critic_encoder.feature_dim
            q_net.add_module(
                "{}_hidden_{}".format(name_, "e"),
                SlimFC(ins, outs, initializer=init, activation_fn=act))
            ins = outs

            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(ins, n, initializer=init, activation_fn=act))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(ins, q_outs, initializer=init, activation_fn=None))
            return q_net
Exemple #7
0
    def create_inverse_model(self, model_config, encoder):
        """
        Create the inverse submodel of the SCM.
        Inputs:[Encoded state at t,
                Encoded state at t - 1,
                Actions at t - 1,
                MOA LSTM output at t - 1]
        Output: Predicted social influence reward at t - 1
        :param model_config: The model config dict.
        :param encoder: The SCM encoder submodel.
        :return: A new inverse model.
        """
        encoder_output_size = encoder.output_shape[-1]
        inputs = [
            self.create_encoded_input_layer(encoder_output_size, "encoded_input_now"),
            self.create_encoded_input_layer(encoder_output_size, "encoded_input_next"),
            self.create_action_input_layer(self.action_space.n, self.num_other_agents + 1),
            self.create_lstm_input_layer(model_config),
        ]
        inputs_concatenated = tf.keras.layers.concatenate(inputs)
        activation = get_activation_fn(model_config.get("fcnet_activation"))

        fc_layer = tf.keras.layers.Dense(
            32, name="fc_forward", activation=activation, kernel_initializer=normc_initializer(1.0),
        )(inputs_concatenated)

        output_layer = tf.keras.layers.Dense(
            1, activation="relu", kernel_initializer=normc_initializer(1.0),
        )(fc_layer)

        return tf.keras.Model(inputs, output_layer, name="SCM_Inverse_Model")
Exemple #8
0
    def create_scm_encoder_model(obs_space, model_config):
        """
        Create the encoder submodel, which is part of the SCM.
        :param obs_space: A single agent's observation space.
        :param model_config: The model config dict.
        :return: A new encoder model.
        """
        original_obs_dims = obs_space.original_space.spaces["curr_obs"].shape
        input_layer = tf.keras.layers.Input(original_obs_dims, name="observations", dtype=tf.uint8)

        # Divide by 255 to transform [0,255] uint8 rgb pixel values to [0,1] float32.
        last_layer = tf.keras.backend.cast(input_layer, tf.float32)
        last_layer = tf.math.divide(last_layer, 255.0)

        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        out_size, kernel, stride = filters[-1]
        conv_out = tf.keras.layers.Conv2D(
            out_size,
            kernel,
            strides=(stride, stride),
            activation=activation,
            padding="valid",
            name="conv_scm_encoder",
        )(last_layer)
        flattened_conv_out = tf.keras.layers.Flatten()(conv_out)

        return tf.keras.Model(input_layer, flattened_conv_out, name="SCM_Encoder_Model")
    def __init__(self,
                 size_in,
                 size_out,
                 hiddens,
                 activations,
                 init_weights,
                 append_log_std=False,
                 log_std_type='constant',
                 sample_std=1.0):
        super().__init__()
        layers = []
        prev_layer_size = size_in
        for i, size_hidden in enumerate(hiddens + [size_out]):
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size_hidden,
                       initializer=normc_initializer(init_weights[i]),
                       activation_fn=get_activation_fn(activations[i],
                                                       framework="torch")))
            prev_layer_size = size_hidden

        if append_log_std:
            layers.append(
                AppendLogStd(type=log_std_type,
                             init_val=np.log(sample_std),
                             dim=size_out))

        self._model = nn.Sequential(*layers)
Exemple #10
0
 def __init__(
         self,
         in_channels,
         out_channels,
         kernel,
         stride,
         padding,
         # Defaulting these to nn.[..] will break soft torch import.
         initializer="default",
         activation_fn="default",
         bias_init=0):
     super(SlimConv2d, self).__init__()
     layers = []
     # Padding layer.
     if padding:
         layers.append(nn.ZeroPad2d(padding))
     # Actual Conv2D layer (including correct initialization logic).
     conv = nn.Conv2d(in_channels, out_channels, kernel, stride)
     if initializer:
         if initializer == "default":
             initializer = nn.init.xavier_uniform_
         initializer(conv.weight)
     nn.init.constant_(conv.bias, bias_init)
     layers.append(conv)
     # Activation function (if any; default=ReLu).
     if isinstance(activation_fn, str):
         if activation_fn == "default":
             activation_fn = nn.ReLU
         else:
             activation_fn = get_activation_fn(activation_fn, "torch")
     if activation_fn is not None:
         layers.append(activation_fn())
     # Put everything in sequence.
     self._model = nn.Sequential(*layers)
Exemple #11
0
    def __init__(self,
                 *,
                 input_size: int,
                 filters: Tuple[Tuple[int]] = ((1024, 5, 2), (128, 5, 2),
                                               (64, 6, 2), (32, 6, 2)),
                 initializer="default",
                 bias_init=0,
                 activation_fn: str = "relu",
                 output_shape: Tuple[int] = (3, 64, 64)):
        """Initializes a TransposedConv2DStack instance.

        Args:
            input_size (int): The size of the 1D input vector, from which to
                generate the image distribution.
            filters (Tuple[Tuple[int]]): Tuple of filter setups (1 for each
                ConvTranspose2D layer): [in_channels, kernel, stride].
            initializer (Union[str]):
            bias_init (float): The initial bias values to use.
            activation_fn (str): Activation function descriptor (str).
            output_shape (Tuple[int]): Shape of the final output image.
        """
        super().__init__()
        self.activation = get_activation_fn(activation_fn, framework="torch")
        self.output_shape = output_shape
        initializer = get_initializer(initializer, framework="torch")

        in_channels = filters[0][0]
        self.layers = [
            # Map from 1D-input vector to correct initial size for the
            # Conv2DTransposed stack.
            nn.Linear(input_size, in_channels),
            # Reshape from the incoming 1D vector (input_size) to 1x1 image
            # format (channels first).
            Reshape([-1, in_channels, 1, 1]),
        ]
        for i, (_, kernel, stride) in enumerate(filters):
            out_channels = filters[i + 1][0] if i < len(filters) - 1 else \
                output_shape[0]
            conv_transp = nn.ConvTranspose2d(in_channels, out_channels, kernel,
                                             stride)
            # Apply initializer.
            initializer(conv_transp.weight)
            nn.init.constant_(conv_transp.bias, bias_init)
            self.layers.append(conv_transp)
            # Apply activation function, if provided and if not last layer.
            if self.activation is not None and i < len(filters) - 1:
                self.layers.append(self.activation())

            # num-outputs == num-inputs for next layer.
            in_channels = out_channels

        self._model = nn.Sequential(*self.layers)
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Hard deprecate this class. All Models should use the ModelV2
        # API from here on.
        deprecation_warning(
            "Model->VisionNetwork", "ModelV2->VisionNetwork", error=False)
        inputs = input_dict["obs"]
        filters = options.get("conv_filters")
        if not filters:
            filters = _get_filter_config(inputs.shape.as_list()[1:])

        activation = get_activation_fn(options.get("conv_activation"))

        with tf.name_scope("vision_net"):
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                inputs = tf.layers.conv2d(
                    inputs,
                    out_size,
                    kernel,
                    stride,
                    activation=activation,
                    padding="same",
                    name="conv{}".format(i))
            out_size, kernel, stride = filters[-1]

            # skip final linear layer
            if options.get("no_final_linear"):
                fc_out = tf.layers.conv2d(
                    inputs,
                    num_outputs,
                    kernel,
                    stride,
                    activation=activation,
                    padding="valid",
                    name="fc_out")
                return flatten(fc_out), flatten(fc_out)

            fc1 = tf.layers.conv2d(
                inputs,
                out_size,
                kernel,
                stride,
                activation=activation,
                padding="valid",
                name="fc1")
            fc2 = tf.layers.conv2d(
                fc1,
                num_outputs, [1, 1],
                activation=None,
                padding="same",
                name="fc2")
            return flatten(fc2), flatten(fc1)
    def feed_forward(self, obs, policy_vars, policy_config):
        # Hacky for now, reconstruct FC network with adapted weights
        # @mluo: TODO for any network
        def fc_network(inp, network_vars, hidden_nonlinearity,
                       output_nonlinearity, policy_config):
            bias_added = False
            x = inp
            for name, param in network_vars.items():
                if "kernel" in name:
                    x = tf.matmul(x, param)
                elif "bias" in name:
                    x = tf.add(x, param)
                    bias_added = True
                else:
                    raise NameError

                if bias_added:
                    if "out" not in name:
                        x = hidden_nonlinearity(x)
                    elif "out" in name:
                        x = output_nonlinearity(x)
                    else:
                        raise NameError
                    bias_added = False
            return x

        policyn_vars = {}
        valuen_vars = {}
        log_std = None
        for name, param in policy_vars.items():
            if "value" in name:
                valuen_vars[name] = param
            elif "log_std" in name:
                log_std = param
            else:
                policyn_vars[name] = param

        output_nonlinearity = tf.identity
        hidden_nonlinearity = get_activation_fn(
            policy_config["fcnet_activation"])

        pi_new_logits = fc_network(obs, policyn_vars, hidden_nonlinearity,
                                   output_nonlinearity, policy_config)
        if log_std is not None:
            pi_new_logits = tf.concat(
                [pi_new_logits, 0.0 * pi_new_logits + log_std], 1)
        value_fn = fc_network(obs, valuen_vars, hidden_nonlinearity,
                              output_nonlinearity, policy_config)

        return pi_new_logits, tf.reshape(value_fn, [-1])
Exemple #14
0
    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            kernel: Union[int, Tuple[int, int]],
            stride: Union[int, Tuple[int, int]],
            padding: Union[int, Tuple[int, int]],
            # Defaulting these to nn.[..] will break soft torch import.
            initializer: Any = "default",
            activation_fn: Any = "default",
            bias_init: float = 0):
        """Creates a standard Conv2d layer, similar to torch.nn.Conv2d

            Args:
                in_channels(int): Number of input channels
                out_channels (int): Number of output channels
                kernel (Union[int, Tuple[int, int]]): If int, the kernel is
                    a tuple(x,x). Elsewise, the tuple can be specified
                stride (Union[int, Tuple[int, int]]): Controls the stride
                    for the cross-correlation. If int, the stride is a
                    tuple(x,x). Elsewise, the tuple can be specified
                padding (Union[int, Tuple[int, int]]): Controls the amount
                    of implicit zero-paddings during the conv operation
                initializer (Any): Initializer function for kernel weights
                activation_fn (Any): Activation function at the end of layer
                bias_init (float): Initalize bias weights to bias_init const
        """
        super(SlimConv2d, self).__init__()
        layers = []
        # Padding layer.
        if padding:
            layers.append(nn.ZeroPad2d(padding))
        # Actual Conv2D layer (including correct initialization logic).
        conv = nn.Conv2d(in_channels, out_channels, kernel, stride)
        if initializer:
            if initializer == "default":
                initializer = nn.init.xavier_uniform_
            initializer(conv.weight)
        nn.init.constant_(conv.bias, bias_init)
        layers.append(conv)
        # Activation function (if any; default=ReLu).
        if isinstance(activation_fn, str):
            if activation_fn == "default":
                activation_fn = nn.ReLU
            else:
                activation_fn = get_activation_fn(activation_fn, "torch")
        if activation_fn is not None:
            layers.append(activation_fn())
        # Put everything in sequence.
        self._model = nn.Sequential(*layers)
Exemple #15
0
    def __init__(self, in_size, out_size, sigma0, activation="relu"):
        """Initializes a NoisyLayer object.

        Args:
            in_size:
            out_size:
            sigma0:
            non_linear:
        """
        super().__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.sigma0 = sigma0
        self.activation = get_activation_fn(activation, framework="torch")
        if self.activation is not None:
            self.activation = self.activation()

        self.sigma_w = get_variable(
            np.random.uniform(
                low=-1.0 / np.sqrt(float(self.in_size)),
                high=1.0 / np.sqrt(float(self.in_size)),
                size=[self.in_size, out_size]),
            framework="torch",
            dtype=torch.float32,
            torch_tensor=True,
            trainable=True)
        self.sigma_b = get_variable(
            np.full(
                shape=[out_size],
                fill_value=sigma0 / np.sqrt(float(self.in_size))),
            framework="torch",
            dtype=torch.float32,
            torch_tensor=True,
            trainable=True)
        self.w = get_variable(
            np.full(
                shape=[self.in_size, self.out_size],
                fill_value=6 / np.sqrt(float(in_size) + float(out_size))),
            framework="torch",
            dtype=torch.float32,
            torch_tensor=True,
            trainable=True)
        self.b = get_variable(
            np.zeros([out_size]),
            framework="torch",
            dtype=torch.float32,
            torch_tensor=True,
            trainable=True)
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """
        # Soft deprecate this class. All Models should use the ModelV2
        # API from here on.
        deprecation_warning("Model->FullyConnectedNetwork",
                            "ModelV2->FullyConnectedNetwork",
                            error=False)

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        if len(inputs.shape) > 2:
            inputs = tf.layers.flatten(inputs)

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                # skip final linear layer
                if options.get("no_final_linear") and i == len(hiddens):
                    output = tf.layers.dense(
                        last_layer,
                        num_outputs,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation,
                        name="fc_out")
                    return output, output

                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1

            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="fc_out")
            return output, last_layer
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(
            model_config.get("conv_activation"), framework="torch")
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        # no_final_linear = model_config.get("no_final_linear")
        # vf_share_layers = model_config.get("vf_share_layers")

        layers = []
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(
                in_channels,
                out_channels,
                kernel,
                stride,
                None,
                activation_fn=activation))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(
            out_channels, num_outputs, initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(
            out_channels, 1, initializer=normc_initializer())
        # Holds the current "base" output (before logits layer).
        self._features = None
Exemple #18
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        inputs = input_dict["obs"]
        filters = options.get("conv_filters")
        if not filters:
            filters = _get_filter_config(inputs.shape.as_list()[1:])

        activation = get_activation_fn(options.get("conv_activation"))

        with tf.name_scope("vision_net"):
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                inputs = tf.layers.conv2d(
                    inputs,
                    out_size,
                    kernel,
                    stride,
                    activation=activation,
                    padding="same",
                    name="conv{}".format(i))
            out_size, kernel, stride = filters[-1]

            # skip final linear layer
            if options.get("no_final_linear"):
                fc_out = tf.layers.conv2d(
                    inputs,
                    num_outputs,
                    kernel,
                    stride,
                    activation=activation,
                    padding="valid",
                    name="fc_out")
                return flatten(fc_out), flatten(fc_out)

            fc1 = tf.layers.conv2d(
                inputs,
                out_size,
                kernel,
                stride,
                activation=activation,
                padding="valid",
                name="fc1")
            fc2 = tf.layers.conv2d(
                fc1,
                num_outputs, [1, 1],
                activation=None,
                padding="same",
                name="fc2")
            return flatten(fc2), flatten(fc1)
Exemple #19
0
    def __init__(
        self,
        in_size,
        out_size,
        initializer: Optional[Callable] = None,
        activation_fn: Optional[str] = None,
        use_bias: bool = True,
        prng_key: Optional[jax.random.PRNGKey] = None,
        name: Optional[str] = None,
    ):
        """Initializes a SlimFC instance.

        Args:
            in_size (int): The input size of the input data that will be passed
                into this layer.
            out_size (int): The number of nodes in this FC layer.
            initializer (flax.:
            activation_fn (str): An activation string specifier, e.g. "relu".
            use_bias (bool): Whether to add biases to the dot product or not.
            #bias_init (float):
            prng_key (Optional[jax.random.PRNGKey]): An optional PRNG key to
                use for initialization. If None, create a new random one.
            name (Optional[str]): An optional name for this layer.
        """

        # By default, use Glorot unform initializer.
        if initializer is None:
            initializer = nn.initializers.xavier_uniform()

        self.prng_key = prng_key or jax.random.PRNGKey(int(time.time()))
        _, self.prng_key = jax.random.split(self.prng_key)
        # Create the flax dense layer.
        self._dense = nn.Dense(
            out_size,
            use_bias=use_bias,
            kernel_init=initializer,
            name=name,
        )
        # Initialize it.
        dummy_in = jax.random.normal(self.prng_key, (in_size, ),
                                     dtype=np.float32)
        _, self.prng_key = jax.random.split(self.prng_key)
        self._params = self._dense.init(self.prng_key, dummy_in)

        # Activation function (if any; default=None (linear)).
        self.activation_fn = get_activation_fn(activation_fn, "jax")
def build_fc_layers(model_config, last_layer, name):
    """
    Create a sequence of fully-connected (dense) layers.
    :param model_config: The config dict containing information on what fully-connected layers to
    create.
    :param last_layer: The layer that feeds into the fully connected layer(s) constructed here.
    :param name: The FC layer name.
    :return: The last constructed FC layer.
    """
    hiddens = model_config.get("fcnet_hiddens")
    activation = get_activation_fn(model_config.get("fcnet_activation"))
    for i, size in enumerate(hiddens):
        last_layer = tf.keras.layers.Dense(
            size,
            name="fc_{}_{}".format(i + 1, name),
            activation=activation,
            kernel_initializer=normc_initializer(1.0),
        )(last_layer)
    return last_layer
Exemple #21
0
    def call(self, inputs):
        in_size = int(inputs.shape[1])
        epsilon_in = tf.random.normal(shape=[in_size])
        epsilon_out = tf.random.normal(shape=[self.out_size])
        epsilon_in = self._f_epsilon(epsilon_in)
        epsilon_out = self._f_epsilon(epsilon_out)
        epsilon_w = tf.matmul(
            a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0))
        epsilon_b = epsilon_out

        action_activation = tf.matmul(
            inputs,
            self.w + self.sigma_w * epsilon_w) + \
            self.b + self.sigma_b * epsilon_b

        fn = get_activation_fn(self.activation, framework="tf")
        if fn is not None:
            action_activation = fn(action_activation)
        return action_activation
Exemple #22
0
    def __init__(self,
                 in_size: int,
                 out_size: int,
                 sigma0: float,
                 activation: str = "relu"):
        """Initializes a NoisyLayer object.

        Args:
            in_size: Input size for Noisy Layer
            out_size: Output size for Noisy Layer
            sigma0: Initialization value for sigma_b (bias noise)
            activation: Non-linear activation for Noisy Layer
        """
        super().__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.sigma0 = sigma0
        self.activation = get_activation_fn(activation, framework="torch")
        if self.activation is not None:
            self.activation = self.activation()

        sigma_w = nn.Parameter(
            torch.from_numpy(
                np.random.uniform(low=-1.0 / np.sqrt(float(self.in_size)),
                                  high=1.0 / np.sqrt(float(self.in_size)),
                                  size=[self.in_size, out_size])).float())
        self.register_parameter("sigma_w", sigma_w)
        sigma_b = nn.Parameter(
            torch.from_numpy(
                np.full(shape=[out_size],
                        fill_value=sigma0 /
                        np.sqrt(float(self.in_size)))).float())
        self.register_parameter("sigma_b", sigma_b)

        w = nn.Parameter(
            torch.from_numpy(
                np.full(shape=[self.in_size, self.out_size],
                        fill_value=6 /
                        np.sqrt(float(in_size) + float(out_size)))).float())
        self.register_parameter("w", w)
        b = nn.Parameter(torch.from_numpy(np.zeros([out_size])).float())
        self.register_parameter("b", b)
Exemple #23
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        if len(inputs.shape) > 2:
            inputs = tf.layers.flatten(inputs)

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                # skip final linear layer
                if options.get("no_final_linear") and i == len(hiddens):
                    output = tf.layers.dense(
                        last_layer,
                        num_outputs,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation,
                        name="fc_out")
                    return output, output

                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1

            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="fc_out")
            return output, last_layer
Exemple #24
0
 def __init__(self,
              in_size,
              out_size,
              initializer=None,
              activation_fn=None,
              use_bias=True,
              bias_init=0.0):
     super(SlimFC, self).__init__()
     layers = []
     linear = nn.Linear(in_size, out_size, bias=use_bias)
     if initializer:
         initializer(linear.weight)
     if use_bias is True:
         nn.init.constant_(linear.bias, bias_init)
     layers.append(linear)
     if isinstance(activation_fn, str):
         activation_fn = get_activation_fn(activation_fn, "torch")
     if activation_fn is not None:
         layers.append(activation_fn())
     self._model = nn.Sequential(*layers)
Exemple #25
0
 def __init__(self,
              in_size,
              out_size,
              initializer=None,
              activation_fn=None,
              use_bias=True,
              bias_init=0.0):
     super(SlimFC, self).__init__()
     layers = []
     # Actual Conv2D layer (including correct initialization logic).
     linear = nn.Linear(in_size, out_size, bias=use_bias)
     if initializer:
         initializer(linear.weight)
     if use_bias is True:
         nn.init.constant_(linear.bias, bias_init)
     layers.append(linear)
     # Activation function (if any; default=None (linear)).
     if isinstance(activation_fn, str):
         activation_fn = get_activation_fn(activation_fn, "torch")
     if activation_fn is not None:
         layers.append(activation_fn())
     # Put everything in sequence.
     self._model = nn.Sequential(*layers)
def build_conv_layers(model_config, last_layer):
    """
    Create a sequence of convoluational layers.
    :param model_config: The config dict containing information on what convolutional layers to
    create.
    :param last_layer: The layer that feeds into the convolutional layer(s) constructed here.
    :return: The last constructed convolutional layer.
    """
    activation = get_activation_fn(model_config.get("conv_activation"))
    filters = model_config.get("conv_filters")
    for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
        last_layer = tf.keras.layers.Conv2D(
            out_size,
            kernel,
            strides=(stride, stride),
            activation=activation,
            padding="same",
            channels_last=True,
            name="conv{}".format(i),
        )(last_layer)
    out_size, kernel, stride = filters[-1]
    if len(filters) == 1:
        i = -1

    conv_out = tf.keras.layers.Conv2D(
        out_size,
        kernel,
        strides=(stride, stride),
        activation=activation,
        padding="valid",
        name="conv{}".format(i + 1),
    )(last_layer)

    flattened_conv_out = tf.keras.layers.Flatten()(conv_out)

    return flattened_conv_out
    def _create_fc_net(self, layer_dims, activation, name=None):
        """Given a list of layer dimensions (incl. input-dim), creates FC-net.

        Args:
            layer_dims (Tuple[int]): Tuple of layer dims, including the input
                dimension.
            activation (str): An activation specifier string (e.g. "relu").

        Examples:
            If layer_dims is [4,8,6] we'll have a two layer net: 4->8 (8 nodes)
            and 8->6 (6 nodes), where the second layer (6 nodes) does not have
            an activation anymore. 4 is the input dimension.
        """
        layers = [
            tf.keras.layers.Input(shape=(layer_dims[0], ),
                                  name="{}_in".format(name))
        ] if self.framework != "torch" else []

        for i in range(len(layer_dims) - 1):
            act = activation if i < len(layer_dims) - 2 else None
            if self.framework == "torch":
                layers.append(
                    SlimFC(in_size=layer_dims[i],
                           out_size=layer_dims[i + 1],
                           initializer=torch.nn.init.xavier_uniform_,
                           activation_fn=act))
            else:
                layers.append(
                    tf.keras.layers.Dense(units=layer_dims[i + 1],
                                          activation=get_activation_fn(act),
                                          name="{}_{}".format(name, i)))

        if self.framework == "torch":
            return nn.Sequential(*layers)
        else:
            return tf.keras.Sequential(layers)
Exemple #28
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 actor_hidden_activation="relu",
                 actor_hiddens=(256, 256),
                 critic_hidden_activation="relu",
                 critic_hiddens=(256, 256),
                 twin_q=False,
                 add_layer_norm=False):
        """Initialize variables of this model.

        Extra model kwargs:
            actor_hidden_activation (str): activation for actor network
            actor_hiddens (list): hidden layers sizes for actor network
            critic_hidden_activation (str): activation for critic network
            critic_hiddens (list): hidden layers sizes for critic network
            twin_q (bool): build twin Q networks.
            add_layer_norm (bool): Enable layer norm (for param noise).

        Note that the core layers for forward() are not defined here, this
        only defines the layers for the output heads. Those layers for
        forward() should be defined in subclasses of DDPGTorchModel.
        """
        nn.Module.__init__(self)
        super(DDPGTorchModel, self).__init__(obs_space, action_space,
                                             num_outputs, model_config, name)

        self.bounded = np.logical_and(action_space.bounded_above,
                                      action_space.bounded_below).any()
        self.low_action = torch.tensor(action_space.low, dtype=torch.float32)
        self.action_range = torch.tensor(action_space.high - action_space.low,
                                         dtype=torch.float32)
        self.action_dim = np.product(action_space.shape)

        # Build the policy network.
        self.policy_model = nn.Sequential()
        ins = num_outputs
        self.obs_ins = ins
        activation = get_activation_fn(actor_hidden_activation,
                                       framework="torch")
        for i, n in enumerate(actor_hiddens):
            self.policy_model.add_module(
                "action_{}".format(i),
                SlimFC(ins,
                       n,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=activation))
            # Add LayerNorm after each Dense.
            if add_layer_norm:
                self.policy_model.add_module("LayerNorm_A_{}".format(i),
                                             nn.LayerNorm(n))
            ins = n

        self.policy_model.add_module(
            "action_out",
            SlimFC(ins,
                   self.action_dim,
                   initializer=torch.nn.init.xavier_uniform_,
                   activation_fn=None))

        # Use sigmoid to scale to [0,1], but also double magnitude of input to
        # emulate behaviour of tanh activation used in DDPG and TD3 papers.
        # After sigmoid squashing, re-scale to env action space bounds.
        class _Lambda(nn.Module):
            def forward(self_, x):
                sigmoid_out = nn.Sigmoid()(2.0 * x)
                squashed = self.action_range * sigmoid_out + self.low_action
                return squashed

        # Only squash if we have bounded actions.
        if self.bounded:
            self.policy_model.add_module("action_out_squashed", _Lambda())

        # Build the Q-net(s), including target Q-net(s).
        def build_q_net(name_):
            activation = get_activation_fn(critic_hidden_activation,
                                           framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + self.action_dim
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(ins,
                           n,
                           initializer=torch.nn.init.xavier_uniform_,
                           activation_fn=activation))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(ins,
                       1,
                       initializer=torch.nn.init.xavier_uniform_,
                       activation_fn=None))
            return q_net

        self.q_model = build_q_net("q")
        if twin_q:
            self.twin_q_model = build_q_net("twin_q")
        else:
            self.twin_q_model = None
Exemple #29
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = torch.nn.ModuleList([
                    torch.nn.Sequential(
                        torch.nn.Linear(prev_layer_size, 256),
                        torch.nn.ReLU(),
                        torch.nn.Linear(256, num_outputs),
                    ) for i in range(5)
                ])

                # self._logits = SlimFC(
                #     in_size=prev_layer_size,
                #     out_size=num_outputs,
                #     initializer=normc_initializer(0.01),
                #     activation_fn=None)

            else:
                self.num_outputs = ([np.product(obs_space.shape)] +
                                    hiddens[-1:])[-1]

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            self._value_branch_separate = []
            for size in hiddens:
                self._value_branch_separate.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(
                *self._value_branch_separate)

        self._value_branch = torch.nn.ModuleList([
            torch.nn.Sequential(
                torch.nn.Linear(prev_layer_size, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 1),
            ) for i in range(5)
        ])
        # self._value_branch = SlimFC(
        #     in_size=prev_layer_size,
        #     out_size=1*5,
        #     initializer=normc_initializer(1.0),
        #     activation_fn=None)

        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None

        self.value = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)
        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear:
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv{}".format(i + 1))(last_layer)
            conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                              activation=None,
                                              padding="same",
                                              data_format="channels_last",
                                              name="conv_out")(last_layer)

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(i + 1))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)