Exemple #1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        layers = []

        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(in_channels, out_channels, kernel, stride, padding))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(out_channels,
                              num_outputs,
                              initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(out_channels,
                                    1,
                                    initializer=normc_initializer())
        self._cur_value = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(
            model_config.get("conv_activation"), framework="torch")
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        # no_final_linear = model_config.get("no_final_linear")
        # vf_share_layers = model_config.get("vf_share_layers")

        layers = []
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(
                in_channels,
                out_channels,
                kernel,
                stride,
                None,
                activation_fn=activation))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(
            out_channels, num_outputs, initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(
            out_channels, 1, initializer=normc_initializer())
        # Holds the current "base" output (before logits layer).
        self._features = None
Exemple #3
0
def conv_layers(x, model_config, obs_space, prefix=""):
    filters = model_config.get("conv_filters")
    if not filters:
        filters = _get_filter_config(obs_space.shape)

    activation = get_conv_activation(model_config)

    for i, (out_size, kernel, stride) in enumerate(filters, 1):
        x = tf.keras.layers.Conv2D(
            out_size,
            kernel,
            strides=(stride, stride),
            activation=activation,
            padding="same",
            data_format="channels_last",
            name=f"{prefix}conv{i}",
        )(x)
    return x
Exemple #4
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = _get_filter_config(obs_space.shape)

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            layers.append(
                SlimConv2d(
                    in_channels,
                    num_outputs,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))
            out_channels = num_outputs
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride)
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                self._logits = SlimConv2d(
                    out_channels,
                    num_outputs, [1, 1],
                    1,
                    padding,
                    activation_fn=None)
            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(
                out_channels,
                1,
                initializer=normc_initializer(0.01),
                activation_fn=None)
        else:
            vf_layers = []
            (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel,
                                                 [stride, stride])
                vf_layers.append(
                    SlimConv2d(
                        in_channels,
                        out_channels,
                        kernel,
                        stride,
                        padding,
                        activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,
                    activation_fn=activation))

            vf_layers.append(
                SlimConv2d(
                    in_channels=out_channels,
                    out_channels=1,
                    kernel=1,
                    stride=1,
                    padding=None,
                    activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                name="conv{}".format(i))(last_layer)
        out_size, kernel, stride = filters[-1]
        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                name="conv{}".format(i + 1))(last_layer)
            conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                              activation=None,
                                              padding="same",
                                              name="conv_out")(last_layer)

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                name="conv_value_{}".format(i + 1))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)
Exemple #6
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs
        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                data_format="channels_last",
                name="conv{}".format(i))(last_layer)

        out_size, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                data_format="channels_last",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv{}".format(i + 1))(last_layer)

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                                  activation=None,
                                                  padding="same",
                                                  data_format="channels_last",
                                                  name="conv_out")(last_layer)
            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                conv_out = tf.keras.layers.Flatten(
                    data_format="channels_last")(last_layer)
                self.num_outputs = conv_out.shape[1]

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    data_format="channels_last",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                data_format="channels_last",
                name="conv_value_{}".format(i + 1))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                data_format="channels_last",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)
Exemple #7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        with tf.variable_scope(f"{name}_model", reuse=tf.AUTO_REUSE):
            super().__init__(obs_space, action_space, num_outputs,
                             model_config, name)
            custom_opts = model_config.get("custom_options", {})
            self.use_comm = custom_opts.get("use_comm", True)

            self.message_coeff = custom_opts.get("message_entropy_coeff", 0.0)
            obs_space_shape = custom_opts.get("obs_shape", obs_space.shape)
            if len(obs_space.shape) == 1:
                n_extra_obs = obs_space.shape[0] - np.prod(obs_space_shape)
            else:
                n_extra_obs = 0

            # Conv
            activation = get_activation_fn(model_config.get("conv_activation"))
            filters = model_config.get("conv_filters")
            if filters is None:
                filters = _get_filter_config(obs_space_shape)

            inputs = tf.keras.layers.Input(shape=(None, *obs_space_shape),
                                           name=f"{name}_observations_time")
            model_inputs = [inputs]

            cnn_in = tf.reshape(inputs, [-1, *obs_space_shape])
            conv_out = build_cnn(cnn_in,
                                 filters,
                                 activation,
                                 name=f"{name}_conv")

            # FC
            activation = get_activation_fn(
                model_config.get("fcnet_activation"))
            hiddens = model_config.get("fcnet_hiddens")

            if n_extra_obs > 0:
                extra_inputs = tf.keras.layers.Input(
                    shape=(n_extra_obs, ), name=f"{name}_extra_observations")
                model_inputs.append(extra_inputs)
                fc_in = tf.keras.layers.Concatenate(name=f"{name}_fc_in")(
                    [tf.keras.layers.Flatten()(conv_out), extra_inputs])
            else:
                fc_in = tf.keras.layers.Flatten(name=f"{name}_fc_in")(conv_out)
            fc_out = build_fc(fc_in, hiddens, activation, name=f"{name}_fc")

            # LSTM
            self.cell_size = model_config.get("lstm_cell_size", 256)

            state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ),
                                               name=f"{name}_h")
            state_in_c = tf.keras.layers.Input(shape=(self.cell_size, ),
                                               name=f"{name}_c")
            seq_in = tf.keras.layers.Input(shape=(),
                                           name=f"{name}_seq_in",
                                           dtype=tf.int32)

            prev_actions = tf.keras.layers.Input(shape=(),
                                                 name=f"{name}_prev_actions",
                                                 dtype=tf.int32)
            prev_rewards = tf.keras.layers.Input(shape=(),
                                                 name=f"{name}_prev_rewards")

            model_inputs.extend(
                [prev_actions, prev_rewards, seq_in, state_in_h, state_in_c])

            if model_config.get("lstm_use_prev_action_reward"):
                prev_actions_onehot = tf.one_hot(prev_actions,
                                                 action_space[0].n)
                in_tensors = [fc_out, prev_actions_onehot, prev_rewards]
            else:
                in_tensors = [fc_out]

            # CPC objective
            self.use_cpc = custom_opts.get("use_cpc", False)
            if self.use_cpc:
                cpc_params = custom_opts["cpc_opts"]
                self.cpc_in_shape = [cpc_params["cpc_code_size"]]
                self.cpc_out_shape = [
                    cpc_params["cpc_len"], cpc_params["cpc_code_size"]
                ]
                cpc_params["name"] = f"{name}_cpc"
                # The actual CPC encodings
                self._cpc_ins = None
                self._cpc_preds = None
            else:
                cpc_params = {}

            lstm_out, model_outputs = build_lstm(
                in_tensors,
                state_in_h=state_in_h,
                state_in_c=state_in_c,
                seq_in=seq_in,
                cell_size=self.cell_size,
                add_cpc=self.use_cpc,
                cpc_params=cpc_params,
                name=f"{name}_lstm",
            )

            # Final layer, logits has both actions and messages
            self.use_inference_policy = custom_opts.get(
                "use_inference_policy", False)
            if self.use_inference_policy:
                inference_policy_opts = custom_opts["inference_policy_opts"]
                self.pm_type = inference_policy_opts["type"]
                self.ewma_momentum = inference_policy_opts.get("ewma_momentum")
                self.pm_hidden = inference_policy_opts.get(
                    "pm_hidden", [64, 64])
                self.message_size = action_space[1].n

                action_logits = tf.keras.layers.Dense(
                    action_space[0].n,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_action_logits",
                )(lstm_out)
                unscaled_message_logits = tf.keras.layers.Dense(
                    self.message_size,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_unscaled_message_logits",
                )(lstm_out)
                unscaled_message_p = tf.nn.softmax(unscaled_message_logits)
                model_outputs.append(unscaled_message_p)

                if self.pm_type == "moving_avg":
                    self._avg_message_p = tf.Variable(
                        name=f"{name}_avg",
                        initial_value=tf.ones(
                            (self.message_size, )) / self.message_size,
                        trainable=False,
                    )
                    avg_message_vars = [self._avg_message_p]
                    if self.ewma_momentum is None:
                        self._avg_message_t = tf.Variable(
                            name=f"{name}_t",
                            initial_value=tf.zeros(()),
                            trainable=False,
                        )
                        avg_message_vars.append(self._avg_message_t)

                    self.register_variables(avg_message_vars)

                logits = tf.keras.layers.Concatenate(name=f"{name}_logits")(
                    [action_logits, unscaled_message_logits])
            else:
                logits = tf.keras.layers.Dense(
                    num_outputs,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_logits",
                )(lstm_out)

            values = tf.keras.layers.Dense(1,
                                           activation=None,
                                           name=f"{name}_values")(lstm_out)
            self._value_out = None  # The actual value
            model_outputs = [logits, values] + model_outputs

            # Create the RNN model
            self.rnn_model = tf.keras.Model(inputs=model_inputs,
                                            outputs=model_outputs)
            self.register_variables(self.rnn_model.variables)
            self._model_out = None  # Actual logits
            self.rnn_model.summary()

            if self.use_inference_policy and self.pm_type == "hyper_nn":
                flattened_vars = []
                message_model = tf.keras.Model(inputs=model_inputs,
                                               outputs=unscaled_message_logits)
                for e in message_model.variables:
                    flattened_vars.append(
                        tf.reshape(tf.stop_gradient(e), shape=[1, -1]))

                concat_vars = tf.keras.layers.Concatenate()(flattened_vars)
                pm_fc_out, pm_fc_vars = build_fc(concat_vars,
                                                 self.pm_hidden,
                                                 "relu",
                                                 name="pm_fc",
                                                 return_vars=True)
                pm_logits_layer = tf.keras.layers.Dense(
                    self.message_size,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_pm_logits",
                )
                self._pm_logits = pm_logits_layer(pm_fc_out)
                self.register_variables(pm_fc_vars)
                self.register_variables(pm_logits_layer.variables)

            # Extra variable definitions
            self.use_receiver_bias = custom_opts.get("use_receiver_bias",
                                                     False)
            self.no_message_outputs = None
            self._unscaled_message_p = None