Beispiel #1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        print(model_config)
        hiddens = model_config.get("fcnet_hiddens")
        activation = _get_activation_fn(model_config.get("fcnet_activation"))
        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        last_layer_size = np.product(obs_space.shape)
        for size in hiddens:
            layers.append(
                SlimFC(in_size=last_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            last_layer_size = size

        self._hidden_layers = nn.Sequential(*layers)

        self._logits = SlimFC(in_size=last_layer_size,
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)
        self._value_branch = SlimFC(in_size=last_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        self._cur_value = None
Beispiel #2
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, custom_input_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(custom_input_space, action_space,
                                       num_outputs, model_config, name)
        prev_safe_layer_size = int(np.product(custom_input_space.shape))
        vf_layers = []
        activation = model_config.get("fcnet_activation")
        hiddens = [32]
        for size in hiddens:
            vf_layers.append(
                SlimFC(in_size=prev_safe_layer_size,
                       out_size=size,
                       activation_fn=activation,
                       initializer=normc_initializer(1.0)))
            prev_safe_layer_size = size
        vf_layers.append(
            SlimFC(in_size=prev_safe_layer_size,
                   out_size=1,
                   initializer=normc_initializer(0.01),
                   activation_fn=None))
        self.safe_branch_separate = nn.Sequential(*vf_layers)
        self.last_in = None
Beispiel #3
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        custom_configs = model_config.get("custom_model_config")
        self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10)

        activation = model_config.get("fcnet_activation", "tanh")

        encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128)
        self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)

        self._all_fc1 = SlimFC(in_size=3,
                               out_size=64,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._all_fc2 = SlimFC(in_size=64,
                               out_size=16,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._action_layer = SlimFC(in_size=16,
                                    out_size=num_outputs,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self._value_layer = SlimFC(in_size=16,
                                   out_size=1,
                                   initializer=normc_initializer(0.01),
                                   activation_fn=None)

        self._features = None
Beispiel #4
0
    def __init__(self,
                 input_size,
                 fe_hidden_sizes=[128],
                 cls_hidden_sizes=[128, 64]):
        super().__init__()
        assert len(fe_hidden_sizes) > 0
        assert len(cls_hidden_sizes) > 0
        layers = []
        for size in fe_hidden_sizes:
            layers.append(
                SlimFC(in_size=input_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            input_size = size
        self.feature_extractor = nn.Sequential(*layers)

        input_size = fe_hidden_sizes[
            -1] * 2  # Concatenate the features from the two samples.
        layers = []
        for size in cls_hidden_sizes:
            layers.append(
                SlimFC(in_size=input_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            input_size = size
        layers.append(
            SlimFC(in_size=input_size,
                   out_size=1,
                   initializer=normc_initializer(1.0)))
        self.classifier = nn.Sequential(*layers)
Beispiel #5
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        # Nonlinearity for fully connected net (tanh, relu). Default: "tanh"
        activation = model_config.get("fcnet_activation")
        # Number of hidden layers for fully connected net. Default: [256, 256]
        hiddens = [256, 256]  # model_config.get("fcnet_hiddens", [])
        # Whether to skip the final linear layer used to resize the hidden layer
        # outputs to size `num_outputs`. If True, then the last hidden layer
        # should already match num_outputs.
        # no_final_linear = False

        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = False

        self._embedd = nn.Embedding(
            int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)

        # Player Hot Encoded = 3 * Number of Cards Played per trick = 4
        # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4

        self._hidden_layers = self._build_hidden_layers(
            first_layer_size=FIRST_LAYER_SIZE,
            hiddens=hiddens,
            activation=activation)

        self._value_branch_separate = None
        self._value_embedding = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            self._value_embedding = nn.Embedding(
                int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)
            self._value_branch_separate = self._build_hidden_layers(
                first_layer_size=FIRST_LAYER_SIZE,
                hiddens=hiddens,
                activation=activation)
        self._logits = SlimFC(in_size=hiddens[-1],
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=hiddens[-1],
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._cards_in = None
        self._players_in = None
    def __init__(self,
                 size_in,
                 size_out,
                 hiddens,
                 activations,
                 init_weights,
                 append_log_std=False,
                 log_std_type='constant',
                 sample_std=1.0):
        super().__init__()
        layers = []
        prev_layer_size = size_in
        for i, size_hidden in enumerate(hiddens + [size_out]):
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size_hidden,
                       initializer=normc_initializer(init_weights[i]),
                       activation_fn=get_activation_fn(activations[i],
                                                       framework="torch")))
            prev_layer_size = size_hidden

        if append_log_std:
            layers.append(
                AppendLogStd(type=log_std_type,
                             init_val=np.log(sample_std),
                             dim=size_out))

        self._model = nn.Sequential(*layers)
Beispiel #7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        layers = []

        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(in_channels, out_channels, kernel, stride, padding))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(out_channels,
                              num_outputs,
                              initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(out_channels,
                                    1,
                                    initializer=normc_initializer())
        self._cur_value = None
Beispiel #8
0
    def create_actor(self):
        model_config = self.model_config
        layers = []
        activation_stage1 = model_config.get("fcnet_activation_stage1")
        hiddens_stage1 = model_config.get("fcnet_hiddens_stage1")

        self.gru_cell_size = model_config.get("gru_cell_size")
        activation_stage2 = model_config.get("fcnet_activation_stage2")
        hiddens_stage2 = model_config.get("fcnet_hiddens_stage2")

        prev_layer_size = self.true_obs_space.shape[1]  # obs
        prev_layer_size += self.nbr_agents  # one hot encoding of the agent id

        for size in hiddens_stage1:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation_stage1))
            prev_layer_size = size

        stage1 = nn.Sequential(*layers)

        gru = nn.GRU(input_size=prev_layer_size,
                     hidden_size=self.gru_cell_size,
                     num_layers=1,
                     batch_first=not self.is_time_major())

        prev_layer_size = self.gru_cell_size

        layers = []
        for size in hiddens_stage2:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation_stage2))
            prev_layer_size = size

        layers.append(
            SlimFC(in_size=prev_layer_size,
                   out_size=self.nbr_actions,
                   initializer=normc_initializer(1.0)))

        stage2 = nn.Sequential(*layers)
        return stage1, gru, stage2
Beispiel #9
0
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)

        obs_space_ = obs_space.original_space
        data, privates = obs_space_.spaces['data'], obs_space_.spaces['privates']

        N, T, L = data.shape
        adjusted_data_shape = (T, N*L)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens", [100, 100])
        lstm_dim = model_config.get("lstm_cell_size", 128)

        self.lstm_net = LSTM(input_dim=adjusted_data_shape[-1], hidden_dim=lstm_dim, num_layers=2)

        prev_layer_size = lstm_dim + int(np.product(privates.shape))

        layers = []
        for size in hiddens:
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=normc_initializer(1.0),
                    activation_fn=activation))
            prev_layer_size = size

        self._hidden_layers = nn.Sequential(*layers)
        self._features = None

        self._policy_net = SlimFC(
                    in_size=prev_layer_size,
                    out_size=num_outputs,
                    initializer=normc_initializer(1.0),
                    activation_fn=activation)

        self._value_net = SlimFC(
                    in_size=prev_layer_size,
                    out_size=1,
                    initializer=normc_initializer(1.0),
                    activation_fn=activation)
Beispiel #10
0
 def __init__(self,
              in_size,
              out_size,
              out_lens,
              at_hiddens,
              ap_hiddens,
              initializer=None,
              activation=None,
              use_bias=True,
              bias_init=0.0):
     super(MultiActionFC, self).__init__()
     assert sum(out_lens) == out_size
     prev_vf_layer_size = in_size
     at_layers = []
     # 动作类型,可以有激活函数
     for size in at_hiddens:
         at_layers.append(
             SlimFC(in_size=prev_vf_layer_size,
                    out_size=size,
                    activation_fn=activation,
                    initializer=normc_initializer(0.5)))
         prev_vf_layer_size = size
     self._at_branch_separate = nn.Sequential(*at_layers)
     # 动作参数, 最后一层不要激活函数.(因为动作参数比较大.)
     prev_vf_layer_size = in_size
     ap_layers = []
     for size in ap_hiddens[:-1]:
         ap_layers.append(
             SlimFC(in_size=prev_vf_layer_size,
                    out_size=size,
                    activation_fn=activation,
                    initializer=normc_initializer(0.5)))
         prev_vf_layer_size = size
     ap_layers.append(
         SlimFC(in_size=prev_vf_layer_size,
                out_size=ap_hiddens[-1],
                activation_fn=None,
                initializer=normc_initializer(0.5)))
     self._ap_branch_separate = nn.Sequential(*ap_layers)
Beispiel #11
0
    def create_critic(self):
        layers = []
        input_size = np.prod(self.true_obs_space.shape)
        if self.has_real_state:
            input_size += np.prod(self.state_space.shape)
        input_size += self.nbr_agents
        input_size += 2 * self.nbr_agents * self.nbr_actions
        prev_layer_size = input_size
        activation = self.model_config['fcnet_activation_critic']
        for size in self.model_config['fcnet_hiddens_critic']:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       activation_fn=activation,
                       initializer=normc_initializer(1.0)))
            prev_layer_size = size

        layers.append(
            SlimFC(in_size=prev_layer_size,
                   out_size=self.nbr_actions,
                   initializer=normc_initializer(1.0)))
        return nn.Sequential(*layers)
Beispiel #12
0
    def _create_model(self):
        filters = self.filters
        activation = self.activation
        branches = {}
        for obs_name, space in self.obs_space.original_space.spaces.items():
            layers = []
            w, in_channels = space.shape
            in_size = w
            for i, (out_channels, kernel, stride) in enumerate(filters):
                padding, out_size = same_padding_1d(in_size, kernel, stride)
                layers.append(
                    SlimConv1d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               None if i == (len(filters) - 1) else padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            branches[obs_name] = nn.Sequential(*layers)

        self._convs = nn.ModuleDict(branches)

        out_channels *= len(self._convs)
        # num_outputs defined. Use that to create an exact
        # `num_output`-sized (1,1)-Conv2D.
        if self.num_outputs:
            in_size = np.ceil((in_size - kernel) / stride)

            padding, _ = same_padding_1d(in_size, 1, 1)
            self._logits = SlimConv1d(out_channels,
                                      self.num_outputs,
                                      1,
                                      1,
                                      padding,
                                      activation_fn=None)
        # num_outputs not known -> Flatten, then set self.num_outputs
        # to the resulting number of nodes.
        else:
            self.last_layer_is_flattened = True
            layers.append(nn.Flatten())
            self.num_outputs = out_channels

        # Build the value layers
        self._value_branch = SlimFC(out_channels,
                                    1,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(
            model_config.get("conv_activation"), framework="torch")
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        # no_final_linear = model_config.get("no_final_linear")
        # vf_share_layers = model_config.get("vf_share_layers")

        layers = []
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(
                in_channels,
                out_channels,
                kernel,
                stride,
                None,
                activation_fn=activation))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(
            out_channels, num_outputs, initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(
            out_channels, 1, initializer=normc_initializer())
        # Holds the current "base" output (before logits layer).
        self._features = None
Beispiel #14
0
    def _build_hidden_layers(self, first_layer_size: int, hiddens: list,
                             activation: str):

        layers = []

        prev_layer_size = first_layer_size

        # Create layers. Assumes no_final_linear = False
        for size in hiddens:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        return nn.Sequential(*layers)
Beispiel #15
0
    def __init__(self, activation, action_size, channels=256):
        nn.Module.__init__(self)

        self.activation = activation
        self.channels = channels
        self.action_size = action_size

        self.dynamic_layers = [
            SlimConv2d(
                self.channels + self.action_size if i == 0 else self.channels,  # encode actions for first layer
                self.channels,
                kernel=1,
                stride=1,
                padding=None,
                activation_fn=self.activation
            ) for i in range(10)
        ]

        self.dynamic_head = SlimConv2d(
            self.channels,
            self.channels,
            kernel=1,
            stride=1,
            padding=None,
            activation_fn=None
        )

        self.dynamic = nn.Sequential(*self.dynamic_layers)

        self.flatten = nn.Flatten()

        self.reward_layers = [
            SlimFC(
                256 if i == 0 else 256,  # could make different later
                256 if i != 4 else 1,
                initializer=normc_initializer(0.01),
                activation_fn=self.activation if i != 4 else None
            ) for i in range(5)
        ]

        self.reward_head = nn.Sequential(*self.reward_layers)
Beispiel #16
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            raise ValueError("Config for conv_filters is required")
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        # FIXME add stacking here
        (w, in_channels) = obs_space.shape
        in_size = w
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding_1d(in_size, kernel, stride)
            layers.append(
                SlimConv1d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            layers.append(
                SlimConv1d(
                    in_channels,
                    num_outputs,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))
            out_channels = num_outputs
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv1d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = np.ceil((in_size - kernel) / stride)

                padding, _ = same_padding_1d(in_size, 1, 1)
                self._logits = SlimConv1d(out_channels,
                                          num_outputs,
                                          1,
                                          1,
                                          padding,
                                          activation_fn=None)
            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(out_channels,
                                        1,
                                        initializer=normc_initializer(0.01),
                                        activation_fn=None)
        else:
            vf_layers = []
            (h, w, in_channels) = obs_space.shape
            assert h == 1
            in_size = w
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding_1d(in_size, kernel, stride)
                vf_layers.append(
                    SlimConv1d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv1d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           None,
                           activation_fn=activation))

            vf_layers.append(
                SlimConv1d(in_channels=out_channels,
                           out_channels=1,
                           kernel=1,
                           stride=1,
                           padding=None,
                           activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None
Beispiel #17
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")

        # TODO(sven): implement case: vf_shared_layers = False.
        # vf_share_layers = model_config.get("vf_share_layers")

        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and self.num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=self.num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = self.num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if self.num_outputs:
                self._logits = SlimFC(in_size=hiddens[-1],
                                      out_size=self.num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([np.product(obs_space.shape)] +
                                    hiddens[-1:-1])[-1]

        self._hidden_layers = nn.Sequential(*layers)

        # TODO(sven): Implement non-shared value branch.
        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current value output.
        self._cur_value = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, control_input_size, control_hidden_size,
                 interaction_hidden_size):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        no_final_linear = model_config.get(
            "no_final_linear")  # TODO Handle no_final_linear
        assert (not no_final_linear, "Not Implemented yet bro")

        self.vf_share_layers = model_config.get("vf_share_layers")
        self.vf_hiddens = model_config.get("vf_hiddens", [10, 10])
        self.free_log_std = model_config.get("free_log_std")
        self.control_input_size = control_input_size
        self.interaction_input_size = 2
        assert (np.product(obs_space.shape) == self.control_input_size +
                self.interaction_input_size, "Wrong size of obs space")
        control_hidden_size = control_hidden_size
        interaction_hidden_size = interaction_hidden_size
        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")

        # Are the std required as output for the action
        self.std = ((num_outputs / 2) == np.product(action_space.shape))

        # Are the log_std varies with state or not
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        self._logits = None  # Output of the network, called logits for consistency with the rest of RLlib

        # Build the Negotiate model
        self.linear_1 = SlimFC(self.control_input_size,
                               control_hidden_size,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)
        self.linear_2_mean = SlimFC(control_hidden_size,
                                    2,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self.linear_accept_1 = SlimFC(self.interaction_input_size,
                                      interaction_hidden_size,
                                      initializer=normc_initializer(1.0),
                                      activation_fn=activation)
        self.linear_accept_2_mean = SlimFC(interaction_hidden_size,
                                           1,
                                           initializer=normc_initializer(0.01),
                                           activation_fn=None)
        self.control = nn.Sequential(self.linear_1, self.linear_2_mean)
        self.interaction = nn.Sequential(self.linear_accept_1,
                                         self.linear_accept_2_mean)
        self.linear_coop_mean = AppendBiasLayer(1)

        if self.std:
            if not self.free_log_std:
                self.linear_2_std = SlimFC(control_hidden_size,
                                           2,
                                           initializer=normc_initializer(0.01),
                                           activation_fn=None)
                self.linear_accept_2_std = SlimFC(
                    interaction_hidden_size,
                    1,
                    initializer=normc_initializer(0.01),
                    activation_fn=None)
                self.linear_coop_std = AppendBiasLayer(1)
                self.control_std = nn.Sequential(self.linear_1,
                                                 self.linear_2_std)
                self.interaction_std = nn.Sequential(self.linear_accept_1,
                                                     self.linear_accept_2_std)
                self.coop_std = AppendBiasLayer(1)
            else:
                self._append_free_log_std = AppendBiasLayer(num_outputs)

        # value function
        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in self.vf_hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
                prev_layer_size = prev_vf_layer_size
            self._value_branch_separate = nn.Sequential(*vf_layers)
        else:
            raise NotImplemented()
        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        self._value_module = nn.Sequential(self._value_branch_separate,
                                           self._value_branch)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Beispiel #19
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        obs_space_ = obs_space.original_space
        data, images, privates = obs_space_.spaces['data'], obs_space_.spaces['images'], \
                                 obs_space_.spaces['privates']

        N, T, L = data.shape
        adjusted_data_shape = (T, N * L)
        _, w, h, c = images.shape
        shape = (c * N, w, h)
        self.img_shape = shape

        conv_filters = model_config.get('conv_filters')
        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens", [100, 100])
        lstm_dim = model_config.get("lstm_cell_size", 128)

        if not conv_filters:
            conv_filters = [16, 32, 32]

        max_pool = [3] * len(conv_filters)

        conv_seqs = []

        self.lstm_net = LSTM(input_dim=adjusted_data_shape[-1],
                             hidden_dim=lstm_dim,
                             num_layers=2)
        for (out_channels, mp) in zip(conv_filters, max_pool):
            conv_seq = ResNet(shape, out_channels, mp)
            shape = conv_seq.get_output_shape()
            conv_seqs.append(conv_seq)
        conv_seqs.append(nn.Flatten())
        self.conv_seqs = nn.ModuleList(conv_seqs)

        prev_layer_size = lstm_dim + int(np.product(privates.shape)) + int(
            np.product(shape))

        layers = []
        for size in hiddens:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        self._hidden_layers = nn.Sequential(*layers)
        self._features = None

        self._policy_net = SlimFC(in_size=prev_layer_size,
                                  out_size=num_outputs,
                                  initializer=normc_initializer(1.0),
                                  activation_fn=activation)

        self._value_net = SlimFC(in_size=prev_layer_size,
                                 out_size=1,
                                 initializer=normc_initializer(1.0),
                                 activation_fn=activation)
Beispiel #20
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="torch")

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None
        self.traj_view_framestacking = False

        layers = []
        # Perform Atari framestacking via traj. view API.
        if model_config.get("num_framestacks") != "auto" and \
                model_config.get("num_framestacks", 0) > 1:
            (w, h) = obs_space.shape
            in_channels = model_config["num_framestacks"]
            self.traj_view_framestacking = True
        else:
            (w, h, in_channels) = obs_space.shape

        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            out_channels = out_channels if post_fcnet_hiddens else num_outputs
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else [])
            for i, out_size in enumerate(layer_sizes):
                layers.append(
                    SlimFC(in_size=out_channels,
                           out_size=out_size,
                           activation_fn=post_fcnet_activation,
                           initializer=normc_initializer(1.0)))
                out_channels = out_size

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride)
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                if post_fcnet_hiddens:
                    layers.append(nn.Flatten())
                    in_size = out_channels
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens +
                                                 [num_outputs]):
                        layers.append(
                            SlimFC(in_size=in_size,
                                   out_size=out_size,
                                   activation_fn=post_fcnet_activation if
                                   i < len(post_fcnet_hiddens) - 1 else None,
                                   initializer=normc_initializer(1.0)))
                        in_size = out_size
                    # Last layer is logits layer.
                    self._logits = layers.pop()

                else:
                    self._logits = SlimConv2d(out_channels,
                                              num_outputs, [1, 1],
                                              1,
                                              padding,
                                              activation_fn=None)

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(out_channels,
                                        1,
                                        initializer=normc_initializer(0.01),
                                        activation_fn=None)
        else:
            vf_layers = []
            if self.traj_view_framestacking:
                (w, h) = obs_space.shape
                in_channels = model_config["num_framestacks"]
            else:
                (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel,
                                                 [stride, stride])
                vf_layers.append(
                    SlimConv2d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           None,
                           activation_fn=activation))

            vf_layers.append(
                SlimConv2d(in_channels=out_channels,
                           out_channels=1,
                           kernel=1,
                           stride=1,
                           padding=None,
                           activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None

        # Optional: framestacking obs/new_obs for Atari.
        if self.traj_view_framestacking:
            from_ = model_config["num_framestacks"] - 1
            self.view_requirements[SampleBatch.OBS].shift = \
                "-{}:0".format(from_)
            self.view_requirements[SampleBatch.OBS].shift_from = -from_
            self.view_requirements[SampleBatch.OBS].shift_to = 0
            self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement(
                data_col=SampleBatch.OBS,
                shift="-{}:1".format(from_ - 1),
                space=self.view_requirements[SampleBatch.OBS].space,
            )
Beispiel #21
0
    def __init__(
        self,
        obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):

        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!"

        # Post FC net config.
        post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", [])
        post_fcnet_activation = get_activation_fn(
            model_config.get("post_fcnet_activation"), framework="torch"
        )

        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        (w, h, in_channels) = obs_space.shape

        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, stride)
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    padding,
                    activation_fn=activation,
                )
            )
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer has activation function and exits with
        # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending
        # on `post_fcnet_...` settings).
        if no_final_linear and num_outputs:
            out_channels = out_channels if post_fcnet_hiddens else num_outputs
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # Add (optional) post-fc-stack after last Conv2D layer.
            layer_sizes = post_fcnet_hiddens[:-1] + (
                [num_outputs] if post_fcnet_hiddens else []
            )
            for i, out_size in enumerate(layer_sizes):
                layers.append(
                    SlimFC(
                        in_size=out_channels,
                        out_size=out_size,
                        activation_fn=post_fcnet_activation,
                        initializer=normc_initializer(1.0),
                    )
                )
                out_channels = out_size

        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation,
                )
            )

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride),
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                if post_fcnet_hiddens:
                    layers.append(nn.Flatten())
                    in_size = out_channels
                    # Add (optional) post-fc-stack after last Conv2D layer.
                    for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]):
                        layers.append(
                            SlimFC(
                                in_size=in_size,
                                out_size=out_size,
                                activation_fn=post_fcnet_activation
                                if i < len(post_fcnet_hiddens) - 1
                                else None,
                                initializer=normc_initializer(1.0),
                            )
                        )
                        in_size = out_size
                    # Last layer is logits layer.
                    self._logits = layers.pop()

                else:
                    self._logits = SlimConv2d(
                        out_channels,
                        num_outputs,
                        [1, 1],
                        1,
                        padding,
                        activation_fn=None,
                    )

            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())

        self._convs = nn.Sequential(*layers)

        # If our num_outputs still unknown, we need to do a test pass to
        # figure out the output dimensions. This could be the case, if we have
        # the Flatten layer at the end.
        if self.num_outputs is None:
            # Create a B=1 dummy sample and push it through out conv-net.
            dummy_in = (
                torch.from_numpy(self.obs_space.sample())
                .permute(2, 0, 1)
                .unsqueeze(0)
                .float()
            )
            dummy_out = self._convs(dummy_in)
            self.num_outputs = dummy_out.shape[1]

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(
                out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None
            )
        else:
            vf_layers = []
            (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel, stride)
                vf_layers.append(
                    SlimConv2d(
                        in_channels,
                        out_channels,
                        kernel,
                        stride,
                        padding,
                        activation_fn=activation,
                    )
                )
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,
                    activation_fn=activation,
                )
            )

            vf_layers.append(
                SlimConv2d(
                    in_channels=out_channels,
                    out_channels=1,
                    kernel=1,
                    stride=1,
                    padding=None,
                    activation_fn=None,
                )
            )
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None
Beispiel #22
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 num_decompose=2):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        self.num_decompose = num_decompose

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                # self._logits = torch.nn.ModuleList([
                #     torch.nn.Sequential(
                #         SlimFC(
                #             in_size=prev_layer_size,
                #             out_size=256,
                #             initializer=normc_initializer(1.0),
                #             activation_fn=activation),
                #         SlimFC(
                #             in_size=256,
                #             out_size=num_outputs,
                #             initializer=normc_initializer(1.0),
                #             activation_fn=None),
                #     ) for i in range(self.num_decompose)])
                # self._logits = torch.nn.ModuleList([
                #     torch.nn.Sequential(
                #         torch.nn.Linear(prev_layer_size, 256),
                #         torch.nn.ReLU(),
                #         torch.nn.Linear(256, num_outputs),
                #     ) for i in range(self.num_decompose)])

                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs *
                                      self.num_decompose,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                raise ValueError("No num_outputs")

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            self._value_branch_separate = []
            for size in hiddens:
                self._value_branch_separate.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(
                *self._value_branch_separate)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=self.num_decompose,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Beispiel #23
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.free_log_std = model_config.get("free_log_std")

        # TODO(sven): implement case: vf_shared_layers = False.
        # vf_share_layers = model_config.get("vf_share_layers")

        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Maybe generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([np.product(obs_space.shape)] +
                                    hiddens[-1:-1])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        # TODO(sven): Implement non-shared value branch.
        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
Beispiel #24
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        if not model_config.get("conv_filters"):
            model_config["conv_filters"] = get_filter_config(obs_space.shape)

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        no_final_linear = self.model_config.get("no_final_linear")
        vf_share_layers = self.model_config.get("vf_share_layers")

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None

        layers = []
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]

        # No final linear: Last layer is a Conv2D and uses num_outputs.
        if no_final_linear and num_outputs:
            layers.append(
                SlimConv2d(
                    in_channels,
                    num_outputs,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))
            out_channels = num_outputs
        # Finish network normally (w/o overriding last layer size with
        # `num_outputs`), then add another linear one of size `num_outputs`.
        else:
            layers.append(
                SlimConv2d(
                    in_channels,
                    out_channels,
                    kernel,
                    stride,
                    None,  # padding=valid
                    activation_fn=activation))

            # num_outputs defined. Use that to create an exact
            # `num_output`-sized (1,1)-Conv2D.
            if num_outputs:
                in_size = [
                    np.ceil((in_size[0] - kernel[0]) / stride),
                    np.ceil((in_size[1] - kernel[1]) / stride)
                ]
                padding, _ = same_padding(in_size, [1, 1], [1, 1])
                self._logits = SlimConv2d(out_channels,
                                          num_outputs, [1, 1],
                                          1,
                                          padding,
                                          activation_fn=None)
            # num_outputs not known -> Flatten, then set self.num_outputs
            # to the resulting number of nodes.
            else:
                self.last_layer_is_flattened = True
                layers.append(nn.Flatten())
                self.num_outputs = out_channels

        self._convs = nn.Sequential(*layers)

        # Build the value layers
        self._value_branch_separate = self._value_branch = None
        if vf_share_layers:
            self._value_branch = SlimFC(out_channels,
                                        1,
                                        initializer=normc_initializer(0.01),
                                        activation_fn=None)
        else:
            vf_layers = []
            (w, h, in_channels) = obs_space.shape
            in_size = [w, h]
            for out_channels, kernel, stride in filters[:-1]:
                padding, out_size = same_padding(in_size, kernel,
                                                 [stride, stride])
                vf_layers.append(
                    SlimConv2d(in_channels,
                               out_channels,
                               kernel,
                               stride,
                               padding,
                               activation_fn=activation))
                in_channels = out_channels
                in_size = out_size

            out_channels, kernel, stride = filters[-1]
            vf_layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           None,
                           activation_fn=activation))

            vf_layers.append(
                SlimConv2d(in_channels=out_channels,
                           out_channels=1,
                           kernel=1,
                           stride=1,
                           padding=None,
                           activation_fn=None))
            self._value_branch_separate = nn.Sequential(*vf_layers)

        # Holds the current "base" output (before logits layer).
        self._features = None
Beispiel #25
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        if num_outputs:
            self._logits = MultiActionFC(in_size=prev_layer_size,
                                         out_size=num_outputs,
                                         out_lens=[3, 10],
                                         at_hiddens=[32, 3],
                                         ap_hiddens=[32, 10],
                                         initializer=normc_initializer(0.01),
                                         activation=activation)
        else:
            self.num_outputs = ([int(np.product(obs_space.shape))] +
                                hiddens[-1:])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(*vf_layers)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Beispiel #26
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens", [])
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([int(np.product(obs_space.shape))] +
                                    hiddens[-1:])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(*vf_layers)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):  #,
        #graph_layers, graph_features, graph_tabs, graph_edge_features, cnn_filters, value_cnn_filters, value_cnn_compression, cnn_compression, relative, activation):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.cfg = copy.deepcopy(DEFAULT_OPTIONS)
        self.cfg.update(model_config['custom_model_config'])

        #self.cfg = model_config['custom_options']
        self.n_agents = len(obs_space.original_space['agents'])
        self.graph_features = self.cfg['graph_features']
        self.cnn_compression = self.cfg['cnn_compression']
        self.activation = {
            'relu': nn.ReLU,
            'leakyrelu': nn.LeakyReLU
        }[self.cfg['activation']]

        layers = []
        input_shape = obs_space.original_space['agents'][0]['map'].shape
        (w, h, in_channels) = input_shape

        in_size = [w, h]
        for out_channels, kernel, stride in self.cfg['cnn_filters'][:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=self.activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = self.cfg['cnn_filters'][-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        layers.append(nn.Flatten(1, -1))
        #if isinstance(cnn_compression, int):
        #    layers.append(nn.Linear(cnn_compression, self.cfg['graph_features']-2)) # reserve 2 for pos
        #    layers.append(self.activation{))
        self.coop_convs = nn.Sequential(*layers)
        self.greedy_convs = copy.deepcopy(self.coop_convs)

        self.coop_value_obs_convs = copy.deepcopy(self.coop_convs)
        self.greedy_value_obs_convs = copy.deepcopy(self.coop_convs)

        summary(self.coop_convs,
                device="cpu",
                input_size=(input_shape[2], input_shape[0], input_shape[1]))

        gfl = []
        for i in range(self.cfg['graph_layers']):
            gfl.append(
                gml_adv.GraphFilterBatchGSOA(self.graph_features,
                                             self.graph_features,
                                             self.cfg['graph_tabs'],
                                             self.cfg['agent_split'],
                                             self.cfg['graph_edge_features'],
                                             False))
            #gfl.append(gml.GraphFilterBatchGSO(self.graph_features, self.graph_features, self.cfg['graph_tabs'], self.cfg['graph_edge_features'], False))
            gfl.append(self.activation())

        self.GFL = nn.Sequential(*gfl)

        #gso_sum = torch.zeros(2, 1, 8, 8)
        #self.GFL[0].addGSO(gso_sum)
        #summary(self.GFL, device="cuda" if torch.cuda.is_available() else "cpu", input_size=(self.graph_features, 8))

        logits_inp_features = self.graph_features
        if self.cfg['cnn_residual']:
            logits_inp_features += self.cnn_compression

        post_logits = [
            nn.Linear(logits_inp_features, 64),
            self.activation(),
            nn.Linear(64, 32),
            self.activation()
        ]
        logit_linear = nn.Linear(32, 5)
        nn.init.xavier_uniform_(logit_linear.weight)
        nn.init.constant_(logit_linear.bias, 0)
        post_logits.append(logit_linear)
        self.coop_logits = nn.Sequential(*post_logits)
        self.greedy_logits = copy.deepcopy(self.coop_logits)
        summary(self.coop_logits,
                device="cpu",
                input_size=(logits_inp_features, ))

        ##############################

        layers = []
        input_shape = np.array(obs_space.original_space['state'].shape)
        (w, h, in_channels) = input_shape

        in_size = [w, h]
        for out_channels, kernel, stride in self.cfg['value_cnn_filters'][:-1]:
            padding, out_size = same_padding(in_size, kernel, [stride, stride])
            layers.append(
                SlimConv2d(in_channels,
                           out_channels,
                           kernel,
                           stride,
                           padding,
                           activation_fn=self.activation))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = self.cfg['value_cnn_filters'][-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        layers.append(nn.Flatten(1, -1))

        self.coop_value_cnn = nn.Sequential(*layers)
        self.greedy_value_cnn = copy.deepcopy(self.coop_value_cnn)
        summary(self.greedy_value_cnn,
                device="cpu",
                input_size=(input_shape[2], input_shape[0], input_shape[1]))

        layers = [
            nn.Linear(self.cnn_compression + self.cfg['value_cnn_compression'],
                      64),
            self.activation(),
            nn.Linear(64, 32),
            self.activation()
        ]
        values_linear = nn.Linear(32, 1)
        normc_initializer()(values_linear.weight)
        nn.init.constant_(values_linear.bias, 0)
        layers.append(values_linear)

        self.coop_value_branch = nn.Sequential(*layers)
        self.greedy_value_branch = copy.deepcopy(self.coop_value_branch)
        summary(self.coop_value_branch,
                device="cpu",
                input_size=(self.cnn_compression +
                            self.cfg['value_cnn_compression'], ))

        self._cur_value = None

        self.freeze_coop_value(self.cfg['freeze_coop_value'])
        self.freeze_greedy_value(self.cfg['freeze_greedy_value'])
        self.freeze_coop(self.cfg['freeze_coop'])
        self.freeze_greedy(self.cfg['freeze_greedy'])