예제 #1
0
    def _build_q_net(self, name_):
        # actions are concatenated with flattened obs
        critic_hidden_activation = self.model_config[
            "critic_hidden_activation"]
        critic_hiddens = self.model_config["critic_hiddens"]

        activation = get_activation_fn(critic_hidden_activation,
                                       framework="torch")
        q_net = nn.Sequential()
        ins = (self.obs_ins if self._is_action_discrete else self.obs_ins +
               self.action_dim)
        for i, n in enumerate(critic_hiddens):
            q_net.add_module(
                f"{name_}_hidden_{i}",
                SlimFC(
                    ins,
                    n,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=activation,
                ),
            )
            ins = n

        q_net.add_module(
            f"{name_}_out",
            SlimFC(
                ins,
                self.action_space.n if self._is_action_discrete else 1,
                initializer=torch.nn.init.xavier_uniform_,
                activation_fn=None,
            ),
        )
        return q_net
예제 #2
0
파일: ma_dqn.py 프로젝트: songCNMS/vrp
    def __init__(self, num_states, num_actions, num_nodes=100):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNModule, self).__init__()
        layers = []
        prev_layer_size = num_states
        self.num_actions = num_actions
        self.num_nodes = num_nodes

        # Create layers 0 to second-last.
        self.hidden_out_size = 32
        for i, size in enumerate([512, 128, self.hidden_out_size]):
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # if i == 0:
            #     layers.append(nn.Dropout(p=0.3))
            # Add a batch norm layer.
            # layers.append(nn.BatchNorm1d(prev_layer_size))

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=num_actions,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
예제 #3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        print(model_config)
        hiddens = model_config.get("fcnet_hiddens")
        activation = _get_activation_fn(model_config.get("fcnet_activation"))
        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        last_layer_size = np.product(obs_space.shape)
        for size in hiddens:
            layers.append(
                SlimFC(in_size=last_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            last_layer_size = size

        self._hidden_layers = nn.Sequential(*layers)

        self._logits = SlimFC(in_size=last_layer_size,
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)
        self._value_branch = SlimFC(in_size=last_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        self._cur_value = None
예제 #4
0
    def __init__(self, num_states=4, num_actions=18):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNModule, self).__init__()
        self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.
                                   cuda.is_available() else "cpu")
        layers = []
        prev_layer_size = num_states

        self.num_actions = num_actions

        # Create layers 0 to second-last.
        self.hidden_out_size = 64
        for size in [512, 256, 128, self.hidden_out_size]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # Add a batch norm layer.
            # layers.append(nn.BatchNorm1d(prev_layer_size))

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=num_actions,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
예제 #5
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in [256, 256]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # Add a batch norm layer.
            layers.append(nn.BatchNorm1d(prev_layer_size))

        self._logits = SlimFC(in_size=prev_layer_size,
                              out_size=self.num_outputs,
                              initializer=torch_normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
        self._hidden_out = None
예제 #6
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 num_frames=3):
        nn.Module.__init__(self)
        super(TorchFrameStackingCartPoleModel,
              self).__init__(obs_space, action_space, None, model_config, name)

        self.num_frames = num_frames
        self.num_outputs = num_outputs

        # Construct actual (very simple) FC model.
        assert len(obs_space.shape) == 1
        self.layer1 = SlimFC(in_size=obs_space.shape[0] * self.num_frames,
                             out_size=64,
                             activation_fn="relu")
        self.out = SlimFC(in_size=64,
                          out_size=self.num_outputs,
                          activation_fn="linear")
        self.values = SlimFC(in_size=64, out_size=1, activation_fn="linear")

        self._last_value = None

        self.view_requirements["prev_n_obs"] = ViewRequirement(
            data_col="obs",
            shift="-{}:0".format(num_frames - 1),
            space=obs_space)
        self.view_requirements["prev_rewards"] = ViewRequirement(
            data_col="rewards", shift=-1)
예제 #7
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        custom_configs = model_config.get("custom_model_config")
        self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10)

        activation = model_config.get("fcnet_activation", "tanh")

        encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128)
        self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)

        self._all_fc1 = SlimFC(in_size=3,
                               out_size=64,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._all_fc2 = SlimFC(in_size=64,
                               out_size=16,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._action_layer = SlimFC(in_size=16,
                                    out_size=num_outputs,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self._value_layer = SlimFC(in_size=16,
                                   out_size=1,
                                   initializer=normc_initializer(0.01),
                                   activation_fn=None)

        self._features = None
예제 #8
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        layers = []

        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(in_channels, out_channels, kernel, stride, padding))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(out_channels,
                              num_outputs,
                              initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(out_channels,
                                    1,
                                    initializer=normc_initializer())
        self._cur_value = None
예제 #9
0
        def build_q_net(name_):

            act = get_activation_fn(critic_hidden_activation,
                                    framework="torch")
            init = nn.init.xavier_uniform_
            # For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = embed_dim
            # embed to encoder embed
            outs = self.critic_encoder.feature_dim
            q_net.add_module(
                "{}_hidden_{}".format(name_, "e"),
                SlimFC(ins, outs, initializer=init, activation_fn=act))
            ins = outs

            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(ins, n, initializer=init, activation_fn=act))
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(ins, q_outs, initializer=init, activation_fn=None))
            return q_net
예제 #10
0
        def build_q_net(name_):
            activation = get_activation_fn(critic_hidden_activation,
                                           framework="torch")
            # For continuous actions: Feed obs and actions (concatenated)
            # through the NN. For discrete actions, only obs.
            q_net = nn.Sequential()
            ins = self.obs_ins + self.action_dim
            for i, n in enumerate(critic_hiddens):
                q_net.add_module(
                    "{}_hidden_{}".format(name_, i),
                    SlimFC(
                        ins,
                        n,
                        initializer=torch.nn.init.xavier_uniform_,
                        activation_fn=activation,
                    ),
                )
                ins = n

            q_net.add_module(
                "{}_out".format(name_),
                SlimFC(
                    ins,
                    1,
                    initializer=torch.nn.init.xavier_uniform_,
                    activation_fn=None,
                ),
            )
            return q_net
예제 #11
0
    def __init__(
            self,
            obs_space: gym.spaces.Space,
            fcnet_hiddens_per_candidate=(256, 32),
    ):
        """Initializes a QValueModel instance.

        Each document candidate receives one full Q-value stack, defined by
        `fcnet_hiddens_per_candidate`. The input to each of these Q-value stacks
        is always {[user] concat [document[i]] for i in document_candidates}.

        Extra model kwargs:
            fcnet_hiddens_per_candidate: List of layer-sizes for each(!) of the
                candidate documents.
        """
        super().__init__()

        self.orig_obs_space = obs_space
        self.embedding_size = self.orig_obs_space["doc"]["0"].shape[0]
        self.num_candidates = len(self.orig_obs_space["doc"])
        assert self.orig_obs_space["user"].shape[0] == self.embedding_size

        self.q_nets = nn.ModuleList()
        for i in range(self.num_candidates):
            layers = nn.Sequential()
            ins = 2 * self.embedding_size
            for j, h in enumerate(fcnet_hiddens_per_candidate):
                layers.add_module(
                    f"q_layer_{i}_{j}",
                    SlimFC(in_size=ins, out_size=h, activation_fn="relu"),
                )
                ins = h
            layers.add_module(f"q_out_{i}", SlimFC(ins, 1, activation_fn=None))

            self.q_nets.append(layers)
예제 #12
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        self.cell_size = model_config["lstm_cell_size"]
        self.use_prev_action_reward = model_config[
            "lstm_use_prev_action_reward"]
        self.action_dim = int(np.product(action_space.shape))
        # Add prev-action/reward nodes to input to LSTM.
        if self.use_prev_action_reward:
            self.num_outputs += 1 + self.action_dim
        self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=True)

        self.num_outputs = num_outputs

        # Postprocess LSTM output with another hidden layer and compute values.
        self._logits_branch = SlimFC(
            in_size=self.cell_size,
            out_size=self.num_outputs,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(
            in_size=self.cell_size,
            out_size=1,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)
    def __init__(
        self,
        in_dim: int,
        out_dim: int,
        num_heads: int,
        head_dim: int,
        input_layernorm: bool = False,
        output_activation: Union[str, callable] = None,
        **kwargs
    ):
        """Initializes a RelativeMultiHeadAttention nn.Module object.

        Args:
            in_dim (int):
            out_dim: The output dimension of this module. Also known as
                "attention dim".
            num_heads: The number of attention heads to use.
                Denoted `H` in [2].
            head_dim: The dimension of a single(!) attention head
                Denoted `D` in [2].
            input_layernorm: Whether to prepend a LayerNorm before
                everything else. Should be True for building a GTrXL.
            output_activation (Union[str, callable]): Optional activation
                function or activation function specifier (str).
                Should be "relu" for GTrXL.
            **kwargs:
        """
        super().__init__(**kwargs)

        # No bias or non-linearity.
        self._num_heads = num_heads
        self._head_dim = head_dim

        # 3=Query, key, and value inputs.
        self._qkv_layer = SlimFC(
            in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False
        )

        self._linear_layer = SlimFC(
            in_size=num_heads * head_dim,
            out_size=out_dim,
            use_bias=False,
            activation_fn=output_activation,
        )

        self._uvar = nn.Parameter(torch.zeros(num_heads, head_dim))
        self._vvar = nn.Parameter(torch.zeros(num_heads, head_dim))
        nn.init.xavier_uniform_(self._uvar)
        nn.init.xavier_uniform_(self._vvar)
        self.register_parameter("_uvar", self._uvar)
        self.register_parameter("_vvar", self._vvar)

        self._pos_proj = SlimFC(
            in_size=in_dim, out_size=num_heads * head_dim, use_bias=False
        )
        self._rel_pos_embedding = RelativePositionEmbedding(out_dim)

        self._input_layernorm = None
        if input_layernorm:
            self._input_layernorm = torch.nn.LayerNorm(in_dim)
예제 #14
0
    def __init__(self,
                 input_size,
                 fe_hidden_sizes=[128],
                 cls_hidden_sizes=[128, 64]):
        super().__init__()
        assert len(fe_hidden_sizes) > 0
        assert len(cls_hidden_sizes) > 0
        layers = []
        for size in fe_hidden_sizes:
            layers.append(
                SlimFC(in_size=input_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            input_size = size
        self.feature_extractor = nn.Sequential(*layers)

        input_size = fe_hidden_sizes[
            -1] * 2  # Concatenate the features from the two samples.
        layers = []
        for size in cls_hidden_sizes:
            layers.append(
                SlimFC(in_size=input_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            input_size = size
        layers.append(
            SlimFC(in_size=input_size,
                   out_size=1,
                   initializer=normc_initializer(1.0)))
        self.classifier = nn.Sequential(*layers)
예제 #15
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, cnn_shape):

        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        self.lstm_state_size = 16
        self.cnn_shape = list(cnn_shape)
        self.visual_size_in = cnn_shape[0] * cnn_shape[1] * cnn_shape[2]
        # MobileNetV2 has a flat output of (1000,).
        self.visual_size_out = 1000

        # Load the MobileNetV2 from torch.hub.
        self.cnn_model = torch.hub.load("pytorch/vision:v0.6.0",
                                        "mobilenet_v2",
                                        pretrained=True)

        self.lstm = nn.LSTM(self.visual_size_out,
                            self.lstm_state_size,
                            batch_first=True)

        # Postprocess LSTM output with another hidden layer and compute values.
        self.logits = SlimFC(self.lstm_state_size, self.num_outputs)
        self.value_branch = SlimFC(self.lstm_state_size, 1)
        # Holds the current "base" output (before logits layer).
        self._features = None
예제 #16
0
    def __init__(self, observation_space, action_space, num_outputs,
                 model_config, name):
        TorchModelV2.__init__(self, observation_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        # Non-shared initial layer.
        self.first_layer = SlimFC(
            int(np.product(observation_space.shape)),
            64,
            activation_fn=nn.ReLU,
            initializer=torch.nn.init.xavier_uniform_)

        # Non-shared final layer.
        self.last_layer = SlimFC(
            64,
            self.num_outputs,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)
        self.vf = SlimFC(
            64,
            1,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_,
        )
        self._global_shared_layer = TORCH_GLOBAL_SHARED_LAYER
        self._output = None
예제 #17
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, custom_input_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(custom_input_space, action_space,
                                       num_outputs, model_config, name)
        prev_safe_layer_size = int(np.product(custom_input_space.shape))
        vf_layers = []
        activation = model_config.get("fcnet_activation")
        hiddens = [32]
        for size in hiddens:
            vf_layers.append(
                SlimFC(in_size=prev_safe_layer_size,
                       out_size=size,
                       activation_fn=activation,
                       initializer=normc_initializer(1.0)))
            prev_safe_layer_size = size
        vf_layers.append(
            SlimFC(in_size=prev_safe_layer_size,
                   out_size=1,
                   initializer=normc_initializer(0.01),
                   activation_fn=None))
        self.safe_branch_separate = nn.Sequential(*vf_layers)
        self.last_in = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        if action_space != Tuple([Discrete(2), Discrete(2)]):
            raise ValueError(
                "This model only supports the [2, 2] action space")

        # Output of the model (normally 'logits', but for an autoregressive
        # dist this is more like a context/feature layer encoding the obs)
        self.context_layer = SlimFC(
            in_size=obs_space.shape[0],
            out_size=num_outputs,
            initializer=normc_init_torch(1.0),
            activation_fn=nn.Tanh,
        )

        # V(s)
        self.value_branch = SlimFC(
            in_size=num_outputs,
            out_size=1,
            initializer=normc_init_torch(0.01),
            activation_fn=None,
        )

        # P(a1 | obs)
        self.a1_logits = SlimFC(in_size=num_outputs,
                                out_size=2,
                                activation_fn=None,
                                initializer=normc_init_torch(0.01))

        class _ActionModel(nn.Module):
            def __init__(self):
                nn.Module.__init__(self)
                self.a2_hidden = SlimFC(in_size=1,
                                        out_size=16,
                                        activation_fn=nn.Tanh,
                                        initializer=normc_init_torch(1.0))
                self.a2_logits = SlimFC(in_size=16,
                                        out_size=2,
                                        activation_fn=None,
                                        initializer=normc_init_torch(0.01))

            def forward(self_, ctx_input, a1_input):
                a1_logits = self.a1_logits(ctx_input)
                a2_logits = self_.a2_logits(self_.a2_hidden(a1_input))
                return a1_logits, a2_logits

        # P(a2 | a1)
        # --note: typically you'd want to implement P(a2 | a1, obs) as follows:
        # a2_context = tf.keras.layers.Concatenate(axis=1)(
        #     [ctx_input, a1_input])
        self.action_module = _ActionModel()

        self._context = None
예제 #19
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        if isinstance(action_space, Discrete):
            self.action_dim = action_space.n
        elif isinstance(action_space, MultiDiscrete):
            self.action_dim = np.product(action_space.nvec)
        elif action_space.shape is not None:
            self.action_dim = int(np.product(action_space.shape))
        else:
            self.action_dim = int(len(action_space))

        cfg = model_config

        self.attention_dim = cfg["attention_dim"]

        # Construct GTrXL sub-module w/ num_outputs=None (so it does not
        # create a logits/value output; we'll do this ourselves in this wrapper
        # here).
        self.gtrxl = GTrXLNet(
            obs_space,
            action_space,
            None,
            model_config,
            "gtrxl",
            num_transformer_units=cfg["attention_num_transformer_units"],
            attention_dim=self.attention_dim,
            num_heads=cfg["attention_num_heads"],
            head_dim=cfg["attention_head_dim"],
            memory_inference=cfg["attention_memory_inference"],
            memory_training=cfg["attention_memory_training"],
            position_wise_mlp_dim=cfg["attention_position_wise_mlp_dim"],
            init_gru_gate_bias=cfg["attention_init_gru_gate_bias"],
        )

        # Set final num_outputs to correct value (depending on action space).
        self.num_outputs = num_outputs

        # Postprocess GTrXL output with another hidden layer and compute
        # values.
        self._logits_branch = SlimFC(
            in_size=self.attention_dim,
            out_size=self.num_outputs,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(
            in_size=self.attention_dim,
            out_size=1,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)

        self.view_requirements = self.gtrxl.view_requirements
예제 #20
0
 def __init__(self):
     nn.Module.__init__(self)
     self.a2_hidden = SlimFC(in_size=1,
                             out_size=16,
                             activation_fn=nn.Tanh,
                             initializer=normc_init_torch(1.0))
     self.a2_logits = SlimFC(in_size=16,
                             out_size=2,
                             activation_fn=None,
                             initializer=normc_init_torch(0.01))
예제 #21
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        # Nonlinearity for fully connected net (tanh, relu). Default: "tanh"
        activation = model_config.get("fcnet_activation")
        # Number of hidden layers for fully connected net. Default: [256, 256]
        hiddens = [256, 256]  # model_config.get("fcnet_hiddens", [])
        # Whether to skip the final linear layer used to resize the hidden layer
        # outputs to size `num_outputs`. If True, then the last hidden layer
        # should already match num_outputs.
        # no_final_linear = False

        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = False

        self._embedd = nn.Embedding(
            int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)

        # Player Hot Encoded = 3 * Number of Cards Played per trick = 4
        # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4

        self._hidden_layers = self._build_hidden_layers(
            first_layer_size=FIRST_LAYER_SIZE,
            hiddens=hiddens,
            activation=activation)

        self._value_branch_separate = None
        self._value_embedding = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            self._value_embedding = nn.Embedding(
                int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE)
            self._value_branch_separate = self._build_hidden_layers(
                first_layer_size=FIRST_LAYER_SIZE,
                hiddens=hiddens,
                activation=activation)
        self._logits = SlimFC(in_size=hiddens[-1],
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=hiddens[-1],
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._cards_in = None
        self._players_in = None
예제 #22
0
    def __init__(self,
                 in_dim,
                 out_dim,
                 num_heads,
                 head_dim,
                 rel_pos_encoder,
                 input_layernorm=False,
                 output_activation=None,
                 **kwargs):
        """Initializes a RelativeMultiHeadAttention nn.Module object.

        Args:
            in_dim (int):
            out_dim (int):
            num_heads (int): The number of attention heads to use.
                Denoted `H` in [2].
            head_dim (int): The dimension of a single(!) attention head
                Denoted `D` in [2].
            rel_pos_encoder (:
            input_layernorm (bool): Whether to prepend a LayerNorm before
                everything else. Should be True for building a GTrXL.
            output_activation (Optional[tf.nn.activation]): Optional tf.nn
                activation function. Should be relu for GTrXL.
            **kwargs:
        """
        super().__init__(**kwargs)

        # No bias or non-linearity.
        self._num_heads = num_heads
        self._head_dim = head_dim

        # 3=Query, key, and value inputs.
        self._qkv_layer = SlimFC(
            in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False)

        self._linear_layer = SlimFC(
            in_size=num_heads * head_dim,
            out_size=out_dim,
            use_bias=False,
            activation_fn=output_activation)

        self._pos_proj = SlimFC(
            in_size=in_dim, out_size=num_heads * head_dim, use_bias=False)

        self._uvar = torch.zeros(num_heads, head_dim)
        self._vvar = torch.zeros(num_heads, head_dim)
        nn.init.xavier_uniform_(self._uvar)
        nn.init.xavier_uniform_(self._vvar)

        self._rel_pos_encoder = rel_pos_encoder
        self._input_layernorm = None

        if input_layernorm:
            self._input_layernorm = torch.nn.LayerNorm(in_dim)
예제 #23
0
    def __init__(self, device, num_states=4, num_actions=18):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNActionModule, self).__init__()
        self.device = device
        state_layers = []
        action_layers = []

        self.num_states = num_states
        self.num_actions = num_actions

        # Create layers 0 to second-last.
        state_prev_layer_size = num_states
        self.state_hidden_out_size = 32
        for size in [256, 128, self.state_hidden_out_size]:
            state_layers.append(
                SlimFC(in_size=state_prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            state_prev_layer_size = size

        action_prev_layer_size = num_actions
        self.action_hidden_out_size = 32
        for size in [64, self.action_hidden_out_size]:
            action_layers.append(
                SlimFC(in_size=action_prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            action_prev_layer_size = size

        layers = []
        prev_layer_size = self.state_hidden_out_size + self.action_hidden_out_size
        self.hidden_out_size = 32
        for size in [64, self.hidden_out_size]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size

        self._value_branch = SlimFC(in_size=self.hidden_out_size,
                                    out_size=1,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._state_hidden_layers = nn.Sequential(*state_layers)
        self._action_hidden_layers = nn.Sequential(*action_layers)
        self._hidden_layers = nn.Sequential(*layers)
예제 #24
0
파일: ma_dp_dqn.py 프로젝트: songCNMS/vrp
    def __init__(self, num_states, num_actions, dqn_config):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNTransitionModule, self).__init__()
        
        self.num_states = num_states
        self.num_actions = num_actions
        self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu")

        self.state_emb_model = None
        layers = []
        prev_layer_size = num_states
        self.state_emb_size = 32
        for i, size in enumerate([128, 64, self.state_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.state_emb_model = nn.Sequential(*layers).to(self.device)

        self.action_emb_model = None
        layers = []
        prev_layer_size = num_actions
        self.action_emb_size = 16
        for i, size in enumerate([64, self.action_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.action_emb_model = nn.Sequential(*layers).to(self.device)


        self.transition_emb_model = None
        layers = []
        prev_layer_size = self.state_emb_size + self.action_emb_size
        self.transition_emb_size = self.state_emb_size
        for i, size in enumerate([128, self.transition_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.transition_emb_model = nn.Sequential(*layers).to(self.device)
 def __init__(self, obs_space, action_space, num_outputs, model_config,
             name):
     TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)
     nn.Module.__init__(self)
     
     self.conv1=SlimConv2d(obs_space.shape[0],32,3,2,1)
     self.conv2=SlimConv2d(32,32,3,2,1)
     self.conv3=SlimConv2d(32,32,3,2,1)
     self.conv4=SlimConv2d(32,32,3,2,1)
     self.fc1=SlimFC(32*6*6,512)
     self.fc_out=SlimFC(512,num_outputs)
예제 #26
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        self.cell_size = model_config["lstm_cell_size"]
        self.time_major = model_config.get("_time_major", False)
        self.use_prev_action_reward = model_config[
            "lstm_use_prev_action_reward"]
        self.action_dim = int(np.product(action_space.shape))
        # Add prev-action/reward nodes to input to LSTM.
        if self.use_prev_action_reward:
            self.num_outputs += 1 + self.action_dim
        self.lstm = nn.LSTM(self.num_outputs,
                            self.cell_size,
                            batch_first=not self.time_major)

        self.num_outputs = num_outputs

        # Postprocess LSTM output with another hidden layer and compute values.
        self._logits_branch = SlimFC(in_size=self.cell_size,
                                     out_size=self.num_outputs,
                                     activation_fn=None,
                                     initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(in_size=self.cell_size,
                                    out_size=1,
                                    activation_fn=None,
                                    initializer=torch.nn.init.xavier_uniform_)

        self.inference_view_requirements.update(
            dict(
                **{
                    SampleBatch.OBS:
                    ViewRequirement(shift=0),
                    SampleBatch.PREV_REWARDS:
                    ViewRequirement(SampleBatch.REWARDS, shift=-1),
                    SampleBatch.PREV_ACTIONS:
                    ViewRequirement(SampleBatch.ACTIONS,
                                    space=self.action_space,
                                    shift=-1),
                }))
        for i in range(2):
            self.inference_view_requirements["state_in_{}".format(i)] = \
                ViewRequirement(
                    "state_out_{}".format(i),
                    shift=-1,
                    space=Box(-1.0, 1.0, shape=(self.cell_size,)))
            self.inference_view_requirements["state_out_{}".format(i)] = \
                ViewRequirement(
                    space=Box(-1.0, 1.0, shape=(self.cell_size,)))
예제 #27
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):

        nn.Module.__init__(self)
        super().__init__(obs_space, action_space, None, model_config, name)

        self.cell_size = model_config["lstm_cell_size"]
        self.time_major = model_config.get("_time_major", False)
        self.use_prev_action = model_config["lstm_use_prev_action"]
        self.use_prev_reward = model_config["lstm_use_prev_reward"]

        if isinstance(action_space, Discrete):
            self.action_dim = action_space.n
        elif isinstance(action_space, MultiDiscrete):
            self.action_dim = np.product(action_space.nvec)
        elif action_space.shape is not None:
            self.action_dim = int(np.product(action_space.shape))
        else:
            self.action_dim = int(len(action_space))

        # Add prev-action/reward nodes to input to LSTM.
        if self.use_prev_action:
            self.num_outputs += self.action_dim
        if self.use_prev_reward:
            self.num_outputs += 1

        self.lstm = nn.LSTM(self.num_outputs,
                            self.cell_size,
                            batch_first=not self.time_major)

        self.num_outputs = num_outputs

        # Postprocess LSTM output with another hidden layer and compute values.
        self._logits_branch = SlimFC(in_size=self.cell_size,
                                     out_size=self.num_outputs,
                                     activation_fn=None,
                                     initializer=torch.nn.init.xavier_uniform_)
        self._value_branch = SlimFC(in_size=self.cell_size,
                                    out_size=1,
                                    activation_fn=None,
                                    initializer=torch.nn.init.xavier_uniform_)

        # Add prev-a/r to this model's view, if required.
        if model_config["lstm_use_prev_action"]:
            self.inference_view_requirements[SampleBatch.PREV_ACTIONS] = \
                ViewRequirement(SampleBatch.ACTIONS, space=self.action_space,
                                data_rel_pos=-1)
        if model_config["lstm_use_prev_reward"]:
            self.inference_view_requirements[SampleBatch.PREV_REWARDS] = \
                ViewRequirement(SampleBatch.REWARDS, data_rel_pos=-1)
예제 #28
0
    def __init__(self, observation_space, action_space, num_outputs,
                 model_config, name):
        TorchModelV2.__init__(self, observation_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        # Non-shared initial layer.
        self.first_layer = SlimFC(int(np.product(observation_space.shape)),
                                  32,
                                  activation_fn=nn.ReLU)

        # Non-shared final layer.
        self.last_layer = SlimFC(32, self.num_outputs, activation_fn=nn.ReLU)
        self.vf = SlimFC(32, 1, activation_fn=None)
예제 #29
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        # Base of the model
        self.model = TorchFC(obs_space, action_space, num_outputs,
                             model_config, name)

        # Central VF maps (obs, opp_obs, opp_act) -> vf_pred
        input_size = 6 + 6 + 2  # obs + opp_obs + opp_act
        self.central_vf_dense = SlimFC(input_size, 16, activation_fn=nn.Tanh)
        self.central_vf_out = SlimFC(16, 1)
예제 #30
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.size_in = 3
        self.lstm_state_size = 64
        self.grab_location = 1
        self.lstm = nn.LSTM(self.size_in, self.lstm_state_size, batch_first=True)
        self.num_outputs = self.grab_location + 7  ## one grab location + seven (position + translation)

        # Postprocess LSTM output with another hidden layer and compute values.
        self.linear = SlimFC(self.lstm_state_size, self.num_outputs, activation_fn="tanh")
        self.value_branch = SlimFC(self.lstm_state_size, 1, activation_fn=None)
        self._features = None