Esempio n. 1
0
    def __init__(self, n_agents, state_feat_dim, a_dim, num_kernel,
                 adv_hidden_units):
        super().__init__()

        self.key_extractors = nn.ModuleList()
        self.agents_extractors = nn.ModuleList()
        self.action_extractors = nn.ModuleList()
        for i in range(num_kernel):  # multi-head attention
            self.key_extractors.append(
                MLP(input_dim=state_feat_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=1))  # key
            self.agents_extractors.append(
                MLP(input_dim=state_feat_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=n_agents))  # agent
            self.action_extractors.append(
                MLP(input_dim=state_feat_dim + n_agents * a_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=n_agents))  # action
Esempio n. 2
0
 def __init__(self,
              obs_spec,
              rep_net_params,
              action_dim,
              options_num,
              network_settings,
              is_continuous=True):
     super().__init__(obs_spec, rep_net_params)
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.q = MLP(ins, network_settings['q'], output_shape=options_num)
     self.pi = MLP(ins,
                   network_settings['intra_option'],
                   output_shape=options_num * action_dim,
                   out_act='tanh' if is_continuous else None)
     self.beta = MLP(ins,
                     network_settings['termination'],
                     output_shape=options_num,
                     out_act='sigmoid')
     self.o = MLP(ins,
                  network_settings['o'],
                  output_shape=options_num,
                  out_act='log_softmax')
Esempio n. 3
0
    def __init__(self,
                 n_agents,
                 state_spec,
                 rep_net_params,
                 mixing_embed_dim=8,
                 hidden_units=[8],
                 **kwargs):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.embed_dim = mixing_embed_dim
        self.hyper_w_1 = MLP(input_dim=self.rep_net.h_dim,
                             hidden_units=hidden_units,
                             layer='linear',
                             act_fn='relu',
                             output_shape=self.embed_dim * n_agents)
        self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim,
                                 hidden_units=hidden_units,
                                 layer='linear',
                                 act_fn='relu',
                                 output_shape=self.embed_dim)
        # State dependent bias for hidden layer
        self.hyper_b_1 = nn.Linear(self.rep_net.h_dim, self.embed_dim)
        # V(s) instead of a bias for the last layers
        self.V = MLP(input_dim=self.rep_net.h_dim,
                     hidden_units=[self.embed_dim],
                     layer='linear',
                     act_fn='relu',
                     output_shape=1)
Esempio n. 4
0
    def __init__(self, n_agents, a_dim, state_spec, rep_net_params,
                 hidden_units, is_minus_one, weighted_head, num_kernel,
                 adv_hidden_units):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)

        self.is_minus_one = is_minus_one
        self.weighted_head = weighted_head

        self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim,
                                 hidden_units=hidden_units,
                                 layer='linear',
                                 act_fn='relu',
                                 output_shape=n_agents)
        self.V = MLP(input_dim=self.rep_net.h_dim,
                     hidden_units=hidden_units,
                     layer='linear',
                     act_fn='relu',
                     output_shape=n_agents)

        self.si_weight = SI_Weight(n_agents=n_agents,
                                   state_feat_dim=self.rep_net.h_dim,
                                   a_dim=a_dim,
                                   num_kernel=num_kernel,
                                   adv_hidden_units=adv_hidden_units)
Esempio n. 5
0
 def __init__(self, obs_spec, rep_net_params, action_dim, network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.q1 = MLP(self.rep_net.h_dim + action_dim,
                   network_settings,
                   output_shape=1)
     self.q2 = MLP(self.rep_net.h_dim + action_dim,
                   network_settings,
                   output_shape=1)
Esempio n. 6
0
 def __init__(self, obs_spec, rep_net_params, action_dim, network_settings):
     assert len(
         network_settings
     ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__(obs_spec, rep_net_params)
     self.feature_net = MLP(self.rep_net.h_dim + action_dim,
                            network_settings[0:1])
     ins = network_settings[-1] + action_dim
     self.net = MLP(ins, network_settings[1:], output_shape=1)
Esempio n. 7
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.v = MLP(ins, network_settings['v'], output_shape=1)
     self.adv = MLP(ins, network_settings['adv'], output_shape=output_shape)
Esempio n. 8
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.soft_clip = network_settings['soft_clip']
     self.log_std_min, self.log_std_max = network_settings['log_std_bound']
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.mu = MLP(ins, network_settings['mu'], output_shape=output_shape)
     self.log_std = MLP(ins,
                        network_settings['log_std'],
                        output_shape=output_shape)
Esempio n. 9
0
 def __init__(self,
              obs_spec,
              rep_net_params,
              output_shape,
              network_settings,
              out_act=None):
     super().__init__(obs_spec, rep_net_params)
     self.net_q = MLP(self.rep_net.h_dim,
                      network_settings,
                      output_shape=output_shape,
                      out_act=out_act)
     self.net_i = MLP(self.rep_net.h_dim,
                      network_settings,
                      output_shape=output_shape,
                      out_act=out_act)
Esempio n. 10
0
 def __init__(self, obs_spec, rep_net_params, action_dim, quantiles_idx,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     # [B, self.rep_net.h_dim]
     self.q_net_head = MLP(self.rep_net.h_dim, network_settings['q_net'])
     # [N*B, quantiles_idx]
     self.quantile_net = MLP(quantiles_idx, network_settings['quantile'])
     if network_settings['quantile']:  # TODO
         ins = network_settings['quantile'][-1]
     else:
         ins = quantiles_idx
     # [N*B, network_settings['quantile'][-1]]
     self.q_net_tile = MLP(ins,
                           network_settings['tile'],
                           output_shape=action_dim)
Esempio n. 11
0
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units'])
        if network_settings['hidden_units']:
            ins = network_settings['hidden_units'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))
Esempio n. 12
0
 def __init__(self, obs_spec, rep_net_params, output_shape, head_num,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.nets = nn.ModuleList([
         MLP(self.rep_net.h_dim,
             network_settings,
             output_shape=output_shape) for _ in range(head_num)
     ])
Esempio n. 13
0
 def __init__(self, obs_spec, rep_net_params, output_shape, options_num,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = MLP(self.rep_net.h_dim,
                   network_settings,
                   output_shape=options_num * output_shape)
Esempio n. 14
0
 def __init__(self, obs_spec, rep_net_params, action_dim, phi,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self._phi = phi
     self.net = MLP(self.rep_net.h_dim + action_dim,
                    network_settings,
                    output_shape=action_dim,
                    out_act='tanh')
Esempio n. 15
0
 def __init__(self, obs_spec, rep_net_params, action_dim, nums,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     self.nums = nums
     self.net = MLP(self.rep_net.h_dim,
                    network_settings,
                    output_shape=nums * action_dim)
Esempio n. 16
0
 def __init__(self, obs_specs, rep_net_params, action_dim,
              network_settings):
     super().__init__()
     self.rep_nets = nn.ModuleList()
     for obs_spec in obs_specs:
         self.rep_nets.append(
             RepresentationNetwork(obs_spec, rep_net_params))
     h_dim = sum([rep_net.h_dim for rep_net in self.rep_nets])
     self.net = MLP(h_dim + action_dim, network_settings, output_shape=1)
Esempio n. 17
0
    def __init__(self, obs_spec, rep_net_params, a_dim, z_dim, hiddens=dict()):
        super().__init__()

        self.z_dim = z_dim

        self.rep_net = RepresentationNetwork(obs_spec=obs_spec,
                                             rep_net_params=rep_net_params)

        self._encoder = MLP(input_dim=self.rep_net.h_dim + a_dim,
                            hidden_units=hiddens['encoder'],
                            act_fn='relu',
                            output_shape=z_dim * 2)

        self._decoder = MLP(input_dim=self.rep_net.h_dim + z_dim,
                            hidden_units=hiddens['decoder'],
                            act_fn='relu',
                            output_shape=a_dim,
                            out_act='tanh')
Esempio n. 18
0
 def __init__(self, obs_spec, rep_net_params, action_dim, atoms,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     self._atoms = atoms
     self.share = MLP(self.rep_net.h_dim,
                      network_settings['share'],
                      layer='noisy')
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.v = MLP(ins,
                  network_settings['v'],
                  layer='noisy',
                  output_shape=atoms)
     self.adv = MLP(ins,
                    network_settings['adv'],
                    layer='noisy',
                    output_shape=action_dim * atoms)
Esempio n. 19
0
    def __init__(self,
                 n_agents: int,
                 state_spec,
                 rep_net_params,

                 agent_own_state_size: bool,
                 query_hidden_units: int,
                 query_embed_dim: int,
                 key_embed_dim: int,
                 head_hidden_units: int,
                 n_attention_head: int,
                 constrant_hidden_units: int,
                 is_weighted: bool = True):
        super().__init__()

        self.n_agents = n_agents
        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.u_dim = agent_own_state_size  # TODO: implement this

        self.query_embed_dim = query_embed_dim
        self.key_embed_dim = key_embed_dim
        self.n_attention_head = n_attention_head
        self.is_weighted = is_weighted

        self.query_embedding_layers = nn.ModuleList()
        self.key_embedding_layers = nn.ModuleList()
        for i in range(self.n_attention_head):
            self.query_embedding_layers.append(MLP(input_dim=self.rep_net.h_dim, hidden_units=query_hidden_units,
                                                   layer='linear', act_fn='relu', output_shape=query_embed_dim))
            self.key_embedding_layers.append(
                nn.Linear(self.u_dim, self.key_embed_dim))

        self.scaled_product_value = np.sqrt(self.query_embed_dim)

        self.head_embedding_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=head_hidden_units,
                                        layer='linear', act_fn='relu', output_shape=n_attention_head)

        self.constrant_value_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=constrant_hidden_units,
                                         layer='linear', act_fn='relu', output_shape=1)
Esempio n. 20
0
    def __init__(self, s_dim, a_dim, hidden_units):
        super().__init__()

        self._s_dim = s_dim
        self._a_dim = a_dim
        self._hidden_units = hidden_units

        self._net = MLP(input_dim=s_dim + a_dim,
                        hidden_units=hidden_units,
                        layer='linear',
                        act_fn='tanh',
                        output_shape=1,
                        out_act=None)
Esempio n. 21
0
    def __init__(self,
                 n_agents,
                 state_spec,
                 rep_net_params,
                 a_dim,

                 qtran_arch,
                 hidden_units):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.qtran_arch = qtran_arch  # QTran architecture

        self.h_nums = 2 if self.rep_net.memory_net.network_type == 'lstm' else 1

        # Q takes [state, agent_action_observation_encodings]

        # Q(s,u)
        if self.qtran_arch == "coma_critic":
            # Q takes [state, u] as input
            q_input_size = self.rep_net.h_dim + (n_agents * a_dim)
        elif self.qtran_arch == "qtran_paper":
            # Q takes [state, agent_action_observation_encodings]
            ae_input = self.h_nums * self.rep_net.h_dim + a_dim
            self.action_encoding = MLP(input_dim=ae_input, hidden_units=[ae_input],
                                       layer='linear', act_fn='relu', output_shape=ae_input)
            q_input_size = self.rep_net.h_dim + ae_input
        else:
            raise Exception(
                "{} is not a valid QTran architecture".format(self.qtran_arch))

        self.Q = MLP(input_dim=q_input_size, hidden_units=hidden_units,
                     layer='linear', act_fn='relu', output_shape=1)
        # V(s)
        self.V = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units,
                     layer='linear', act_fn='relu', output_shape=1)
Esempio n. 22
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.logits = MLP(self.rep_net.h_dim,
                       network_settings,
                       output_shape=output_shape)