예제 #1
0
    def __init__(self, n_agents, state_feat_dim, a_dim, num_kernel,
                 adv_hidden_units):
        super().__init__()

        self.key_extractors = nn.ModuleList()
        self.agents_extractors = nn.ModuleList()
        self.action_extractors = nn.ModuleList()
        for i in range(num_kernel):  # multi-head attention
            self.key_extractors.append(
                MLP(input_dim=state_feat_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=1))  # key
            self.agents_extractors.append(
                MLP(input_dim=state_feat_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=n_agents))  # agent
            self.action_extractors.append(
                MLP(input_dim=state_feat_dim + n_agents * a_dim,
                    hidden_units=adv_hidden_units,
                    layer='linear',
                    act_fn='relu',
                    output_shape=n_agents))  # action
예제 #2
0
파일: qplex.py 프로젝트: StepNeverStop/RLs
    def __init__(self, n_agents, a_dim, state_spec, rep_net_params,
                 hidden_units, is_minus_one, weighted_head, num_kernel,
                 adv_hidden_units):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)

        self.is_minus_one = is_minus_one
        self.weighted_head = weighted_head

        self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim,
                                 hidden_units=hidden_units,
                                 layer='linear',
                                 act_fn='relu',
                                 output_shape=n_agents)
        self.V = MLP(input_dim=self.rep_net.h_dim,
                     hidden_units=hidden_units,
                     layer='linear',
                     act_fn='relu',
                     output_shape=n_agents)

        self.si_weight = SI_Weight(n_agents=n_agents,
                                   state_feat_dim=self.rep_net.h_dim,
                                   a_dim=a_dim,
                                   num_kernel=num_kernel,
                                   adv_hidden_units=adv_hidden_units)
예제 #3
0
    def __init__(self,
                 n_agents,
                 state_spec,
                 rep_net_params,
                 mixing_embed_dim=8,
                 hidden_units=[8],
                 **kwargs):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.embed_dim = mixing_embed_dim
        self.hyper_w_1 = MLP(input_dim=self.rep_net.h_dim,
                             hidden_units=hidden_units,
                             layer='linear',
                             act_fn='relu',
                             output_shape=self.embed_dim * n_agents)
        self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim,
                                 hidden_units=hidden_units,
                                 layer='linear',
                                 act_fn='relu',
                                 output_shape=self.embed_dim)
        # State dependent bias for hidden layer
        self.hyper_b_1 = nn.Linear(self.rep_net.h_dim, self.embed_dim)
        # V(s) instead of a bias for the last layers
        self.V = MLP(input_dim=self.rep_net.h_dim,
                     hidden_units=[self.embed_dim],
                     layer='linear',
                     act_fn='relu',
                     output_shape=1)
예제 #4
0
 def __init__(self,
              obs_spec,
              rep_net_params,
              action_dim,
              options_num,
              network_settings,
              is_continuous=True):
     super().__init__(obs_spec, rep_net_params)
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.q = MLP(ins, network_settings['q'], output_shape=options_num)
     self.pi = MLP(ins,
                   network_settings['intra_option'],
                   output_shape=options_num * action_dim,
                   out_act='tanh' if is_continuous else None)
     self.beta = MLP(ins,
                     network_settings['termination'],
                     output_shape=options_num,
                     out_act='sigmoid')
     self.o = MLP(ins,
                  network_settings['o'],
                  output_shape=options_num,
                  out_act='log_softmax')
예제 #5
0
 def __init__(self, obs_spec, rep_net_params, action_dim, network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.q1 = MLP(self.rep_net.h_dim + action_dim,
                   network_settings,
                   output_shape=1)
     self.q2 = MLP(self.rep_net.h_dim + action_dim,
                   network_settings,
                   output_shape=1)
예제 #6
0
 def __init__(self, obs_spec, rep_net_params, action_dim, network_settings):
     assert len(
         network_settings
     ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__(obs_spec, rep_net_params)
     self.feature_net = MLP(self.rep_net.h_dim + action_dim,
                            network_settings[0:1])
     ins = network_settings[-1] + action_dim
     self.net = MLP(ins, network_settings[1:], output_shape=1)
예제 #7
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.v = MLP(ins, network_settings['v'], output_shape=1)
     self.adv = MLP(ins, network_settings['adv'], output_shape=output_shape)
예제 #8
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.soft_clip = network_settings['soft_clip']
     self.log_std_min, self.log_std_max = network_settings['log_std_bound']
     self.share = MLP(self.rep_net.h_dim, network_settings['share'])
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.mu = MLP(ins, network_settings['mu'], output_shape=output_shape)
     self.log_std = MLP(ins,
                        network_settings['log_std'],
                        output_shape=output_shape)
예제 #9
0
 def __init__(self,
              obs_spec,
              rep_net_params,
              output_shape,
              network_settings,
              out_act=None):
     super().__init__(obs_spec, rep_net_params)
     self.net_q = MLP(self.rep_net.h_dim,
                      network_settings,
                      output_shape=output_shape,
                      out_act=out_act)
     self.net_i = MLP(self.rep_net.h_dim,
                      network_settings,
                      output_shape=output_shape,
                      out_act=out_act)
예제 #10
0
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units'])
        if network_settings['hidden_units']:
            ins = network_settings['hidden_units'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))
예제 #11
0
 def __init__(self, obs_spec, rep_net_params, action_dim, quantiles_idx,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     # [B, self.rep_net.h_dim]
     self.q_net_head = MLP(self.rep_net.h_dim, network_settings['q_net'])
     # [N*B, quantiles_idx]
     self.quantile_net = MLP(quantiles_idx, network_settings['quantile'])
     if network_settings['quantile']:  # TODO
         ins = network_settings['quantile'][-1]
     else:
         ins = quantiles_idx
     # [N*B, network_settings['quantile'][-1]]
     self.q_net_tile = MLP(ins,
                           network_settings['tile'],
                           output_shape=action_dim)
예제 #12
0
class ActorMuLogstd(BaseModel):
    """
    use for PPO/PG algorithms' actor network.
    input: vector of state
    output: [stochastic action(mu), log of std]
    """
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units'])
        if network_settings['hidden_units']:
            ins = network_settings['hidden_units'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))

    def forward(self, x, **kwargs):
        x = self.repre(x, **kwargs)
        x = self.share(x)
        mu = self.mu(x)
        if self.condition_sigma:
            log_std = self.log_std(x)  # [T, B, *] or [B, *]
        else:
            # TODO:
            log_std = self.log_std.repeat(mu.shape[:-1] +
                                          (1, ))  # [T, B, *] or [B, *]
        log_std = log_std.clamp(self.log_std_min, self.log_std_max)
        return mu, log_std
예제 #13
0
 def __init__(self, obs_spec, rep_net_params, action_dim, phi,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self._phi = phi
     self.net = MLP(self.rep_net.h_dim + action_dim,
                    network_settings,
                    output_shape=action_dim,
                    out_act='tanh')
예제 #14
0
 def __init__(self, obs_spec, rep_net_params, action_dim, nums,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     self.nums = nums
     self.net = MLP(self.rep_net.h_dim,
                    network_settings,
                    output_shape=nums * action_dim)
예제 #15
0
 def __init__(self, obs_spec, rep_net_params, output_shape, options_num,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = MLP(self.rep_net.h_dim,
                   network_settings,
                   output_shape=options_num * output_shape)
예제 #16
0
 def __init__(self, obs_spec, rep_net_params, output_shape, head_num,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.nets = nn.ModuleList([
         MLP(self.rep_net.h_dim,
             network_settings,
             output_shape=output_shape) for _ in range(head_num)
     ])
예제 #17
0
 def __init__(self, obs_specs, rep_net_params, action_dim,
              network_settings):
     super().__init__()
     self.rep_nets = nn.ModuleList()
     for obs_spec in obs_specs:
         self.rep_nets.append(
             RepresentationNetwork(obs_spec, rep_net_params))
     h_dim = sum([rep_net.h_dim for rep_net in self.rep_nets])
     self.net = MLP(h_dim + action_dim, network_settings, output_shape=1)
예제 #18
0
    def __init__(self, obs_spec, rep_net_params, a_dim, z_dim, hiddens=dict()):
        super().__init__()

        self.z_dim = z_dim

        self.rep_net = RepresentationNetwork(obs_spec=obs_spec,
                                             rep_net_params=rep_net_params)

        self._encoder = MLP(input_dim=self.rep_net.h_dim + a_dim,
                            hidden_units=hiddens['encoder'],
                            act_fn='relu',
                            output_shape=z_dim * 2)

        self._decoder = MLP(input_dim=self.rep_net.h_dim + z_dim,
                            hidden_units=hiddens['decoder'],
                            act_fn='relu',
                            output_shape=a_dim,
                            out_act='tanh')
예제 #19
0
 def __init__(self, obs_spec, rep_net_params, action_dim, atoms,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.action_dim = action_dim
     self._atoms = atoms
     self.share = MLP(self.rep_net.h_dim,
                      network_settings['share'],
                      layer='noisy')
     if network_settings['share']:
         ins = network_settings['share'][-1]
     else:
         ins = self.rep_net.h_dim
     self.v = MLP(ins,
                  network_settings['v'],
                  layer='noisy',
                  output_shape=atoms)
     self.adv = MLP(ins,
                    network_settings['adv'],
                    layer='noisy',
                    output_shape=action_dim * atoms)
예제 #20
0
    def __init__(self,
                 n_agents: int,
                 state_spec,
                 rep_net_params,

                 agent_own_state_size: bool,
                 query_hidden_units: int,
                 query_embed_dim: int,
                 key_embed_dim: int,
                 head_hidden_units: int,
                 n_attention_head: int,
                 constrant_hidden_units: int,
                 is_weighted: bool = True):
        super().__init__()

        self.n_agents = n_agents
        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.u_dim = agent_own_state_size  # TODO: implement this

        self.query_embed_dim = query_embed_dim
        self.key_embed_dim = key_embed_dim
        self.n_attention_head = n_attention_head
        self.is_weighted = is_weighted

        self.query_embedding_layers = nn.ModuleList()
        self.key_embedding_layers = nn.ModuleList()
        for i in range(self.n_attention_head):
            self.query_embedding_layers.append(MLP(input_dim=self.rep_net.h_dim, hidden_units=query_hidden_units,
                                                   layer='linear', act_fn='relu', output_shape=query_embed_dim))
            self.key_embedding_layers.append(
                nn.Linear(self.u_dim, self.key_embed_dim))

        self.scaled_product_value = np.sqrt(self.query_embed_dim)

        self.head_embedding_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=head_hidden_units,
                                        layer='linear', act_fn='relu', output_shape=n_attention_head)

        self.constrant_value_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=constrant_hidden_units,
                                         layer='linear', act_fn='relu', output_shape=1)
예제 #21
0
    def __init__(self, s_dim, a_dim, hidden_units):
        super().__init__()

        self._s_dim = s_dim
        self._a_dim = a_dim
        self._hidden_units = hidden_units

        self._net = MLP(input_dim=s_dim + a_dim,
                        hidden_units=hidden_units,
                        layer='linear',
                        act_fn='tanh',
                        output_shape=1,
                        out_act=None)
예제 #22
0
    def __init__(self,
                 n_agents,
                 state_spec,
                 rep_net_params,
                 a_dim,

                 qtran_arch,
                 hidden_units):
        super().__init__()

        self.rep_net = RepresentationNetwork(obs_spec=state_spec,
                                             rep_net_params=rep_net_params)
        self.qtran_arch = qtran_arch  # QTran architecture

        self.h_nums = 2 if self.rep_net.memory_net.network_type == 'lstm' else 1

        # Q takes [state, agent_action_observation_encodings]

        # Q(s,u)
        if self.qtran_arch == "coma_critic":
            # Q takes [state, u] as input
            q_input_size = self.rep_net.h_dim + (n_agents * a_dim)
        elif self.qtran_arch == "qtran_paper":
            # Q takes [state, agent_action_observation_encodings]
            ae_input = self.h_nums * self.rep_net.h_dim + a_dim
            self.action_encoding = MLP(input_dim=ae_input, hidden_units=[ae_input],
                                       layer='linear', act_fn='relu', output_shape=ae_input)
            q_input_size = self.rep_net.h_dim + ae_input
        else:
            raise Exception(
                "{} is not a valid QTran architecture".format(self.qtran_arch))

        self.Q = MLP(input_dim=q_input_size, hidden_units=hidden_units,
                     layer='linear', act_fn='relu', output_shape=1)
        # V(s)
        self.V = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units,
                     layer='linear', act_fn='relu', output_shape=1)
예제 #23
0
class ActorCriticValueCts(BaseModel):
    """
    combine actor network and critic network, share some nn layers. use for continuous action space.
    input: vector of state
    output: mean(mu) of Gaussian Distribution of actions given a state, v(s)
    """
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['share'])
        if network_settings['share']:
            ins = network_settings['share'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu_logstd_share = MLP(ins, network_settings['mu'])
        self.v = MLP(ins, network_settings['v'], output_shape=1)
        if network_settings['mu']:
            ins = network_settings['mu'][-1]
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))

    def forward(self, x, **kwargs):
        x = self.repre(x, **kwargs)
        x = self.share(x)
        v = self.v(x)
        x_mu_logstd = self.mu_logstd_share(x)
        mu = self.mu(x_mu_logstd)
        if self.condition_sigma:
            log_std = self.log_std(x_mu_logstd)  # [T, B, *] or [B, *]
        else:
            log_std = self.log_std.repeat(mu.shape[:-1] +
                                          (1, ))  # [T, B, *] or [B, *]
        log_std = log_std.clamp(self.log_std_min, self.log_std_max)
        return mu, log_std, v
예제 #24
0
 def __init__(self, obs_spec, rep_net_params, output_shape,
              network_settings):
     super().__init__(obs_spec, rep_net_params)
     self.logits = MLP(self.rep_net.h_dim,
                       network_settings,
                       output_shape=output_shape)