def __init__(self, n_agents, state_feat_dim, a_dim, num_kernel, adv_hidden_units): super().__init__() self.key_extractors = nn.ModuleList() self.agents_extractors = nn.ModuleList() self.action_extractors = nn.ModuleList() for i in range(num_kernel): # multi-head attention self.key_extractors.append( MLP(input_dim=state_feat_dim, hidden_units=adv_hidden_units, layer='linear', act_fn='relu', output_shape=1)) # key self.agents_extractors.append( MLP(input_dim=state_feat_dim, hidden_units=adv_hidden_units, layer='linear', act_fn='relu', output_shape=n_agents)) # agent self.action_extractors.append( MLP(input_dim=state_feat_dim + n_agents * a_dim, hidden_units=adv_hidden_units, layer='linear', act_fn='relu', output_shape=n_agents)) # action
def __init__(self, n_agents, a_dim, state_spec, rep_net_params, hidden_units, is_minus_one, weighted_head, num_kernel, adv_hidden_units): super().__init__() self.rep_net = RepresentationNetwork(obs_spec=state_spec, rep_net_params=rep_net_params) self.is_minus_one = is_minus_one self.weighted_head = weighted_head self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=n_agents) self.V = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=n_agents) self.si_weight = SI_Weight(n_agents=n_agents, state_feat_dim=self.rep_net.h_dim, a_dim=a_dim, num_kernel=num_kernel, adv_hidden_units=adv_hidden_units)
def __init__(self, n_agents, state_spec, rep_net_params, mixing_embed_dim=8, hidden_units=[8], **kwargs): super().__init__() self.rep_net = RepresentationNetwork(obs_spec=state_spec, rep_net_params=rep_net_params) self.embed_dim = mixing_embed_dim self.hyper_w_1 = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=self.embed_dim * n_agents) self.hyper_w_final = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=self.embed_dim) # State dependent bias for hidden layer self.hyper_b_1 = nn.Linear(self.rep_net.h_dim, self.embed_dim) # V(s) instead of a bias for the last layers self.V = MLP(input_dim=self.rep_net.h_dim, hidden_units=[self.embed_dim], layer='linear', act_fn='relu', output_shape=1)
def __init__(self, obs_spec, rep_net_params, action_dim, options_num, network_settings, is_continuous=True): super().__init__(obs_spec, rep_net_params) self.actions_num = action_dim self.options_num = options_num self.share = MLP(self.rep_net.h_dim, network_settings['share']) if network_settings['share']: ins = network_settings['share'][-1] else: ins = self.rep_net.h_dim self.q = MLP(ins, network_settings['q'], output_shape=options_num) self.pi = MLP(ins, network_settings['intra_option'], output_shape=options_num * action_dim, out_act='tanh' if is_continuous else None) self.beta = MLP(ins, network_settings['termination'], output_shape=options_num, out_act='sigmoid') self.o = MLP(ins, network_settings['o'], output_shape=options_num, out_act='log_softmax')
def __init__(self, obs_spec, rep_net_params, action_dim, network_settings): super().__init__(obs_spec, rep_net_params) self.q1 = MLP(self.rep_net.h_dim + action_dim, network_settings, output_shape=1) self.q2 = MLP(self.rep_net.h_dim + action_dim, network_settings, output_shape=1)
def __init__(self, obs_spec, rep_net_params, action_dim, network_settings): assert len( network_settings ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1" super().__init__(obs_spec, rep_net_params) self.feature_net = MLP(self.rep_net.h_dim + action_dim, network_settings[0:1]) ins = network_settings[-1] + action_dim self.net = MLP(ins, network_settings[1:], output_shape=1)
def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.share = MLP(self.rep_net.h_dim, network_settings['share']) if network_settings['share']: ins = network_settings['share'][-1] else: ins = self.rep_net.h_dim self.v = MLP(ins, network_settings['v'], output_shape=1) self.adv = MLP(ins, network_settings['adv'], output_shape=output_shape)
def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.soft_clip = network_settings['soft_clip'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = MLP(self.rep_net.h_dim, network_settings['share']) if network_settings['share']: ins = network_settings['share'][-1] else: ins = self.rep_net.h_dim self.mu = MLP(ins, network_settings['mu'], output_shape=output_shape) self.log_std = MLP(ins, network_settings['log_std'], output_shape=output_shape)
def __init__(self, obs_spec, rep_net_params, output_shape, network_settings, out_act=None): super().__init__(obs_spec, rep_net_params) self.net_q = MLP(self.rep_net.h_dim, network_settings, output_shape=output_shape, out_act=out_act) self.net_i = MLP(self.rep_net.h_dim, network_settings, output_shape=output_shape, out_act=out_act)
def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.condition_sigma = network_settings['condition_sigma'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units']) if network_settings['hidden_units']: ins = network_settings['hidden_units'][-1] else: ins = self.rep_net.h_dim self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh') if self.condition_sigma: self.log_std = MLP(ins, [], output_shape=output_shape) else: self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))
def __init__(self, obs_spec, rep_net_params, action_dim, quantiles_idx, network_settings): super().__init__(obs_spec, rep_net_params) self.action_dim = action_dim # [B, self.rep_net.h_dim] self.q_net_head = MLP(self.rep_net.h_dim, network_settings['q_net']) # [N*B, quantiles_idx] self.quantile_net = MLP(quantiles_idx, network_settings['quantile']) if network_settings['quantile']: # TODO ins = network_settings['quantile'][-1] else: ins = quantiles_idx # [N*B, network_settings['quantile'][-1]] self.q_net_tile = MLP(ins, network_settings['tile'], output_shape=action_dim)
class ActorMuLogstd(BaseModel): """ use for PPO/PG algorithms' actor network. input: vector of state output: [stochastic action(mu), log of std] """ def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.condition_sigma = network_settings['condition_sigma'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units']) if network_settings['hidden_units']: ins = network_settings['hidden_units'][-1] else: ins = self.rep_net.h_dim self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh') if self.condition_sigma: self.log_std = MLP(ins, [], output_shape=output_shape) else: self.log_std = nn.Parameter(-0.5 * th.ones(output_shape)) def forward(self, x, **kwargs): x = self.repre(x, **kwargs) x = self.share(x) mu = self.mu(x) if self.condition_sigma: log_std = self.log_std(x) # [T, B, *] or [B, *] else: # TODO: log_std = self.log_std.repeat(mu.shape[:-1] + (1, )) # [T, B, *] or [B, *] log_std = log_std.clamp(self.log_std_min, self.log_std_max) return mu, log_std
def __init__(self, obs_spec, rep_net_params, action_dim, phi, network_settings): super().__init__(obs_spec, rep_net_params) self._phi = phi self.net = MLP(self.rep_net.h_dim + action_dim, network_settings, output_shape=action_dim, out_act='tanh')
def __init__(self, obs_spec, rep_net_params, action_dim, nums, network_settings): super().__init__(obs_spec, rep_net_params) self.action_dim = action_dim self.nums = nums self.net = MLP(self.rep_net.h_dim, network_settings, output_shape=nums * action_dim)
def __init__(self, obs_spec, rep_net_params, output_shape, options_num, network_settings): super().__init__(obs_spec, rep_net_params) self.actions_num = output_shape self.options_num = options_num self.pi = MLP(self.rep_net.h_dim, network_settings, output_shape=options_num * output_shape)
def __init__(self, obs_spec, rep_net_params, output_shape, head_num, network_settings): super().__init__(obs_spec, rep_net_params) self.nets = nn.ModuleList([ MLP(self.rep_net.h_dim, network_settings, output_shape=output_shape) for _ in range(head_num) ])
def __init__(self, obs_specs, rep_net_params, action_dim, network_settings): super().__init__() self.rep_nets = nn.ModuleList() for obs_spec in obs_specs: self.rep_nets.append( RepresentationNetwork(obs_spec, rep_net_params)) h_dim = sum([rep_net.h_dim for rep_net in self.rep_nets]) self.net = MLP(h_dim + action_dim, network_settings, output_shape=1)
def __init__(self, obs_spec, rep_net_params, a_dim, z_dim, hiddens=dict()): super().__init__() self.z_dim = z_dim self.rep_net = RepresentationNetwork(obs_spec=obs_spec, rep_net_params=rep_net_params) self._encoder = MLP(input_dim=self.rep_net.h_dim + a_dim, hidden_units=hiddens['encoder'], act_fn='relu', output_shape=z_dim * 2) self._decoder = MLP(input_dim=self.rep_net.h_dim + z_dim, hidden_units=hiddens['decoder'], act_fn='relu', output_shape=a_dim, out_act='tanh')
def __init__(self, obs_spec, rep_net_params, action_dim, atoms, network_settings): super().__init__(obs_spec, rep_net_params) self.action_dim = action_dim self._atoms = atoms self.share = MLP(self.rep_net.h_dim, network_settings['share'], layer='noisy') if network_settings['share']: ins = network_settings['share'][-1] else: ins = self.rep_net.h_dim self.v = MLP(ins, network_settings['v'], layer='noisy', output_shape=atoms) self.adv = MLP(ins, network_settings['adv'], layer='noisy', output_shape=action_dim * atoms)
def __init__(self, n_agents: int, state_spec, rep_net_params, agent_own_state_size: bool, query_hidden_units: int, query_embed_dim: int, key_embed_dim: int, head_hidden_units: int, n_attention_head: int, constrant_hidden_units: int, is_weighted: bool = True): super().__init__() self.n_agents = n_agents self.rep_net = RepresentationNetwork(obs_spec=state_spec, rep_net_params=rep_net_params) self.u_dim = agent_own_state_size # TODO: implement this self.query_embed_dim = query_embed_dim self.key_embed_dim = key_embed_dim self.n_attention_head = n_attention_head self.is_weighted = is_weighted self.query_embedding_layers = nn.ModuleList() self.key_embedding_layers = nn.ModuleList() for i in range(self.n_attention_head): self.query_embedding_layers.append(MLP(input_dim=self.rep_net.h_dim, hidden_units=query_hidden_units, layer='linear', act_fn='relu', output_shape=query_embed_dim)) self.key_embedding_layers.append( nn.Linear(self.u_dim, self.key_embed_dim)) self.scaled_product_value = np.sqrt(self.query_embed_dim) self.head_embedding_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=head_hidden_units, layer='linear', act_fn='relu', output_shape=n_attention_head) self.constrant_value_layer = MLP(input_dim=self.rep_net.h_dim, hidden_units=constrant_hidden_units, layer='linear', act_fn='relu', output_shape=1)
def __init__(self, s_dim, a_dim, hidden_units): super().__init__() self._s_dim = s_dim self._a_dim = a_dim self._hidden_units = hidden_units self._net = MLP(input_dim=s_dim + a_dim, hidden_units=hidden_units, layer='linear', act_fn='tanh', output_shape=1, out_act=None)
def __init__(self, n_agents, state_spec, rep_net_params, a_dim, qtran_arch, hidden_units): super().__init__() self.rep_net = RepresentationNetwork(obs_spec=state_spec, rep_net_params=rep_net_params) self.qtran_arch = qtran_arch # QTran architecture self.h_nums = 2 if self.rep_net.memory_net.network_type == 'lstm' else 1 # Q takes [state, agent_action_observation_encodings] # Q(s,u) if self.qtran_arch == "coma_critic": # Q takes [state, u] as input q_input_size = self.rep_net.h_dim + (n_agents * a_dim) elif self.qtran_arch == "qtran_paper": # Q takes [state, agent_action_observation_encodings] ae_input = self.h_nums * self.rep_net.h_dim + a_dim self.action_encoding = MLP(input_dim=ae_input, hidden_units=[ae_input], layer='linear', act_fn='relu', output_shape=ae_input) q_input_size = self.rep_net.h_dim + ae_input else: raise Exception( "{} is not a valid QTran architecture".format(self.qtran_arch)) self.Q = MLP(input_dim=q_input_size, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=1) # V(s) self.V = MLP(input_dim=self.rep_net.h_dim, hidden_units=hidden_units, layer='linear', act_fn='relu', output_shape=1)
class ActorCriticValueCts(BaseModel): """ combine actor network and critic network, share some nn layers. use for continuous action space. input: vector of state output: mean(mu) of Gaussian Distribution of actions given a state, v(s) """ def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.condition_sigma = network_settings['condition_sigma'] self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = MLP(self.rep_net.h_dim, network_settings['share']) if network_settings['share']: ins = network_settings['share'][-1] else: ins = self.rep_net.h_dim self.mu_logstd_share = MLP(ins, network_settings['mu']) self.v = MLP(ins, network_settings['v'], output_shape=1) if network_settings['mu']: ins = network_settings['mu'][-1] self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh') if self.condition_sigma: self.log_std = MLP(ins, [], output_shape=output_shape) else: self.log_std = nn.Parameter(-0.5 * th.ones(output_shape)) def forward(self, x, **kwargs): x = self.repre(x, **kwargs) x = self.share(x) v = self.v(x) x_mu_logstd = self.mu_logstd_share(x) mu = self.mu(x_mu_logstd) if self.condition_sigma: log_std = self.log_std(x_mu_logstd) # [T, B, *] or [B, *] else: log_std = self.log_std.repeat(mu.shape[:-1] + (1, )) # [T, B, *] or [B, *] log_std = log_std.clamp(self.log_std_min, self.log_std_max) return mu, log_std, v
def __init__(self, obs_spec, rep_net_params, output_shape, network_settings): super().__init__(obs_spec, rep_net_params) self.logits = MLP(self.rep_net.h_dim, network_settings, output_shape=output_shape)