def __init__(self, args, obs_dim, act_dim, discrete_action, device, take_prev_action=False): super(R_Actor, self).__init__() self._use_feature_normlization = args.use_feature_normlization self._use_ReLU = args.use_ReLU self._layer_N = args.layer_N self._use_orthogonal = args.use_orthogonal self._gain = args.gain self.obs_dim = obs_dim self.act_dim = act_dim self.hidden_size = args.hidden_size self.discrete = discrete_action self.device = device self.take_prev_act = take_prev_action if take_prev_action: input_dim = obs_dim + act_dim else: input_dim = obs_dim # map observation input into input for rnn if self._use_feature_normlization: self.feature_norm = nn.LayerNorm(input_dim).to(self.device) self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU).to(self.device) self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: if self._use_orthogonal: nn.init.orthogonal_(param) else: nn.init.xavier_uniform_(param) self.norm = nn.LayerNorm(self.hidden_size).to(self.device) # get action from rnn hidden state if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), self._gain) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0), self._gain) if isinstance(act_dim, np.ndarray): # MultiDiscrete setting: have n Linear layers for each action self.multidiscrete = True self.action_outs = [ init_(nn.Linear(self.hidden_size, a_dim)).to(self.device) for a_dim in act_dim ] else: self.multidiscrete = False self.action_out = init_(nn.Linear(self.hidden_size, act_dim)).to(self.device)
def __init__(self, args, device, multidiscrete_list=None): """ init mixer class """ super(QMixer, self).__init__() self.device = device self.n_agents = args.n_agents self.cent_obs_dim = args.cent_obs_dim self.use_orthogonal = args.use_orthogonal self.hidden_layer_dim = args.mixer_hidden_dim # dimension of the hidden layer of the mixing net self.hypernet_hidden_dim = args.hypernet_hidden_dim # dimension of the hidden layer of each hypernet if multidiscrete_list: self.num_mixer_q_inps = sum(multidiscrete_list) else: self.num_mixer_q_inps = self.n_agents if self.use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0)) # hypernets output the weight and bias for the 2 layer MLP which takes in the state and agent Qs and outputs Q_tot if args.hypernet_layers == 1: # each hypernet only has 1 layer to output the weights # hyper_w1 outputs weight matrix which is of dimension (hidden_layer_dim x N) self.hyper_w1 = init_( nn.Linear(self.cent_obs_dim, self.num_mixer_q_inps * self.hidden_layer_dim)).to(self.device) # hyper_w1 outputs weight matrix which is of dimension (1 x hidden_layer_dim) self.hyper_w2 = init_( nn.Linear(self.cent_obs_dim, self.hidden_layer_dim)).to(self.device) elif args.hypernet_layers == 2: # 2 layer hypernets: output dimensions are same as above case self.hyper_w1 = nn.Sequential( init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)), nn.ReLU(), init_( nn.Linear(self.hypernet_hidden_dim, self.num_mixer_q_inps * self.hidden_layer_dim))).to(self.device) self.hyper_w2 = nn.Sequential( init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)), nn.ReLU(), init_( nn.Linear(self.hypernet_hidden_dim, self.hidden_layer_dim))).to(self.device) # hyper_b1 outputs bias vector of dimension (1 x hidden_layer_dim) self.hyper_b1 = init_( nn.Linear(self.cent_obs_dim, self.hidden_layer_dim)).to(self.device) # hyper_b2 outptus bias vector of dimension (1 x 1) self.hyper_b2 = nn.Sequential( init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)), nn.ReLU(), init_(nn.Linear(self.hypernet_hidden_dim, 1))).to(self.device)
def __init__(self, input_dim, act_dim, args, device): # input dim is agent obs dim + agent acf dim # output dim is act dim super(AgentQFunction, self).__init__() self._use_feature_normlization = args.use_feature_normlization self._layer_N = args.layer_N self._use_orthogonal = args.use_orthogonal self._use_ReLU = args.use_ReLU self._gain = args.gain self.hidden_size = args.hidden_size self.device = device # maps input to RNN input dimension if self._use_feature_normlization: self.feature_norm = nn.LayerNorm(input_dim).to(self.device) self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU).to(self.device) self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: if self._use_orthogonal: nn.init.orthogonal_(param) else: nn.init.xavier_uniform_(param) # get action from rnn hidden state if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), self._gain) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0), self._gain) if isinstance(act_dim, np.ndarray): # MultiDiscrete setting: have n Linear layers for each q self.multidiscrete = True self.q_outs = [ init_(nn.Linear(self.hidden_size, a_dim)).to(self.device) for a_dim in act_dim ] else: self.multidiscrete = False self.q_out = init_(nn.Linear(self.hidden_size, act_dim)).to(self.device)
def __init__(self, args, central_obs_dim, central_act_dim, device): super(R_Critic, self).__init__() self._use_ReLU = args.use_ReLU self._layer_N = args.layer_N self._use_orthogonal = args.use_orthogonal self._use_feature_normlization = args.use_feature_normlization self.central_obs_dim = central_obs_dim self.central_act_dim = central_act_dim self.hidden_size = args.hidden_size self.device = device input_dim = central_obs_dim + central_act_dim if self._use_feature_normlization: self.feature_norm = nn.LayerNorm(input_dim).to(self.device) # map observation input into input for rnn self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU).to(self.device) self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: if self._use_orthogonal: nn.init.orthogonal_(param) else: nn.init.xavier_uniform_(param) self.norm = nn.LayerNorm(self.hidden_size).to(self.device) if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0)) self.q1_out = init_(nn.Linear(self.hidden_size, 1)).to(self.device) self.q2_out = init_(nn.Linear(self.hidden_size, 1)).to(self.device)
def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal, use_ReLU): super(MLPLayer, self).__init__() self._layer_N = layer_N if use_orthogonal: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) else: if use_ReLU: active_func = nn.ReLU() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('relu')) else: active_func = nn.Tanh() init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.init.constant_(x, 0), gain=nn.init.calculate_gain('tanh')) self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)), active_func, nn.LayerNorm(hidden_size)) self.fc2 = get_clones(self.fc_h, self._layer_N)
def __init__(self, args, obs_dim, act_dim, action_space, device, take_prev_action=False): super(R_GaussianActor, self).__init__() self._use_feature_normlization = args.use_feature_normlization self._use_ReLU = args.use_ReLU self._layer_N = args.layer_N self._use_orthogonal = args.use_orthogonal self.obs_dim = obs_dim self.act_dim = act_dim self.hidden_size = args.hidden_size self.device = device self.take_prev_act = take_prev_action if take_prev_action: input_dim = obs_dim + act_dim else: input_dim = obs_dim if self._use_feature_normlization: self.feature_norm = nn.LayerNorm(input_dim).to(self.device) # map observation input into input for rnn self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N, self._use_orthogonal, self._use_ReLU).to(self.device) self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: if self._use_orthogonal: nn.init.orthogonal_(param) else: nn.init.xavier_uniform_(param) self.norm = nn.LayerNorm(self.hidden_size).to(self.device) # get action from rnn hidden state if self._use_orthogonal: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn. init.constant_(x, 0)) self.mean_layer = init_(nn.Linear(self.hidden_size, self.act_dim)).to(self.device) self.log_std_layer = init_(nn.Linear(self.hidden_size, self.act_dim)).to(self.device) # SAC rescaling to respect action bounds (see paper) if action_space is None: self.action_scale = torch.tensor(1.) self.action_bias = torch.tensor(0.) else: self.action_scale = torch.tensor( (action_space.high - action_space.low) / 2.).float() self.action_bias = torch.tensor( (action_space.high + action_space.low) / 2.).float()