예제 #1
0
    def __init__(self,
                 args,
                 obs_dim,
                 act_dim,
                 discrete_action,
                 device,
                 take_prev_action=False):
        super(R_Actor, self).__init__()
        self._use_feature_normlization = args.use_feature_normlization
        self._use_ReLU = args.use_ReLU
        self._layer_N = args.layer_N
        self._use_orthogonal = args.use_orthogonal
        self._gain = args.gain
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.hidden_size = args.hidden_size
        self.discrete = discrete_action
        self.device = device
        self.take_prev_act = take_prev_action

        if take_prev_action:
            input_dim = obs_dim + act_dim
        else:
            input_dim = obs_dim

        # map observation input into input for rnn
        if self._use_feature_normlization:
            self.feature_norm = nn.LayerNorm(input_dim).to(self.device)
        self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N,
                            self._use_orthogonal,
                            self._use_ReLU).to(self.device)
        self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0)
            elif 'weight' in name:
                if self._use_orthogonal:
                    nn.init.orthogonal_(param)
                else:
                    nn.init.xavier_uniform_(param)
        self.norm = nn.LayerNorm(self.hidden_size).to(self.device)
        # get action from rnn hidden state
        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0), self._gain)
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0), self._gain)

        if isinstance(act_dim, np.ndarray):
            # MultiDiscrete setting: have n Linear layers for each action
            self.multidiscrete = True
            self.action_outs = [
                init_(nn.Linear(self.hidden_size, a_dim)).to(self.device)
                for a_dim in act_dim
            ]
        else:
            self.multidiscrete = False
            self.action_out = init_(nn.Linear(self.hidden_size,
                                              act_dim)).to(self.device)
예제 #2
0
    def __init__(self, args, device, multidiscrete_list=None):
        """
        init mixer class
        """
        super(QMixer, self).__init__()
        self.device = device
        self.n_agents = args.n_agents
        self.cent_obs_dim = args.cent_obs_dim
        self.use_orthogonal = args.use_orthogonal

        self.hidden_layer_dim = args.mixer_hidden_dim  # dimension of the hidden layer of the mixing net
        self.hypernet_hidden_dim = args.hypernet_hidden_dim  # dimension of the hidden layer of each hypernet

        if multidiscrete_list:
            self.num_mixer_q_inps = sum(multidiscrete_list)
        else:
            self.num_mixer_q_inps = self.n_agents

        if self.use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0))

        # hypernets output the weight and bias for the 2 layer MLP which takes in the state and agent Qs and outputs Q_tot
        if args.hypernet_layers == 1:
            # each hypernet only has 1 layer to output the weights
            # hyper_w1 outputs weight matrix which is of dimension (hidden_layer_dim x N)
            self.hyper_w1 = init_(
                nn.Linear(self.cent_obs_dim, self.num_mixer_q_inps *
                          self.hidden_layer_dim)).to(self.device)
            # hyper_w1 outputs weight matrix which is of dimension (1 x hidden_layer_dim)
            self.hyper_w2 = init_(
                nn.Linear(self.cent_obs_dim,
                          self.hidden_layer_dim)).to(self.device)
        elif args.hypernet_layers == 2:
            # 2 layer hypernets: output dimensions are same as above case
            self.hyper_w1 = nn.Sequential(
                init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)),
                nn.ReLU(),
                init_(
                    nn.Linear(self.hypernet_hidden_dim, self.num_mixer_q_inps *
                              self.hidden_layer_dim))).to(self.device)
            self.hyper_w2 = nn.Sequential(
                init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)),
                nn.ReLU(),
                init_(
                    nn.Linear(self.hypernet_hidden_dim,
                              self.hidden_layer_dim))).to(self.device)

        # hyper_b1 outputs bias vector of dimension (1 x hidden_layer_dim)
        self.hyper_b1 = init_(
            nn.Linear(self.cent_obs_dim,
                      self.hidden_layer_dim)).to(self.device)
        # hyper_b2 outptus bias vector of dimension (1 x 1)
        self.hyper_b2 = nn.Sequential(
            init_(nn.Linear(self.cent_obs_dim, self.hypernet_hidden_dim)),
            nn.ReLU(), init_(nn.Linear(self.hypernet_hidden_dim,
                                       1))).to(self.device)
예제 #3
0
    def __init__(self, input_dim, act_dim, args, device):
        # input dim is agent obs dim + agent acf dim
        # output dim is act dim
        super(AgentQFunction, self).__init__()
        self._use_feature_normlization = args.use_feature_normlization
        self._layer_N = args.layer_N
        self._use_orthogonal = args.use_orthogonal
        self._use_ReLU = args.use_ReLU
        self._gain = args.gain
        self.hidden_size = args.hidden_size
        self.device = device

        # maps input to RNN input dimension
        if self._use_feature_normlization:
            self.feature_norm = nn.LayerNorm(input_dim).to(self.device)
        self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N,
                            self._use_orthogonal,
                            self._use_ReLU).to(self.device)
        self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0)
            elif 'weight' in name:
                if self._use_orthogonal:
                    nn.init.orthogonal_(param)
                else:
                    nn.init.xavier_uniform_(param)

        # get action from rnn hidden state
        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0), self._gain)
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0), self._gain)

        if isinstance(act_dim, np.ndarray):
            # MultiDiscrete setting: have n Linear layers for each q
            self.multidiscrete = True
            self.q_outs = [
                init_(nn.Linear(self.hidden_size, a_dim)).to(self.device)
                for a_dim in act_dim
            ]
        else:
            self.multidiscrete = False
            self.q_out = init_(nn.Linear(self.hidden_size,
                                         act_dim)).to(self.device)
예제 #4
0
    def __init__(self, args, central_obs_dim, central_act_dim, device):
        super(R_Critic, self).__init__()
        self._use_ReLU = args.use_ReLU
        self._layer_N = args.layer_N
        self._use_orthogonal = args.use_orthogonal
        self._use_feature_normlization = args.use_feature_normlization
        self.central_obs_dim = central_obs_dim
        self.central_act_dim = central_act_dim
        self.hidden_size = args.hidden_size
        self.device = device

        input_dim = central_obs_dim + central_act_dim
        if self._use_feature_normlization:
            self.feature_norm = nn.LayerNorm(input_dim).to(self.device)

        # map observation input into input for rnn
        self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N,
                            self._use_orthogonal,
                            self._use_ReLU).to(self.device)
        self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0)
            elif 'weight' in name:
                if self._use_orthogonal:
                    nn.init.orthogonal_(param)
                else:
                    nn.init.xavier_uniform_(param)
        self.norm = nn.LayerNorm(self.hidden_size).to(self.device)

        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0))

        self.q1_out = init_(nn.Linear(self.hidden_size, 1)).to(self.device)
        self.q2_out = init_(nn.Linear(self.hidden_size, 1)).to(self.device)
예제 #5
0
    def __init__(self, input_dim, hidden_size, layer_N, use_orthogonal,
                 use_ReLU):
        super(MLPLayer, self).__init__()
        self._layer_N = layer_N

        if use_orthogonal:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.orthogonal_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))
        else:
            if use_ReLU:
                active_func = nn.ReLU()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('relu'))
            else:
                active_func = nn.Tanh()
                init_ = lambda m: init(m,
                                       nn.init.xavier_uniform_,
                                       lambda x: nn.init.constant_(x, 0),
                                       gain=nn.init.calculate_gain('tanh'))

        self.fc1 = nn.Sequential(init_(nn.Linear(input_dim, hidden_size)),
                                 active_func, nn.LayerNorm(hidden_size))
        self.fc_h = nn.Sequential(init_(nn.Linear(hidden_size, hidden_size)),
                                  active_func, nn.LayerNorm(hidden_size))
        self.fc2 = get_clones(self.fc_h, self._layer_N)
    def __init__(self,
                 args,
                 obs_dim,
                 act_dim,
                 action_space,
                 device,
                 take_prev_action=False):
        super(R_GaussianActor, self).__init__()
        self._use_feature_normlization = args.use_feature_normlization
        self._use_ReLU = args.use_ReLU
        self._layer_N = args.layer_N
        self._use_orthogonal = args.use_orthogonal
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.hidden_size = args.hidden_size
        self.device = device
        self.take_prev_act = take_prev_action

        if take_prev_action:
            input_dim = obs_dim + act_dim
        else:
            input_dim = obs_dim

        if self._use_feature_normlization:
            self.feature_norm = nn.LayerNorm(input_dim).to(self.device)

        # map observation input into input for rnn
        self.mlp = MLPLayer(input_dim, self.hidden_size, self._layer_N,
                            self._use_orthogonal,
                            self._use_ReLU).to(self.device)
        self.rnn = nn.GRU(self.hidden_size, self.hidden_size).to(self.device)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0)
            elif 'weight' in name:
                if self._use_orthogonal:
                    nn.init.orthogonal_(param)
                else:
                    nn.init.xavier_uniform_(param)
        self.norm = nn.LayerNorm(self.hidden_size).to(self.device)

        # get action from rnn hidden state
        if self._use_orthogonal:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init(m, nn.init.xavier_uniform_, lambda x: nn.
                                   init.constant_(x, 0))

        self.mean_layer = init_(nn.Linear(self.hidden_size,
                                          self.act_dim)).to(self.device)
        self.log_std_layer = init_(nn.Linear(self.hidden_size,
                                             self.act_dim)).to(self.device)

        # SAC rescaling to respect action bounds (see paper)
        if action_space is None:
            self.action_scale = torch.tensor(1.)
            self.action_bias = torch.tensor(0.)
        else:
            self.action_scale = torch.tensor(
                (action_space.high - action_space.low) / 2.).float()

            self.action_bias = torch.tensor(
                (action_space.high + action_space.low) / 2.).float()