예제 #1
0
파일: NAF.py 프로젝트: phymucs/HTRPO
 def forward(self,x):
     x = MLP.forward(self, x)
     # x is a batch
     if x.dim() == 2:
         mu = x[:,:self.n_actions]
         if self.action_active is not None:
             if self.action_scaler is not None:
                 mu = self.action_scaler * self.action_active(mu)
             else:
                 mu = self.action_active(mu)
         V = x[:,self.n_actions:self.n_actions+1]
         Lunmasked_ = x[:,-self.n_actions ** 2:].clone()
         Lunmasked = Lunmasked_.view(-1,self.n_actions,self.n_actions)
         L = torch.mul(Lunmasked, self.tril_mask.unsqueeze(0)) + \
             torch.mul(torch.exp(Lunmasked), self.diag_mask.unsqueeze(0))
     elif x.dim() == 1:
         mu = x[:self.n_actions]
         if self.action_active is not None:
             if self.action_scaler is not None:
                 mu = self.action_scaler * self.action_active(mu)
             else:
                 mu = self.action_active(mu)
         V = x[self.n_actions]
         Lunmasked_ = x[-self.n_actions ** 2:].clone()
         Lunmasked = Lunmasked_.view( self.n_actions, self.n_actions)
         L = torch.mul(Lunmasked, self.tril_mask) + \
             torch.mul(torch.exp(Lunmasked), self.diag_mask)
     else:
         raise RuntimeError("dimenssion not matched")
     return V,mu,L
예제 #2
0
파일: DQN.py 프로젝트: phymucs/HTRPO
 def __init__(self,
              n_inputfeats,
              n_actions,
              n_hiddens=[30],
              nonlinear=F.tanh,
              usebn=False):
     super(FCDuelingDQN, self).__init__()
     # using MLP as hidden layers
     self.hidden_layers = MLP(n_inputfeats,
                              n_hiddens[-1],
                              n_hiddens[:-1],
                              nonlinear,
                              outactive=nonlinear,
                              usebn=usebn)
     self.usebn = usebn
     if self.usebn:
         self.bn = nn.BatchNorm1d(n_hiddens[-1])
     self.V = nn.Linear(n_hiddens[-1], 1)
     self.A = nn.Linear(n_hiddens[-1], n_actions)
     self.V.weight.data.normal_(0, 0.1)
     self.A.weight.data.normal_(0, 0.1)
예제 #3
0
파일: DQN.py 프로젝트: phymucs/HTRPO
 def __init__(
     self,
     n_inputfeats,  # input dim
     n_actions,  # action dim
     n_hiddens=[30],  # hidden unit number list
     nonlinear=F.tanh,
     usebn=False,
 ):
     super(FCDQN, self).__init__()
     self.net = MLP(n_inputfeats,
                    n_actions,
                    n_hiddens=n_hiddens,
                    nonlinear=nonlinear,
                    usebn=usebn)
예제 #4
0
파일: DDPG.py 프로젝트: phymucs/HTRPO
    def __init__(
            self,
            n_inputfeats,  # input dim
            n_actions,  # action dim
            n_hiddens=[64, 64],  # hidden unit number list
            nonlinear=F.relu,
            usebn=False,
            initializer="uniform",
            initializer_param={
                "last_upper": 3e-3,
                "last_lower": 3e-3
            }):

        assert len(
            n_hiddens
        ) >= 2, "The critic has to contain at least one hidden layer."
        super(FCDDPG_C, self).__init__()
        self.n_actions = n_actions

        self.first_layer = nn.Linear(n_inputfeats, n_hiddens[0])
        # TODO: first layer initialization should be modified according to initializer.
        # initialize the first layer
        lower = initializer_param['lower'] if 'lower' in initializer_param.keys(
        ) else -1. / np.sqrt(n_inputfeats)
        upper = initializer_param['upper'] if 'upper' in initializer_param.keys(
        ) else 1. / np.sqrt(n_inputfeats)
        nn.init.uniform_(self.first_layer.weight, lower, upper)

        self.nonlinear = nonlinear
        self.net = MLP(n_hiddens[0] + self.n_actions,
                       1,
                       n_hiddens[1:],
                       nonlinear,
                       usebn,
                       initializer=initializer,
                       initializer_param=initializer_param)
예제 #5
0
파일: DDPG.py 프로젝트: phymucs/HTRPO
class FCDDPG_C(nn.Module):
    def __init__(
            self,
            n_inputfeats,  # input dim
            n_actions,  # action dim
            n_hiddens=[64, 64],  # hidden unit number list
            nonlinear=F.relu,
            usebn=False,
            initializer="uniform",
            initializer_param={
                "last_upper": 3e-3,
                "last_lower": 3e-3
            }):

        assert len(
            n_hiddens
        ) >= 2, "The critic has to contain at least one hidden layer."
        super(FCDDPG_C, self).__init__()
        self.n_actions = n_actions

        self.first_layer = nn.Linear(n_inputfeats, n_hiddens[0])
        # TODO: first layer initialization should be modified according to initializer.
        # initialize the first layer
        lower = initializer_param['lower'] if 'lower' in initializer_param.keys(
        ) else -1. / np.sqrt(n_inputfeats)
        upper = initializer_param['upper'] if 'upper' in initializer_param.keys(
        ) else 1. / np.sqrt(n_inputfeats)
        nn.init.uniform_(self.first_layer.weight, lower, upper)

        self.nonlinear = nonlinear
        self.net = MLP(n_hiddens[0] + self.n_actions,
                       1,
                       n_hiddens[1:],
                       nonlinear,
                       usebn,
                       initializer=initializer,
                       initializer_param=initializer_param)

    def cuda(self, device=None):
        return self._apply(lambda t: t.cuda(device))

    def forward(self, x, a):
        # TODO: support 1-d input.
        # critic only deals with mini-batch.
        x = self.nonlinear(self.first_layer.forward(x))
        x = self.net.forward(torch.cat((x, a), dim=-1))
        return x
예제 #6
0
파일: DQN.py 프로젝트: phymucs/HTRPO
class FCDuelingDQN(nn.Module):
    def __init__(self,
                 n_inputfeats,
                 n_actions,
                 n_hiddens=[30],
                 nonlinear=F.tanh,
                 usebn=False):
        super(FCDuelingDQN, self).__init__()
        # using MLP as hidden layers
        self.hidden_layers = MLP(n_inputfeats,
                                 n_hiddens[-1],
                                 n_hiddens[:-1],
                                 nonlinear,
                                 outactive=nonlinear,
                                 usebn=usebn)
        self.usebn = usebn
        if self.usebn:
            self.bn = nn.BatchNorm1d(n_hiddens[-1])
        self.V = nn.Linear(n_hiddens[-1], 1)
        self.A = nn.Linear(n_hiddens[-1], n_actions)
        self.V.weight.data.normal_(0, 0.1)
        self.A.weight.data.normal_(0, 0.1)

    def forward(self, x):
        x = self.hidden_layers.forward(x)
        input_dim = x.dim()
        if self.usebn:
            if input_dim == 1:
                x = x.unsqueeze(0)
            x = self.bn.forward(x)
        A = self.A(x) - torch.mean(self.A(x), 1, keepdim=True)
        V = self.V(x)
        if self.usebn and input_dim == 1:
            A = A.squeeze(0)
            V = V.squeeze(0)
        return A + V
 def forward(self, x, other_data=None):
     x = MLP.forward(self, x, other_data)
     return x, self.logstd.expand_as(x), torch.exp(self.logstd).expand_as(x)
예제 #8
0
파일: PG.py 프로젝트: edmundwsy/HTRPO-v2
 def forward(self, x, other_data=None):
     x = MLP.forward(self, x, other_data)
     # for exploration, and similar to e-greedy
     x = x + 0.01 / self.n_actions
     x = x / torch.sum(x, dim=-1, keepdim=True).detach()
     return x
예제 #9
0
파일: PG.py 프로젝트: edmundwsy/HTRPO-v2
 def forward(self, x, other_data=None):
     x = MLP.forward(self, x, other_data)
     # for exploration, we need to make sure that the std is not too low.
     logstd = torch.clamp(self.logstd, min=np.log(0.1))
     return x, logstd.expand_as(x), torch.exp(logstd).expand_as(x)
예제 #10
0
파일: PG.py 프로젝트: phymucs/HTRPO
 def forward(self, x):
     x = MLP.forward(self, x)
     return x, self.logstd.expand_as(x), torch.exp(self.logstd).expand_as(x)