def forward(self,x): x = MLP.forward(self, x) # x is a batch if x.dim() == 2: mu = x[:,:self.n_actions] if self.action_active is not None: if self.action_scaler is not None: mu = self.action_scaler * self.action_active(mu) else: mu = self.action_active(mu) V = x[:,self.n_actions:self.n_actions+1] Lunmasked_ = x[:,-self.n_actions ** 2:].clone() Lunmasked = Lunmasked_.view(-1,self.n_actions,self.n_actions) L = torch.mul(Lunmasked, self.tril_mask.unsqueeze(0)) + \ torch.mul(torch.exp(Lunmasked), self.diag_mask.unsqueeze(0)) elif x.dim() == 1: mu = x[:self.n_actions] if self.action_active is not None: if self.action_scaler is not None: mu = self.action_scaler * self.action_active(mu) else: mu = self.action_active(mu) V = x[self.n_actions] Lunmasked_ = x[-self.n_actions ** 2:].clone() Lunmasked = Lunmasked_.view( self.n_actions, self.n_actions) L = torch.mul(Lunmasked, self.tril_mask) + \ torch.mul(torch.exp(Lunmasked), self.diag_mask) else: raise RuntimeError("dimenssion not matched") return V,mu,L
def __init__(self, n_inputfeats, n_actions, n_hiddens=[30], nonlinear=F.tanh, usebn=False): super(FCDuelingDQN, self).__init__() # using MLP as hidden layers self.hidden_layers = MLP(n_inputfeats, n_hiddens[-1], n_hiddens[:-1], nonlinear, outactive=nonlinear, usebn=usebn) self.usebn = usebn if self.usebn: self.bn = nn.BatchNorm1d(n_hiddens[-1]) self.V = nn.Linear(n_hiddens[-1], 1) self.A = nn.Linear(n_hiddens[-1], n_actions) self.V.weight.data.normal_(0, 0.1) self.A.weight.data.normal_(0, 0.1)
def __init__( self, n_inputfeats, # input dim n_actions, # action dim n_hiddens=[30], # hidden unit number list nonlinear=F.tanh, usebn=False, ): super(FCDQN, self).__init__() self.net = MLP(n_inputfeats, n_actions, n_hiddens=n_hiddens, nonlinear=nonlinear, usebn=usebn)
def __init__( self, n_inputfeats, # input dim n_actions, # action dim n_hiddens=[64, 64], # hidden unit number list nonlinear=F.relu, usebn=False, initializer="uniform", initializer_param={ "last_upper": 3e-3, "last_lower": 3e-3 }): assert len( n_hiddens ) >= 2, "The critic has to contain at least one hidden layer." super(FCDDPG_C, self).__init__() self.n_actions = n_actions self.first_layer = nn.Linear(n_inputfeats, n_hiddens[0]) # TODO: first layer initialization should be modified according to initializer. # initialize the first layer lower = initializer_param['lower'] if 'lower' in initializer_param.keys( ) else -1. / np.sqrt(n_inputfeats) upper = initializer_param['upper'] if 'upper' in initializer_param.keys( ) else 1. / np.sqrt(n_inputfeats) nn.init.uniform_(self.first_layer.weight, lower, upper) self.nonlinear = nonlinear self.net = MLP(n_hiddens[0] + self.n_actions, 1, n_hiddens[1:], nonlinear, usebn, initializer=initializer, initializer_param=initializer_param)
class FCDDPG_C(nn.Module): def __init__( self, n_inputfeats, # input dim n_actions, # action dim n_hiddens=[64, 64], # hidden unit number list nonlinear=F.relu, usebn=False, initializer="uniform", initializer_param={ "last_upper": 3e-3, "last_lower": 3e-3 }): assert len( n_hiddens ) >= 2, "The critic has to contain at least one hidden layer." super(FCDDPG_C, self).__init__() self.n_actions = n_actions self.first_layer = nn.Linear(n_inputfeats, n_hiddens[0]) # TODO: first layer initialization should be modified according to initializer. # initialize the first layer lower = initializer_param['lower'] if 'lower' in initializer_param.keys( ) else -1. / np.sqrt(n_inputfeats) upper = initializer_param['upper'] if 'upper' in initializer_param.keys( ) else 1. / np.sqrt(n_inputfeats) nn.init.uniform_(self.first_layer.weight, lower, upper) self.nonlinear = nonlinear self.net = MLP(n_hiddens[0] + self.n_actions, 1, n_hiddens[1:], nonlinear, usebn, initializer=initializer, initializer_param=initializer_param) def cuda(self, device=None): return self._apply(lambda t: t.cuda(device)) def forward(self, x, a): # TODO: support 1-d input. # critic only deals with mini-batch. x = self.nonlinear(self.first_layer.forward(x)) x = self.net.forward(torch.cat((x, a), dim=-1)) return x
class FCDuelingDQN(nn.Module): def __init__(self, n_inputfeats, n_actions, n_hiddens=[30], nonlinear=F.tanh, usebn=False): super(FCDuelingDQN, self).__init__() # using MLP as hidden layers self.hidden_layers = MLP(n_inputfeats, n_hiddens[-1], n_hiddens[:-1], nonlinear, outactive=nonlinear, usebn=usebn) self.usebn = usebn if self.usebn: self.bn = nn.BatchNorm1d(n_hiddens[-1]) self.V = nn.Linear(n_hiddens[-1], 1) self.A = nn.Linear(n_hiddens[-1], n_actions) self.V.weight.data.normal_(0, 0.1) self.A.weight.data.normal_(0, 0.1) def forward(self, x): x = self.hidden_layers.forward(x) input_dim = x.dim() if self.usebn: if input_dim == 1: x = x.unsqueeze(0) x = self.bn.forward(x) A = self.A(x) - torch.mean(self.A(x), 1, keepdim=True) V = self.V(x) if self.usebn and input_dim == 1: A = A.squeeze(0) V = V.squeeze(0) return A + V
def forward(self, x, other_data=None): x = MLP.forward(self, x, other_data) return x, self.logstd.expand_as(x), torch.exp(self.logstd).expand_as(x)
def forward(self, x, other_data=None): x = MLP.forward(self, x, other_data) # for exploration, and similar to e-greedy x = x + 0.01 / self.n_actions x = x / torch.sum(x, dim=-1, keepdim=True).detach() return x
def forward(self, x, other_data=None): x = MLP.forward(self, x, other_data) # for exploration, we need to make sure that the std is not too low. logstd = torch.clamp(self.logstd, min=np.log(0.1)) return x, logstd.expand_as(x), torch.exp(logstd).expand_as(x)
def forward(self, x): x = MLP.forward(self, x) return x, self.logstd.expand_as(x), torch.exp(self.logstd).expand_as(x)