class ANN(nn.Module): def __init__(self, config, args): super().__init__() self.config, self.args = config, args self.valNet = ValNet(config, n_quant=config.N_QUANT, noise=self.config.NOISE) self.actionNets = nn.ModuleDict() self.actionNets['action'] = ActionNet(config, args, outDim=8, n_quant=config.N_QUANT, add_features=int(self.config.VAL_FEATURE), noise=self.config.NOISE) self.envNet = Env(config, self.config.NOISE) def forward(self, env, eps=0, punishmentsLm=None, v=None): s = self.envNet(env) val = self.valNet(s) v = val.detach().mean(2) if v is None else v outputs = {} for name in self.actionNets.keys(): punish = punishmentsLm[name] if punishmentsLm is not None else None pi, actionIdx = self.actionNets[name](s, eps, punish, v) outputs[name] = (pi.to('cpu'), actionIdx) return outputs, val.to('cpu') def reset_noise(self): self.envNet.reset_noise() self.valNet.reset_noise() for _, net in self.actionNets.items(): net.reset_noise()
class ActionNet(nn.Module): def __init__(self, config, entDim=19, outDim=2, n_quant=1, add_features=0, noise=False): super().__init__() self.config, self.h = config, config.HIDDEN self.entDim = entDim self.envNet = Env(config, noise=noise) self.fc = NoisyLinear(self.h + add_features, self.h) if noise else nn.Linear( self.h + add_features, self.h) self.actionNet = ConstDiscrete(config, self.h, outDim, n_quant) def forward(self, flat, ents, eps=0, punish=None, val=None, device='cpu'): stim = self.envNet(flat.to(device), ents.to(device), device=device) if self.config.VAL_FEATURE and val is not None: stim = torch.cat([stim.to(device), val.to(device)], dim=1) if self.config.NOISE: x = F.relu(self.fc(stim.to(device), device=device)) else: x = F.relu(self.fc(stim.to(device))) outs, idx = self.actionNet(x.to(device), eps, punish) return outs, idx def reset_noise(self): self.envNet.reset_noise() self.fc.reset_noise()
class ActionNet(nn.Module): def __init__(self, config, args, entDim=11, outDim=2, device='cpu', batch_size=1): super().__init__() self.config, self.args, self.h = config, args, config.HIDDEN self.entDim, self.outDim = entDim, outDim self.fc = nn.Linear(self.h, self.h) self.actionNet = ConstDiscrete(config, self.h, self.outDim) self.envNet = Env(config, config.LSTM, device=device, batch_size=batch_size) def forward(self, s, outLm, done=False): s = self.envNet(s, is_done=done) x = F.relu(self.fc(s)) pi, actionIdx = self.actionNet(x, outLm) return pi, actionIdx def reset_noise(self): self.envNet.reset_noise() self.fc.reset_noise()
def __init__(self, config, args, entDim=11, outDim=2, device='cpu', batch_size=1): super().__init__() self.config, self.args, self.h = config, args, config.HIDDEN self.entDim, self.outDim = entDim, outDim self.fc = nn.Linear(self.h, self.h) self.actionNet = ConstDiscrete(config, self.h, self.outDim) self.envNet = Env(config, config.LSTM, device=device, batch_size=batch_size)
class Lawmaker(LawmakerAbstract): def __init__(self, args, config): super().__init__(args, config) self.actionNets = nn.ModuleDict() self.actionNets['action'] = ActionNet(config, args, outDim=8, n_quant=config.N_QUANT_LM, noise=self.config.NOISE) self.envNet = Env(config, self.config.NOISE) def forward(self, env): s = self.envNet(env) outputs, punishments = dict(), dict() for name in self.actionNets.keys(): outputs[name] = self.actionNets[name](s, 0)[0].to('cpu') punishments[name] = outputs[name].mean( 2).detach() * self.config.PUNISHMENT return outputs, punishments def reset_noise(self): self.envNet.reset_noise() for _, net in self.actionNets.items(): net.reset_noise()
def __init__(self, config, args): super().__init__() self.config, self.args = config, args self.valNet = ValNet(config, n_quant=config.N_QUANT, noise=self.config.NOISE) self.actionNets = nn.ModuleDict() self.actionNets['action'] = ActionNet(config, args, outDim=8, n_quant=config.N_QUANT, add_features=int(self.config.VAL_FEATURE), noise=self.config.NOISE) self.envNet = Env(config, self.config.NOISE)
def __init__(self, args, config): super().__init__(args, config) self.actionNets = nn.ModuleDict() self.actionNets['action'] = ActionNet(config, args, outDim=8, n_quant=config.N_QUANT_LM, noise=self.config.NOISE) self.envNet = Env(config, self.config.NOISE)
def __init__(self, config, entDim=19, outDim=2, n_quant=1, add_features=0, noise=False): super().__init__() self.config, self.h = config, config.HIDDEN self.entDim = entDim self.envNet = Env(config, noise=noise) self.fc = NoisyLinear(self.h + add_features, self.h) if noise else nn.Linear( self.h + add_features, self.h) self.actionNet = ConstDiscrete(config, self.h, outDim, n_quant)
def __init__(self, config, outDim=2, device='cpu', batch_size=1): super().__init__() self.config, self.h = config, config.HIDDEN self.outDim = outDim self.actionNet = nn.Linear(self.h, self.outDim) self.envNet = Env(config, self.config.LSTM, True, device=device, batch_size=batch_size) self.fc = nn.Linear(self.h, self.h) self.qNet = QNet(config, outDim, device, batch_size)