def train_evalnet(self, evalnet, X, Y): print("Evaluation network data size: %d" % X.size(0)) dataset = torch.utils.data.TensorDataset(X, Y) dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True) epochs = 200 lossf = torch.nn.MSELoss() optim = torch.optim.SGD(evalnet.parameters(), lr=0.01, momentum=0.9) sched = torch.optim.lr_scheduler.MultiStepLR(optim, milestones=[80, 160]) bar = tqdm.tqdm(range(epochs), ncols=80) avg = MovingAverage(momentum=0.95) for epoch in bar: for Xb, Yb in dataloader: Xb = self.create_normal(Xb) Yb = self.create_normal(Yb) Yh = evalnet(Xb).squeeze() loss = lossf(Yh, Yb) full_loss = loss + self.grad_penalty(evalnet, X, Xb) optim.zero_grad() full_loss.backward() optim.step() sched.step() avg.update(loss.item()) bar.set_description("Fitting evalnet: %.3f" % avg.peek())
class EvolutionaryUnit(Model): def __init__(self, D): super().__init__(D) self.target_modules = [torch.nn.Linear] self.gain_rate = 0.01 self.loss_rate = 0.05 self.epochs = 100 self.batch = 8 self.score = MovingAverage(momentum=0.90) self.train_score = MovingAverage(momentum=0.9) def set_device(self, device): self.device = device self.to(device) def __lt__(self, other): return self.get_score() < other.get_score() def fit(self, X, Y, X_test, Y_test): dataset = torch.utils.data.TensorDataset(X, Y) dataloader = torch.utils.data.DataLoader(dataset, batch_size=self.batch, shuffle=True) lossf = torch.nn.MSELoss() optim = torch.optim.Adam(self.parameters()) sched = torch.optim.lr_scheduler.MultiStepLR(optim, milestones=[60]) for epoch in range(self.epochs): self.train() for x, y in dataloader: x = x.to(self.device) y = y.to(self.device) yh = self(x) loss = lossf(yh, y) optim.zero_grad() loss.backward() optim.step() self.train_score.update(self.calc_score(yh, y)) sched.step() with torch.no_grad(): self.eval() yh_test = self(X_test.to(self.device)) self.score.update(self.calc_score(yh_test, Y_test.to(self.device))) def get_score(self): return self.score.peek() # lower the better def get_train_score(self): return self.train_score.peek() def mutate(self): out = copy.deepcopy(self) out.apply(self.recurse_apply(self.gain_ability)) out.apply(self.recurse_apply(self.lose_ability)) return out def share_abilities(self, other): n1 = copy.deepcopy(self) n2 = copy.deepcopy(other) for m1, m2 in zip(n1.net, n2.net): self.exchange(m1, m2) return n1, n2 # === PRIVATE === def exchange(self, m1, m2): if type(m1) in self.target_modules: p = 0.5 i = torch.rand_like(m1.weight.data) < p m1.weight.data[i] = m2.weight.data[i] m2.weight.data[1 - i] = m1.weight.data[1 - i] def calc_score(self, yh, y): return (yh - y).abs().sum().item() def recurse_apply(self, f): return lambda m: f(m) if type(m) in self.target_modules else None def gain_ability(self, m): i, v = self.get_new_weights(m, self.gain_rate) torch.nn.init.kaiming_uniform_(v, a=math.sqrt(5)) m.weight.data[i] = v[i] def lose_ability(self, m): i, v = self.get_new_weights(m, self.loss_rate) m.weight.data[i] = v[i] def get_new_weights(self, m, rate): i = (torch.rand_like(m.weight.data) < rate) v = torch.zeros_like(m.weight.data) return i, v