def train(self, epoch): self.optimizer.lr = self.lr_schedule(epoch) train_loss = 0 train_acc = 0 for i, batch in enumerate(self.train_iter): x, t = chainer.dataset.concat_examples(batch, device=self.opt.gpu) self.optimizer.zero_grads() y = self.model(x) loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(t.data) train_acc += float(acc.data) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) eta = elapsed_time / progress - elapsed_time line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.optimizer.lr, to_hms(elapsed_time), to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() self.train_iter.reset() train_loss /= len(self.train_iter.dataset) train_top1 = 100 * (train_acc / len(self.train_iter.dataset)) return train_loss, train_top1
def train(self, epoch): self.optimizer.lr = self.lr_schedule(epoch) train_loss = 0 train_acc = 0 for i, batch in enumerate(self.train_iter): x_array, t_array = chainer.dataset.concat_examples(batch) x = chainer.Variable(cuda.to_gpu(x_array)) t = chainer.Variable(cuda.to_gpu(t_array)) # self.optimizer.use_cleargrads(use=False) # self.optimizer.use_cleargrads() ## self.optimizer.reallocate_cleared_grads() # x.cleargrad() # t.cleargrad() # self.optimizer.zero_grads() # self.optimizer.setup(model) ### with chainer.no_backprop_mode(): This is not the origin y = self.model(x) self.model.cleargrads() if self.opt.BC: loss = utils.kl_divergence(y, t) acc = F.accuracy(y, F.argmax(t, axis=1)) else: loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) # self.optimizer.check_nan_in_grads() self.optimizer.use_cleargrads(use=True) loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(t.data) train_acc += float(acc.data) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) eta = elapsed_time / progress - elapsed_time line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.optimizer.lr, utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() self.train_iter.reset() train_loss /= len(self.train_iter.dataset) train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset)) return train_loss, train_top1
def train(self, epoch): """ run one train epoch """ train_loss = 0 train_acc = 0 for i, (x_array, t_array) in enumerate(self.train_iter): device = torch.device("cuda" if cuda.is_available() else "cpu") self.optimizer.zero_grad() x = x_array.to(device) t = t_array.to(device) y = self.model(x) if self.opt.BC: t = t.to(device, dtype=torch.float32) y = y.to(device, dtype=torch.float32) loss = utils.kl_divergence(y, t) t_indices = torch.argmax(t, dim=1) acc = accuracy(y.data, t_indices) else: """ F.cross_entropy already combines log_softmax and NLLLoss """ t = t.to(device, dtype=torch.int64) loss = F.cross_entropy(y, t) acc = accuracy(y.data, t) loss.backward() self.optimizer.step() train_loss += float(loss.item()) * len(t.data) train_acc += float(acc.item()) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) eta = elapsed_time / progress - elapsed_time line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.scheduler.get_last_lr(), utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() train_loss /= len(self.train_iter.dataset) train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset)) return train_loss, train_top1
def train(self, epoch): self.optimizer.lr = self.lr_schedule(epoch) train_loss = 0 train_acc = 0 for i, batch in enumerate(self.train_iter): x_array, t_array = chainer.dataset.concat_examples(batch) x_array = np.reshape(x_array,(self.opt.batchSize*2,-1)).astype('float32') t_array = np.reshape(t_array,(self.opt.batchSize*2,-1)).astype('float32') x = chainer.Variable(cuda.to_gpu(x_array[:, None, None, :])) t = chainer.Variable(cuda.to_gpu(t_array)) self.model.cleargrads() y , t = self.model(x, t, self.opt.mixup_type, self.opt.eligible, self.opt.batchSize) if self.opt.BC: loss = utils.kl_divergence(y, t) acc = F.accuracy(y, F.argmax(t, axis=1)) else: loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(t.data) train_acc += float(acc.data) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) if ((progress)!=0): eta = elapsed_time / progress - elapsed_time else: eta = 0 line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.optimizer.lr, utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() self.train_iter.reset() train_loss /= len(self.train_iter.dataset)*2 train_top1 = 100 * (1 - train_acc / (len(self.train_iter.dataset)*2)) return train_loss, train_top1