def train_epoch(model, opt, lr_scheduler, epoch, dataloader, gpu_id=0, verbose=True): _ = model.train() batches_per_epoch = len(dataloader) train_loss, correct, total = 0, 0, 0 for batch_idx, (data, targets) in enumerate(dataloader): data, targets = Variable(data.cuda(gpu_id)), Variable( targets.cuda(gpu_id)) # Set LR LRSchedule.set_lr(opt, lr_scheduler(epoch + batch_idx / batches_per_epoch)) opt.zero_grad() outputs = model(data) loss = F.cross_entropy(outputs, targets) loss.backward() opt.step() train_loss += loss.data[0] predicted = torch.max(outputs.data, 1)[1] total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() if verbose: progress_bar( batch_idx, batches_per_epoch, 'Loss: %.3f | Acc: %.3f%%' % (train_loss / (batch_idx + 1), 100. * correct / total)) return float(correct) / total
def train_step(self, data, targets, progress): self.lr = self.lr_scheduler(progress) LRSchedule.set_lr(self.opt, self.lr) self.opt.zero_grad() outputs = self(data) loss = F.cross_entropy(outputs, targets) loss.backward() self.opt.step() return outputs, loss.data[0]
def train(model, dataloaders, epochs=1, gpu_id=0, verbose=True, **kwargs): model = model.cuda(gpu_id) lr_scheduler = LRSchedule.sgdr(period_length=epochs, **kwargs) opt = torch.optim.SGD(model.parameters(), lr=lr_scheduler(0), momentum=0.9, weight_decay=5e-4) performance = [] for epoch in range(epochs): train_acc = train_epoch(model, opt, lr_scheduler, epoch, dataloaders['train'], gpu_id=gpu_id, verbose=verbose) test_acc = eval_epoch(model, dataloaders['test'], gpu_id=gpu_id, verbose=verbose) if dataloaders['val']: val_acc = eval_epoch(model, dataloaders['val'], gpu_id=gpu_id, verbose=verbose) else: val_acc = None perf = { "epoch": epoch, "train": train_acc, "test": test_acc, "val": val_acc, } print(json.dumps(perf), file=sys.stderr) performance.append(perf) if train_acc < 0.2: print('*' * 50 + 'VV break vv ' + '*' * 50, file=sys.stderr) print(model, file=sys.stderr) print('*' * 50 + '^^ break ^^' + '*' * 50, file=sys.stderr) break return model.cpu(), performance
def set_progress(optimizer, lr_scheduler, progress): lr = lr_scheduler(progress) LRSchedule.set_lr(optimizer, lr)
mult = 2 Tcur = 0 for epoch in range(args.epochs): # early stopping if tolerance > args.tolerance: break train_loss = 0 # Train _ = model.train() for ids, targets, epoch_progress in problem.iterate( mode='train', shuffle=True, batch_size=args.batch_size): if args.lr_schedule == 'cosine': lr = lr_scheduler(Tcur + epoch_progress, epochs=Ti) LRSchedule.set_lr(optimizer, lr) print('learning rate:{}'.format(lr)) else: # set_progress(optimizer, lr_scheduler, (epoch + epoch_progress) / args.epochs) pass loss, preds = train_step( model=model, optimizer=optimizer, ids=ids, targets=targets, loss_fn=problem.loss_fn, ) train_loss += loss.item() train_metric = problem.metric_fn(to_numpy(targets), to_numpy(preds)) #print(json.dumps({
def set_progress(self, progress): self.lr = self.lr_scheduler(progress) LRSchedule.set_lr(self.optimizer, self.lr)
def set_progress(self, progress): self.progress = progress self.epoch = np.floor(progress) if self.lr_scheduler is not None: self.lr = self.lr_scheduler(progress) LRSchedule.set_lr(self.opt, self.lr)