def train(train_loader, net, criterion, optimizer, epoch, device): global writer start = time.time() net.train() train_loss = 0 correct = 0 total = 0 logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): # move tensor to GPU inputs, targets = inputs.to(device), targets.to(device) if config.mixup: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = mixup_criterion( criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) # zero the gradient buffers optimizer.zero_grad() # backward loss.backward() # update weight optimizer.step() # count the loss and acc train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) if config.mixup: correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item()) else: correct += predicted.eq(targets).sum().item() if (batch_index + 1) % 100 == 0: logger.info(" == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format( batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) logger.info(" == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format( batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('train_loss', train_loss, global_step=epoch) writer.add_scalar('train_acc', train_acc, global_step=epoch) return train_loss, train_acc
def train( model, device, train_loader, optimizer, criterion, epoch, mixup=False, avg_meter=None, ): model.train() batch_loss = list() alpha = 0.2 if mixup else 0 lam = None # Required if doing mixup training for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) data, target_a, target_b, lam = mixup_data( data, target, device, alpha ) # Targets here correspond to the pair of examples used to create the mix optimizer.zero_grad() output = model(data) loss = mixup_criterion(criterion, output, target_a, target_b, lam) loss.backward() optimizer.step() batch_loss.append(loss.item()) if avg_meter is not None: avg_meter.update(batch_loss[-1], n=len(data)) return batch_loss
def train(): params = split_weights(model) if opt.no_wd else model.parameters() optimizer = optim.SGD(params, lr=base_lr, momentum=0.9, nesterov=True, weight_decay=0.0001) Loss = nn.CrossEntropyLoss() metric_loss = mloss() alpha = 1. if mixup else 0. iterations = 0 for epoch in range(epochs): model.train() metric_loss.reset() st_time = time.time() if mixup and epoch > epochs - 20: alpha = 0. for i, (trans, labels) in enumerate(train_data): trans, targets_a, targets_b, lam = mixup_data(trans.cuda(), labels.cuda(), alpha=alpha) trans, targets_a, targets_b = map(Variable, (trans, targets_a, targets_b)) optimizer.zero_grad() outputs = model(trans) loss = mixup_criterion(Loss, outputs, targets_a, targets_b, lam) loss.backward() optimizer.step() metric_loss.update(loss) iterations += 1 lr_scheduler.update(optimizer, iterations) learning_rate = lr_scheduler.get() met_name, metric = metric_loss.get() epoch_time = time.time() - st_time epoch_str = 'Epoch {}. Train {}: {:.5f}. {} samples/s. lr {:.5}'. \ format(epoch, met_name, metric, int(num_train_samples // epoch_time), learning_rate) logger.info(epoch_str) test(epoch, True)
def _training_step_mixup(self, imgs, targets, data_provider): imgs, targets_a, targets_b, lam = mixup_data_same_provider( imgs, targets, data_provider ) logits = self.forward(imgs) loss = mixup_criterion(self.loss, logits, targets_a, targets_b, lam) return loss
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # generate mixed inputs, two one-hot label vectors and mixing coefficient inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, args.alpha, use_cuda) optimizer.zero_grad() inputs, targets_a, targets_b = Variable(inputs), Variable( targets_a), Variable(targets_b) outputs = net(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += lam * predicted.eq(targets_a.data).cpu().sum() + ( 1 - lam) * predicted.eq(targets_b.data).cpu().sum() progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return (train_loss / batch_idx, 100. * correct / total)
def training_step(self, batch, batch_idx): x, y, idx = batch x, y_a, y_b, lam = mixup_data(x, y) y_hat = self.forward(x) loss = mixup_criterion(self.crit, y_hat, y_a.float(), y_b.float(), lam) self.log('trn/_loss', loss) return loss
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 total_gnorm = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # generate mixed inputs, two one-hot label vectors and mixing coefficient optimizer.zero_grad() if args.train_loss == 'mixup': inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, args.alpha, use_cuda) outputs = net(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) else: outputs = net(inputs) loss = cel(outputs, targets) loss.backward() if args.train_clip > 0: gnorm = torch.nn.utils.clip_grad_norm_(net.parameters(), args.train_clip) else: gnorm = -1 total_gnorm += gnorm optimizer.step() sgdr.step() train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() acc = 100. * float(correct) / float(total) if batch_idx % 50 == 0 or batch_idx == len(trainloader) - 1: wnorms = [ w.norm().item() for n, w in net.named_parameters() if 'weight' in n ] print( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d) | WNorm: %.3e (min: %.3e, max: %.3e) | GNorm: %.3e (%.3e)' % (train_loss / (batch_idx + 1), acc, correct, total, sum(wnorms), min(wnorms), max(wnorms), gnorm, total_gnorm / (batch_idx + 1))) return train_loss / batch_idx, acc
def mixup_train(loader, model, criterion, optimizer, epoch, use_cuda): global BEST_ACC, LR_STATE # switch to train mode if not cfg.CLS.fix_bn: model.train() else: model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(loader): # adjust learning rate adjust_learning_rate(optimizer, epoch, batch=batch_idx, batch_per_epoch=len(loader)) # mixup inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, ALPHA) if use_cuda: inputs, targets_a, targets_b = inputs.cuda(), targets_a.cuda(), targets_b.cuda() inputs, targets_a, targets_b = torch.autograd.Variable(inputs), torch.autograd.Variable(targets_a), \ torch.autograd.Variable(targets_b) # measure data loading time data_time.update(time.time() - end) # forward pass: compute output outputs = model(inputs) # forward pass: compute gradient and do SGD step optimizer.zero_grad() loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) # backward loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # measure accuracy and record loss prec1, prec5 = [0.0], [0.0] losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) if (batch_idx + 1) % cfg.CLS.disp_iter == 0: print('Training: [{}/{}][{}/{}] | Best_Acc: {:4.2f}% | Time: {:.2f} | Data: {:.2f} | ' 'LR: {:.8f} | Top1: {:.4f}% | Top5: {:.4f}% | Loss: {:.4f} | Total: {:.2f}' .format(epoch + 1, cfg.CLS.epochs, batch_idx + 1, len(loader), BEST_ACC, batch_time.average(), data_time.average(), LR_STATE, top1.avg, top5.avg, losses.avg, batch_time.sum + data_time.sum)) return (losses.avg, top1.avg)
def train(epoch): print('\nEpoch: %d' % epoch) global Train_acc net.train() train_loss = 0 correct = 0 total = 0 if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0: frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every decay_factor = learning_rate_decay_rate ** frac current_lr = opt.lr * decay_factor utils.set_lr(optimizer, current_lr) # set the decayed rate else: current_lr = opt.lr print('learning_rate: %s' % str(current_lr)) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() if opt.mixup: inputs, targets_a, targets_b, lam = utils.mixup_data(inputs, targets, 0.6, True) inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b)) else: inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) if opt.mixup: loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: loss = criterion(outputs, targets) loss.backward() utils.clip_gradient(optimizer, 0.1) optimizer.step() train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) if opt.mixup: correct += (lam * predicted.eq(targets_a.data).cpu().sum().float() + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float()) else: correct += predicted.eq(targets.data).cpu().sum() utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss/(batch_idx+1), 100.*float(correct)/float(total), correct, total)) Train_acc = 100.*float(correct)/float(total) return train_loss/(batch_idx+1), Train_acc
def _training_step_cutmix(self, imgs, targets): # No cutmix end of epochs if self.current_epoch > self.cutmix_epoch or np.random.rand() < 0.5: return self._training_step_normal(imgs, targets) imgs, targets_a, targets_b, lam = cutmix_tile( imgs, targets, self.img_size, self.tile_size, beta=1.0 ) logits = self.forward(imgs) loss = mixup_criterion(self.loss, logits, targets_a, targets_b, lam) return loss
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0.0 correct = 0.0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() mask = random.random() if epoch >= 90: # threshold = math.cos( math.pi * (epoch - 150) / ((200 - 150) * 2)) threshold = (100 - epoch) / (100 - 90) # threshold = 1.0 - math.cos( math.pi * (200 - epoch) / ((200 - 150) * 2)) if mask < threshold: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, args.alpha, use_cuda) else: targets_a, targets_b = targets, targets lam = 1.0 elif epoch >= 60: if epoch % 2 == 0: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, args.alpha, use_cuda) else: targets_a, targets_b = targets, targets lam = 1.0 else: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, args.alpha, use_cuda) optimizer.zero_grad() inputs, targets_a, targets_b = Variable(inputs), Variable( targets_a), Variable(targets_b) outputs = net(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) loss.backward() optimizer.step() train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += lam * predicted.eq(targets_a.data).cpu().sum().item() + ( 1.0 - lam) * predicted.eq(targets_b.data).cpu().sum().item() progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), (100. * correct) / total, correct, total)) return (train_loss / batch_idx, 100. * correct / total)
def train(train_loader, net, criterion, optimizer, epoch, device): global writer start = time.time() # 设置为tranin模式,仅当有dropout和batchnormal时工作 net.train() train_loss = 0 correct = 0 total = 0 logger.info("====Epoch:[{}/{}]====".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) if config.mixup: inputs, targets_a, targets_b, lam = utils.mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += inputs.size()[0] if config.mixup: correct += (lam * predicted.eq * (targets_a)).sum().item() + ( 1 - lam) * predicted.eq(targets_b).sum().item() else: correct += predicted.eq(targets).sum().item() if batch_index % 100 == 99: logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, utils.get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('test_loss', train_loss, global_step=epoch) writer.add_scalar('test_acc', train_acc, global_step=epoch) return train_loss, train_acc
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() if args.mixup: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, 1.0, use_cuda) inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b)) outputs = net(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) _, predicted = torch.max(outputs.data, 1) correct += lam * predicted.eq(targets_a.data).cpu().sum().float() correct += (1 - lam) * predicted.eq( targets_b.data).cpu().sum().float() else: inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() total += targets.size(0) train_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) if args.transfer_learning: if batch_idx >= len(trainloader) - 2: break
def train(self, loader): self.model.train() # Set model to training mode running_loss = 0.0 running_corrects = 0 num_inst = 0 if self.config.model.use_center_loss: running_centloss = 0. for batch_idx, batch_samples in enumerate(loader): batch_data = batch_samples['image'].cuda() if self.config.model.use_relabel: batch_label = batch_samples['class'].cuda().float().unsqueeze( 1) else: batch_label = batch_samples['class'].cuda() if self.config.model.use_mixup: # generate mixed inputs, two one-hot label vectors # and mixing coefficient batch_data, batch_label_A, batch_label_B, lam = \ mixup_data(batch_data, batch_label, self.config.model.mixup_alpha, True) # zero the parameter gradients self.optimizer.zero_grad() if self.config.model.use_center_loss: self.optimizer_centloss.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(True): features, outputs = self.model(batch_data) if self.config.model.use_relabel: pred_labels = torch.where(outputs > 0.5, torch.ones(1).cuda(), torch.zeros(1).cuda()) else: _, pred_labels = torch.max(outputs, 1) if self.config.model.use_mixup: loss_func = mixup_criterion(batch_label_A, batch_label_B, lam) loss = loss_func(self.config.train.criterion.ce, outputs) else: if self.config.model.use_relabel: loss = self.config.train.criterion.sl1( outputs, batch_label) else: loss = self.config.train.criterion.ce( outputs, batch_label) if self.config.model.use_center_loss: loss_cent = self.config.train.center_loss_weight * \ self.config.train.criterion.cent(features, batch_label) loss += loss_cent loss.backward() self.optimizer.step() if self.config.model.use_center_loss: for param in self.config.train.criterion.cent.parameters(): scale = 1. / self.config.train.center_loss_weight param.grad.data *= scale self.optimizer_centloss.step() # statistics running_loss += loss.item() * batch_label.size(0) if self.config.model.use_mixup: tmp_a = lam * \ pred_labels.eq(batch_label_A.data).cpu().sum() tmp_b = (1 - lam) * \ pred_labels.eq(batch_label_B.data).cpu().sum() running_corrects += (tmp_a + tmp_b) else: running_corrects += torch.sum(pred_labels == batch_label.data) num_inst += batch_label.size(0) if self.config.model.use_center_loss: running_centloss += loss_cent.item() * batch_label.size(0) batch_loss = running_loss / num_inst batch_acc = running_corrects.double() / num_inst if ((batch_idx + 1) % 20) == 0: if self.config.model.use_center_loss: cent_loss_ = running_centloss / num_inst self.disp_batch(batch_idx, batch_loss, batch_acc, cent_loss_) else: self.disp_batch(batch_idx, batch_loss, batch_acc) return running_loss, running_corrects, num_inst
def train(args, model: nn.Module, criterion, *, params, train_loader, valid_loader, init_optimizer, use_cuda, n_epochs=None, patience=2, max_lr_changes=3) -> bool: lr = args.lr n_epochs = n_epochs or args.n_epochs params = list(params) optimizer = init_optimizer(params, lr) run_root = Path(args.run_root) model_path = Path(str(run_root) + '/' + 'model.pt') if model_path.exists(): state = load_model(model, model_path) epoch = state['epoch'] step = state['step'] best_valid_loss = state['best_valid_loss'] best_f2 = state['best_f2'] else: epoch = 1 step = 0 best_valid_loss = float('inf') best_f2 = 0 lr_changes = 0 save = lambda ep: torch.save( { 'model': model.state_dict(), 'epoch': ep, 'step': step, 'best_valid_loss': best_valid_loss, 'best_f2': best_f2 }, str(model_path)) report_each = 100 log = run_root.joinpath('train.log').open('at', encoding='utf8') valid_losses = [] valid_f2s = [] lr_reset_epoch = epoch for epoch in range(epoch, n_epochs + 1): model.train() tq = tqdm.tqdm( total=(args.epoch_size or len(train_loader) * args.batch_size)) tq.set_description(f'Epoch {epoch}, lr {lr}') losses = [] tl = train_loader if args.epoch_size: tl = islice(tl, args.epoch_size // args.batch_size) try: mean_loss = 0 for i, (inputs, targets) in enumerate(tl): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, 1, use_cuda) inputs, targets_a, targets_b = Variable(inputs), Variable( targets_a), Variable(targets_b) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) loss = _reduce_loss(loss) batch_size = inputs.size(0) (batch_size * loss).backward() if (i + 1) % args.step == 0: optimizer.step() optimizer.zero_grad() step += 1 tq.update(batch_size) losses.append(loss.item()) mean_loss = np.mean(losses[-report_each:]) tq.set_postfix(loss=f'{mean_loss:.3f}') # if i and i % report_each == 0: # write_event(log, step, loss=mean_loss) write_event(log, step, loss=mean_loss) tq.close() save(epoch + 1) valid_metrics = validation(model, criterion, valid_loader, use_cuda) write_event(log, step, **valid_metrics) valid_loss = valid_metrics['valid_loss'] valid_f2 = valid_metrics['valid_f2_th_0.10'] valid_f2s.append(valid_f2) valid_losses.append(valid_loss) if valid_loss < best_valid_loss: best_valid_loss = valid_loss #shutil.copy(str(model_path), str(run_root) + '/model_loss_' + f'{valid_loss:.4f}' + '.pt') if valid_f2 > best_f2: best_f2 = valid_f2 shutil.copy( str(model_path), str(run_root) + '/model_f2_' + f'{valid_f2:.4f}' + '.pt') # if epoch == 7: # lr = 1e-4 # print(f'lr updated to {lr}') # optimizer = init_optimizer(params, lr) # if epoch == 8: # lr = 1e-5 # optimizer = init_optimizer(params, lr) # print(f'lr updated to {lr}') except KeyboardInterrupt: tq.close() # print('Ctrl+C, saving snapshot') # save(epoch) # print('done.') return False return True
def train(train_loader, net, criterion, optimizer, epoch, device,\ layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, groups): global writer start = time.time() net.train() train_loss = 0 correct = 0 total = 0 eps = 0.001 logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): # move tensor to GPU inputs, targets = inputs.to(device), targets.to(device) inputs.requires_grad = True layer_inputs.clear() layer_outputs.clear() grad_inputs.clear() grad_outputs.clear() if config.mixup: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) # zero the gradient buffers optimizer.zero_grad() # backward loss.backward() #fgsm # for p in net.parameters(): # p.grad *= args.alpha # adv_input = inputs + eps * inputs.grad.sign() # # outputs = net(adv_input) # # loss_2 = (1-args.alpha) * criterion(outputs, targets) # loss_2.backward() # layer_loss = update_grad(net, layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, args.alpha) layer_loss = group_noise(net, groups, crit, args.alpha) optimizer.step() # count the loss and acc train_loss += args.alpha * loss.item() + (1 - args.alpha) * layer_loss _, predicted = outputs.max(1) total += targets.size(0) if config.mixup: correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item()) else: correct += predicted.eq(targets).sum().item() if (batch_index + 1) % 100 == 0: logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('train_loss', train_loss, global_step=epoch) writer.add_scalar('train_acc', train_acc, global_step=epoch) return train_loss, train_acc
def train(self, epoch): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() self.scheduler.step() self.model.train() end = time.time() lr = self.scheduler.get_lr()[0] # for batch, (softmax_data, triplet_data) in enumerate(itertools.zip_longest(self.softmax_train_loader, self.triplet_train_loader)): for batch, (softmax_data, triplet_data) in enumerate( zip(self.softmax_train_loader, self.triplet_train_loader)): loss = 0 softmax_inputs, softmax_labels = softmax_data # 转cuda softmax_inputs = softmax_inputs.to( self.device ) if torch.cuda.device_count() >= 1 else softmax_inputs softmax_labels = softmax_labels.to( self.device ) if torch.cuda.device_count() >= 1 else softmax_labels # softmax_score, softmax_outputs = self.model(softmax_inputs) # traditional_loss = self.softmax_loss(softmax_score, softmax_outputs, softmax_labels) # loss += traditional_loss inputs, targets_a, targets_b, lam = mixup_data(softmax_inputs, softmax_labels, alpha=opt.alpha) # inputs, targets_a, targets_b = Variable(inputs), Variable(targets_a), Variable(targets_b) softmax_score, softmax_outputs = self.model(softmax_inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) mixup_loss = loss_func(criterion, softmax_score) loss += mixup_loss losses.update(loss.item(), softmax_inputs.size(0)) prec = (softmax_score.max(1)[1] == softmax_labels).float().mean() acc.update(prec, softmax_inputs.size(0)) triplet_inputs, triplet_labels = triplet_data # 转cuda triplet_inputs = triplet_inputs.to( self.device ) if torch.cuda.device_count() >= 1 else triplet_inputs triplet_labels = triplet_labels.to( self.device ) if torch.cuda.device_count() >= 1 else triplet_labels triplet_score, triplet_outputs = self.model(triplet_inputs) triplet_loss = self.triplet_loss(triplet_score, triplet_outputs, triplet_labels) loss += triplet_loss self.optimizer.zero_grad() if opt.fp16: # we use optimier to backward loss with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() self.optimizer.step() # 评估训练耗时 batch_time.update(time.time() - end) end = time.time() # 打印耗时与结果 if (batch + 1) % 10 == 0: logger.debug( 'Epoch: [{}][{}/{}]\t' 'Base_lr: [{:.2e}]\t' 'Time: ({batch_time.avg:.3f})\t' 'Loss_val: {loss.val:.4f} (Loss_avg: {loss.avg:.4f})\t' 'Accuray_val: {acc.val:.4f} (Accuray_avg: {acc.avg:.4f})'. format(epoch, batch + 1, len(self.softmax_train_loader), lr, batch_time=batch_time, loss=losses, acc=acc)) # 每个epoch的结果 log_text = 'Epoch[{}]\tBase_lr {:.2e}\tAccuray {acc.avg:.4f}\tLoss {loss.avg:.4f}'.format( epoch, lr, acc=acc, loss=losses) logger.info(log_text) with open(log_file, 'a') as f: f.write(log_text + '\n') f.flush()
def valid(loader, model, criterion_cls, criterion_ranking, optimizer, epoch, history, logger, args): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() total_losses = utils.AverageMeter() top1 = utils.AverageMeter() cls_losses = utils.AverageMeter() ## cross entropy loss ranking_losses = utils.AverageMeter() ## marginranking loss end = time.time() print("*** Valid ***") model.eval() all_idx = [] all_iscorrect = [] all_confidence = [] all_target = [] ## 원본 이미지, 라벨 저장 for i, (input, target, idx) in enumerate(loader): ## batchsize = 128 # for i, (input, target) in enumerate(loader): ## batchsize = 128 with torch.no_grad(): data_time.update(time.time() - end) input, target = input.cuda(), target.cuda() confidence = [] all_idx.extend(idx.tolist()) all_target.extend(target.tolist()) ##mixup if args.mixup is not None: input, target_a, target_b, lam = utils.mixup_data(input, target, args.mixup, True) input, target_a, target_b = map(Variable, (input, target_a, target_b)) output = model(input) # NaN alert assert torch.all(output == output) # compute ranking target value normalize (0 ~ 1) range # max(softmax) if args.rank_target == 'softmax': conf = F.softmax(output, dim=1) confidence, prediction = conf.max(dim=1) ## predictin : 예측 class, confidence : 그때의 confidence # entropy elif args.rank_target == 'entropy': if args.data == 'cifar100': value_for_normalizing = 4.605170 else: value_for_normalizing = 2.302585 confidence = crl_utils.negative_entropy(output, normalize=True, max_value=value_for_normalizing) # margin elif args.rank_target == 'margin': conf, _ = torch.topk(F.softmax(output), 2, dim=1) conf[:,0] = conf[:,0] - conf[:,1] confidence = conf[:,0] # make input pair rank_input1 = confidence rank_input2 = torch.roll(confidence, -1) idx2 = torch.roll(idx, -1) # calc target, margin rank_target, rank_margin, norm_cor = history.get_target_margin(idx, idx2) ## rank_target : 누가 더 크냐 1, 0, -1 / rank_margin : 옳게 맞춘 횟수의 차이 rank_target_nonzero = rank_target.clone() rank_target_nonzero[rank_target_nonzero == 0] = 1 ## rank_target 에서 0을 다 1로 바꿈 rank_input2 = rank_input2 + rank_margin / rank_target_nonzero ranking_loss = criterion_ranking(rank_input1, rank_input2, rank_target) # total loss if args.mixup is not None: cls_loss = utils.mixup_criterion(criterion_cls, output, target_a, target_b, lam) else: cls_loss = criterion_cls(output, target) ranking_loss = args.rank_weight * ranking_loss loss = cls_loss + ranking_loss # record loss and accuracy prec, correct = utils.accuracy(output, target) all_iscorrect.extend(map(int, correct)) all_confidence.extend(confidence.tolist()) total_losses.update(loss.item(), input.size(0)) cls_losses.update(cls_loss.item(), input.size(0)) ranking_losses.update(ranking_loss.item(), input.size(0)) top1.update(prec.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('[{0}][{1}/{2}] ' 'Time {batch_time.val:.3f}({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f}({data_time.avg:.3f}) ' 'Loss {loss.val:.4f}({loss.avg:.4f}) ' 'CLS Loss {cls_loss.val:.4f}({cls_loss.avg:.4f}) ' 'Rank Loss {rank_loss.val:.4f}({rank_loss.avg:.4f}) ' 'Prec {top1.val:.2f}%({top1.avg:.2f}%)'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=total_losses, cls_loss=cls_losses, rank_loss=ranking_losses,top1=top1)) # history.confidence_update(idx, correct, output) # max correctness update # history.max_correctness_update(epoch) logger.write([epoch, total_losses.avg, cls_losses.avg, ranking_losses.avg, top1.avg]) return all_idx, all_iscorrect, all_confidence, all_target, total_losses, prec.item()
def train(matrix_idx_confidence, matrix_idx_iscorrect, loader, model, wr, criterion_cls, criterion_ranking, optimizer, epoch, history, logger, args): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() total_losses = utils.AverageMeter() top1 = utils.AverageMeter() cls_losses = utils.AverageMeter() ## cross entropy loss ranking_losses = utils.AverageMeter() ## marginranking loss ji_wj_losses = utils.AverageMeter() ## JI_WJ end = time.time() print("*** Training ***") model.train() all_idx = [] all_iscorrect = [] all_confidence = [] all_target = [] ## 원본 이미지, 라벨 저장 for i, (input, target, idx) in enumerate(loader): ## batchsize = 128 # for i, (input, target) in enumerate(loader): ## batchsize = 128 data_time.update(time.time() - end) input, target = input.cuda(), target.cuda() confidence = [] all_idx.extend(idx.tolist()) all_target.extend(target.tolist()) ##mixup if args.mixup is not None: input, target_a, target_b, lam = utils.mixup_data(input, target, args.mixup, True) input, target_a, target_b = map(Variable, (input, target_a, target_b)) output = model(input) if args.ts is not None: temp = torch.nn.Parameter(torch.ones(1) * args.ts) ts = temp.unsqueeze(1).expand(output.size(0), output.size(1)).cuda() output = output / ts # NaN alert assert torch.all(output == output) for a in range(len(input)): wr.writerow([str(idx[a].item()), str(target[a].item())]) # record loss and accuracy prec, correct = utils.accuracy(output, target) # # compute ranking target value normalize (0 ~ 1) range # max(softmax) if args.rank_target == 'softmax': conf = F.softmax(output, dim=1) confidence, prediction = conf.max(dim=1) ## predictin : 예측 class, confidence : 그때의 confidence # entropy elif args.rank_target == 'entropy': if args.data == 'cifar100': value_for_normalizing = 4.605170 else: value_for_normalizing = 2.302585 confidence = crl_utils.negative_entropy(output, normalize=True, max_value=value_for_normalizing) # margin elif args.rank_target == 'margin': conf, _ = torch.topk(F.softmax(output), 2, dim=1) conf[:,0] = conf[:,0] - conf[:,1] confidence = conf[:, 0] # correctness count update if args.loss == "CRL" or args.cal == "Cor": history.correctness_update(idx, correct, output) # Avg confidence update if args.cal == "Conf": history.confidence_update(idx, confidence, output) for a in range(len(input)): matrix_idx_confidence[idx[a]].append(confidence[a].item()) all_iscorrect.extend(map(int, correct)) all_confidence.extend(confidence.tolist()) # make input pair rank_input1 = confidence rank_input2 = torch.roll(confidence, -1) idx2 = torch.roll(idx, -1) # calc target, margin rank_target, rank_margin, acc, correctness= history.get_target_margin(idx, idx2) ## rank_target : 누가 더 크냐 1, 0, -1 / rank_margin : 옳게 맞춘 횟수의 차이 # print(rank_target, rank_margin) rank_target_nonzero = rank_target.clone() # print("rank_target_nonzero", rank_target_nonzero) rank_target_nonzero[rank_target_nonzero == 0] = 1 ## rank_target 에서 0을 다 1로 바꿈 # print("rank_target_nonzero", rank_target_nonzero) rank_input2 = rank_input2 + rank_margin / rank_target_nonzero # print(rank_input2) # ranking loss // margin rankingloss ranking_loss = criterion_ranking(rank_input1, rank_input2, rank_target) # total loss ji_loss = 0 if args.mixup is not None: cls_loss = utils.mixup_criterion(criterion_cls, output, target_a, target_b, lam) else: cls_loss = criterion_cls(output, target) # (128, 1) if args.b != None: # print("******************************") # print("Conf = ", confidence.sum().item()/len(confidence)) if args.mode == 0: ## batch-wised if args.ji_conf == True and cls_loss <= args.b: print("*** Adjusting b(1.5-conf) ***") print("[Before]", cls_loss.item()) cls_loss = abs(cls_loss - args.b * (1.5 - confidence.mean())) + args.b * (1.5 - confidence.mean()) print("[After]", cls_loss.item()) elif args.minus_1_conf == True and cls_loss <= args.b: print("*** Adjusting b(1/conf) ***") print("[Before]", cls_loss.item()) cls_loss = abs(cls_loss - args.b * (1 / confidence.mean())) + args.b * (1 / confidence.mean()) print("[After]", cls_loss.item()) elif args.ji_acc_conf == True and cls_loss <= args.b: print("*** Adjusting b(acc/conf) ***") print("[Before]", cls_loss.item()) acc_conf = (torch.from_numpy(correctness).to(torch.device("cuda")) / confidence).mean() cls_loss = abs(cls_loss - args.b * acc_conf) + args.b * acc_conf print("[After]", cls_loss.item()) print("--------------------------------------------") elif args.ji_wj != 0 and cls_loss <= args.b: print("*** Adjusting wj") l1loss = nn.L1Loss(reduction="mean").cuda() ji_wj_loss = l1loss(confidence, torch.from_numpy(correctness / epoch).to(torch.device("cuda"))) else: if cls_loss.item() <= args.b: print("*** Adjusting b(Flood) ***") cls_loss = abs(cls_loss - args.b) + args.b else: ## sample-wised if args.ji_conf == True and cls_loss.mean().item() <= args.b: print("*** Adjusting b(1.5-conf) ***") print("[Before]", cls_loss.mean().item()) cls_loss = abs(cls_loss - args.b * (1.5 - confidence)) + args.b * (1.5 - confidence) print("[After]", cls_loss.mean().item()) cls_loss = cls_loss.mean() elif args.minus_1_conf == True and cls_loss.mean().item() <= args.b: print("*** Adjusting b(1/conf) ***") print("[Before]", cls_loss.mean().item()) cls_loss = abs(cls_loss - args.b * (1 / confidence)) + args.b * (1 / confidence) print("[After]", cls_loss.mean().item()) cls_loss = cls_loss.mean() elif args.ji_acc_conf == True and cls_loss.mean().item() <= args.b: print("*** Adjusting b(acc/conf) ***") print("[Before]", cls_loss.mean().item()) acc_conf = torch.from_numpy(correctness).to(torch.device("cuda")) / confidence cls_loss = abs(cls_loss - args.b * acc_conf) + args.b * acc_conf cls_loss = cls_loss.mean() print("[After]", cls_loss.mean().item()) print("--------------------------------------------") elif args.ji_wj != 0 and cls_loss <= args.b: print("*** Adjusting wj") l1loss = nn.L1Loss(reduction="mean").cuda() ji_wj_loss = l1loss(confidence, torch.from_numpy(correctness / epoch).to(torch.device("cuda"))) else: if cls_loss.mean().item() <= args.b: print("*** Adjusting b(Flood) ***") cls_loss = abs(cls_loss - args.b) + args.b cls_loss = cls_loss.mean() ranking_loss = args.rank_weight * ranking_loss if args.loss == "Margin": loss = ranking_loss elif args.ji_wj != 0: if cls_loss <= args.b: loss = cls_loss + args.ji_wj * ji_wj_loss else: loss = cls_loss else: loss = cls_loss + ranking_loss # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() # print("prec", prec) # print("correct", correct) for a in range(len(idx)): if correct[a].item() == False: matrix_idx_iscorrect[idx[a]].append(0) else: matrix_idx_iscorrect[idx[a]].append(1) total_losses.update(loss.item(), input.size(0)) cls_losses.update(cls_loss.mean().item(), input.size(0)) # cls_losses.update(cls_loss.item(), input.size(0)) if args.ji_wj != 0 and cls_loss <= args.b: ji_wj_losses.update(ji_wj_loss.item(), input.size(0)) ranking_losses.update(ranking_loss.item(), input.size(0)) top1.update(prec.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('[{0}][{1}/{2}] ' 'Time {batch_time.val:.3f}({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f}({data_time.avg:.3f}) ' 'Loss {loss.val:.4f}({loss.avg:.4f}) ' 'CLS Loss {cls_loss.val:.4f}({cls_loss.avg:.4f}) ' 'Rank Loss {rank_loss.val:.4f}({rank_loss.avg:.4f}) ' 'JI_WJ Loss {ji_wj_loss.val:.4f}({ji_wj_loss.avg:.4f}) ' 'Prec {top1.val:.2f}%({top1.avg:.2f}%)'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=total_losses, cls_loss=cls_losses, rank_loss=ranking_losses, ji_wj_loss = ji_wj_losses, top1=top1)) # max correctness update history.max_correctness_update(epoch) logger.write([epoch, total_losses.avg, cls_losses.avg, ranking_losses.avg, top1.avg]) cur_confidence = history.get_confidence() cur_correctness = history.get_correctness() if args.rank_weight != 0.0: return matrix_idx_confidence, matrix_idx_iscorrect, all_idx, all_iscorrect, all_confidence, all_target, cls_losses.avg, ranking_losses.avg, correctness, cur_confidence, cur_correctness else: return matrix_idx_confidence, matrix_idx_iscorrect, all_idx, all_iscorrect, all_confidence, all_target, total_losses.avg, 0, correctness, cur_confidence, cur_correctness
def train(args): train_dataset = FurnitureDataset('train', transform=preprocess_with_augmentation) val_dataset = FurnitureDataset('val', transform=preprocess) training_data_loader = DataLoader(dataset=train_dataset, num_workers=8, batch_size=BATCH_SIZE, shuffle=True) validation_data_loader = DataLoader(dataset=val_dataset, num_workers=1, batch_size=BATCH_SIZE, shuffle=False) model = get_model(args.name) class_weight = np.load('./class_weight.npy') #criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weight)).cuda() criterion = nn.CrossEntropyLoss().cuda() #criterion = FocalLoss(alpha=alpha, gamma=0).cuda() nb_learnable_params = sum(p.numel() for p in model.fresh_params()) print(f'[+] nb learnable params {nb_learnable_params}') min_loss = float("inf") lr = 0 patience = 0 for epoch in range(30): print(f'epoch {epoch}') if epoch == 1: lr = 0.00003 print(f'[+] set lr={lr}') if patience == 2: patience = 0 model.load_state_dict( torch.load( 'models_trained/{}_{}_{}/best_val_weight_{}.pth'.format( args.name, args.aug, args.alpha, args.name))) lr = lr / 10 if lr < 3e-6: lr = 3e-6 print(f'[+] set lr={lr}') if epoch == 0: lr = 0.001 print(f'[+] set lr={lr}') optimizer = torch.optim.Adam(model.fresh_params(), lr=lr) else: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001) running_loss = RunningMean() running_score = RunningMean() model.train() pbar = tqdm(training_data_loader, total=len(training_data_loader)) for inputs, labels in pbar: batch_size = inputs.size(0) inputs = Variable(inputs) labels = Variable(labels) if use_gpu: inputs = inputs.cuda() labels = labels.cuda() optimizer.zero_grad() if args.aug: inputs, targets_a, targets_b, lam = mixup_data( inputs, labels, args.alpha, use_gpu) outputs = model(inputs) if args.aug: loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) else: loss = criterion(outputs, labels) _, preds = torch.max(outputs.data, dim=1) running_loss.update(loss.data[0], 1) if args.aug: running_score.update( batch_size - lam * preds.eq(targets_a.data).cpu().sum() - (1 - lam) * preds.eq(targets_b.data).cpu().sum(), batch_size) else: running_score.update(torch.sum(preds != labels.data), batch_size) loss.backward() optimizer.step() pbar.set_description( f'{running_loss.value:.5f} {running_score.value:.3f}') print( f'[+] epoch {epoch} {running_loss.value:.5f} {running_score.value:.3f}' ) lx, px = utils.predict(model, validation_data_loader) log_loss = criterion(Variable(px), Variable(lx)) log_loss = log_loss.data[0] _, preds = torch.max(px, dim=1) accuracy = torch.mean((preds != lx).float()) print(f'[+] val {log_loss:.5f} {accuracy:.3f}') if log_loss < min_loss: torch.save( model.state_dict(), 'models_trained/{}_{}_{}/best_val_weight_{}.pth'.format( args.name, args.aug, args.alpha, args.name)) print( f'[+] val score improved from {min_loss:.5f} to {log_loss:.5f}. Saved!' ) min_loss = log_loss patience = 0 else: patience += 1
def train(cfg, train_loader, model, criterion, kd_criterion, optimizer, scheduler, epoch): """ Helper function to train. """ losses = AverageMeter() model.train() tbar = tqdm(train_loader) for i, (image, target) in enumerate(tbar): image = image.cuda() target = target.cuda() bsize, seq_len, c, h, w = image.size() # image = image.view(bsize * seq_len, c, h, w) # target = target.view(-1, target.size(-1)) data_aug = cfg["CUTMIX"] or cfg["MIXUP"] if np.random.uniform() < cfg["P_AUGMENT"] and data_aug: # if cfg["CUTMIX"]: # mixed_x, y_a, y_b, lam = cutmix_data(image, target) # elif cfg["MIXUP"]: # mixed_x, y_a, y_b, lam = mixup_data(image, target) mixed_x = [] y_a = [] y_b = [] lam = [] for st_image, st_target in zip(image, target): mixed_st_image, st_y_a, st_y_b, st_lam = cutmix_data( st_image, st_target) mixed_x.append(mixed_st_image) y_a.append(st_y_a) y_b.append(st_y_b) lam.append(torch.FloatTensor([st_lam] * seq_len)) mixed_x = torch.stack(mixed_x) y_a = torch.stack(y_a) y_b = torch.stack(y_b) lam = torch.cat(lam, 0).unsqueeze(1).cuda() mixed_x = mixed_x.view(bsize * seq_len, c, h, w) y_a = y_a.view(-1, target.size(-1)) y_b = y_b.view(-1, target.size(-1)) output, aux_output0, aux_output1 = model(mixed_x, seq_len) main_loss = mixup_criterion(criterion, output, y_a, y_b, lam) if cfg["USE_KD"]: aux_loss = cfg["ALPHA"] * ( mixup_criterion(criterion, aux_output0, y_a, y_b, lam) + mixup_criterion(criterion, aux_output1, y_a, y_b, lam) ) + (1. - cfg["ALPHA"]) * (kd_criterion(aux_output0, output) + kd_criterion(aux_output1, output)) else: aux_loss = mixup_criterion( criterion, aux_output0, y_a, y_b, lam) + mixup_criterion( criterion, aux_output1, y_a, y_b, lam) else: image = image.view(bsize * seq_len, c, h, w) target = target.view(-1, target.size(-1)) output, aux_output0, aux_output1 = model(image, seq_len) main_loss = criterion(output, target) if cfg["USE_KD"]: aux_loss = cfg["ALPHA"] * ( criterion(aux_output0, target) + criterion(aux_output1, target)) + (1. - cfg["ALPHA"]) * ( kd_criterion(aux_output0, output) + kd_criterion(aux_output1, output)) else: aux_loss = criterion(aux_output0, target) + criterion( aux_output1, target) loss = main_loss + cfg["AUX_W"] * aux_loss loss = loss.mean() # gradient accumulation loss = loss / cfg['GD_STEPS'] with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % cfg['GD_STEPS'] == 0: scheduler(optimizer, i, epoch) optimizer.step() optimizer.zero_grad() # record loss losses.update(loss.item() * cfg['GD_STEPS'], image.size(0)) tbar.set_description("Train loss: %.5f, learning rate: %.6f" % (losses.avg, optimizer.param_groups[-1]['lr']))
def train( self, data_loader: torch.utils.data.DataLoader, epoch: int, scheduler: Union[Callable, None] = None, print_every: int = 100, ) -> float: batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() # put the model to train mode self.model.train() start = end = time.time() for batch_idx, (images, labels) in enumerate(data_loader): # measure the data loading time data_time.update(time.time() - end) # zero out the accumulated gradients self.optimizer.zero_grad() # send the data to device images = images.to(self.device) labels = labels.to(self.device) batch_size = images.size(0) if self.use_mixup: mixed_x, y_a, y_b, lam = utils.mixup_data( images, labels, alpha=config.MIXUP_ALPHA, use_cuda=True, ) # forward pass y_preds = self.model(mixed_x) y_preds = y_preds.squeeze(1) y_a = y_a.type_as(y_preds) y_b = y_b.type_as(y_preds) loss = utils.mixup_criterion( self.criterion, y_preds, y_a, y_b, lam ) else: y_preds = self.model(images) y_preds = y_preds.squeeze(1) loss = self.criterion(y_preds, labels.type_as(y_preds)) # record loss losses.update(loss.item(), batch_size) # backpropagate loss.backward() # step the optimizer self.optimizer.step() # measure the elapsed time batch_time.update(time.time() - end) end = time.time() # step the scheduler if provided if scheduler is not None: scheduler.step() # display results if (batch_idx + 1) % print_every == 0: print( f"Epoch: [{epoch+1}][{batch_idx+1}/{len(data_loader)}] " # f"Data loading time: {data_time.val:.3f} ({data_time.avg:.3f}) " f"Batch time: {batch_time.val:.3f} ({batch_time.avg:.3f}) " f"Elapsed {utils.time_since(start, float(batch_idx+1)/len(data_loader))} " f"Loss: {losses.val:.4f} ({losses.avg:.4f}) " ) return losses.avg
train_loss = [] model.train() loop = tqdm(train_loader) for inputs, labels in loop: inputs = inputs.cuda() labels = labels.cuda() inputs, labels_a, labels_b, lam = mixup_data( inputs, labels, 0.5, True) inputs, labels_a, labels_b = map(Variable, (inputs, labels_a, labels_b)) with torch.set_grad_enabled(True): outputs = model(inputs) loss = mixup_criterion(train_criterion, outputs, labels_a, labels_b, lam) optimizer.zero_grad() if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() scheduler.batch_step() train_loss.append(loss.item()) loop.set_description('Epoch {:2d}/{:2d}'.format( epoch, args.epochs - 1)) loop.set_postfix(loss=np.mean(train_loss))
def train(loader, model, criterion, optimizer, args, scheduler, epoch, lr): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter() if isinstance(loader, torch.utils.data.dataloader.DataLoader): length = len(loader) else: length = getattr(loader, '_size', 0) / getattr(loader, 'batch_size', 1) model.train() if 'less_bn' in args.keyword: utils.custom_state(model) end = time.time() for i, data in enumerate(loader): if isinstance(data, list) and isinstance(data[0], dict): input = data[0]['data'] target = data[0]['label'].squeeze() else: input, target = data data_time.update(time.time() - end) if args.device_ids is not None: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True).long() if args.mixup_enable: input, target_a, target_b, lam = utils.mixup_data( input, target, args.mixup_alpha, use_cuda=(args.device_ids is not None)) if 'sgdr' in args.lr_policy and scheduler is not None and torch.__version__ < "1.0.4" and epoch < args.epochs: scheduler.step() for group in optimizer.param_groups: if 'lr_constant' in group: group['lr'] = group['lr_constant'] lr_list = scheduler.get_lr() if isinstance(lr_list, list): lr = lr_list[0] outputs = model(input) if isinstance(outputs, dict) and hasattr(model, '_out_features'): outputs = outputs[model._out_features[0]] if args.mixup_enable: mixup_criterion = lambda pred, target, \ lam: (-F.log_softmax(pred, dim=1) * torch.zeros(pred.size()).cuda().scatter_(1, target.data.view(-1, 1), lam.view(-1, 1))) \ .sum(dim=1).mean() loss = utils.mixup_criterion(target_a, target_b, lam)(mixup_criterion, outputs) else: loss = criterion(outputs, target) if 'quant_loss' in args.global_buffer: loss += args.global_buffer['quant_loss'] args.global_buffer.pop('quant_loss') if i % args.iter_size == 0: optimizer.zero_grad() if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if i % args.iter_size == (args.iter_size - 1): if args.grad_clip is not None: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) iterations = epoch * length + i if args.wakeup > iterations: for param_group in optimizer.param_groups: if param_group.get('lr_constant', None) is not None: continue param_group['lr'] = param_group['lr'] * ( 1.0 / args.wakeup) * iterations logging.info( 'train {}/{}, change learning rate to lr * {}'.format( i, length, iterations / args.wakeup)) if iterations >= args.warmup: optimizer.step() if 'sgdr' in args.lr_policy and scheduler is not None and torch.__version__ > "1.0.4" and epoch < args.epochs: scheduler.step() for group in optimizer.param_groups: if 'lr_constant' in group: group['lr'] = group['lr_constant'] lr_list = scheduler.get_lr() if isinstance(lr_list, list): lr = lr_list[0] losses.update(loss.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if i % args.report_freq == 0: logging.info( 'train %d/%d, loss:%.3f(%.3f), batch time:%.2f(%.2f), data load time: %.2f(%.2f)' % (i, length, losses.val, losses.avg, batch_time.val, batch_time.avg, data_time.val, data_time.avg)) if epoch == 0 and i == 10: logging.info(utils.gpu_info()) if args.delay > 0: time.sleep(args.delay) input = None target = None data = None if 'dali' in args.dataset: loader.reset() return losses.avg
def train(epoch): print('\nEpoch: %d' % epoch) global Train_acc net.train() train_loss = 0 conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES)) conf_mat_a = np.zeros((NUM_CLASSES, NUM_CLASSES)) conf_mat_b = np.zeros((NUM_CLASSES, NUM_CLASSES)) if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0: frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every decay_factor = learning_rate_decay_rate**frac current_lr = args.lr * decay_factor utils.set_lr(optimizer, current_lr) # set the decayed rate else: current_lr = args.lr print('learning_rate: %s' % str(current_lr)) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() if args.augmentation: inputs, targets_a, targets_b, lam = utils.mixup_data( inputs, targets, 0.6, True) inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b)) else: inputs, targets = Variable(inputs), Variable(targets) _, _, _, _, outputs = net(inputs) if args.augmentation: loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: loss = criterion(outputs, targets) loss.backward() utils.clip_gradient(optimizer, 0.1) optimizer.step() train_loss += loss.item() if args.augmentation: conf_mat_a += losses.confusion_matrix(outputs, targets_a, NUM_CLASSES) acc_a = sum([conf_mat_a[i, i] for i in range(conf_mat_a.shape[0]) ]) / conf_mat_a.sum() precision_a = np.array([ conf_mat_a[i, i] / (conf_mat_a[i].sum() + 1e-10) for i in range(conf_mat_a.shape[0]) ]) recall_a = np.array([ conf_mat_a[i, i] / (conf_mat_a[:, i].sum() + 1e-10) for i in range(conf_mat_a.shape[0]) ]) mAP_a = sum(precision_a) / len(precision_a) F1_score_a = (2 * precision_a * recall_a / (precision_a + recall_a + 1e-10)).mean() conf_mat_b += losses.confusion_matrix(outputs, targets_b, NUM_CLASSES) acc_b = sum([conf_mat_b[i, i] for i in range(conf_mat_b.shape[0]) ]) / conf_mat_b.sum() precision_b = np.array([ conf_mat_b[i, i] / (conf_mat_b[i].sum() + 1e-10) for i in range(conf_mat_b.shape[0]) ]) recall_b = np.array([ conf_mat_b[i, i] / (conf_mat_b[:, i].sum() + 1e-10) for i in range(conf_mat_b.shape[0]) ]) mAP_b = sum(precision_b) / len(precision_b) F1_score_b = (2 * precision_b * recall_b / (precision_b + recall_b + 1e-10)).mean() acc = lam * acc_a + (1 - lam) * acc_b mAP = lam * mAP_a + (1 - lam) * mAP_b F1_score = lam * F1_score_a + (1 - lam) * F1_score_b else: conf_mat += losses.confusion_matrix(outputs, targets, NUM_CLASSES) acc = sum([conf_mat[i, i] for i in range(conf_mat.shape[0])]) / conf_mat.sum() precision = [ conf_mat[i, i] / (conf_mat[i].sum() + 1e-10) for i in range(conf_mat.shape[0]) ] mAP = sum(precision) / len(precision) recall = [ conf_mat[i, i] / (conf_mat[:, i].sum() + 1e-10) for i in range(conf_mat.shape[0]) ] precision = np.array(precision) recall = np.array(recall) f1 = 2 * precision * recall / (precision + recall + 1e-10) F1_score = f1.mean() #utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | mAP: %.3f%% | F1: %.3f%%' #% (train_loss/(batch_idx+1), 100.*acc, 100.* mAP, 100.* F1_score)) return train_loss / (batch_idx + 1), 100. * acc, 100. * mAP, 100 * F1_score
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() l2_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = data[0] target = data[-1] if args.l2_loss: dual_input = data[1] dual_input_var = torch.autograd.Variable(dual_input) if CUDA: input = input.cuda(async=True) target = target.cuda(async=True) if args.mixup: input, y_a, y_b, lam = utils.mixup_data(input, target, alpha=1.0) y_a = torch.autograd.Variable(y_a) y_b = torch.autograd.Variable(y_b) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output if args.l2_loss: f1, f2, y1, y2 = model(input_var, dual_input_var) l2_loss = l2_loss_w * mse_loss(f1, f2) output = torch.cat([y1, y2]) target = torch.cat([target, target]) target_var = torch.cat([target_var, target_var]) loss = criterion(output, target_var) loss = loss + l2_loss l2_losses.update(l2_loss.data[0], input.size(0)) else: output = model(input_var) if args.mixup: loss_fun = utils.mixup_criterion(y_a, y_b, lam) loss = loss_fun(criterion, output) else: loss = criterion(output, target_var) # measure accuracy and record loss if args.mixup: _, predicted = torch.max(output.data, 1) prec1 = lam*predicted.eq(y_a.data).cpu().sum() + (1-lam)*predicted.eq(y_b.data).cpu().sum() top1.update(prec1, input.size(0)) else: prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) top1.update(prec1[0], input.size(0)) #top5.update(prec5[0], input.size(0)) losses.update(loss.data[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 5 == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'L2Loss {l2_loss.val:.4f} ({l2_loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, l2_loss=l2_losses, top1=top1)) step = epoch * len(train_loader) + i #print(type(step)) writer.add_scalar('train/acc', prec1[0], step) writer.add_scalar('train/loss', loss.data[0], step) if args.l2_loss: writer.add_scalar('train/l2_loss', l2_loss.data[0], step) for name, param in model.named_parameters(): #print(name, param.data.cpu().numpy().dtype) if name.find('batchnorm')==-1: writer.add_histogram(name, param.data.cpu().numpy(), step)
def train(fold, train_dataset,path, test_dataset): batch_size = 64 train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) start_step = 0 end_step = 150 lr = 0.0001 disp_interval = 10 device = torch.device("cuda:0,1,2,3" if torch.cuda.is_available() else "cpu") net = GLENet(n_classes) for param in net.parameters(): param.requires_grad = True if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) net = net.cuda() #net.apply(init_params) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) loss_fun = nn.CrossEntropyLoss() best_acc = 0 best_epoch = 0 train_loss_graph = [] test_loss_graph = [] for epoch in range(start_step, end_step+1): net.train() step = 0 train_loss = 0 test_loss = 0 running_acc = 0 total = 0 if epoch == 80: lr = lr*0.1 print('learning rate, ',lr) for g in optimizer.param_groups: g['lr'] = lr for index, data in enumerate(train_loader): #print(index) step = step + 1 label, x1, x2, x3, e1, e2, e3, n1, n2, n3, m1, m2, m3 = data label = label.type(torch.LongTensor) label = network.tensor_to_variable(label, is_cuda=True, is_training=True) x1 = x1.type(torch.FloatTensor) x1 = network.tensor_to_variable(x1, is_cuda=True, is_training=True) x2 = x2.type(torch.FloatTensor) x2 = network.tensor_to_variable(x2, is_cuda=True, is_training=True) x3 = x3.type(torch.FloatTensor) x3 = network.tensor_to_variable(x3, is_cuda=True, is_training=True) e1 = e1.type(torch.FloatTensor) e1 = network.tensor_to_variable(e1, is_cuda=True, is_training=True) e2 = e2.type(torch.FloatTensor) e2 = network.tensor_to_variable(e2, is_cuda=True, is_training=True) e3 = e3.type(torch.FloatTensor) e3 = network.tensor_to_variable(e3, is_cuda=True, is_training=True) n1 = n1.type(torch.FloatTensor) n1 = network.tensor_to_variable(n1, is_cuda=True, is_training=True) n2 = n2.type(torch.FloatTensor) n2 = network.tensor_to_variable(n2, is_cuda=True, is_training=True) n3 = n3.type(torch.FloatTensor) n3 = network.tensor_to_variable(n3, is_cuda=True, is_training=True) m1 = m1.type(torch.FloatTensor) m1 = network.tensor_to_variable(m1, is_cuda=True, is_training=True) m2 = m2.type(torch.FloatTensor) m2 = network.tensor_to_variable(m2, is_cuda=True, is_training=True) m3 = m3.type(torch.FloatTensor) m3 = network.tensor_to_variable(m3, is_cuda=True, is_training=True) mixup, targets_a, targets_b, lam = mixup_data(x3,label,1) out1, out2 = net(x1,x2,mixup,e1,e2,e3,n1,n2,n3,m1,m2,m3) #print(out.shape, label.shape) loss_f = mixup_criterion(targets_a.reshape(-1),targets_b.reshape(-1),lam) loss1 = loss_f(loss_fun, out1) loss2 = loss_fun(out2, label.reshape(-1)) loss = loss1 + loss2 #loss = loss_fun(out, label.reshape(-1)) train_loss += loss out = out1 + out2 _,pred = torch.max(out,1) num_correct = 0 for j in range(label.shape[0]): if pred[i] == label[i]: num_correct += 1 #num_correct = torch.sum(pred == label.data) running_acc += num_correct total += label.size(0) optimizer.zero_grad() loss.backward() optimizer.step() """ if step % disp_interval == 0: ave_loss = train_loss / (batch_size*(index+1)) acc = running_acc / (batch_size*(index+1)) log_text = 'epoch: %4d, step: %4d, loss: %4.6f, Acc: %4.6f' % (epoch+1, step, ave_loss, acc) print(log_text) """ acc = float(running_acc) / total print('epoch: {}, Loss: {}, Acc:{} , acc:{}'.format(epoch+1, train_loss/(index+1), running_acc, acc)) train_loss_graph.append(train_loss/(index+1)) net.eval() eval_acc = 0 for tindex,test_data in enumerate(test_loader): tlabel, tx1, tx2, tx3, te1, te2, te3, tn1, tn2, tn3, tm1, tm2, tm3 = test_data tlabel = tlabel.type(torch.LongTensor) tlabel = network.tensor_to_variable(tlabel, is_cuda=True, is_training=False) tx1 = tx1.type(torch.FloatTensor) tx1 = network.tensor_to_variable(tx1, is_cuda=True, is_training=False) tx2 = tx2.type(torch.FloatTensor) tx2 = network.tensor_to_variable(tx2, is_cuda=True, is_training=False) tx3 = tx3.type(torch.FloatTensor) tx3 = network.tensor_to_variable(tx3, is_cuda=True, is_training=False) te1 = te1.type(torch.FloatTensor) te1 = network.tensor_to_variable(te1, is_cuda=True, is_training=False) te2 = te2.type(torch.FloatTensor) te2 = network.tensor_to_variable(te2, is_cuda=True, is_training=False) te3 = te3.type(torch.FloatTensor) te3 = network.tensor_to_variable(te3, is_cuda=True, is_training=False) tn1 = tn1.type(torch.FloatTensor) tn1 = network.tensor_to_variable(tn1, is_cuda=True, is_training=False) tn2 = tn2.type(torch.FloatTensor) tn2 = network.tensor_to_variable(tn2, is_cuda=True, is_training=False) tn3 = tn3.type(torch.FloatTensor) tn3 = network.tensor_to_variable(tn3, is_cuda=True, is_training=False) tm1 = tm1.type(torch.FloatTensor) tm1 = network.tensor_to_variable(tm1, is_cuda=True, is_training=False) tm2 = tm2.type(torch.FloatTensor) tm2 = network.tensor_to_variable(tm2, is_cuda=True, is_training=False) tm3 = tm3.type(torch.FloatTensor) tm3 = network.tensor_to_variable(tm3, is_cuda=True, is_training=False) tout1, tout2 = net(tx3,tx3,tx3,te3,te3,te3,tn3,tn3,tn3,tm3,tm3,tm3) tout = tout1 + tout2 tloss = loss_fun(tout, tlabel.reshape(-1)) test_loss += float(tloss) _,tpred = torch.max(tout,1) correct = (tpred == tlabel).sum() eval_acc += correct print('test_acc : {} test_loss: {}'.format(eval_acc, test_loss/ tindex+1)) test_loss_graph.append(test_loss/ tindex+1) if eval_acc > best_acc: best_acc = eval_acc best_epoch = epoch torch.save(net.state_dict(), path+'{}_best.pth'.format(best_epoch)) return best_epoch,best_acc
def main_worker(args, logger): try: writer = SummaryWriter(logdir=args.sub_tensorboard_dir) train_set = RSDataset(rootpth=args.data_dir, mode='train') train_loader = DataLoader(train_set, batch_size=args.batch_size, drop_last=True, shuffle=True, pin_memory=True, num_workers=args.num_workers) # 权重list,每个样本被选择的概率,重采样效果不好,不使用,但是留作实例,以后参考 # sampler_weight = train_set.get_sampler_weight() # # train_sampler = WeightedRandomSampler(sampler_weight, # num_samples=100000, # 每次循环,使用的样本数量 # replacement=True) # # train_loader = DataLoader(train_set, # batch_size=args.batch_size, # pin_memory=True, # num_workers=args.num_workers, # sampler=train_sampler) val_set = RSDataset(rootpth=args.data_dir, mode='val') val_loader = DataLoader(val_set, batch_size=args.test_batch_size, drop_last=False, shuffle=False, pin_memory=True, num_workers=args.num_workers) net = Dense201() logger.info('net name: {}'.format(net.__class__.__name__)) net.train() input_ = torch.randn((1, 3, 224, 224)) writer.add_graph(net, input_) net = net.cuda() criterion = nn.CrossEntropyLoss().cuda() if args.pre_epoch: # 预训练:冻结前面的层,只训练新增加的全连接层 for name, param in net.named_parameters(): if 'classifier' not in name: param.requires_grad = False optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.base_lr, momentum=0.9, nesterov=args.sgdn, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.pre_epoch * len(train_loader), eta_min=args.min_lr) loss_record = [] iter_step = 0 running_loss = [] st = glob_st = time.time() total_epoch = args.pre_epoch + args.warmup_epoch + args.normal_epoch total_iter_step = len(train_loader) * total_epoch logger.info('len(train_set): {}'.format(len(train_set))) logger.info('len(train_loader): {}'.format(len(train_loader))) logger.info('len(val_set): {}'.format(len(val_set))) logger.info('len(val_loader): {}'.format(len(val_loader))) logger.info('total_epoch: {}'.format(total_epoch)) logger.info('total_iter_step: {}'.format(total_iter_step)) if args.pre_epoch: logger.info('----- start pre train ------') for epoch in range(total_epoch): # 评估 # if epoch % args.eval_fre == 0 and epoch!=0 : if epoch % args.eval_fre == 0: evalute(net, val_loader, writer, epoch, logger) # 保存 if epoch % args.save_fre == 0 and epoch > args.save_after: model_out_name = osp.join(args.sub_model_out_dir, 'out_{}.pth'.format(epoch)) # 防止分布式训练保存失败 state_dict = net.modules.state_dict() if hasattr( net, 'module') else net.state_dict() torch.save(state_dict, model_out_name) # 预训练结束,训练所有参数,重构optimizer--但是只对全连接和卷积层的乘权重进行衰减 if epoch == args.pre_epoch: for param in net.parameters(): param.requires_grad = True wd_params, nowd_params = [], [] for name, module in net.named_modules(): if isinstance(module, (nn.Linear, nn.Conv2d)): wd_params.append(module.weight) if not module.bias is None: nowd_params.append(module.bias) # todo 这种paramlist会不会漏掉了一些参数 elif isinstance(module, nn.BatchNorm2d): nowd_params += list(module.parameters()) # else: # nowd_params += list(module.parameters()) param_list = [{ 'params': wd_params }, { 'params': nowd_params, 'weight_decay': 0 }] optimizer = optim.SGD(param_list, lr=args.base_lr, momentum=0.9, nesterov=args.sgdn, weight_decay=args.weight_decay) # 重构学习率调度器 if args.warmup_epoch: scheduler = LinearScheduler(optimizer, start_lr=args.min_lr, end_lr=args.base_lr, all_steps=args.warmup_epoch * len(train_loader)) logger.info( '-------- start warmup for {} epochs -------'.format( args.warmup_epoch)) # 如果到了正式训练,构建新的scheduller if epoch == args.pre_epoch + args.warmup_epoch: scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.normal_epoch * len(train_loader), eta_min=args.min_lr) logger.info('---- start normal train for {} epoch ----'.format( args.normal_epoch)) for img, lb in train_loader: iter_step += 1 img = img.cuda() lb = lb.cuda() optimizer.zero_grad() inputs, targets_a, targets_b, lam = mixup_data( img, lb, args.mixup_alpha) outputs = net(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) # outputs = net(img) # loss = criterion(outputs, lb) loss.backward() optimizer.step() scheduler.step() running_loss.append(loss.item()) if iter_step % args.msg_fre == 0: ed = time.time() spend = ed - st global_spend = ed - glob_st st = ed eta = int((total_iter_step - iter_step) * (global_spend / iter_step)) eta = str(datetime.timedelta(seconds=eta)) global_spend = str( datetime.timedelta(seconds=(int(global_spend)))) avg_loss = np.mean(running_loss) loss_record.append(avg_loss) running_loss = [] lr = optimizer.param_groups[0]['lr'] msg = '. '.join([ 'epoch:{epoch}', 'iter/total_iter:{iter}/{total_iter}', 'lr:{lr:.7f}', 'loss:{loss:.4f}', 'spend/global_spend:{spend:.4f}/{global_spend}', 'eta:{eta}' ]).format(epoch=epoch, iter=iter_step, total_iter=total_iter_step, lr=lr, loss=avg_loss, spend=spend, global_spend=global_spend, eta=eta) logger.info(msg) writer.add_scalar('loss', avg_loss, iter_step) writer.add_scalar('lr', lr, iter_step) # 训练完最后评估一次 evalute(net, val_loader, writer, args.pre_epoch + args.normal_epoch, logger) out_name = osp.join(args.sub_model_out_dir, args.model_out_name) torch.save(net.cpu().state_dict(), out_name) logger.info('-----------Done!!!----------') except: logger.exception('Exception logged') finally: writer.close()
def train_with_exhaustive_testing(epoch): """ to find best pair for a single picture :param epoch: epoch to train :return: accuracy """ print('\n Epoch: %d' % epoch) train_loss = 0 correct = 0 total = 0 # et_criterion = CustomLoss() et_criterion = nn.CrossEntropyLoss(reduction='none') for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() net.eval() best_pair_list = None lam_list = None with torch.no_grad(): # starttime = datetime.datetime.now() for index in range(inputs.size()[0]): inputs_next, targets_a, targets_b, lam = exhaustive_mix_data_pre(inputs, targets, index, use_cuda=use_cuda) inputs_next, targets_a, targets_b = Variable(inputs_next), Variable(targets_a), Variable(targets_b) outputs = net(inputs_next) loss_func = mixup_criterion(targets_a, targets_b, lam) best_pair = loss_func(et_criterion, outputs) best_pair = best_pair.argmax() if type(best_pair_list) == type(None): best_pair_list = best_pair.unsqueeze(0) lam_list = torch.tensor([lam], dtype=torch.float) else: best_pair_list = torch.cat((best_pair_list, best_pair.unsqueeze(0)),0) lam = torch.tensor([lam], dtype=torch.float) lam_list = torch.cat((lam_list, lam),0) # find_pair_time = datetime.datetime.now() # print(str((find_pair_time - starttime).microseconds) + "======") net.train() with torch.enable_grad(): optimizer.zero_grad() lam_list = lam_list.cuda() inputs, targets_a, targets_b, lam = exhausitive_mix_data(inputs, best_pair_list, targets, lam_list) loss_func = mixup_criterion(targets_a, targets_b, lam) outputs = net(inputs) loss = loss_func(criterion, outputs) loss.backward() optimizer.step() # train_time = datetime.datetime.now() # print(str((train_time - find_pair_time).microseconds) + "--------") train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) # correct += lam * predicted.eq(targets_a.data).cpu().sum() + (1 - lam) * predicted.eq(targets_b.data).cpu().sum() # correct = correct.item() correct = 0 print(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return (train_loss / batch_idx, 100. * correct / total)
def train(args, i): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename audio_num = config.audio_num mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True in_domain_classes_num = len(config.labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_train.csv') validate_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_test.csv') feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins)) checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5) # Data generator data_generator = DataGenerator(feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 100 == 0 and iteration >= 1500: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() train_statistics = evaluator.evaluate(data_type='train', iteration=iteration, max_iteration=None, verbose=False) if holdout_fold != 'none': validate_statistics = evaluator.evaluate(data_type='validate', iteration=iteration, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 100 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 100 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train for i in range(audio_num): model.train() data, target_a, target_b, lam = mixup_data( x=batch_data_dict['feature'][:, i, :, :], y=batch_data_dict['target'], alpha=0.2) batch_output = model(data) # batch_output = model(batch_data_dict['feature']) # loss loss = loss_func(batch_output, batch_data_dict['target']) loss = mixup_criterion(loss_func, batch_output, target_a, target_b, lam) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 4000: break iteration += 1
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 p1 = 0 p2 = 0 p3 = 0 p4 = 0 correct = 0 total = 0 for batch_idx, ((m_in, m_tar), (f_in, f_tar), (c_in, c_tar), (C_in, C_tar)) in enumerate( zip(m_train, f_train, c_train, C_train)): # generate mixed inputs, two one-hot label vectors and mixing coefficient lam = 0 inputs = torch.cat((m_in, f_in, c_in, C_in), 1) if use_cuda: inputs, targets_a, targets_b, targets_c, targets_d = inputs.cuda( ), m_tar.type(torch.LongTensor).cuda(), f_tar.type( torch.LongTensor).cuda(), c_tar.type( torch.LongTensor).cuda(), C_tar.type( torch.LongTensor).cuda() optimizer.zero_grad() inputs, targets_a, targets_b, targets_c, targets_d = Variable( inputs), Variable(targets_a), Variable(targets_b), Variable( targets_c), Variable(targets_d) outputs, outputs2, outputs3, outputs4 = net(inputs, targets_a, targets_b, targets_c, targets_d) #rand = np.random.uniform(1, 1) loss_func = mixup_criterion(targets_a, targets_b, targets_c, targets_d, lam) loss = loss_func(criterion, outputs, outputs2, outputs3, outputs4) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) _, predicted2 = torch.max(outputs2.data, 1) _, predicted3 = torch.max(outputs3.data, 1) _, predicted4 = torch.max(outputs4.data, 1) total += targets_a.size(0) # -------------- prec1 = predicted.eq(targets_a.data).cpu().sum() prec2 = predicted2.eq(targets_b.data).cpu().sum() prec3 = predicted3.eq(targets_c.data).cpu().sum() prec4 = predicted4.eq(targets_d.data).cpu().sum() p1 += prec1 p2 += prec2 p3 += prec3 p4 += prec4 correct += (prec1 + prec2 + prec3 + prec4) / 3 progress_bar( batch_idx, len(m_train), 'Loss: %.3f |P1: %.3f |P2: %.3f |P3: %.3f| P4: %.3f| Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * p1 / total, 100. * p2 / total, 100. * p3 / total, 100. * p4 / total, 100. * correct / total, correct, total)) return (train_loss / batch_idx, 100. * correct / total, 100. * p1 / total, 100. * p2 / total, 100. * p3 / total, 100. * p4 / total, lam)