def train(train_loader, net, criterion, optimizer, epoch, device): global writer start = time.time() net.train() train_loss = 0 correct = 0 total = 0 logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): # move tensor to GPU inputs, targets = inputs.to(device), targets.to(device) if config.mixup: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = mixup_criterion( criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) # zero the gradient buffers optimizer.zero_grad() # backward loss.backward() # update weight optimizer.step() # count the loss and acc train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) if config.mixup: correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item()) else: correct += predicted.eq(targets).sum().item() if (batch_index + 1) % 100 == 0: logger.info(" == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format( batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) logger.info(" == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format( batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('train_loss', train_loss, global_step=epoch) writer.add_scalar('train_acc', train_acc, global_step=epoch) return train_loss, train_acc
def train(train_loader, net, criterion, optimizer, epoch, device): global writer start = time.time() # 设置为tranin模式,仅当有dropout和batchnormal时工作 net.train() train_loss = 0 correct = 0 total = 0 logger.info("====Epoch:[{}/{}]====".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) if config.mixup: inputs, targets_a, targets_b, lam = utils.mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += inputs.size()[0] if config.mixup: correct += (lam * predicted.eq * (targets_a)).sum().item() + ( 1 - lam) * predicted.eq(targets_b).sum().item() else: correct += predicted.eq(targets).sum().item() if batch_index % 100 == 99: logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, utils.get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('test_loss', train_loss, global_step=epoch) writer.add_scalar('test_acc', train_acc, global_step=epoch) return train_loss, train_acc
threshold=0.005, verbose=True) n_epochs = 200 if args.resume: start_epoch, model, optimizer, scheduler = load_checkpoint( model_path=args.model_path, ckpt_name=args.ckpt, device=device, model=model, optimizer=optimizer, scheduler=scheduler) start_epoch -= 1 print('Resumed checkpoint {} from {}. Starting at epoch {}.'.format( args.ckpt, args.model_path, start_epoch + 1)) print('Current learning rate: {}'.format(get_current_lr(optimizer))) print('*' * 30) else: start_epoch = 0 # model = init_weights(model) for epoch in range(start_epoch, n_epochs): print('Epoch: %d/%d' % (epoch + 1, n_epochs)) train_loss = train_model(model, train_loader, criterion, optimizer, device, measure_accuracy=True, opti_batch=args.opti_batch) val_loss, val_acc = val_model(model, test_loader, criterion, device)
def train(train_loader, net, criterion, optimizer, epoch, device,\ layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, groups): global writer start = time.time() net.train() train_loss = 0 correct = 0 total = 0 eps = 0.001 logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): # move tensor to GPU inputs, targets = inputs.to(device), targets.to(device) inputs.requires_grad = True layer_inputs.clear() layer_outputs.clear() grad_inputs.clear() grad_outputs.clear() if config.mixup: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, config.mixup_alpha, device) outputs = net(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) else: outputs = net(inputs) loss = criterion(outputs, targets) # zero the gradient buffers optimizer.zero_grad() # backward loss.backward() #fgsm # for p in net.parameters(): # p.grad *= args.alpha # adv_input = inputs + eps * inputs.grad.sign() # # outputs = net(adv_input) # # loss_2 = (1-args.alpha) * criterion(outputs, targets) # loss_2.backward() # layer_loss = update_grad(net, layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, args.alpha) layer_loss = group_noise(net, groups, crit, args.alpha) optimizer.step() # count the loss and acc train_loss += args.alpha * loss.item() + (1 - args.alpha) * layer_loss _, predicted = outputs.max(1) total += targets.size(0) if config.mixup: correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item()) else: correct += predicted.eq(targets).sum().item() if (batch_index + 1) % 100 == 0: logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) logger.info( " == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}" .format(batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) end = time.time() logger.info(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total writer.add_scalar('train_loss', train_loss, global_step=epoch) writer.add_scalar('train_acc', train_acc, global_step=epoch) return train_loss, train_acc