def train(train_loader, epoch, model, optimizer, criterion, device): top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), top1, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() for i, (inputs, target) in enumerate(train_loader): inputs = inputs.to(device) target = target.to(device) # compute output output = model(inputs) loss = criterion(output, target) # measure accuracy and record loss, accuracy acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0].item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if i % PRINTFREQ == 0: progress.print(i) print('=> Acc@1 {top1.avg:.3f}'.format(top1=top1))
def train(train_loader, model, criteria, optimizer, device, batch_size): model.train() evaluator = Evaluator(21) evaluator.reset() train_loss = AverageMeter("Loss", ":.4") progress = ProgressMeter(len(train_loader), train_loss) for i, (image, mask) in enumerate(train_loader): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) train_loss.update(loss.item(), batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: progress.print(i) evaluator.add_batch(target, predict) info = {"loss": train_loss.val, "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def train(train_loader, epoch, model, optimizer, criterion): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda() target = target.cuda() r = np.random.rand(1) if args.beta > 0 and r < args.cutmix_prob: # generate mixed sample lam = np.random.beta(args.beta, args.beta) rand_index = torch.randperm(input.size()[0]).cuda() target_a = target target_b = target[rand_index] bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam) input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2] # adjust lambda to exactly match pixel ratio lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2])) # compute output output = model(input) loss = criterion(output, target_a) * lam + criterion(output, target_b) * (1. - lam) else: # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss, accuracy acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0].item(), input.size(0)) top5.update(acc5[0].item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) print('=> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg
def validate_identification(cfg, model, test_loader, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), batch_time, losses, top1, top5, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(test_loader): input = input.cuda(non_blocking=True) #.squeeze(0) target = target.cuda(non_blocking=True) # compute output output = model(input) # MODIFYING EVALUATION STAGE: WHY AVG THIS THING? # output = torch.mean(output, dim=0, keepdim=True) # output = model.forward_classifier(output) print("DEBUG 1") print(f'Target shape: {target.shape}. Target:') print(target) print(f'output of forward shape: {output.shape}. output:') print(output) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], input.size(0)) # top5.update(acc5[0], input.size(0)) loss = criterion(output, target) losses.update(loss.item(), 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) # This gives formatting problems. Just printing the top1 object is fine # logger.info('Test Acc@1: {:.8f} Acc@5: {:.8f}'.format(top1.avg, top5.avg)) print(top1) return top1.avg
def train_epoch(train_loader, model, criterion, optimizer, epoch, args): """ 训练模型一个epoch的数据 :param train_loader: 训练集 :param model: 模型 :param criterion: 损失函数 :param optimizer: 优化器 :param epoch: 当前迭代次数 :param args: 训练超参 """ batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, prefix=f"Epoch: [{epoch}]") # 训练模式 model.train() end_time = time.time() for i, (images, scores, _) in enumerate(train_loader): # 更新数据加载时间度量 data_time.update(time.time() - end_time) if args.cuda: images = images.cuda(args.gpu, non_blocking=True) scores = scores.cuda(args.gpu, non_blocking=True) # 网络推理 outputs = model(images) loss = criterion(outputs, scores) # 计算梯度、更新 optimizer.zero_grad() if args.cuda: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # 更新度量 acc1, _, _ = criterion.accuracy(outputs, scores) batch_size = images.size(0)/2 if args.criterion == 'rank' else images.size(0) losses.update(loss.detach().cpu().item(), batch_size) top1.update(acc1.item(), batch_size) # 更新一个batch训练时间度量 batch_time.update(time.time() - end_time) end_time = time.time() if i % args.print_freq == 0: progress.print(i) return losses.avg, top1.avg
def validate(val_loader, model, criterion): batch_time = AverageMeter("Time", ":6.3f") losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix="Test: ") # switch to evaluate mode model.eval() total_loss = 0.0 with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): input = input.cuda() target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) total_loss += loss.item() if i % args.print_freq == 0: progress.print(i) end = time.time() print("====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format( top1=top1, top5=top5)) total_loss = total_loss / len(val_loader) return top1.avg
def validate_identification(cfg, model, test_loader, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), batch_time, losses, top1, top5, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(test_loader): input = input.cuda(non_blocking=True).squeeze(0) target = target.cuda(non_blocking=True) # compute output output = model(input) output = torch.mean(output, dim=0, keepdim=True) output = model.forward_classifier(output) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) loss = criterion(output, target) losses.update(loss.item(), 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) logger.info('Test Acc@1: {:.8f} Acc@5: {:.8f}'.format( top1.avg, top5.avg)) return top1.avg
def validate_verification(cfg, model, test_loader): batch_time = AverageMeter('Time', ':6.3f') progress = ProgressMeter(len(test_loader), batch_time, prefix='Test: ', logger=logger) # switch to evaluate mode model.eval() labels, distances = [], [] with torch.no_grad(): end = time.time() for i, (input1, input2, label) in enumerate(test_loader): input1 = input1.cuda(non_blocking=True).squeeze(0) input2 = input2.cuda(non_blocking=True).squeeze(0) label = label.cuda(non_blocking=True) # compute output outputs1 = model(input1).mean(dim=0).unsqueeze(0) outputs2 = model(input2).mean(dim=0).unsqueeze(0) dists = F.cosine_similarity(outputs1, outputs2) dists = dists.data.cpu().numpy() distances.append(dists) labels.append(label.data.cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 2000 == 0: progress.print(i) labels = np.array([sublabel for label in labels for sublabel in label]) distances = np.array( [subdist for dist in distances for subdist in dist]) eer = compute_eer(distances, labels) logger.info('Test EER: {:.8f}'.format(np.mean(eer))) return eer
def train(cfg, model, optimizer, train_loader, val_loader, criterion, architect, epoch, writer_dict, lr_scheduler=None): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') alpha_entropies = AverageMeter('Entropy', ':.4e') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, alpha_entropies, prefix="Epoch: [{}]".format(epoch), logger=logger) writer = writer_dict['writer'] # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): global_steps = writer_dict['train_global_steps'] if lr_scheduler: current_lr = lr_scheduler.set_lr(optimizer, global_steps, epoch) else: current_lr = cfg.TRAIN.LR # measure data loading time data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) input_search, target_search = next(iter(val_loader)) input_search = input_search.cuda(non_blocking=True) target_search = target_search.cuda(non_blocking=True) # step architecture architect.step(input_search, target_search) alpha_entropy = architect.model.compute_arch_entropy() alpha_entropies.update(alpha_entropy.mean(), input.size(0)) # compute output output = model(input) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) loss = criterion(output, target) losses.update(loss.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # write to logger writer.add_scalar('lr', current_lr, global_steps) writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('arch_entropy', alpha_entropies.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 # log acc for cross entropy loss writer.add_scalar('train_acc1', top1.val, global_steps) writer.add_scalar('train_acc5', top5.val, global_steps) if i % cfg.PRINT_FREQ == 0: progress.print(i)
def train_epoch(train_loader, model, criterion, optimizer, epoch, args): """ 训练模型一个epoch的数据 :param train_loader: 训练集 :param model: 模型 :param criterion: 损失函数 :param optimizer: 优化器 :param epoch: 当前迭代次数 :param args: 训练超参 """ global mix_up, multi_scale, bn_gammas, net_weights batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, prefix=f"Epoch: [{epoch}]") # 训练模式 model.train() end_time = time.time() for i, (images, targets, _, weights) in enumerate(train_loader): # 更新数据加载时间度量 data_time.update(time.time() - end_time) # 只有训练集,才可能进行mixup和multi-scale数据增强 images, targets1, targets2, mix_rate = mix_up(images, targets) if args.multi_scale: images = multi_scale(images) images = DataLoaderX.normalize(images, args) if args.cuda: images = images.cuda(args.gpu, non_blocking=True) targets1 = targets1.cuda(args.gpu, non_blocking=True) weights = weights.cuda(args.gpu, non_blocking=True) if targets2 is not None: targets2 = targets2.cuda(args.gpu, non_blocking=True) mix_rate = mix_rate.cuda(args.gpu, non_blocking=True) output = model(images) loss = criterion(output, targets1, weights) if targets2 is not None: loss = mix_rate * loss + (1.0 - mix_rate) * criterion( output, targets2, weights) if mix_rate < 0.5: targets1 = targets2 optimizer.zero_grad() if args.cuda: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # network slimming if args.sparsity: for gamma in bn_gammas: gamma.data.add_(-torch.sign(gamma.data), alpha=args.slim * optimizer.param_groups[0]['lr']) # weight decay for param in net_weights: param.data.add_(-param.data, alpha=args.weight_decay * optimizer.param_groups[0]['lr']) optimizer.step() # 更新度量 acc1 = accuracy(output, targets1) losses.update(loss.detach().cpu().item(), images.size(0)) top1.update(acc1.item(), images.size(0)) # 更新一个batch训练时间度量 batch_time.update(time.time() - end_time) end_time = time.time() if i % args.print_freq == 0: progress.print(i) return losses.avg, top1.avg
def train(train_loader, epoch, model, optimizer, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time input = input.cuda() target = target.cuda() # get the image size I_x, I_y = input.size()[2:] #draw a boundary position w = int(np.round(I_x * np.random.beta(0.3, 0.3))) h = int(np.round(I_y * np.random.beta(0.3, 0.3))) w_ = [w, I_x - w, w, I_x - w] h_ = [h, h, I_y - h, I_y - h] #select and crop four images cropped_images = {} c_ = {} W_ = {} for k in range(4): index = torch.randperm(input.size(0)) index = index.cuda() x_k = np.random.randint(0, I_x - w_[k] + 1) y_k = np.random.randint(0, I_y - h_[k] + 1) cropped_images[k] = input[index][:, :, x_k:x_k + w_[k], y_k:y_k + h_[k]] c_[k] = target[index] W_[k] = (w_[k] * h_[k]) / (I_x * I_y) #patch cropped images patched_images = torch.cat((torch.cat( (cropped_images[0], cropped_images[1]), 2), torch.cat((cropped_images[2], cropped_images[3]), 2)), 3) # compute output output = model(patched_images) loss = sum([W_[k] * criterion(output, c_[k]) for k in range(4)]) #print(criterion(output, c_[k])) #loss = criterion(output, target) # measure accuracy and record loss, accuracy acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0].item(), input.size(0)) top5.update(acc5[0].item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: progress.print(i) print('==> Train Accuracy: Acc@1 {top1.avg:.3f} || Acc@5 {top5.avg:.3f}'. format(top1=top1, top5=top5)) return top1.avg