def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() torch.set_grad_enabled(False) end = time.time() bar = Bar('Processing', max=len(val_loader)) for batch_idx, (inputs, targets) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs, volatile=True), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) # losses.update(loss.data[0], inputs.size(0)) losses.update(loss.data, inputs.size(0)) #top1.update(prec1[0], inputs.size(0)) top1.update(prec1, inputs.size(0)) #top5.update(prec5[0], inputs.size(0)) top5.update(prec5, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() print(bar.suffix) bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() torch.set_grad_enabled(True) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(train_loader)) show_step = len(train_loader) // 10 for batch_idx, (inputs, targets) in enumerate(train_loader): batch_size = inputs.size(0) if batch_size < args.train_batch: continue # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data, inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) if (batch_idx) % show_step == 0: print(bar.suffix) bar.next() bar.finish() return (losses.avg, top1.avg)
def validate(test_loader, model, args, config): """Test the model on the validation set We follow "fully convolutional" testing: * Scale the video with shortest side =256 * Uniformly sample 10 clips within a video * For each clip, crop K=3 regions of 256*256 along the longest side * This is equivalent to 30-crop testing """ # set up meters batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() cm_meter = AverageMeter() model.eval() # data prefetcher with noramlization test_loader = ClipPrefetcherJoint(test_loader, config['input']['mean'], config['input']['std']) # loop over validation set end = time.time() input, target = test_loader.next() i = 0 # for large models if args.slice: batch_size = input.size(1) max_split_size = 1 for split_size in range(2, batch_size): if (batch_size % split_size) == 0 and split_size > max_split_size: max_split_size = split_size num_batch_splits = batch_size // max_split_size print("Split the input by size: {:d}x{:d}".format( max_split_size, num_batch_splits)) while input is not None: i += 1 # disable/enable gradients with torch.no_grad(): if args.slice: # slice the inputs for testing splited_inputs = torch.split(input, max_split_size, dim=1) splited_outputs = [] for idx in range(num_batch_splits): split_output = model(splited_inputs[idx]) # test time augmentation (minor performance boost) flipped_split_input = torch.flip(splited_inputs[idx], (-1, )) flipped_split_output = model(flipped_split_input) split_output = 0.5 * (split_output + flipped_split_output) splited_outputs.append(split_output) output = torch.mean(torch.stack(splited_outputs), dim=0) else: # forward all inputs output, _, _ = model(input) # print(output.size()) # test time augmentation (minor performance boost) # always flip the last dim (width) flipped_input = torch.flip(input, (-1, )) flipped_output, _, _ = model(flipped_input) output = 0.5 * (output + flipped_output) # print(target[1].size()) # print(target[2].size()) # measure accuracy and record loss acc1, acc5 = accuracy(output.data, target[0], topk=(1, 5)) top1.update(acc1.item(), input.size(0)) top5.update(acc5.item(), input.size(0)) batch_cm = confusion_matrix(output.data, target[0]) cm_meter.update(batch_cm.data.cpu().double()) # prefetch next batch input, target = test_loader.next() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # printing if i % (args.print_freq * 2) == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Acc@1 {top1.val:.2f} ({top1.avg:.2f})\t' 'Acc@5 {top5.val:.2f} ({top5.avg:.2f})'.format( i, len(test_loader), batch_time=batch_time, top1=top1, top5=top5)) cls_acc = mean_class_accuracy(cm_meter.sum) print( '***Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f} Mean Cls Acc {cls_acc:.3f}' .format(top1=top1, top5=top5, cls_acc=100 * cls_acc)) return top1.avg, top5.avg
def validate(val_loader, model, epoch, args, config): """Test the model on the validation set""" # set up meters batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() cm_meter = AverageMeter() # switch to evaluate mode model.eval() # data prefetcher with noramlization val_loader = ClipPrefetcherJoint(val_loader, config['input']['mean'], config['input']['std']) # loop over validation set end = time.time() input, target = val_loader.next() i = 0 while input is not None: i += 1 with torch.no_grad(): # forward the model (without gradients) output = model(input) # print(target[0]) # measure accuracy and record loss acc1, acc5 = accuracy(output[0].data, target[0], topk=(1, 5)) batch_cm = confusion_matrix(output[0].data, target[0]) if args.distributed: reduced_acc1 = reduce_tensor(acc1, args.world_size) reduced_acc5 = reduce_tensor(acc5, args.world_size) reduced_cm = reduce_tensor(batch_cm.data, args.world_size, avg=False) else: reduced_acc1 = acc1 reduced_acc5 = acc5 reduced_cm = batch_cm.data top1.update(reduced_acc1.item(), input.size(0)) top5.update(reduced_acc5.item(), input.size(0)) cm_meter.update(reduced_cm.cpu().clone()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # printing if i % (args.print_freq * 2) == 0 and (args.local_rank == 0): print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Acc@1 {top1.val:.2f} ({top1.avg:.2f})\t' 'Acc@5 {top5.val:.2f} ({top5.avg:.2f})'.format( i, len(val_loader), batch_time=batch_time, top1=top1, top5=top5)) # prefetch next batch input, target = val_loader.next() # finish up if args.local_rank == 0: cls_acc = 100 * mean_class_accuracy(cm_meter.sum) print( '******Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f} Cls Acc {cls_acc:.3f}' .format(top1=top1, top5=top5, cls_acc=cls_acc)) # log top-1/5 acc writer.add_scalars('data/top1_accuracy', {"val": top1.avg}, epoch + 1) writer.add_scalars('data/top5_accuracy', {"val": top5.avg}, epoch + 1) writer.add_scalars('data/mean_cls_acc', {"val": cls_acc}, epoch + 1) return top1.avg, top5.avg
def train(train_loader, model, optimizer, scheduler, epoch, args, config): """Training the model""" # set up meters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() cm_meter = AverageMeter() # number of iterations per epoch num_iters = len(train_loader) # switch to train mode model.train() # data prefetcher with noramlization train_loader = ClipPrefetcherJoint(train_loader, config['input']['mean'], config['input']['std']) # main loop end = time.time() input, target = train_loader.next() i = 0 while input is not None: # input & target are pre-fetched i += 1 # print(target) # compute output # print(input.size()) # print(target[0].size()) # print(target[1].size()) # print(target[2].size()) output, loss = model(input, targets=target) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # printing (on the first GPU) # print(i) # print(args.print_freq) if (i % args.print_freq) == 0: # only check the stats when necessary # avoid additional cost at each iter acc1, acc5 = accuracy(output.data, target[0], topk=(1, 5)) batch_cm = confusion_matrix(output.data, target[0]) # measure accuracy and record loss if args.distributed: reduced_loss = reduce_tensor(loss.data, args.world_size) reduced_acc1 = reduce_tensor(acc1, args.world_size) reduced_acc5 = reduce_tensor(acc5, args.world_size) reduced_cm = reduce_tensor(batch_cm.data, args.world_size, avg=False) else: reduced_loss = loss.mean().data reduced_acc1 = acc1 reduced_acc5 = acc5 reduced_cm = batch_cm.data losses.update(reduced_loss.item(), input.size(0)) top1.update(reduced_acc1.item(), input.size(0)) top5.update(reduced_acc5.item(), input.size(0)) cm_meter.update(reduced_cm.cpu().clone()) # measure elapsed time torch.cuda.synchronize() batch_time.update((time.time() - end) / args.print_freq) end = time.time() if args.local_rank == 0: lr = scheduler.get_lr()[0] print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Acc@1 {top1.val:.2f} ({top1.avg:.2f})\t' 'Acc@5 {top5.val:.2f} ({top5.avg:.2f})'.format( epoch + 1, i, num_iters, batch_time=batch_time, loss=losses, top1=top1, top5=top5)) # log loss / lr writer.add_scalar('data/training_loss', losses.val, epoch * num_iters + i) writer.add_scalar('data/learning_rate', lr, epoch * num_iters + i) # step the lr scheduler after each iteration scheduler.step() # prefetch next batch input, target = train_loader.next() # finish up if args.local_rank == 0: # print & step the learning rate lr = scheduler.get_lr()[0] cls_acc = 100 * mean_class_accuracy(cm_meter.sum) print("[Train]: Epoch {:d} finished with lr={:f}".format( epoch + 1, lr)) # log top-1/5 acc writer.add_scalars('data/top1_accuracy', {"train": top1.avg}, epoch + 1) writer.add_scalars('data/top5_accuracy', {"train": top5.avg}, epoch + 1) writer.add_scalars('data/mean_cls_acc', {"train": cls_acc}, epoch + 1) return
top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') cnt = 0 for image, target in data_loader_test: image_resized = [] for im in image: image_resized += [cv2.resize(im, (width, height))] image = np.asarray(image_resized) if floating_model: input_data = (np.float32(image) - input_mean) / input_std else: input_data = image.astype(np.uint8) interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) cnt += 1 acc1, acc5 = accuracy(torch.from_numpy(output_data), torch.from_numpy(target+1), topk=(1, 5)) print('.', end='') top1.update(acc1[0], image.shape[0]) top5.update(acc5[0], image.shape[0]) if cnt >= 1000: break print('\nEvaluation accuracy on %d images, %2.3f %2.3f' % (len(data_loader_test), top1.avg, top5.avg))
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() # torch.set_grad_enabled(False) end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(val_loader)) prefetcher = data_prefetcher(val_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(val_loader): batch_idx += 1 # if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda() # inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) # compute output with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress if args.local_rank == 0: bar.suffix = 'Valid({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), bt=batch_time.avg, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() inputs, targets = prefetcher.next() if args.local_rank == 0: print(bar.suffix) bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() torch.set_grad_enabled(True) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(train_loader)) show_step = len(train_loader) // 10 prefetcher = data_prefetcher(train_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(train_loader): batch_idx += 1 batch_size = inputs.size(0) if batch_size < args.train_batch: break # measure data loading time # if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda(async=True) # inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) if args.mixup: inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, args.alpha, use_cuda) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) old_loss = loss_func(criterion, outputs) else: outputs = model(inputs) old_loss = criterion(outputs, targets) # compute gradient and do SGD step optimizer.zero_grad() # loss.backward() with amp.scale_loss(old_loss, optimizer) as loss: loss.backward() optimizer.step() if batch_idx % args.print_freq == 0: # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) torch.cuda.synchronize() # measure elapsed time batch_time.update((time.time() - end) / args.print_freq) end = time.time() if args.local_rank == 0: # plot progress bar.suffix = '({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.val, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() if (batch_idx) % show_step == 0 and args.local_rank == 0: print('E%d' % (epoch) + bar.suffix) inputs, targets = prefetcher.next() if args.local_rank == 0: bar.finish() return (losses.avg, top1.avg)
if __name__ == '__main__': train_batch_size = 1 eval_batch_size = 1 data_path = 'data/imagenet_1k' # modelpath = 'data/mobilenet_v2_fp32_scripted.pth' modelpath = 'data/mobilenet_v2_int8_static_qnnpack.pth' scripted_model = torch.jit.load(modelpath) data_loader, data_loader_test = prepare_data_loaders(data_path) top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') for cnt, (image, target) in enumerate(data_loader_test): output = scripted_model(image) acc1, acc5 = accuracy(output, target, topk=(1, 5)) print('.', end='') top1.update(acc1[0], image.shape[0]) top5.update(acc5[0], image.shape[0]) if cnt >= 1000: break print('\nEvaluation accuracy on %d images, %2.3f %2.3f' % (len(data_loader_test), top1.avg, top5.avg)) # testIm = cv2.imread('fox.jpg') # testIm = testIm[:, :, ::-1] # output = scripted_model(testImTensor)