def main(args): if args.dump_xmodel: args.device = 'cpu' args.batch_size = 1 if args.device == 'cpu': device = torch.device("cpu") else: device = torch.device("cuda") # model model = build_model(args, device) model.eval() #model.to(device) H, W = args.crop_size, 2 * args.crop_size input = torch.randn([1, 3, H, W]) if args.quant_mode == 'float': quant_model = model else: ## new api #################################################################################### quantizer = torch_quantizer(args.quant_mode, model, (input), output_dir=args.quant_dir, device=device) quant_model = quantizer.quant_model quant_model.to(device) criterion = Criterion(ignore_index=255, weight=None, use_weight=False, reduce=True) loss_fn = criterion.to(device) if args.fast_finetune == True: ft_data = build_data(args, subset_len=None, sample_method=None) if args.quant_mode == 'calib': quantizer.fast_finetune(eval_miou, (ft_data, quant_model, device)) elif args.quant_mode == 'test': quantizer.load_ft_param() ''' if args.quant_mode == 'calib' and args.finetune == True: ft_loader = build_data(args) quantizer.finetune(eval_miou, (quant_model, ft_loader, loss_fn)) ''' if args.eval: print('===> Evaluation mIoU: ') test_data = build_data(args) eval_miou(test_data, quant_model, device) else: print('===> Visualization: ') visulization(args, quant_model, device) # handle quantization result if args.quant_mode == 'calib': quantizer.export_quant_config() if args.quant_mode == 'test' and args.dump_xmodel: #deploy_check= True if args.dump_golden_data else False dump_xmodel(args.quant_dir, deploy_check=True)
def quantization(title='optimize', model_name='', file_path='', quant_mode=1): batch_size = 32 model = resnet18().cpu() model.load_state_dict(torch.load(file_path)) input = torch.randn([batch_size, 3, 224, 224]) if quant_mode < 1: quant_model = model else: ## new api #################################################################################### quantizer = torch_quantizer('calib', model, (input), output_dir='resnet18') quant_model = quantizer.quant_model ##################################################################################### # to get loss value after evaluation loss_fn = torch.nn.CrossEntropyLoss().cuda() val_loader, _ = load_data(subset_len=100, train=False, batch_size=batch_size, sample_method='random', data_dir=args.data_dir, model_name=model_name) # record modules float model accuracy # add modules float model accuracy here acc_org1 = 0.0 acc_org5 = 0.0 loss_org = 0.0 #register_modification_hooks(model_gen, train=False) acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn) # handle quantization result if quant_mode > 0: quantizer.export_quant_config() if quant_mode == 2: dump_xmodel('resnet18', True) # logging accuracy if args.quant_mode == 2: basic_info(loss_gen, 'quantized model loss') basic_info(acc1_gen, 'quantized model top-1 accuracy') basic_info(acc5_gen, 'quantized model top-5 accuracy') elif args.quant_mode == 1: basic_info(loss_gen, 'calibration model loss') basic_info(acc1_gen, 'calibration model top-1 accuracy') basic_info(acc5_gen, 'calibration model top-5 accuracy') elif args.quant_mode == 0: basic_info(loss_gen, 'float model loss') basic_info(acc1_gen, 'float model top-1 accuracy') basic_info(acc5_gen, 'float model top-5 accuracy')
def quantize(build_dir, quant_mode, batchsize): dset_dir = build_dir + '/dataset' float_model = build_dir + '/float_model' quant_model = build_dir + '/quant_model' # use GPU if available if (torch.cuda.device_count() > 0): print('You have', torch.cuda.device_count(), 'CUDA devices available') for i in range(torch.cuda.device_count()): print(' Device', str(i), ': ', torch.cuda.get_device_name(i)) print('Selecting device 0..') device = torch.device('cuda:0') else: print('No CUDA devices available..selecting CPU') device = torch.device('cpu') # load trained model model = CNN().to(device) model.load_state_dict(torch.load(os.path.join(float_model, 'f_model.pth'))) # force to merge BN with CONV for better quantization accuracy optimize = 1 # override batchsize if in test mode if (quant_mode == 'test'): batchsize = 1 rand_in = torch.randn([batchsize, 1, 28, 28]) quantizer = torch_quantizer(quant_mode, model, (rand_in), output_dir=quant_model) quantized_model = quantizer.quant_model # data loader test_dataset = torchvision.datasets.MNIST(dset_dir, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batchsize, shuffle=False) # evaluate test(quantized_model, device, test_loader) # export config if quant_mode == 'calib': quantizer.export_quant_config() if quant_mode == 'test': quantizer.export_xmodel(deploy_check=False, output_dir=quant_model) return
def quant_mode_1(args, device, file_path=''): model = resnet18().cpu() model.load_state_dict(torch.load(file_path)) model.name = args.model_name input = torch.randn([args.batch_size, 3, 224, 224]) quantizer = torch_quantizer(args.quant_mode, model, (input), device=device) val_loader = load_data(args) #register_modification_hooks(model_gen, train=False) acc1_gen, acc5_gen, loss_gen = evaluate(device, quantizer.quant_model, val_loader) quantizer.export_quant_config()
def quantization(title='optimize', model_name='', file_path='', quant_mode=1): batch_size = 32 model = resnet18().cpu() model.load_state_dict(torch.load(file_path)) input = torch.randn([batch_size, 3, 224, 224]) if quant_mode < 1: quant_model = model else: ## new api #################################################################################### quantizer = torch_quantizer(quant_mode, model, (input)) quant_model = quantizer.quant_model ##################################################################################### # to get loss value after evaluation loss_fn = torch.nn.CrossEntropyLoss().cuda() val_loader, _ = load_data( #subset_len=100, #subset_len=None, subset_len=args.subset_len, train=False, batch_size=batch_size, sample_method='random', data_dir=args.data_dir, model_name=model_name) # record modules float model accuracy # add modules float model accuracy here acc_org1 = 0.0 acc_org5 = 0.0 loss_org = 0.0 #register_modification_hooks(model_gen, train=False) acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn) # logging accuracy print('loss: %g' % (loss_gen)) print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen)) # handle quantization result if quant_mode > 0: quantizer.export_quant_config() if quant_mode == 2: dump_xmodel()
def quantize(model_name,quant_mode,batchsize,quant_model): # use GPU if available if (torch.cuda.device_count() > 0): print('You have',torch.cuda.device_count(),'CUDA devices available') for i in range(torch.cuda.device_count()): print(' Device',str(i),': ',torch.cuda.get_device_name(i)) print('Selecting device 0..') device = torch.device('cuda:0') else: print('No CUDA devices available...selecting CPU') device = torch.device('cpu') # load trained model model = CNN().to(device) model.load_state_dict(torch.load(os.path.join(data_path, model_name))) # force to merge BN with CONV for better quantization accuracy optimize = 1 # override batchsize if in test mode if (quant_mode=='test'): batchsize = 1 rand_in = torch.randn([batchsize, 3, 32, 32]) quantizer = torch_quantizer(quant_mode, model, (rand_in), output_dir=quant_model) quantized_model = quantizer.quant_model # data loader train_loader = training_loader(train_data=cifar2_train, batch_size=batchsize, shuffle=True) val_loader = validation_loader(val_data=cifar2_val, batch_size=batchsize, shuffle=False) # evaluate validate(quantized_model, train_loader, val_loader, device) # export config if quant_mode == 'calib': quantizer.export_quant_config() if quant_mode == 'test': quantizer.export_xmodel(deploy_check=False, output_dir=quant_model) return
def quantization(title='optimize', model_name='', file_path=''): data_dir = args.data_dir quant_mode = args.quant_mode finetune = True deploy = args.deploy batch_size = args.batch_size subset_len = args.subset_len if quant_mode != 'test' and deploy: deploy = False print( r'Warning: Exporting xmodel needs to be done in quantization test mode, turn off it in this running!' ) if deploy and (batch_size != 1 or subset_len != 1): print( r'Warning: Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, change them automatically!' ) batch_size = 1 subset_len = 1 model = mobilenet_v2().cpu() model.load_state_dict(torch.load(file_path)) input = torch.randn([batch_size, 3, 224, 224]) if quant_mode == 'float': quant_model = model else: ## new api #################################################################################### quantizer = torch_quantizer(quant_mode, model, (input), device=device) quant_model = quantizer.quant_model ##################################################################################### # to get loss value after evaluation loss_fn = torch.nn.CrossEntropyLoss().to(device) val_loader, _ = load_data(subset_len=subset_len, train=False, batch_size=batch_size, sample_method='random', data_dir=data_dir, model_name=model_name) # fast finetune model or load finetuned parameter before test if finetune == True: ft_loader, _ = load_data(subset_len=1024, train=False, batch_size=batch_size, sample_method=None, data_dir=data_dir, model_name=model_name) if quant_mode == 'calib': quantizer.fast_finetune(evaluate, (quant_model, ft_loader, loss_fn)) elif quant_mode == 'test': quantizer.load_ft_param() # record modules float model accuracy # add modules float model accuracy here acc_org1 = 0.0 acc_org5 = 0.0 loss_org = 0.0 #register_modification_hooks(model_gen, train=False) acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn) # logging accuracy print('loss: %g' % (loss_gen)) print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen)) # handle quantization result if quant_mode == 'calib': quantizer.export_quant_config() if deploy: quantizer.export_xmodel(deploy_check=False)
def main(): #data = '/scratch/workspace/dataset/imagenet/raw-data' data = '/proj/rdi/staff/niuxj/imagenet' workers = 4 gpu = 0 batch_size = 128 lr = 1e-5 momentum = 0.9 weight_decay = 1e-4 traindir = os.path.join(data, 'train') valdir = os.path.join(data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) model = resnet18(pretrained=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(gpu) input = torch.randn([batch_size, 3, 224, 224], dtype=torch.float32) quantizer = torch_quantizer(quant_mode='calib', module=model, input_args=input, bitwidth=8, mix_bit=True, qat_proc=True) quantized_model = quantizer.quant_model optimizer = torch.optim.Adam(quantized_model.parameters(), lr, weight_decay=weight_decay) print(quantized_model) # The test accuracy should be equal to the float model: # quantized_model.disable_quant() # validate(val_loader, quantized_model, criterion, gpu) # quantized_model.enable_quant() #quantized_model = torch.nn.DataParallel(quantized_model.cuda()) best_acc1 = 0 epochs = 2 for epoch in range(epochs): adjust_learning_rate(optimizer, epoch, lr) # train for one epoch train(train_loader, quantized_model, criterion, optimizer, epoch, gpu) # evaluate on validation set acc1 = validate(val_loader, quantized_model, criterion, gpu) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': quantized_model.state_dict(), 'best_acc1': best_acc1 }, is_best) print('Saving ckpt with best_acc1:', best_acc1) #quantized_model.load_state_dict(torch.load('model_best.pth.tar')['state_dict']) #deployable_model = qat_sched.convert_to_deployable(quantized_model, mix_bit=True) quantizer.deploy(quantized_model, mix_bit=True) deployable_model = quantizer.deploy_model deployed_acc1 = validate(val_loader, deployable_model, criterion, gpu) quantized_model.freeze_bn() quantized_acc1 = validate(val_loader, quantized_model, criterion, gpu) if quantized_acc1 != deployed_acc1: warnings.warn( 'The accuracy of deployed model is not equal to the accuracy of quantized model.' ) val_dataset2 = torch.utils.data.Subset(val_dataset, list(range(1))) val_loader2 = torch.utils.data.DataLoader(val_dataset2, batch_size=1, shuffle=False, num_workers=workers, pin_memory=True) validate(val_loader2, deployable_model, criterion, gpu) quantizer.export_xmodel()
def main(): gpu = 0 traindir = os.path.join(args.data_dir, 'train') valdir = os.path.join(args.data_dir, 'validation') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) model = resnet18(pretrained=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(gpu) # vai_q_pytorch interface function: create quantizer can do QAT input = torch.randn([args.batch_size, 3, 224, 224], dtype=torch.float32) quantizer = torch_quantizer(quant_mode='calib', module=model, input_args=input, bitwidth=8, qat_proc=True) quantized_model = quantizer.quant_model optimizer = torch.optim.Adam(quantized_model.parameters(), args.lr, weight_decay=args.weight_decay) best_acc1 = 0 epochs = 2 for epoch in range(epochs): adjust_learning_rate(optimizer, epoch, args.lr) # train for one epoch train(train_loader, quantized_model, criterion, optimizer, epoch, gpu) # evaluate on validation set acc1 = validate(val_loader, quantized_model, criterion, gpu) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': quantized_model.state_dict(), 'best_acc1': best_acc1 }, is_best) print('Saving ckpt with best_acc1:', best_acc1) # vai_q_pytorch interface function: deploy the trained model and convert xmodel # need at least 1 iteration of inference with batch_size=1 val_subset = torch.utils.data.Subset(val_dataset, list(range(1))) subset_loader = torch.utils.data.DataLoader(val_subset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) quantizer.deploy(quantized_model) deployable_model = quantizer.deploy_model validate(subset_loader, deployable_model, criterion, gpu) quantizer.export_xmodel()
def quantization(title='optimize', model_name='', file_path='', quant_mode='calib', finetune=False): batch_size = 4 model = resnet50().to(device) model.load_state_dict(torch.load(file_path)) input = torch.randn([batch_size, 3, 224, 224]) if quant_mode == 'float': quant_model = model else: ## new api #################################################################################### quantizer = torch_quantizer(quant_mode, model, (input), output_dir="pt_resnet50/vai_q_output") quant_model = quantizer.quant_model ##################################################################################### # to get loss value after evaluation loss_fn = torch.nn.CrossEntropyLoss().to(device) val_loader, _ = load_data(subset_len=args.subset_len, train=False, batch_size=batch_size, sample_method='random', data_dir=args.data_dir, model_name=model_name) # finetune before calibration or load finetuned parameter before test if finetune == True: ft_loader, _ = load_data(subset_len=1024, train=False, batch_size=batch_size, sample_method=None, data_dir=args.data_dir, model_name=model_name) if quant_mode == 'calib': quantizer.fast_finetune(evaluate, (quant_model, ft_loader, loss_fn)) elif quant_mode == 'test': quantizer.load_ft_param() # record modules float model accuracy # add modules float model accuracy here acc_org1 = 0.0 acc_org5 = 0.0 loss_org = 0.0 #register_modification_hooks(model_gen, train=False) acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn) # logging accuracy print('loss: %g' % (loss_gen)) print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen)) # handle quantization result if quant_mode == 'calib': quantizer.export_quant_config() if quant_mode == 'test': quantizer.export_xmodel(deploy_check=False, output_dir="pt_resnet50/vai_q_output")
def quantization(): if args.quant_mode != 'test' and args.deploy: args.deploy = False warnings.warn( 'Exporting xmodel needs to be done in quantization test mode, turn off it in this running!', UserWarning) if args.quant_mode == 'test' and (args.batch_size != 1 or args.subset_len != 1): warnings.warn( 'Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, they\'ll be changed automatically!', UserWarning) args.batch_size = 1 args.subset_len = 1 p = Path(args.checkpoint_dir) / args.model_name model = FFN(args.input_size) model = preprocessors.load_from_state_dict(model, p) if args.quant_mode == 'float': quant_model = deepcopy(model) else: rand_input = torch.randn([args.batch_size, args.input_size]) quantizer = torch_quantizer(args.quant_mode, module=deepcopy(model), input_args=rand_input, bitwidth=8, mix_bit=False, qat_proc=False, device=set_seed.DEVICE) quant_model = quantizer.quant_model if args.fast_finetune: ft_loader = preprocessors.make_dataloader(data_dir=args.data_dir, data_file=args.calib_data, subset_len=args.subset_len) if args.quant_mode == 'calib': loss_fn = MSE().to(set_seed.DEVICE) quantizer.fast_finetune(eval_loss, (quant_model, ft_loader, loss_fn)) elif args.quant_mode == 'test': quantizer.load_ft_param() if args.evaluate: valid_loader = preprocessors.make_dataloader( data_dir=args.data_dir, data_file=args.calib_data, batch_size=args.batch_size) cr1 = CustomRunner(model=model, device=set_seed.DEVICE, input_key='features', input_target_key='targets', evaluate=True, loaders={'test': valid_loader}) print('Evaluation completed!') print('Initial model results:') pprint.pprint(cr1.logs, width=5) if args.quant_mode != 'float': cr2 = CustomRunner(model=quant_model, device=set_seed.DEVICE, input_key='features', input_target_key='targets', evaluate=True, loaders={'test': valid_loader}) print('Quantized model results:') pprint.pprint(cr2.logs, width=5) if args.quant_mode == 'calib': quantizer.export_quant_config() if args.deploy: quantizer.export_xmodel(deploy_check=True)
test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format( test_loss, acc, len(test_loader.dataset), 100. * acc / len(test_loader.dataset))) return acc / len(test_loader.dataset) embedding_vector_length = 32 hidden_vector_length = 100 model = Model(top_words, embedding_vector_length, hidden_vector_length).cpu() model.load_state_dict(torch.load("./pretrained.pth")) # nndct quantization if args.quant_mode == 'calib' or args.quant_mode == 'test': netbak = model quantizer = torch_quantizer(quant_mode = args.quant_mode, module = model, bitwidth = 16, lstm = True) model = quantizer.quant_model # nndct quantization forwarding acc = test(model, DEVICE, test_loader) print("acc is: {:.4f}\n".format(acc)) # handle quantization result if args.quant_mode == 'calib': quantizer.export_quant_config() if args.quant_mode == 'test': quantizer.export_xmodel(deploy_check=True)