Ejemplo n.º 1
0
def main(args):
    if args.dump_xmodel:
        args.device = 'cpu'
        args.batch_size = 1

    if args.device == 'cpu':
        device = torch.device("cpu")
    else:
        device = torch.device("cuda")
    # model
    model = build_model(args, device)
    model.eval()
    #model.to(device)

    H, W = args.crop_size, 2 * args.crop_size
    input = torch.randn([1, 3, H, W])

    if args.quant_mode == 'float':
        quant_model = model
    else:
        ## new api
        ####################################################################################
        quantizer = torch_quantizer(args.quant_mode,
                                    model, (input),
                                    output_dir=args.quant_dir,
                                    device=device)

        quant_model = quantizer.quant_model
    quant_model.to(device)
    criterion = Criterion(ignore_index=255,
                          weight=None,
                          use_weight=False,
                          reduce=True)
    loss_fn = criterion.to(device)

    if args.fast_finetune == True:
        ft_data = build_data(args, subset_len=None, sample_method=None)
        if args.quant_mode == 'calib':
            quantizer.fast_finetune(eval_miou, (ft_data, quant_model, device))
        elif args.quant_mode == 'test':
            quantizer.load_ft_param()
    '''
    if args.quant_mode == 'calib' and args.finetune == True:
        ft_loader = build_data(args)
        quantizer.finetune(eval_miou, (quant_model, ft_loader, loss_fn))
    '''
    if args.eval:
        print('===> Evaluation mIoU: ')
        test_data = build_data(args)
        eval_miou(test_data, quant_model, device)
    else:
        print('===> Visualization: ')
        visulization(args, quant_model, device)

    # handle quantization result
    if args.quant_mode == 'calib':
        quantizer.export_quant_config()
    if args.quant_mode == 'test' and args.dump_xmodel:
        #deploy_check= True if args.dump_golden_data else False
        dump_xmodel(args.quant_dir, deploy_check=True)
Ejemplo n.º 2
0
def quantization(title='optimize', model_name='', file_path='', quant_mode=1):

    batch_size = 32

    model = resnet18().cpu()
    model.load_state_dict(torch.load(file_path))

    input = torch.randn([batch_size, 3, 224, 224])
    if quant_mode < 1:
        quant_model = model
    else:
        ## new api
        ####################################################################################
        quantizer = torch_quantizer('calib',
                                    model, (input),
                                    output_dir='resnet18')

        quant_model = quantizer.quant_model
        #####################################################################################

    # to get loss value after evaluation
    loss_fn = torch.nn.CrossEntropyLoss().cuda()

    val_loader, _ = load_data(subset_len=100,
                              train=False,
                              batch_size=batch_size,
                              sample_method='random',
                              data_dir=args.data_dir,
                              model_name=model_name)

    # record  modules float model accuracy
    # add modules float model accuracy here
    acc_org1 = 0.0
    acc_org5 = 0.0
    loss_org = 0.0

    #register_modification_hooks(model_gen, train=False)
    acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn)

    # handle quantization result
    if quant_mode > 0:
        quantizer.export_quant_config()
        if quant_mode == 2:
            dump_xmodel('resnet18', True)

    # logging accuracy
    if args.quant_mode == 2:
        basic_info(loss_gen, 'quantized model loss')
        basic_info(acc1_gen, 'quantized model top-1 accuracy')
        basic_info(acc5_gen, 'quantized model top-5 accuracy')
    elif args.quant_mode == 1:
        basic_info(loss_gen, 'calibration model loss')
        basic_info(acc1_gen, 'calibration model top-1 accuracy')
        basic_info(acc5_gen, 'calibration model top-5 accuracy')
    elif args.quant_mode == 0:
        basic_info(loss_gen, 'float model loss')
        basic_info(acc1_gen, 'float model top-1 accuracy')
        basic_info(acc5_gen, 'float model top-5 accuracy')
Ejemplo n.º 3
0
def quantize(build_dir, quant_mode, batchsize):

    dset_dir = build_dir + '/dataset'
    float_model = build_dir + '/float_model'
    quant_model = build_dir + '/quant_model'

    # use GPU if available
    if (torch.cuda.device_count() > 0):
        print('You have', torch.cuda.device_count(), 'CUDA devices available')
        for i in range(torch.cuda.device_count()):
            print(' Device', str(i), ': ', torch.cuda.get_device_name(i))
        print('Selecting device 0..')
        device = torch.device('cuda:0')
    else:
        print('No CUDA devices available..selecting CPU')
        device = torch.device('cpu')

    # load trained model
    model = CNN().to(device)
    model.load_state_dict(torch.load(os.path.join(float_model, 'f_model.pth')))

    # force to merge BN with CONV for better quantization accuracy
    optimize = 1

    # override batchsize if in test mode
    if (quant_mode == 'test'):
        batchsize = 1

    rand_in = torch.randn([batchsize, 1, 28, 28])
    quantizer = torch_quantizer(quant_mode,
                                model, (rand_in),
                                output_dir=quant_model)
    quantized_model = quantizer.quant_model

    # data loader
    test_dataset = torchvision.datasets.MNIST(dset_dir,
                                              train=False,
                                              download=True,
                                              transform=test_transform)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batchsize,
                                              shuffle=False)

    # evaluate
    test(quantized_model, device, test_loader)

    # export config
    if quant_mode == 'calib':
        quantizer.export_quant_config()
    if quant_mode == 'test':
        quantizer.export_xmodel(deploy_check=False, output_dir=quant_model)

    return
Ejemplo n.º 4
0
def quant_mode_1(args, device, file_path=''):
    model = resnet18().cpu()
    model.load_state_dict(torch.load(file_path))
    model.name = args.model_name

    input = torch.randn([args.batch_size, 3, 224, 224])
    quantizer = torch_quantizer(args.quant_mode, model, (input), device=device)
    val_loader = load_data(args)

    #register_modification_hooks(model_gen, train=False)
    acc1_gen, acc5_gen, loss_gen = evaluate(device, quantizer.quant_model,
                                            val_loader)

    quantizer.export_quant_config()
Ejemplo n.º 5
0
def quantization(title='optimize', model_name='', file_path='', quant_mode=1):
    batch_size = 32

    model = resnet18().cpu()
    model.load_state_dict(torch.load(file_path))

    input = torch.randn([batch_size, 3, 224, 224])
    if quant_mode < 1:
        quant_model = model
    else:
        ## new api
        ####################################################################################
        quantizer = torch_quantizer(quant_mode, model, (input))

        quant_model = quantizer.quant_model
        #####################################################################################

    # to get loss value after evaluation
    loss_fn = torch.nn.CrossEntropyLoss().cuda()

    val_loader, _ = load_data(
        #subset_len=100,
        #subset_len=None,
        subset_len=args.subset_len,
        train=False,
        batch_size=batch_size,
        sample_method='random',
        data_dir=args.data_dir,
        model_name=model_name)

    # record  modules float model accuracy
    # add modules float model accuracy here
    acc_org1 = 0.0
    acc_org5 = 0.0
    loss_org = 0.0

    #register_modification_hooks(model_gen, train=False)
    acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn)

    # logging accuracy
    print('loss: %g' % (loss_gen))
    print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen))

    # handle quantization result
    if quant_mode > 0:
        quantizer.export_quant_config()
        if quant_mode == 2:
            dump_xmodel()
Ejemplo n.º 6
0
def quantize(model_name,quant_mode,batchsize,quant_model):

  # use GPU if available
  if (torch.cuda.device_count() > 0):
    print('You have',torch.cuda.device_count(),'CUDA devices available')
    for i in range(torch.cuda.device_count()):
      print(' Device',str(i),': ',torch.cuda.get_device_name(i))
    print('Selecting device 0..')
    device = torch.device('cuda:0')
  else:
    print('No CUDA devices available...selecting CPU')
    device = torch.device('cpu')

  # load trained model
  model = CNN().to(device)
  model.load_state_dict(torch.load(os.path.join(data_path, model_name)))

  # force to merge BN with CONV for better quantization accuracy
  optimize = 1

  # override batchsize if in test mode
  if (quant_mode=='test'):
    batchsize = 1

  rand_in = torch.randn([batchsize, 3, 32, 32])
  quantizer = torch_quantizer(quant_mode, model, (rand_in), output_dir=quant_model)
  quantized_model = quantizer.quant_model

  # data loader
  train_loader = training_loader(train_data=cifar2_train,   batch_size=batchsize, shuffle=True)
  val_loader = validation_loader(val_data=cifar2_val, batch_size=batchsize, shuffle=False)

  # evaluate
  validate(quantized_model, train_loader, val_loader, device)

  # export config
  if quant_mode == 'calib':
    quantizer.export_quant_config()
  if quant_mode == 'test':
    quantizer.export_xmodel(deploy_check=False, output_dir=quant_model)

  return
Ejemplo n.º 7
0
def quantization(title='optimize', model_name='', file_path=''):

    data_dir = args.data_dir
    quant_mode = args.quant_mode
    finetune = True
    deploy = args.deploy
    batch_size = args.batch_size
    subset_len = args.subset_len
    if quant_mode != 'test' and deploy:
        deploy = False
        print(
            r'Warning: Exporting xmodel needs to be done in quantization test mode, turn off it in this running!'
        )
    if deploy and (batch_size != 1 or subset_len != 1):
        print(
            r'Warning: Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, change them automatically!'
        )
        batch_size = 1
        subset_len = 1

    model = mobilenet_v2().cpu()
    model.load_state_dict(torch.load(file_path))

    input = torch.randn([batch_size, 3, 224, 224])
    if quant_mode == 'float':
        quant_model = model
    else:
        ## new api
        ####################################################################################
        quantizer = torch_quantizer(quant_mode, model, (input), device=device)

        quant_model = quantizer.quant_model
        #####################################################################################

    # to get loss value after evaluation
    loss_fn = torch.nn.CrossEntropyLoss().to(device)

    val_loader, _ = load_data(subset_len=subset_len,
                              train=False,
                              batch_size=batch_size,
                              sample_method='random',
                              data_dir=data_dir,
                              model_name=model_name)

    # fast finetune model or load finetuned parameter before test
    if finetune == True:
        ft_loader, _ = load_data(subset_len=1024,
                                 train=False,
                                 batch_size=batch_size,
                                 sample_method=None,
                                 data_dir=data_dir,
                                 model_name=model_name)
        if quant_mode == 'calib':
            quantizer.fast_finetune(evaluate,
                                    (quant_model, ft_loader, loss_fn))
        elif quant_mode == 'test':
            quantizer.load_ft_param()

    # record  modules float model accuracy
    # add modules float model accuracy here
    acc_org1 = 0.0
    acc_org5 = 0.0
    loss_org = 0.0

    #register_modification_hooks(model_gen, train=False)
    acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn)

    # logging accuracy
    print('loss: %g' % (loss_gen))
    print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen))

    # handle quantization result
    if quant_mode == 'calib':
        quantizer.export_quant_config()
    if deploy:
        quantizer.export_xmodel(deploy_check=False)
Ejemplo n.º 8
0
def main():
    #data = '/scratch/workspace/dataset/imagenet/raw-data'
    data = '/proj/rdi/staff/niuxj/imagenet'
    workers = 4
    gpu = 0

    batch_size = 128
    lr = 1e-5
    momentum = 0.9
    weight_decay = 1e-4

    traindir = os.path.join(data, 'train')
    valdir = os.path.join(data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=workers,
                                               pin_memory=True)

    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=workers,
                                             pin_memory=True)

    model = resnet18(pretrained=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(gpu)

    input = torch.randn([batch_size, 3, 224, 224], dtype=torch.float32)
    quantizer = torch_quantizer(quant_mode='calib',
                                module=model,
                                input_args=input,
                                bitwidth=8,
                                mix_bit=True,
                                qat_proc=True)
    quantized_model = quantizer.quant_model
    optimizer = torch.optim.Adam(quantized_model.parameters(),
                                 lr,
                                 weight_decay=weight_decay)
    print(quantized_model)

    # The test accuracy should be equal to the float model:
    # quantized_model.disable_quant()
    # validate(val_loader, quantized_model, criterion, gpu)
    # quantized_model.enable_quant()

    #quantized_model = torch.nn.DataParallel(quantized_model.cuda())
    best_acc1 = 0
    epochs = 2
    for epoch in range(epochs):
        adjust_learning_rate(optimizer, epoch, lr)

        # train for one epoch
        train(train_loader, quantized_model, criterion, optimizer, epoch, gpu)

        # evaluate on validation set
        acc1 = validate(val_loader, quantized_model, criterion, gpu)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': quantized_model.state_dict(),
                'best_acc1': best_acc1
            }, is_best)
        print('Saving ckpt with best_acc1:', best_acc1)

    #quantized_model.load_state_dict(torch.load('model_best.pth.tar')['state_dict'])
    #deployable_model = qat_sched.convert_to_deployable(quantized_model, mix_bit=True)
    quantizer.deploy(quantized_model, mix_bit=True)
    deployable_model = quantizer.deploy_model
    deployed_acc1 = validate(val_loader, deployable_model, criterion, gpu)

    quantized_model.freeze_bn()
    quantized_acc1 = validate(val_loader, quantized_model, criterion, gpu)

    if quantized_acc1 != deployed_acc1:
        warnings.warn(
            'The accuracy of deployed model is not equal to the accuracy of quantized model.'
        )

    val_dataset2 = torch.utils.data.Subset(val_dataset, list(range(1)))
    val_loader2 = torch.utils.data.DataLoader(val_dataset2,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=workers,
                                              pin_memory=True)
    validate(val_loader2, deployable_model, criterion, gpu)
    quantizer.export_xmodel()
Ejemplo n.º 9
0
def main():
    gpu = 0

    traindir = os.path.join(args.data_dir, 'train')
    valdir = os.path.join(args.data_dir, 'validation')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    model = resnet18(pretrained=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(gpu)

    # vai_q_pytorch interface function: create quantizer can do QAT
    input = torch.randn([args.batch_size, 3, 224, 224], dtype=torch.float32)
    quantizer = torch_quantizer(quant_mode='calib',
                                module=model,
                                input_args=input,
                                bitwidth=8,
                                qat_proc=True)
    quantized_model = quantizer.quant_model

    optimizer = torch.optim.Adam(quantized_model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)

    best_acc1 = 0
    epochs = 2
    for epoch in range(epochs):
        adjust_learning_rate(optimizer, epoch, args.lr)

        # train for one epoch
        train(train_loader, quantized_model, criterion, optimizer, epoch, gpu)

        # evaluate on validation set
        acc1 = validate(val_loader, quantized_model, criterion, gpu)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': quantized_model.state_dict(),
                'best_acc1': best_acc1
            }, is_best)
        print('Saving ckpt with best_acc1:', best_acc1)

    # vai_q_pytorch interface function: deploy the trained model and convert xmodel
    # need at least 1 iteration of inference with batch_size=1
    val_subset = torch.utils.data.Subset(val_dataset, list(range(1)))
    subset_loader = torch.utils.data.DataLoader(val_subset,
                                                batch_size=1,
                                                shuffle=False,
                                                num_workers=args.workers,
                                                pin_memory=True)
    quantizer.deploy(quantized_model)
    deployable_model = quantizer.deploy_model
    validate(subset_loader, deployable_model, criterion, gpu)
    quantizer.export_xmodel()
Ejemplo n.º 10
0
def quantization(title='optimize',
                 model_name='',
                 file_path='',
                 quant_mode='calib',
                 finetune=False):
    batch_size = 4

    model = resnet50().to(device)
    model.load_state_dict(torch.load(file_path))

    input = torch.randn([batch_size, 3, 224, 224])
    if quant_mode == 'float':
        quant_model = model
    else:
        ## new api
        ####################################################################################
        quantizer = torch_quantizer(quant_mode,
                                    model, (input),
                                    output_dir="pt_resnet50/vai_q_output")

        quant_model = quantizer.quant_model
        #####################################################################################

    # to get loss value after evaluation
    loss_fn = torch.nn.CrossEntropyLoss().to(device)

    val_loader, _ = load_data(subset_len=args.subset_len,
                              train=False,
                              batch_size=batch_size,
                              sample_method='random',
                              data_dir=args.data_dir,
                              model_name=model_name)

    # finetune before calibration or load finetuned parameter before test
    if finetune == True:
        ft_loader, _ = load_data(subset_len=1024,
                                 train=False,
                                 batch_size=batch_size,
                                 sample_method=None,
                                 data_dir=args.data_dir,
                                 model_name=model_name)
        if quant_mode == 'calib':
            quantizer.fast_finetune(evaluate,
                                    (quant_model, ft_loader, loss_fn))
        elif quant_mode == 'test':
            quantizer.load_ft_param()

    # record  modules float model accuracy
    # add modules float model accuracy here
    acc_org1 = 0.0
    acc_org5 = 0.0
    loss_org = 0.0

    #register_modification_hooks(model_gen, train=False)
    acc1_gen, acc5_gen, loss_gen = evaluate(quant_model, val_loader, loss_fn)

    # logging accuracy
    print('loss: %g' % (loss_gen))
    print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen))

    # handle quantization result
    if quant_mode == 'calib':
        quantizer.export_quant_config()
    if quant_mode == 'test':
        quantizer.export_xmodel(deploy_check=False,
                                output_dir="pt_resnet50/vai_q_output")
Ejemplo n.º 11
0
def quantization():
    if args.quant_mode != 'test' and args.deploy:
        args.deploy = False
        warnings.warn(
            'Exporting xmodel needs to be done in quantization test mode, turn off it in this running!',
            UserWarning)

    if args.quant_mode == 'test' and (args.batch_size != 1
                                      or args.subset_len != 1):
        warnings.warn(
            'Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, they\'ll be changed automatically!',
            UserWarning)
        args.batch_size = 1
        args.subset_len = 1

    p = Path(args.checkpoint_dir) / args.model_name
    model = FFN(args.input_size)
    model = preprocessors.load_from_state_dict(model, p)

    if args.quant_mode == 'float':
        quant_model = deepcopy(model)
    else:
        rand_input = torch.randn([args.batch_size, args.input_size])
        quantizer = torch_quantizer(args.quant_mode,
                                    module=deepcopy(model),
                                    input_args=rand_input,
                                    bitwidth=8,
                                    mix_bit=False,
                                    qat_proc=False,
                                    device=set_seed.DEVICE)

        quant_model = quantizer.quant_model

    if args.fast_finetune:
        ft_loader = preprocessors.make_dataloader(data_dir=args.data_dir,
                                                  data_file=args.calib_data,
                                                  subset_len=args.subset_len)
        if args.quant_mode == 'calib':
            loss_fn = MSE().to(set_seed.DEVICE)
            quantizer.fast_finetune(eval_loss,
                                    (quant_model, ft_loader, loss_fn))
        elif args.quant_mode == 'test':
            quantizer.load_ft_param()

    if args.evaluate:
        valid_loader = preprocessors.make_dataloader(
            data_dir=args.data_dir,
            data_file=args.calib_data,
            batch_size=args.batch_size)
        cr1 = CustomRunner(model=model,
                           device=set_seed.DEVICE,
                           input_key='features',
                           input_target_key='targets',
                           evaluate=True,
                           loaders={'test': valid_loader})
        print('Evaluation completed!')
        print('Initial model results:')
        pprint.pprint(cr1.logs, width=5)

        if args.quant_mode != 'float':
            cr2 = CustomRunner(model=quant_model,
                               device=set_seed.DEVICE,
                               input_key='features',
                               input_target_key='targets',
                               evaluate=True,
                               loaders={'test': valid_loader})
            print('Quantized model results:')
            pprint.pprint(cr2.logs, width=5)

    if args.quant_mode == 'calib':
        quantizer.export_quant_config()
    if args.deploy:
        quantizer.export_xmodel(deploy_check=True)
Ejemplo n.º 12
0
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, acc, len(test_loader.dataset),
        100. * acc / len(test_loader.dataset)))
    return acc / len(test_loader.dataset)

embedding_vector_length = 32
hidden_vector_length = 100
model = Model(top_words, embedding_vector_length, hidden_vector_length).cpu()
model.load_state_dict(torch.load("./pretrained.pth"))

# nndct quantization
if args.quant_mode == 'calib' or args.quant_mode == 'test':
    netbak = model
    quantizer = torch_quantizer(quant_mode = args.quant_mode,
                                module = model,
                                bitwidth = 16,
                                lstm = True)
    model = quantizer.quant_model

# nndct quantization forwarding
acc = test(model, DEVICE, test_loader)
print("acc is: {:.4f}\n".format(acc))

# handle quantization result
if args.quant_mode == 'calib':
    quantizer.export_quant_config()
if args.quant_mode == 'test':
    quantizer.export_xmodel(deploy_check=True)