Beispiel #1
0
    def test_manual_mode(self):

        torch.cuda.empty_cache()

        net = mnist_model.Net()

        model = net.to(torch.device('cuda'))
        # Adding wrapper to first convolution layer
        for module_name, module_ref in model.named_children():
            if module_name is 'conv1':
                quantized_module = QcPostTrainingWrapper(
                    module_ref,
                    weight_bw=8,
                    activation_bw=8,
                    round_mode='nearest',
                    quant_scheme=QuantScheme.post_training_tf)
                setattr(model, module_name, quantized_module)

        sim = QuantizationSimModel(model,
                                   dummy_input=torch.rand(1, 1, 28, 28).cuda())

        # Quantize the untrained MNIST model
        sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5)

        # Run some inferences
        mnist_torch_model.evaluate(model=sim.model,
                                   iterations=100,
                                   use_cuda=True)

        # train the model again
        mnist_model.train(model=sim.model,
                          epochs=1,
                          num_batches=3,
                          batch_callback=check_if_layer_weights_are_updating,
                          use_cuda=True)
Beispiel #2
0
    def test_collect_inp_out_data_quantsim_model_gpu(self):
        """ test collect input output data from module """

        device_list = [torch.device('cuda:0')]

        for device in device_list:
            model = TinyModel().to(device=device)
            model_input = torch.randn(1, 3, 32, 32).to(device=device)
            sim = QuantizationSimModel(model,
                                       dummy_input=torch.rand(
                                           1, 3, 32, 32).to(device=device))

            module_data = utils.ModuleData(model, model.fc)
            inp, out = module_data.collect_inp_out_data(model_input,
                                                        collect_input=False,
                                                        collect_output=True)
            fc_out = sim.model(model_input)
            self.assertFalse(
                np.array_equal(utils.to_numpy(out), utils.to_numpy(fc_out)))

            module_data = utils.ModuleData(model, model.conv1)
            inp, out = module_data.collect_inp_out_data(model_input,
                                                        collect_input=True,
                                                        collect_output=False)
            self.assertTrue(
                np.array_equal(utils.to_numpy(inp),
                               utils.to_numpy(model_input)))
Beispiel #3
0
def main():
    args = arguments()
    seed(args)

    model = DeepLab(backbone='mobilenet',
                    output_stride=16,
                    num_classes=21,
                    sync_bn=False)
    model.eval()

    from aimet_torch import batch_norm_fold
    from aimet_torch import utils
    args.input_shape = (1, 3, 513, 513)
    batch_norm_fold.fold_all_batch_norms(model, args.input_shape)
    utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6,
                                              torch.nn.ReLU)

    if args.checkpoint_path:
        model.load_state_dict(torch.load(args.checkpoint_path))
    else:
        raise ValueError('checkpoint path {} must be specified'.format(
            args.checkpoint_path))

    data_loader_kwargs = {'worker_init_fn': work_init, 'num_workers': 0}
    train_loader, val_loader, test_loader, num_class = make_data_loader(
        args, **data_loader_kwargs)
    eval_func_quant = model_eval(args, val_loader)
    eval_func = model_eval(args, val_loader)

    from aimet_common.defs import QuantScheme
    from aimet_torch.quantsim import QuantizationSimModel
    if hasattr(args, 'quant_scheme'):
        if args.quant_scheme == 'range_learning_tf':
            quant_scheme = QuantScheme.training_range_learning_with_tf_init
        elif args.quant_scheme == 'range_learning_tfe':
            quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init
        elif args.quant_scheme == 'tf':
            quant_scheme = QuantScheme.post_training_tf
        elif args.quant_scheme == 'tf_enhanced':
            quant_scheme = QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             args.quant_scheme)
        kwargs = {
            'quant_scheme': quant_scheme,
            'default_param_bw': args.default_param_bw,
            'default_output_bw': args.default_output_bw,
            'config_file': args.config_file
        }
    print(kwargs)
    sim = QuantizationSimModel(model.cpu(),
                               input_shapes=args.input_shape,
                               **kwargs)
    sim.compute_encodings(eval_func_quant, (1024, True))
    post_quant_top1 = eval_func(sim.model.cuda(), (99999999, True))
    print("Post Quant mIoU :", post_quant_top1)
    def test_memory_leak_during_quantization_train(self):

        # First get baseline numbers
        base_pre_model_load_mark = torch.cuda.memory_allocated()
        model = models.vgg16(pretrained=True)
        model = model.to(torch.device('cuda'))
        base_model_loaded_mark = torch.cuda.memory_allocated()

        _ = model_train(model=model, epochs=2)
        base_model_train_mark = torch.cuda.memory_allocated()
        base_model_train_delta = base_model_train_mark - base_model_loaded_mark

        print("Usage Report ------")
        print("Model pre-load = {}".format(base_pre_model_load_mark))
        print("Model load = {}".format(base_model_loaded_mark))
        print("Model train delta = {}".format(base_model_train_delta))

        del model
        baseline_leaked_mem = torch.cuda.memory_allocated() - base_pre_model_load_mark
        print("Leaked during train = {}".format(baseline_leaked_mem))

        model = models.vgg16(pretrained=True)
        model = model.to(torch.device('cuda'))
        base_model_loaded_mark = torch.cuda.memory_allocated()
        #
        # # Now use AIMET
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                   default_param_bw=8, default_output_bw=4,
                                   dummy_input=torch.rand(1, 3, 224, 224).cuda())
        sim.compute_encodings(model_eval, forward_pass_callback_args=1)

        print(sim.model)
        aimet_model_quantize_mark = torch.cuda.memory_allocated()
        aimet_model_quantize_delta = aimet_model_quantize_mark - base_model_loaded_mark

        _ = model_train(model=sim.model, epochs=2,
                        callback=check_if_layer_weights_are_updating)

        aimet_model_train_mark = torch.cuda.memory_allocated()
        aimet_model_train_delta = aimet_model_train_mark - aimet_model_quantize_mark
        leaked_memory = aimet_model_train_delta - base_model_train_delta + baseline_leaked_mem

        print("")
        print("Usage Report ------")
        print("Model load = {}".format(base_model_loaded_mark))
        print("AIMET quantize delta = {}".format(aimet_model_quantize_delta))
        print("AIMET train delta = {}".format(aimet_model_train_delta))
        print("Leaked memory = {}".format(leaked_memory))

        # During training, the memory is held for a longer duration by PyTorch.
        # Often, this test fails with the following assert failing.
        # When the test is run individually, this test may still fail.
        # The tolerance is bumped up to take care of the situation where all tests are run.
        self.assertLessEqual(leaked_memory, 2000000)
Beispiel #5
0
def main():
    args = arguments()
    seed(args)

    if args.model_path:
        model = torch.load(args.model_path)
    else:
        raise ValueError('Model path {} must be specified'.format(
            args.model_path))

    model.eval()
    input_shape = (1, 3, 224, 224)
    image_size = input_shape[-1]
    eval_func_quant = model_eval(args.images_dir + '/val/',
                                 image_size,
                                 batch_size=args.batch_size,
                                 num_workers=0,
                                 quant=True)
    eval_func = model_eval(args.images_dir + '/val/',
                           image_size,
                           batch_size=args.batch_size,
                           num_workers=16)

    from aimet_common.defs import QuantScheme
    from aimet_torch.quantsim import QuantizationSimModel
    if hasattr(args, 'quant_scheme'):
        if args.quant_scheme == 'range_learning_tf':
            quant_scheme = QuantScheme.training_range_learning_with_tf_init
        elif args.quant_scheme == 'range_learning_tfe':
            quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init
        elif args.quant_scheme == 'tf':
            quant_scheme = QuantScheme.post_training_tf
        elif args.quant_scheme == 'tf_enhanced':
            quant_scheme = QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             args.quant_scheme)
        kwargs = {
            'quant_scheme': quant_scheme,
            'default_param_bw': args.default_param_bw,
            'default_output_bw': args.default_output_bw,
            'config_file': args.config_file
        }
    print(kwargs)
    sim = QuantizationSimModel(model.cpu(), input_shapes=input_shape, **kwargs)
    sim.compute_encodings(eval_func_quant, (32, True))
    post_quant_top1 = eval_func(sim.model.cuda(), (0, True))
    print("Post Quant Top1 :", post_quant_top1)
Beispiel #6
0
    def test_parse_config_file_model_outputs(self):
        """ Test that model output quantization parameters are set correctly when using json config file """
        model = SingleResidual()
        model.eval()

        quantsim_config = {
            "defaults": {
                "ops": {},
                "params": {}
            },
            "params": {},
            "op_type": {},
            "supergroups": [],
            "model_input": {},
            "model_output": {
                "is_output_quantized": "True"
            }
        }
        with open('./data/quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced, config_file='./data/quantsim_config.json',
                                   dummy_input=torch.rand(1, 3, 32, 32))
        for name, module in sim.model.named_modules():
            if isinstance(module, QcQuantizeWrapper):
                if name == 'fc':
                    # model.conv3 and model.ada are inputs to add
                    assert module.output_quantizers[0].enabled
                else:
                    assert not module.output_quantizers[0].enabled
                assert not module.input_quantizer.enabled
        if os.path.exists('./data/quantsim_config.json'):
            os.remove('./data/quantsim_config.json')
Beispiel #7
0
def get_simulations(model, args):
    from aimet_common.defs import QuantScheme
    from aimet_torch.quantsim import QuantizationSimModel
    if hasattr(args, 'quant_scheme'):
        if args.quant_scheme == 'range_learning_tf':
            quant_scheme = QuantScheme.training_range_learning_with_tf_init
        elif args.quant_scheme == 'range_learning_tfe':
            quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init
        elif args.quant_scheme == 'tf':
            quant_scheme = QuantScheme.post_training_tf
        elif args.quant_scheme == 'tf_enhanced':
            quant_scheme = QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             args.quant_scheme)
    kwargs = {
        'quant_scheme': quant_scheme,
        'default_param_bw': args.default_param_bw,
        'default_output_bw': args.default_output_bw,
        'config_file': args.config_file
    }
    print(kwargs)
    sim = QuantizationSimModel(model.cpu(),
                               input_shapes=args.input_shape,
                               **kwargs)
    return sim
    def test_memory_leak_during_quantization_eval(self):

        # First get baseline numbers
        base_pre_model_load_mark = torch.cuda.memory_allocated()
        model = models.vgg16(pretrained=True)
        model = model.to(torch.device('cuda'))
        base_model_loaded_mark = torch.cuda.memory_allocated()

        _ = model_eval(model=model, early_stopping_iterations=10)
        base_model_eval_mark = torch.cuda.memory_allocated()
        base_model_eval_delta = base_model_eval_mark - base_model_loaded_mark

        print("Usage Report ------")
        print("Model pre-load = {}".format(base_pre_model_load_mark))
        print("Model load = {}".format(base_model_loaded_mark))
        print("Model eval delta = {}".format(base_model_eval_delta))

        del model
        print("Leaked during eval = {}".format(torch.cuda.memory_allocated() - base_pre_model_load_mark))

        model = models.vgg16(pretrained=True)
        model = model.to(torch.device('cuda'))
        base_model_loaded_mark = torch.cuda.memory_allocated()

        # Now use AIMET
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                   default_param_bw=8, default_output_bw=4,
                                   dummy_input=torch.rand(1, 3, 224, 224).cuda())
        sim.compute_encodings(model_eval, forward_pass_callback_args=1)

        aimet_model_quantize_mark = torch.cuda.memory_allocated()
        aimet_model_quantize_delta = aimet_model_quantize_mark - base_model_loaded_mark

        for i in range(1):
            _ = model_eval(model=sim.model, early_stopping_iterations=10)

        aimet_model_eval_mark = torch.cuda.memory_allocated()
        aimet_model_eval_delta = aimet_model_eval_mark - aimet_model_quantize_mark

        print("")
        print("Usage Report ------")
        print("Model load = {}".format(base_model_loaded_mark))
        print("AIMET quantize delta = {}".format(aimet_model_quantize_delta))
        print("AIMET eval delta = {}".format(aimet_model_eval_delta))

        self.assertEqual(0, aimet_model_eval_delta)
Beispiel #9
0
    def test_parse_config_file_supergroups(self):
        """ Test that supergroup quantization parameters are set correctly when using json config file """
        model = TinyModel()
        model.eval()

        quantsim_config = {
            "defaults": {
                "ops": {
                    "is_output_quantized": "True",
                    "is_symmetric": "False"
                },
                "params": {
                    "is_quantized": "False",
                    "is_symmetric": "False"
                }
            },
            "params": {},
            "op_type": {},
            "supergroups": [
                {
                    "op_list": ["Conv", "BatchNormalization"]
                },
                {
                    "op_list": ["Relu", "MaxPool"]
                },
                {
                    "op_list": ["Conv", "Relu", "AveragePool"]
                }
            ],
            "model_input": {},
            "model_output": {}
        }
        with open('./data/quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        # Use in_place=True here for easy access to modules through model instance variables
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                   config_file='./data/quantsim_config.json',
                                   in_place=True, dummy_input=torch.rand(1, 3, 32, 32))
        for _, module in sim.model.named_modules():
            if isinstance(module, QcQuantizeWrapper):
                # Check configs for starts of supergroups
                if module in [model.conv1, model.relu1, model.conv2, model.conv3]:
                    assert not module.output_quantizers[0].enabled
                # Check configs for middle ops in supergroups
                elif module == model.relu3:
                    assert not module.input_quantizer.enabled
                    assert not module.output_quantizers[0].enabled
                # Check configs for ends of supergroups
                elif module in [model.bn1, model.maxpool, model.bn2, model.avgpool]:
                    assert not module.input_quantizer.enabled
                    assert module.output_quantizers[0].enabled
                else:
                    assert not module.input_quantizer.enabled
                    assert module.output_quantizers[0].enabled

        if os.path.exists('./data/quantsim_config.json'):
            os.remove('./data/quantsim_config.json')
    def test_quantize_resnet18(self):

        torch.cuda.empty_cache()

        # Train the model using tiny imagenet data
        model = models.resnet18(pretrained=False)
        _ = model_train(model, epochs=2)
        model = model.to(torch.device('cuda'))

        # layers_to_ignore = [model.conv1]
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf, default_param_bw=8,
                                   default_output_bw=8, dummy_input=torch.rand(1, 3, 224, 224).cuda())

        print(sim.model)

        # If 'iterations'set to None, will iterate over all the validation data
        sim.compute_encodings(model_eval, forward_pass_callback_args=400)
        quantized_model_accuracy = model_eval(model=sim.model, early_stopping_iterations=None)

        print("Quantized model accuracy=", quantized_model_accuracy)
        self.assertGreaterEqual(quantized_model_accuracy, 0.5)
    def test_quantsim_export(self):
        torch.manual_seed(10)
        model = Model2(Add())
        dummy_input = torch.randn(5, 10, 10, 20)
        sim = QuantizationSimModel(model, dummy_input)
        encodings = libpymo.TfEncoding()
        encodings.bw = 8
        encodings.max = 5
        encodings.min = -5
        encodings.delta = 1
        encodings.offset = 0.2
        sim.model.op1.output_quantizer.encoding = encodings
        sim.model.conv1.output_quantizer.encoding = encodings
        sim.model.conv1.param_quantizers['weight'].encoding = encodings
        sim.export(path='./data', filename_prefix='quant_model', dummy_input=dummy_input)

        with open('./data/quant_model.encodings') as f:
            data = json.load(f)

        self.assertTrue(isinstance(data['activation_encodings']['3'], list))
        self.assertTrue(isinstance(data['activation_encodings']['4'], list))
Beispiel #12
0
    def test_retraining_on_quantized_model_first_step(self):

        torch.cuda.empty_cache()

        model = mnist_model.Net().to(torch.device('cuda'))

        sim = QuantizationSimModel(model,
                                   default_output_bw=4,
                                   default_param_bw=4,
                                   dummy_input=torch.rand(1, 1, 28, 28).cuda())

        # Quantize the untrained MNIST model
        sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5)

        # train the model for entire one epoch
        mnist_model.train(model=sim.model,
                          epochs=1,
                          num_batches=3,
                          batch_callback=check_if_layer_weights_are_updating,
                          use_cuda=True)

        # Checkpoint the model
        save_checkpoint(sim, os.path.join(path, 'checkpoint.pt'))
Beispiel #13
0
    def test_with_finetuning(self):

        torch.cuda.empty_cache()

        model = mnist_model.Net().to(torch.device('cuda'))
        mnist_torch_model.evaluate(model=model, iterations=None, use_cuda=True)

        sim = QuantizationSimModel(model,
                                   dummy_input=torch.rand(1, 1, 28, 28).cuda())

        # Quantize the untrained MNIST model
        sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5)

        # Run some inferences
        mnist_torch_model.evaluate(model=sim.model,
                                   iterations=None,
                                   use_cuda=True)

        # train the model again
        mnist_model.train(sim.model,
                          epochs=1,
                          num_batches=3,
                          batch_callback=check_if_layer_weights_are_updating,
                          use_cuda=True)
Beispiel #14
0
    def test_parse_config_file_defaults(self):
        """ Test that default quantization parameters are set correctly when using json config file """
        model = SingleResidual()
        model.eval()

        quantsim_config = {
            "defaults": {
                "ops": {
                    "is_output_quantized": "True",
                    "is_symmetric": "False"
                },
                "params": {
                    "is_quantized": "False",
                    "is_symmetric": "True"
                }
            },
            "params": {},
            "op_type": {},
            "supergroups": [],
            "model_input": {},
            "model_output": {}
        }
        with open('./data/quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)

        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                   config_file='./data/quantsim_config.json',
                                   dummy_input=torch.rand(1, 3, 32, 32), in_place=True)
        for name, module in sim.model.named_modules():
            if isinstance(module, QcQuantizeWrapper):
                # Output of add op is input quantized
                if name == 'relu3':
                    assert module.input_quantizer.enabled
                else:
                    assert not module.input_quantizer.enabled
                assert module.output_quantizers[0].enabled
                assert not module.input_quantizer.use_symmetric_encodings
                assert not module.output_quantizers[0].use_symmetric_encodings
                if module.param_quantizers:
                    for _, param_quantizer in module.param_quantizers.items():
                        assert not param_quantizer.enabled
                        assert param_quantizer.use_symmetric_encodings

        if os.path.exists('./data/quantsim_config.json'):
            os.remove('./data/quantsim_config.json')
Beispiel #15
0
    def test_supergroups_with_elementwise_add(self):
        """ Test that supergroup quantization parameters are set correctly when using json config file """
        model = SingleResidual()
        model.eval()

        quantsim_config = {
            "defaults": {
                "ops": {
                    "is_output_quantized": "True"
                },
                "params": {}
            },
            "params": {},
            "op_type": {},
            "supergroups": [
                {
                    "op_list": ["Add", "Relu"]
                }
            ],
            "model_input": {},
            "model_output": {}
        }
        with open('./data/quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        # Use in_place=True here for easy access to modules through model instance variables
        sim = QuantizationSimModel(model, quant_scheme=QuantScheme.post_training_tf_enhanced, config_file='./data/quantsim_config.json',
                                   in_place=True, dummy_input=torch.rand(1, 3, 32, 32))
        for _, module in sim.model.named_modules():
            if isinstance(module, QcQuantizeWrapper):
                # Check configs for starts of supergroups
                if module == model.relu3:
                    # If add were not part of the supergroup, relu's input quantizer would be enabled
                    assert not module.input_quantizer.enabled

        if os.path.exists('./data/quantsim_config.json'):
            os.remove('./data/quantsim_config.json')
def export_and_generate_encodings(model, params):
    os.makedirs(params.log_path)

    enc_ds = create_encoder_dataset(params, return_type='dataset')

    def evaluator_enc(model, iterations):
        for query_id in tqdm(range(enc_ds.get_item_count())):
            query_ids = [query_id]
            enc_ds.load_query_samples(query_ids)
            img, label = enc_ds.get_samples(query_ids)
            with torch.no_grad():
                _ = model(img)
            enc_ds.unload_query_samples(query_ids)

    quantizer = QuantizationSimModel(model=model,
                                     input_shapes=params.input_shape_tuple,
                                     quant_scheme=params.quant_scheme,
                                     rounding_mode=params.rounding_mode,
                                     default_output_bw=params.default_bitwidth,
                                     default_param_bw=params.default_bitwidth,
                                     in_place=False,
                                     config_file=params.config_file)
    quantizer_modifications(quantizer)
    quantizer.compute_encodings(forward_pass_callback=evaluator_enc,
                                forward_pass_callback_args=1)

    quantizer.export(path=params.log_path,
                     filename_prefix=params.filename_prefix,
                     input_shape=params.input_shape_tuple)

    input_file = os.path.join(params.log_path,
                              '%s.encodings' % str(params.filename_prefix))

    remap_bitwidth_to_32(input_file)

    with open(os.path.join(params.log_path, params.my_filename), 'wb') as f:
        pickle.dump(params, f)

    return quantizer
Beispiel #17
0
def quantize_model(trainer_function):

    model = mnist_torch_model.Net().to(torch.device('cuda'))

    sim = QuantizationSimModel(
        model,
        default_output_bw=8,
        default_param_bw=8,
        dummy_input=torch.rand(1, 1, 28, 28),
        config_file=
        '../../../TrainingExtensions/common/src/python/aimet_common/quantsim_config/'
        'default_config.json')

    # Quantize the untrained MNIST model
    sim.compute_encodings(forward_pass_callback=evaluate_model,
                          forward_pass_callback_args=5)

    # Fine-tune the model's parameter using training
    trainer_function(model=sim.model, epochs=1, num_batches=100, use_cuda=True)

    # Export the model
    sim.export(path='./',
               filename_prefix='quantized_mnist',
               dummy_input=torch.rand(1, 1, 28, 28))
Beispiel #18
0
    def test_and_compare_quantizer_no_fine_tuning_CPU_and_GPU(self):

        torch.manual_seed(1)
        torch.backends.cudnn.deterministic = True
        dummy_input = torch.rand(1, 1, 28, 28)
        dummy_input_cuda = dummy_input.cuda()

        start_time = time.time()

        # create model on CPU
        model_cpu = mnist_model.Net().to('cpu')
        model_gpu = copy.deepcopy(model_cpu).to('cuda')
        cpu_sim_model = QuantizationSimModel(model_cpu,
                                             quant_scheme='tf',
                                             in_place=True,
                                             dummy_input=dummy_input)
        # Quantize
        cpu_sim_model.compute_encodings(forward_pass, None)

        print("Encodings for cpu model calculated")
        print("Took {} secs".format(time.time() - start_time))
        start_time = time.time()

        # create model on GPU
        gpu_sim_model = QuantizationSimModel(model_gpu,
                                             quant_scheme='tf',
                                             in_place=True,
                                             dummy_input=dummy_input_cuda)
        # Quantize
        gpu_sim_model.compute_encodings(forward_pass, None)

        print("Encodings for gpu model calculated")
        print("Took {} secs".format(time.time() - start_time))

        # check the encodings only min and max
        # Test that first and second are approximately (or not approximately)
        # equal by computing the difference, rounding to the given number of
        # decimal places (default 7), and comparing to zero. Note that these
        # methods round the values to the given number of decimal places
        # (i.e. like the round() function) and not significant digits
        # excluding fc1 since it is part of Matmul->Relu supergroup
        # can't use assertEqual for FC2, so using assertAlmostEquals for FC2
        self.assertAlmostEqual(
            model_gpu.conv1.output_quantizers[0].encoding.min,
            model_cpu.conv1.output_quantizers[0].encoding.min,
            delta=0.001)
        self.assertAlmostEqual(
            model_gpu.conv1.output_quantizers[0].encoding.max,
            model_cpu.conv1.output_quantizers[0].encoding.max,
            delta=0.001)

        self.assertAlmostEqual(
            model_gpu.conv2.output_quantizers[0].encoding.min,
            model_cpu.conv2.output_quantizers[0].encoding.min,
            delta=0.001)
        self.assertAlmostEqual(
            model_gpu.conv2.output_quantizers[0].encoding.max,
            model_cpu.conv2.output_quantizers[0].encoding.max,
            delta=0.001)

        self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.min,
                               model_cpu.fc2.output_quantizers[0].encoding.min,
                               delta=0.001)
        self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.max,
                               model_cpu.fc2.output_quantizers[0].encoding.max,
                               delta=0.001)

        gpu_sim_model.export("./data/", "quantizer_no_fine_tuning__GPU",
                             dummy_input)
        cpu_sim_model.export("./data/", "quantizer_no_fine_tuning__CPU",
                             dummy_input)

        self.assertEqual(torch.device('cuda:0'),
                         next(model_gpu.parameters()).device)
        self.assertEqual(torch.device('cpu'),
                         next(model_cpu.parameters()).device)
Beispiel #19
0
def quantize_model(model, bitwidth=8, layerwise_bitwidth=None, retrain=True, ref_model=None, flags=None, adaround=False, lr=0.00000001):
    res = check_metrics(dataloader, model, image_resolution)
    print(res)
    input_shape = coord_dataset.mgrid.shape
    dummy_in = ((torch.rand(input_shape).unsqueeze(0) * 2) - 1).cuda()
    aimet_dataloader = DataLoader(AimetDataset(coord_dataset), shuffle=True, batch_size=1, pin_memory=True,
                                  num_workers=0)
    # Create QuantSim using adarounded_model
    sim = QuantizationSimModel(model, default_param_bw=bitwidth,
                               default_output_bw=31, dummy_input=dummy_in)
    modules_to_exclude = (
    Sine, ImageDownsampling, PosEncodingNeRF, FourierFeatureEncodingPositional, FourierFeatureEncodingGaussian)
    excl_layers = []
    for mod in sim.model.modules():
        if isinstance(mod, QcPostTrainingWrapper) and isinstance(mod._module_to_wrap, modules_to_exclude):
            excl_layers.append(mod)

    sim.exclude_layers_from_quantization(excl_layers)
    i = 0
    for name, mod in sim.model.named_modules():
        if isinstance(mod, QcPostTrainingWrapper):
            mod.output_quantizer.enabled = False
            mod.input_quantizer.enabled = False
            weight_quantizer = mod.param_quantizers['weight']
            bias_quantizer = mod.param_quantizers['bias']

            weight_quantizer.use_symmetric_encodings = True
            bias_quantizer.use_symmetric_encodings = True
            if torch.count_nonzero(mod._module_to_wrap.bias.data):
                mod.param_quantizers['bias'].enabled = True
            if layerwise_bitwidth:
                mod.param_quantizers['bias'].bitwidth = layerwise_bitwidth[i]
                mod.param_quantizers['weight'].bitwidth = layerwise_bitwidth[i]
                i += 1
    res = check_metrics(dataloader, sim.model, image_resolution)
    print(res)
    if adaround:

        params = AdaroundParameters(data_loader=aimet_dataloader, num_batches=1, default_num_iterations=500,
                                    default_reg_param=0.001, default_beta_range=(20, 2))
        # adarounded_model_1 = Adaround.apply_adaround(model=model, dummy_input=dummy_in, params=params,path='', filename_prefix='adaround',
        #                               default_param_bw=bitwidth, ignore_quant_ops_list=excl_layers )
        # Compute only param encodings
        Adaround._compute_param_encodings(sim)

        # Get the module - activation function pair using ConnectedGraph
        module_act_func_pair = connectedgraph_utils.get_module_act_func_pair(model, dummy_in)

        Adaround._adaround_model(model, sim, module_act_func_pair, params, dummy_in)
        #res = check_metrics(dataloader, sim.model, image_resolution)
        #print('1st stage ada round ', res)
        # Update every module (AdaroundSupportedModules) weight with Adarounded weight (Soft rounding)
        Adaround._update_modules_with_adarounded_weights(sim)
        path=''


        # from aimet_torch.cross_layer_equalization import equalize_model
        # equalize_model(model, input_shape)

        # params = QuantParams(weight_bw=4, act_bw=4, round_mode="nearest", quant_scheme='tf_enhanced')
        #
        # # Perform Bias Correction
        # bias_correction.correct_bias(model.to(device="cuda"), params, num_quant_samples=1,
        #                              data_loader=aimet_dataloader, num_bias_correct_samples=1)

        # torch.save(sim.model,
        #            os.path.join(
        #                os.path.join(exp_folder,
        #                             image_name + '/checkpoints/model_aimet_quantized.pth')))

        quantized_model = sim.model
        #res = check_metrics(dataloader, sim.model, image_resolution)
        #print('After Adaround ', res)
    #
    # if retrain:
    #     loss_fn = partial(loss_functions.image_mse, None)
    #     #quantized_model = retrain_model(sim.model, dataloader, 200, loss_fn, 0.0000005, flags['l1_reg'] if flags is not None else 0)
    #     quantized_model = retrain_model(sim.model, dataloader, 300, loss_fn, lr,
    #                                     flags['l1_reg'] if flags is not None else 0)
    #     # Fine-tune the model's parameter using training
    #     # torch.save(quantized_model,
    #     #            os.path.join(
    #     #                os.path.join(exp_folder,
    #     #                             image_name + '/checkpoints/model_aimet_quantized_retrained.pth')))
    #     res = check_metrics(dataloader, quantized_model, image_resolution)
    #     print('After retraining ',res)
    #     state_dict ={}
    #     quantized_dict = {}
    #     for name, module in sim.model.named_modules():
    #         if isinstance(module, QcPostTrainingWrapper) and isinstance(module._module_to_wrap, torch.nn.Linear):
    #             weight_quantizer = module.param_quantizers['weight']
    #             bias_quantizer = module.param_quantizers['bias']
    #             weight_quantizer.enabled = True
    #             bias_quantizer.enabled = True
    #             weight_quantizer.use_soft_rounding = False
    #             bias_quantizer.use_soft_rounding = False
    #             wrapped_linear = module._module_to_wrap
    #             weight = wrapped_linear.weight
    #             bias = wrapped_linear.bias
    #             if not (torch.all(weight < weight_quantizer.encoding.max) and torch.all(
    #                     weight > weight_quantizer.encoding.min)):
    #                 print("not within bounds")
    #
    #             weight_dequant = weight_quantizer.quantize_dequantize(weight,
    #                                                                                 weight_quantizer.round_mode).cpu().detach()
    #             state_dict[name + '.weight'] = weight_dequant
    #             # assert(len(torch.unique(state_dict[name + '.weight'])) <= 2**bitwidth)
    #             bias_dequant = bias_quantizer.quantize_dequantize(bias,
    #                                                                             bias_quantizer.round_mode).cpu().detach()
    #             state_dict[name + '.bias'] = bias_dequant
    #             # assert (len(torch.unique(state_dict[name + '.bias'])) <= 2 ** bitwidth)
    #             quantized_weight = weight_dequant / weight_quantizer.encoding.delta
    #             quantized_bias = bias_dequant / bias_quantizer.encoding.delta
    #             weights_csc = scipy.sparse.csc_matrix(quantized_weight + weight_quantizer.encoding.offset)
    #             quantized_dict[name] = {'weight': {'data': quantized_weight, 'encoding': weight_quantizer.encoding},
    #                                     'bias': {'data': quantized_bias, 'encoding': bias_quantizer.encoding}}
    #     res = check_metrics(dataloader, quantized_model, image_resolution)
    #     print('After hard rounding ', res)

    if adaround:


        filename_prefix = 'adaround'
        # Export quantization encodings to JSON-formatted file
        Adaround._export_encodings_to_json(path, filename_prefix, sim)
        #res = check_metrics(dataloader, sim.model, image_resolution)
        SaveUtils.remove_quantization_wrappers(sim.model)
        adarounded_model = sim.model

        #print('After Adaround ', res)



        sim = QuantizationSimModel(adarounded_model, default_param_bw=bitwidth,
                                   default_output_bw=31, dummy_input=dummy_in)

        for mod in sim.model.modules():
            if isinstance(mod, QcPostTrainingWrapper) and isinstance(mod._module_to_wrap, modules_to_exclude):
                excl_layers.append(mod)

        sim.exclude_layers_from_quantization(excl_layers)
        i = 0
        for name, mod in sim.model.named_modules():
            if isinstance(mod, QcPostTrainingWrapper):
                mod.output_quantizer.enabled = False
                mod.input_quantizer.enabled = False
                weight_quantizer = mod.param_quantizers['weight']
                bias_quantizer = mod.param_quantizers['bias']

                weight_quantizer.use_symmetric_encodings = True
                bias_quantizer.use_symmetric_encodings = True
                if torch.count_nonzero(mod._module_to_wrap.bias.data):
                    mod.param_quantizers['bias'].enabled = True
                if layerwise_bitwidth:
                    mod.param_quantizers['bias'].bitwidth = layerwise_bitwidth[i]
                    mod.param_quantizers['weight'].bitwidth = layerwise_bitwidth[i]
                    i += 1

        sim.set_and_freeze_param_encodings(encoding_path='adaround.encodings')

        # Quantize the untrained MNIST model
        #sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5)
        res = check_metrics(dataloader, sim.model, image_resolution)
        print(res)
    if retrain:
        loss_fn = partial(loss_functions.image_mse, None)
        #quantized_model = retrain_model(sim.model, dataloader, 200, loss_fn, 0.0000005, flags['l1_reg'] if flags is not None else 0)
        quantized_model = retrain_model(sim.model, dataloader, 1000, loss_fn, lr,
                                        flags['l1_reg'] if flags is not None else 0)
        #sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5)
        # Fine-tune the model's parameter using training
        # torch.save(quantized_model,
        #            os.path.join(
        #                os.path.join(exp_folder,
        #                             image_name + '/checkpoints/model_aimet_quantized_retrained.pth')))
        res = check_metrics(dataloader, quantized_model, image_resolution)
        print('After retraining ',res)


   # # w = sim.model.net.net[0][0]._module_to_wrap.weight
   # q = sim.model.net.net[0][0].param_quantizers['weight']
   # wq = q.quantize(w, q.round_mode)

    #Compute the difference for each parameter
    if ref_model is not None:
        new_state_dict=sim.model.state_dict()
        lis = [[i, j,  a, b] for i, a in ref_model.named_parameters() for j, b in sim.model.named_parameters() if i == j.replace('._module_to_wrap','')]
        for module in lis:
            new_state_dict[module[1]] = module[3] - module[2]
        sim.model.load_state_dict(new_state_dict)
        #sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=1)

    quantized_dict = {}
    state_dict = {}
    for name, module in sim.model.named_modules():
        if isinstance(module, QcPostTrainingWrapper) and isinstance(module._module_to_wrap, torch.nn.Linear):
            weight_quantizer = module.param_quantizers['weight']
            bias_quantizer = module.param_quantizers['bias']
            weight_quantizer.enabled = True
            bias_quantizer.enabled = True
            wrapped_linear = module._module_to_wrap
            weight = wrapped_linear.weight
            bias = wrapped_linear.bias
            if not (torch.all(weight < weight_quantizer.encoding.max) and torch.all(weight > weight_quantizer.encoding.min)):
                print("not within bounds")

            state_dict[name + '.weight'] = weight_quantizer.quantize_dequantize(weight,weight_quantizer.round_mode).cpu().detach()
            #assert(len(torch.unique(state_dict[name + '.weight'])) <= 2**bitwidth)
            state_dict[name + '.bias'] = bias_quantizer.quantize_dequantize(bias, bias_quantizer.round_mode).cpu().detach()
            #assert (len(torch.unique(state_dict[name + '.bias'])) <= 2 ** bitwidth)
            quantized_weight = weight_quantizer.quantize(weight, weight_quantizer.round_mode).cpu().detach().numpy() + weight_quantizer.encoding.offset
            quantized_bias = bias_quantizer.quantize(bias, bias_quantizer.round_mode).cpu().detach().numpy() + bias_quantizer.encoding.offset
            weights_csc = scipy.sparse.csc_matrix(quantized_weight + weight_quantizer.encoding.offset)
            quantized_dict[name] = {'weight': {'data': quantized_weight, 'encoding': weight_quantizer.encoding}, 'bias': {'data': quantized_bias, 'encoding': bias_quantizer.encoding}}

    weights_np = []
    for l in quantized_dict.values():
        w = l['weight']['data']
        b = l['bias']['data']
        Q = l['weight']['encoding'].bw
        if Q < 9:
            tpe = 'int8'
        elif Q < 17:
            tpe = 'int16'
        else:
            tpe = 'int32'
        w = w.astype(tpe).flatten()
        weights_np.append(w)

        if l['bias']['encoding']:
            Q = l['bias']['encoding'].bw
            if Q < 9:
                tpe = 'int8'
            elif Q < 17:
                tpe = 'int16'
            else:
                tpe = 'int32'
            b = b.astype(tpe).flatten()
            weights_np.append(b)
    weights_np = np.concatenate(weights_np)
    comp = zlib.compress(weights_np, level=9)
    print(len(comp))
    # sim.export(path=os.path.join(
    #                os.path.join(exp_folder,
    #                             image_name, 'checkpoints')), filename_prefix='model_aimet_quantized_retrained', dummy_input=dummy_in, set_onnx_layer_names=False)

    print(res)
    return quantized_model, res, len(comp), state_dict
Beispiel #20
0
def main():
    args = arguments()
    seed(args)
    if args.checkpoint:
        model = torch.load(args.checkpoint)
    else:
        model = load_model()
    model.eval()
    input_shape = (1, 3, 224, 224)
    args.input_shape = input_shape
    image_size = input_shape[-1]

    data_loader_kwargs = {
        'worker_init_fn': work_init,
        'num_workers': args.num_workers
    }
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    val_transforms = transforms.Compose([
        transforms.Resize(image_size + 24),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(), normalize
    ])
    val_data = datasets.ImageFolder(args.images_dir + '/val/', val_transforms)
    val_dataloader = DataLoader(val_data,
                                args.batch_size,
                                shuffle=False,
                                pin_memory=True,
                                **data_loader_kwargs)

    eval_func_quant = model_eval(val_dataloader,
                                 image_size,
                                 batch_size=args.batch_size,
                                 quant=True)
    eval_func = model_eval(val_dataloader,
                           image_size,
                           batch_size=args.batch_size)

    if 'BNfold' in args.quant_tricks:
        print("BN fold")
        model, conv_bn_pairs = run_pytorch_bn_fold(args, model)
    if 'CLE' in args.quant_tricks:
        print("CLE")
        model = run_pytorch_cross_layer_equalization(args, model)

    if hasattr(args, 'quant_scheme'):
        if args.quant_scheme == 'range_learning_tf':
            quant_scheme = QuantScheme.training_range_learning_with_tf_init
        elif args.quant_scheme == 'range_learning_tfe':
            quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init
        elif args.quant_scheme == 'tf':
            quant_scheme = QuantScheme.post_training_tf
        elif args.quant_scheme == 'tf_enhanced':
            quant_scheme = QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             args.quant_scheme)
        kwargs = {
            'quant_scheme': quant_scheme,
            'default_param_bw': args.default_param_bw,
            'default_output_bw': args.default_output_bw,
            'config_file': args.config_file
        }
    print(kwargs)
    sim = QuantizationSimModel(model.cpu(), input_shapes=input_shape, **kwargs)

    # Manually Config Super group, AIMET currently does not support [Conv-ReLU6] in a supergroup
    from aimet_torch.qc_quantize_op import QcPostTrainingWrapper
    for quant_wrapper in sim.model.modules():
        if isinstance(quant_wrapper, QcPostTrainingWrapper):
            if isinstance(quant_wrapper._module_to_wrap, torch.nn.Conv2d):
                quant_wrapper.output_quantizer.enabled = False

    sim.model.blocks[0][0].conv_pw.output_quantizer.enabled = True
    sim.model.blocks[1][0].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[1][1].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[2][0].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[2][1].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[3][0].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[3][1].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[3][2].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[4][0].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[4][1].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[4][2].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[5][0].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[5][1].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[5][2].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[5][3].conv_pwl.output_quantizer.enabled = True
    sim.model.blocks[6][0].conv_pwl.output_quantizer.enabled = True

    sim.compute_encodings(eval_func_quant, (32, True))
    print(sim)
    post_quant_top1 = eval_func(sim.model.cuda(), (0, True))
    print("Post Quant Top1 :", post_quant_top1)