コード例 #1
0
    def test_quantsim_export(self):
        torch.manual_seed(10)
        model = Model2(Add())
        dummy_input = torch.randn(5, 10, 10, 20)
        sim = QuantizationSimModel(model, dummy_input)
        encodings = libpymo.TfEncoding()
        encodings.bw = 8
        encodings.max = 5
        encodings.min = -5
        encodings.delta = 1
        encodings.offset = 0.2
        sim.model.op1.output_quantizer.encoding = encodings
        sim.model.conv1.output_quantizer.encoding = encodings
        sim.model.conv1.param_quantizers['weight'].encoding = encodings
        sim.export(path='./data', filename_prefix='quant_model', dummy_input=dummy_input)

        with open('./data/quant_model.encodings') as f:
            data = json.load(f)

        self.assertTrue(isinstance(data['activation_encodings']['3'], list))
        self.assertTrue(isinstance(data['activation_encodings']['4'], list))
コード例 #2
0
def export_and_generate_encodings(model, params):
    os.makedirs(params.log_path)

    enc_ds = create_encoder_dataset(params, return_type='dataset')

    def evaluator_enc(model, iterations):
        for query_id in tqdm(range(enc_ds.get_item_count())):
            query_ids = [query_id]
            enc_ds.load_query_samples(query_ids)
            img, label = enc_ds.get_samples(query_ids)
            with torch.no_grad():
                _ = model(img)
            enc_ds.unload_query_samples(query_ids)

    quantizer = QuantizationSimModel(model=model,
                                     input_shapes=params.input_shape_tuple,
                                     quant_scheme=params.quant_scheme,
                                     rounding_mode=params.rounding_mode,
                                     default_output_bw=params.default_bitwidth,
                                     default_param_bw=params.default_bitwidth,
                                     in_place=False,
                                     config_file=params.config_file)
    quantizer_modifications(quantizer)
    quantizer.compute_encodings(forward_pass_callback=evaluator_enc,
                                forward_pass_callback_args=1)

    quantizer.export(path=params.log_path,
                     filename_prefix=params.filename_prefix,
                     input_shape=params.input_shape_tuple)

    input_file = os.path.join(params.log_path,
                              '%s.encodings' % str(params.filename_prefix))

    remap_bitwidth_to_32(input_file)

    with open(os.path.join(params.log_path, params.my_filename), 'wb') as f:
        pickle.dump(params, f)

    return quantizer
コード例 #3
0
def quantize_model(trainer_function):

    model = mnist_torch_model.Net().to(torch.device('cuda'))

    sim = QuantizationSimModel(
        model,
        default_output_bw=8,
        default_param_bw=8,
        dummy_input=torch.rand(1, 1, 28, 28),
        config_file=
        '../../../TrainingExtensions/common/src/python/aimet_common/quantsim_config/'
        'default_config.json')

    # Quantize the untrained MNIST model
    sim.compute_encodings(forward_pass_callback=evaluate_model,
                          forward_pass_callback_args=5)

    # Fine-tune the model's parameter using training
    trainer_function(model=sim.model, epochs=1, num_batches=100, use_cuda=True)

    # Export the model
    sim.export(path='./',
               filename_prefix='quantized_mnist',
               dummy_input=torch.rand(1, 1, 28, 28))
コード例 #4
0
    def test_and_compare_quantizer_no_fine_tuning_CPU_and_GPU(self):

        torch.manual_seed(1)
        torch.backends.cudnn.deterministic = True
        dummy_input = torch.rand(1, 1, 28, 28)
        dummy_input_cuda = dummy_input.cuda()

        start_time = time.time()

        # create model on CPU
        model_cpu = mnist_model.Net().to('cpu')
        model_gpu = copy.deepcopy(model_cpu).to('cuda')
        cpu_sim_model = QuantizationSimModel(model_cpu,
                                             quant_scheme='tf',
                                             in_place=True,
                                             dummy_input=dummy_input)
        # Quantize
        cpu_sim_model.compute_encodings(forward_pass, None)

        print("Encodings for cpu model calculated")
        print("Took {} secs".format(time.time() - start_time))
        start_time = time.time()

        # create model on GPU
        gpu_sim_model = QuantizationSimModel(model_gpu,
                                             quant_scheme='tf',
                                             in_place=True,
                                             dummy_input=dummy_input_cuda)
        # Quantize
        gpu_sim_model.compute_encodings(forward_pass, None)

        print("Encodings for gpu model calculated")
        print("Took {} secs".format(time.time() - start_time))

        # check the encodings only min and max
        # Test that first and second are approximately (or not approximately)
        # equal by computing the difference, rounding to the given number of
        # decimal places (default 7), and comparing to zero. Note that these
        # methods round the values to the given number of decimal places
        # (i.e. like the round() function) and not significant digits
        # excluding fc1 since it is part of Matmul->Relu supergroup
        # can't use assertEqual for FC2, so using assertAlmostEquals for FC2
        self.assertAlmostEqual(
            model_gpu.conv1.output_quantizers[0].encoding.min,
            model_cpu.conv1.output_quantizers[0].encoding.min,
            delta=0.001)
        self.assertAlmostEqual(
            model_gpu.conv1.output_quantizers[0].encoding.max,
            model_cpu.conv1.output_quantizers[0].encoding.max,
            delta=0.001)

        self.assertAlmostEqual(
            model_gpu.conv2.output_quantizers[0].encoding.min,
            model_cpu.conv2.output_quantizers[0].encoding.min,
            delta=0.001)
        self.assertAlmostEqual(
            model_gpu.conv2.output_quantizers[0].encoding.max,
            model_cpu.conv2.output_quantizers[0].encoding.max,
            delta=0.001)

        self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.min,
                               model_cpu.fc2.output_quantizers[0].encoding.min,
                               delta=0.001)
        self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.max,
                               model_cpu.fc2.output_quantizers[0].encoding.max,
                               delta=0.001)

        gpu_sim_model.export("./data/", "quantizer_no_fine_tuning__GPU",
                             dummy_input)
        cpu_sim_model.export("./data/", "quantizer_no_fine_tuning__CPU",
                             dummy_input)

        self.assertEqual(torch.device('cuda:0'),
                         next(model_gpu.parameters()).device)
        self.assertEqual(torch.device('cpu'),
                         next(model_cpu.parameters()).device)