def test_quantsim_export(self): torch.manual_seed(10) model = Model2(Add()) dummy_input = torch.randn(5, 10, 10, 20) sim = QuantizationSimModel(model, dummy_input) encodings = libpymo.TfEncoding() encodings.bw = 8 encodings.max = 5 encodings.min = -5 encodings.delta = 1 encodings.offset = 0.2 sim.model.op1.output_quantizer.encoding = encodings sim.model.conv1.output_quantizer.encoding = encodings sim.model.conv1.param_quantizers['weight'].encoding = encodings sim.export(path='./data', filename_prefix='quant_model', dummy_input=dummy_input) with open('./data/quant_model.encodings') as f: data = json.load(f) self.assertTrue(isinstance(data['activation_encodings']['3'], list)) self.assertTrue(isinstance(data['activation_encodings']['4'], list))
def export_and_generate_encodings(model, params): os.makedirs(params.log_path) enc_ds = create_encoder_dataset(params, return_type='dataset') def evaluator_enc(model, iterations): for query_id in tqdm(range(enc_ds.get_item_count())): query_ids = [query_id] enc_ds.load_query_samples(query_ids) img, label = enc_ds.get_samples(query_ids) with torch.no_grad(): _ = model(img) enc_ds.unload_query_samples(query_ids) quantizer = QuantizationSimModel(model=model, input_shapes=params.input_shape_tuple, quant_scheme=params.quant_scheme, rounding_mode=params.rounding_mode, default_output_bw=params.default_bitwidth, default_param_bw=params.default_bitwidth, in_place=False, config_file=params.config_file) quantizer_modifications(quantizer) quantizer.compute_encodings(forward_pass_callback=evaluator_enc, forward_pass_callback_args=1) quantizer.export(path=params.log_path, filename_prefix=params.filename_prefix, input_shape=params.input_shape_tuple) input_file = os.path.join(params.log_path, '%s.encodings' % str(params.filename_prefix)) remap_bitwidth_to_32(input_file) with open(os.path.join(params.log_path, params.my_filename), 'wb') as f: pickle.dump(params, f) return quantizer
def quantize_model(trainer_function): model = mnist_torch_model.Net().to(torch.device('cuda')) sim = QuantizationSimModel( model, default_output_bw=8, default_param_bw=8, dummy_input=torch.rand(1, 1, 28, 28), config_file= '../../../TrainingExtensions/common/src/python/aimet_common/quantsim_config/' 'default_config.json') # Quantize the untrained MNIST model sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5) # Fine-tune the model's parameter using training trainer_function(model=sim.model, epochs=1, num_batches=100, use_cuda=True) # Export the model sim.export(path='./', filename_prefix='quantized_mnist', dummy_input=torch.rand(1, 1, 28, 28))
def test_and_compare_quantizer_no_fine_tuning_CPU_and_GPU(self): torch.manual_seed(1) torch.backends.cudnn.deterministic = True dummy_input = torch.rand(1, 1, 28, 28) dummy_input_cuda = dummy_input.cuda() start_time = time.time() # create model on CPU model_cpu = mnist_model.Net().to('cpu') model_gpu = copy.deepcopy(model_cpu).to('cuda') cpu_sim_model = QuantizationSimModel(model_cpu, quant_scheme='tf', in_place=True, dummy_input=dummy_input) # Quantize cpu_sim_model.compute_encodings(forward_pass, None) print("Encodings for cpu model calculated") print("Took {} secs".format(time.time() - start_time)) start_time = time.time() # create model on GPU gpu_sim_model = QuantizationSimModel(model_gpu, quant_scheme='tf', in_place=True, dummy_input=dummy_input_cuda) # Quantize gpu_sim_model.compute_encodings(forward_pass, None) print("Encodings for gpu model calculated") print("Took {} secs".format(time.time() - start_time)) # check the encodings only min and max # Test that first and second are approximately (or not approximately) # equal by computing the difference, rounding to the given number of # decimal places (default 7), and comparing to zero. Note that these # methods round the values to the given number of decimal places # (i.e. like the round() function) and not significant digits # excluding fc1 since it is part of Matmul->Relu supergroup # can't use assertEqual for FC2, so using assertAlmostEquals for FC2 self.assertAlmostEqual( model_gpu.conv1.output_quantizers[0].encoding.min, model_cpu.conv1.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual( model_gpu.conv1.output_quantizers[0].encoding.max, model_cpu.conv1.output_quantizers[0].encoding.max, delta=0.001) self.assertAlmostEqual( model_gpu.conv2.output_quantizers[0].encoding.min, model_cpu.conv2.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual( model_gpu.conv2.output_quantizers[0].encoding.max, model_cpu.conv2.output_quantizers[0].encoding.max, delta=0.001) self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.min, model_cpu.fc2.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.max, model_cpu.fc2.output_quantizers[0].encoding.max, delta=0.001) gpu_sim_model.export("./data/", "quantizer_no_fine_tuning__GPU", dummy_input) cpu_sim_model.export("./data/", "quantizer_no_fine_tuning__CPU", dummy_input) self.assertEqual(torch.device('cuda:0'), next(model_gpu.parameters()).device) self.assertEqual(torch.device('cpu'), next(model_cpu.parameters()).device)