def quant(net_i, scheme, trainer, quant_params=None): """ Quantizes the network accoring to the different possibilities post, dynamic and both """ if scheme == "post": net_i.to("cpu") net_i.eval() net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) elif scheme == "dynamic": net_i.to("cpu") net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) elif scheme == "both": net_i.to("cpu") net_i.eval() net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) else: pass return net_i
def graph_mode_quantize( self, inputs, data_loader, calibration_num_batches=64, qconfig_dict=None, force_quantize=False, ): """Quantize the model during export with graph mode quantization.""" if force_quantize: trace = self.trace(inputs) if not qconfig_dict: qconfig_dict = {"": get_default_qconfig("fbgemm")} prepare_m = prepare_jit(trace, qconfig_dict, inplace=False) prepare_m.eval() with torch.no_grad(): for i, (_, batch) in enumerate(data_loader): print("Running calibration with batch {}".format(i)) input_data = self.onnx_trace_input(batch) prepare_m(*input_data) if i == calibration_num_batches - 1: break trace = convert_jit(prepare_m, inplace=True) else: super().quantize() trace = self.trace(inputs) return trace
def quantize(model, data_loader, config="fbgemm", name="lanes"): # Configuration prep_config_dict = {"non_traceable_module_name": ["base", "deconv"]} qconfig = get_default_qconfig(config) qconfig_dict = {"": qconfig} model.load() model.eval() # Prepare Model model_prepared = prepare_fx(model, qconfig_dict, prepare_custom_config_dict=prep_config_dict) calibrate(model_prepared, data_loader) model_int_8 = convert_fx(model_prepared) # Model Description params = sum([np.prod(p.size()) for p in model.parameters()]) print("ORIGINAL") print("Number of Parameters: {:.1f}M".format(params / 1e6)) print(f"Number of Parameters: {params}M") params = sum([np.prod(p.size()) for p in model_int_8.parameters()]) print("QUANTIZED") print("Number of Parameters: {:.6f}M".format(params / 1e6)) print(f"Number of Parameters: {params}M") print_size_of_model(model_int_8) mobile_model = torch.jit.script(model_int_8) torchscript_mobile = optimize_for_mobile(mobile_model) torch.jit.save(torchscript_mobile, MODEL_MAIN_DIR + name + "_mobile.pt") torch.jit.save(torch.jit.script(model_int_8), MODEL_MAIN_DIR + "quantized_" + name + "Net.pt") return model_int_8
def test_compare_model_stub_conv_static_fx(self): r"""Compare the output of static quantized conv layer and its float shadow module""" qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} model_list = [ConvModel(), ConvBnReLUModel()] for float_model in model_list: float_model.eval() prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) # Run calibration test_only_eval_fn(prepared_model, self.img_data_2d) q_model = convert_fx(prepared_model) module_swap_list = [nn.Conv2d, nni.modules.fused.ConvReLU2d] expected_ob_dict_keys = {"conv.stats"} self.compare_and_validate_model_stub_results_fx( prepared_float_model, q_model, module_swap_list, expected_ob_dict_keys, self.img_data_2d[0][0], )
def graph_mode_quantize(self, inputs, data_loader, calibration_num_batches=64): """Quantize the model during export with graph mode quantization for linformer encoder.""" if (isinstance(self.right_encoder, RoBERTaEncoder) and self.right_encoder.use_linformer_encoder and isinstance(self.left_encoder, RoBERTaEncoder) and self.left_encoder.use_linformer_encoder): trace = self.trace(inputs) qconfig = get_default_qconfig("fbgemm") qconfig_dict = {"": qconfig} prepare_m = prepare_jit(trace, qconfig_dict, inplace=False) prepare_m.eval() with torch.no_grad(): for i, (_, batch) in enumerate(data_loader): print("Running calibration with batch {}".format(i)) input_data = self.onnx_trace_input(batch) prepare_m(*input_data) if i == calibration_num_batches - 1: break trace = convert_jit(prepare_m, inplace=True) else: super().quantize() trace = self.trace(inputs) return trace
def test_compare_model_stub_linear_static_fx(self): r"""Compare the output of static quantized linear layer and its float shadow module""" qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} float_model = SingleLayerLinearModel() float_model.eval() prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) # Run calibration test_only_eval_fn(prepared_model, self.calib_data) q_model = convert_fx(prepared_model) linear_data = self.calib_data[0][0] module_swap_list = [nn.Linear] expected_ob_dict_keys = {"fc1.stats"} self.compare_and_validate_model_stub_results_fx( prepared_float_model, q_model, module_swap_list, expected_ob_dict_keys, linear_data, )
def test_compare_model_outputs_conv_static_fx(self): r"""Compare the output of conv layer in static quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} model_list = [ConvModel(), ConvBnReLUModel()] for float_model in model_list: float_model.eval() prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) # Run calibration test_only_eval_fn(prepared_model, self.img_data_2d) q_model = convert_fx(prepared_model) expected_act_compare_dict_keys = {"x.stats", "conv.stats"} self.compare_and_validate_model_outputs_results_fx( prepared_float_model, q_model, expected_act_compare_dict_keys, self.img_data_2d[0][0], )
def test_compare_weights_linear_static_fx(self): r"""Compare the weights of float and static quantized linear layer""" def calibrate(model, calib_data): model.eval() with torch.no_grad(): for inp in calib_data: model(*inp) def compare_and_validate_results(float_model, q_model): weight_dict = compare_weights_fx(float_model.state_dict(), q_model.state_dict()) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) float_model = SingleLayerLinearModel() float_model.eval() qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} prepared_model = prepare_fx(float_model, qconfig_dict) backup_prepared_model = copy.deepcopy(prepared_model) backup_prepared_model.eval() # Run calibration calibrate(prepared_model, self.calib_data) q_model = convert_fx(prepared_model) compare_and_validate_results(backup_prepared_model, q_model)
def test_compare_weights_conv_static_fx(self): r"""Compare the weights of float and static quantized conv layer""" def calibrate(model, calib_data): model.eval() with torch.no_grad(): for inp in calib_data: model(*inp) def compare_and_validate_results(float_model, q_model): weight_dict = compare_weights_fx(float_model.state_dict(), q_model.state_dict()) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} model_list = [ConvModel(), ConvBnModel(), ConvBNReLU()] for float_model in model_list: float_model.eval() fused = fuse_fx(float_model) prepared_model = prepare_fx(float_model, qconfig_dict) # Run calibration calibrate(prepared_model, self.img_data_2d) q_model = convert_fx(prepared_model) compare_and_validate_results(fused, q_model)
def test_compare_model_outputs_linear_static_fx(self): r"""Compare the output of linear layer in static quantized model and corresponding output of linear layer in float model """ qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} float_model = SingleLayerLinearModel() float_model.eval() prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) # Run calibration test_only_eval_fn(prepared_model, self.calib_data) q_model = convert_fx(prepared_model) linear_data = self.calib_data[0][0] expected_act_compare_dict_keys = {"x.stats", "fc1.stats"} self.compare_and_validate_model_outputs_results_fx( prepared_float_model, q_model, expected_act_compare_dict_keys, linear_data)
def test_post_training_static_quantization(self, root_dir): """ Validate post-training static quantization. """ seed_everything(100) model = TestModule() num_epochs = 4 static_quantization = PostTrainingQuantization( qconfig_dicts={"": {"": get_default_qconfig()}} ) trainer = Trainer( default_root_dir=os.path.join(root_dir, "quantized"), checkpoint_callback=False, callbacks=[static_quantization], max_epochs=num_epochs, logger=False, ) # This will both train the model + quantize it. trainer.fit(model) self.assertIsNotNone(static_quantization.quantized) # Default qconfig requires calibration. self.assertTrue(static_quantization.should_calibrate) test_in = torch.randn(12, 32) with mode(model, training=False) as m: base_out = m(test_in) with mode(static_quantization.quantized, training=False) as q: test_out = q(test_in) # While quantized/original won't be exact, they should be close. self.assertLess( ((((test_out - base_out) ** 2).sum(axis=1)) ** (1 / 2)).mean(), 0.015, "RMSE should be less than 0.015 between quantized and original.", )
def get_model(framework, model_variant): """ Load the desired EfficientPose model variant using the requested deep learning framework. Args: framework: string Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch) model_variant: string EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite) Returns: Initialized EfficientPose model and corresponding resolution. """ # Keras if framework in ['keras', 'k']: from tensorflow.keras.backend import set_learning_phase from tensorflow.keras.models import load_model set_learning_phase(0) model = load_model(join('models', 'keras', 'EfficientPose{0}.h5'.format(model_variant.upper())), custom_objects={'BilinearWeights': helpers.keras_BilinearWeights, 'Swish': helpers.Swish(helpers.eswish), 'eswish': helpers.eswish, 'swish1': helpers.swish1}) # TensorFlow elif framework in ['tensorflow', 'tf']: from tensorflow.python.platform.gfile import FastGFile from tensorflow.compat.v1 import GraphDef from tensorflow.compat.v1.keras.backend import get_session from tensorflow import import_graph_def f = FastGFile(join('models', 'tensorflow', 'EfficientPose{0}.pb'.format(model_variant.upper())), 'rb') graph_def = GraphDef() graph_def.ParseFromString(f.read()) f.close() model = get_session() model.graph.as_default() import_graph_def(graph_def) # TensorFlow Lite elif framework in ['tensorflowlite', 'tflite']: from tensorflow import lite model = lite.Interpreter(model_path=join('models', 'tflite', 'EfficientPose{0}.tflite'.format(model_variant.upper()))) model.allocate_tensors() # PyTorch elif framework in ['pytorch', 'torch']: from imp import load_source from torch import load, quantization, backends try: MainModel = load_source('MainModel', join('models', 'pytorch', 'EfficientPose{0}.py'.format(model_variant.upper()))) except: print('\n##########################################################################################################') print('Desired model "EfficientPose{0}Lite" not available in PyTorch. Please select among "RT", "I", "II", "III" or "IV".'.format(model_variant.split('lite')[0].upper())) print('##########################################################################################################\n') return False, False model = load(join('models', 'pytorch', 'EfficientPose{0}'.format(model_variant.upper()))) model.eval() qconfig = quantization.get_default_qconfig('qnnpack') backends.quantized.engine = 'qnnpack' return model, {'rt': 224, 'i': 256, 'ii': 368, 'iii': 480, 'iv': 600, 'rt_lite': 224, 'i_lite': 256, 'ii_lite': 368}[model_variant]
def checkGraphModeOp(self, module, data, quantized_op, tracing=False, debug=False, check=True, eval_mode=True, dynamic=False): if debug: print('Testing:', str(module)) qconfig_dict = { '': get_default_qconfig(torch.backends.quantized.engine) } if eval_mode: module = module.eval() if dynamic: qconfig_dict = {'': default_dynamic_qconfig} inputs = data else: *inputs, target = data[0] model = get_script_module(module, tracing, inputs).eval() if debug: print('input graph:', model.graph) models = {} outputs = {} for d in [True, False]: # TODO: _test_only_eval_fn --> default_eval_fn if dynamic: models[d] = quantize_dynamic_jit(model, qconfig_dict, debug=d) # make sure it runs outputs[d] = models[d](inputs) else: # module under test can contain in-place ops, and we depend on # input data staying constant for comparisons data_copy = copy.deepcopy(data) models[d] = quantize_jit(model, qconfig_dict, test_only_eval_fn, [data_copy], inplace=False, debug=d) # make sure it runs outputs[d] = models[d](*inputs) if debug: print('debug graph:', models[True].graph) print('non debug graph:', models[False].graph) if check: # debug and non-debug option should have the same numerics self.assertEqual(outputs[True], outputs[False]) # non debug graph should produce quantized op FileCheck().check(quantized_op) \ .run(models[False].graph) return models[False]
def __init__(self, input_net, output_file, backend='fbgemm'): self.input_net = copy.deepcopy(input_net) self.input_net.cpu().eval() self.output_file = output_file self.dq_output_file = '{}.dq'.format(output_file) self.sq_output_file = '{}.sq'.format(output_file) self.d_qconfig_dict = {'': per_channel_dynamic_qconfig} self.s_qconfig_dict = {'': get_default_qconfig(backend)} self.ts = None
def __init__(self, deepvac_core_config, output_file, backend = 'fbgemm'): self.deepvac_core_config = deepvac_core_config self.input_net = copy.deepcopy(self.deepvac_core_config.ema if self.deepvac_core_config.ema else self.deepvac_core_config.net) self.input_net.to(self.deepvac_core_config.sample.device) self.input_net.eval() self.output_file = output_file self.backend = backend self.dq_output_file = '{}.dq'.format(output_file) self.sq_output_file = '{}.sq'.format(output_file) self.d_qconfig_dict = {'': per_channel_dynamic_qconfig} self.s_qconfig_dict = {'': get_default_qconfig(self.backend) }
def __init__( self, qconfig_dicts: Optional[QConfigDicts] = None, preserved_attrs: Optional[List[str]] = None, ) -> None: """ Initialize the callback. """ self.qconfig_dicts = qconfig_dicts or {"": {"": get_default_qconfig()}} self.preserved_attrs = set([] if preserved_attrs is None else preserved_attrs) self.prepared: Optional[torch.nn.Module] = None self.quantized: Optional[torch.nn.Module] = None self.should_calibrate = _requires_calibration(self.qconfig_dicts)
def __init__(self, model, quant_method='dynamic', config='x86', calibration_loader=None): ''' :param config: platform switch :type config: x86, pi, jetson ''' self.model = model self.print_model_size(model, 'Original Model') self.quant_method = quant_method self.config = config self.qconfig = quant.get_default_qconfig( 'fbgemm') if config == 'x86' else quant.get_default_qconfig( 'qnnpack') # For post training static quantization calibration, typically the training data loader self.calibration_loader = calibration_loader assert self.quant_method == 'static' and self.calibration_loader is not None, \ 'Post training static quantization requires calibration loader (training loader)!'
def test_compare_weights_conv_static_fx(self): r"""Compare the weights of float and static quantized conv layer""" qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} model_list = [ConvModel(), ConvBnModel(), ConvBnReLUModel()] for float_model in model_list: float_model.eval() fused = fuse_fx(float_model) prepared_model = prepare_fx(float_model, qconfig_dict) # Run calibration test_only_eval_fn(prepared_model, self.img_data_2d) q_model = convert_fx(prepared_model) expected_weight_dict_keys = {"conv.weight"} self.compare_and_validate_model_weights_results_fx( fused, q_model, expected_weight_dict_keys)
def test_remove_qconfig_observer_fx(self): r"""Remove activation_post_process node from fx prepred model""" float_model = SingleLayerLinearModel() float_model.eval() qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) prepared_float_model.eval() model = remove_qconfig_observer_fx(prepared_float_model) modules = dict(model.named_modules()) for node in model.graph.nodes: if node.op == "call_module": self.assertFalse(is_activation_post_process(modules[node.target]))
def test_compare_weights_linear_static_fx(self): r"""Compare the weights of float and static quantized linear layer""" qengine = torch.backends.quantized.engine qconfig = get_default_qconfig(qengine) qconfig_dict = {"": qconfig} float_model = SingleLayerLinearModel() float_model.eval() prepared_model = prepare_fx(float_model, qconfig_dict) prepared_float_model = copy.deepcopy(prepared_model) prepared_float_model.eval() # Run calibration test_only_eval_fn(prepared_model, self.calib_data) q_model = convert_fx(prepared_model) expected_weight_dict_keys = {"fc1._packed_params._packed_params"} self.compare_and_validate_model_weights_results_fx( prepared_float_model, q_model, expected_weight_dict_keys)
# # **api subject to change** # # optional: specify the path for standalone modules # # These modules are symbolically traced and quantized as one unit # # so that the call to the submodule appears as one call_module # # node in the forward graph of the GraphModule # "standalone_module_name": [ # "submodule.standalone" # ], # "standalone_module_class": [ # StandaloneModuleClass # ] # } # # Utility functions related to ``qconfig`` can be found in the `qconfig <https://github.com/pytorch/pytorch/blob/master/torch/quantization/qconfig.py>`_ file. qconfig = get_default_qconfig("fbgemm") qconfig_dict = {"": qconfig} ###################################################################### # 5. Prepare the Model for Post Training Static Quantization # ---------------------------------------------------------- # # .. code:: python # # prepared_model = prepare_fx(model_to_quantize, qconfig_dict) # # prepare_fx folds BatchNorm modules into previous Conv2d modules, and insert observers # in appropriate places in the model. prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
def quantize(model): qconfig = get_default_qconfig("fbgemm") qconfig_dict = {"": qconfig} return convert_fx(prepare_fx(model, qconfig_dict))
def checkGraphModeFxOp(self, model, inputs, quant_type, expected_node=None, expected_node_occurrence=None, expected_node_list=None, debug=False, print_debug_info=False): """ Quantizes model with graph mode quantization on fx and check if the quantized model contains the quantized_node Args: model: floating point torch.nn.Module inputs: one positional sample input arguments for model expected_node: NodeSpec e.g. NodeSpec.call_function(torch.quantize_per_tensor) expected_node_occurrence: a dict from NodeSpec to expected number of occurences (int) e.g. {NodeSpec.call_function(torch.quantize_per_tensor) : 1, NodeSpec.call_method('dequantize'): 1} expected_node_list: a list of NodeSpec, used to check the order of the occurrence of Node e.g. [NodeSpec.call_function(torch.quantize_per_tensor), NodeSpec.call_module(nnq.Conv2d), NodeSpec.call_function(F.hardtanh_), NodeSpec.call_method('dequantize')] """ # TODO: make img_data a single example instead of a list if type(inputs) == list: inputs = inputs[0] if quant_type == QuantType.QAT: model.train() else: model.eval() original = symbolic_trace(model) fused = fuse_fx(original) qconfig_dict = { '': get_default_qconfig(torch.backends.quantized.engine) } if quant_type == QuantType.DYNAMIC: prepare = prepare_dynamic_fx convert = convert_dynamic_fx else: prepare = prepare_fx convert = convert_fx prepared = prepare(fused, qconfig_dict) prepared(*inputs) qgraph = convert(prepared) qgraph_debug = convert(prepared, debug=True) result = qgraph(*inputs) result_debug = qgraph_debug(*inputs) self.assertEqual((result - result_debug).abs().max(), 0), \ 'Expecting debug and non-debug option to produce identical result' if print_debug_info: print() print('quant type:', quant_type) print('origianl graph module:', type(model)) self.printGraphModule(original) print() print('quantized graph module:', type(qgraph)) self.printGraphModule(qgraph) print() qgraph_to_check = qgraph_debug if debug else qgraph self.checkGraphModuleNodes(qgraph_to_check, expected_node, expected_node_occurrence, expected_node_list)
def quantMain(): # Choose quantization engine if 'qnnpack' in backquant.supported_engines: # This Engine Works ONLY on Linux # We will use it print("Using qnnpack backend engine") BACKEND_ENGINE = 'qnnpack' elif 'fbgemm' in backquant.supported_engines: # This Engine works on Windows (and Linux?) # We won't be using it BACKEND_ENGINE = 'fbgemm' print( "FBGEMM Backend Engine is not supported - are you trying this on windows?" ) exit(-2) else: BACKEND_ENGINE = 'none' print("No Proper Backend Engine found") exit(-3) # Choose quantization device (cpu/gpu) # Static Quantisation works only on cpu quantDevice = par.QUANT_DEVICE # Load Data #TODO: transforms transform_for_quant = trans.TRANSFORM_QUANTIZE dataset_loader, valset_loader, _ = bank.loadData( arg_load_train=True, arg_load_val=True, arg_load_test=False, arg_trans_train=transform_for_quant, quantisation_mode=True) #Load Our Model quant_model = mod.UsedModel(par.MODEL_USED_MODEL_TYPE, arg_load=True, arg_load_path=par.QUANT_MODEL_PATH, arg_load_device=par.QUANT_DEVICE, arg_load_raw=par.DATA_LOAD_RAW_MODEL_ENABLE) quant_model.optimizer = torch.optim.Adam( quant_model.model.parameters(), lr=par.TRAIN_INITIAl_LEARNING_RATE) ##only if raw load quant_model.model.to(par.QUANT_DEVICE) print('Loaded trained model') quant_model.model.eval() quant_model.addQuantStubs() #needed???? for old 1.6 way quant_model.fuzeModel() # Evaluate Our Model if DO_EVALUATE: print("Started Evaluation") quant_model.model.eval() top1, _, _ = eva.evaluate(quant_model, valset_loader, par.QUANT_DEVICE) print('Evaluation accuracy on all val images, %2.2f' % (top1.avg)) propagation_list = quant.get_default_qconfig_propagation_list() propagation_list.remove(torch.nn.modules.linear.Linear) q_config_dict = dict() for e in propagation_list: q_config_dict[e] = quant.get_default_qconfig(BACKEND_ENGINE) quant.propagate_qconfig_(quant_model.model, q_config_dict) quant.prepare(quant_model.model, inplace=True) #Calibrate print("\nStarting Quantizising Imputs") quant_model.model.eval() with torch.no_grad(): for i, data in enumerate(dataset_loader, 0): #if (i+1) % 2 == 0: break if i % 1000 == 0: print("Progress = ", i) inputs, labels = data['image'], data['class'] quant_model.model(inputs) print("Imputs Quantized") #Convert to quantized model torch.quantization.convert(quant_model.model, inplace=True) print("Model Quantized") # Evaluate Our Model if DO_EVALUATE: print("Started Evaluation") quant_model.model.eval() top1, _, _ = eva.evaluate(quant_model, valset_loader, par.QUANT_DEVICE) print('Evaluation accuracy on all val images, %2.2f' % (top1.avg)) # save for mobile quant_model.saveQuantizedModel(par.QUANT_SAVE_MODEL_PATH, dataset_loader) print("Done")
def main(args): # data train_transform = tv.transforms.Compose([]) if args.data_augmentation: train_transform.transforms.append(tv.transforms.RandomCrop(32, padding=4)) train_transform.transforms.append(tv.transforms.RandomHorizontalFlip()) train_transform.transforms.append(tv.transforms.ToTensor()) normalize = tv.transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) train_transform.transforms.append(normalize) test_transform = tv.transforms.Compose([ tv.transforms.ToTensor(), normalize]) train_dataset = tv.datasets.CIFAR10(root='data/', train=True, transform=train_transform, download=True) test_dataset = tv.datasets.CIFAR10(root='data/', train=False, transform=test_transform, download=True) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.bs, shuffle=True, pin_memory=True, num_workers=4) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.bs, shuffle=False, pin_memory=True, num_workers=4) # net net = tv.models.mobilenet_v2(num_classes=10) net.load_state_dict(torch.load('mobilenet_v2.pth', map_location='cpu')) net.dropout = torch.nn.Sequential() # quantization model = copy.deepcopy(net).cuda() del net model.eval() graph_module = torch.fx.symbolic_trace(model) qconfig = get_default_qconfig("fbgemm") qconfig_dict = {"": qconfig} model_prepared = prepare_fx(graph_module, qconfig_dict) calibrate(model_prepared, test_loader) # 这一步是做后训练量化 model_int8 = convert_fx(model_prepared) torch.jit.save(torch.jit.script(model_int8), 'int8-ptq.pth') # valid loaded_quantized_model = torch.jit.load('int8-ptq.pth') correct = 0. total = 0. with torch.no_grad(): loaded_quantized_model.eval() for images, labels in tqdm(test_loader): images = images labels = labels pred = loaded_quantized_model(images) pred = torch.max(pred.data, 1)[1] total += labels.size(0) correct += (pred == labels).sum().item() val_acc = correct / total print(val_acc)
# # Right now ``qconfig_dict`` is the only way to configure how the model is quantized, and it is done in the granularity of module, that is, we only support one type of ``qconfig`` for each ``torch.nn.Module``, for example, if we have: # # .. code:: python # # qconfig = { # '' : qconfig_global, # 'sub' : qconfig_sub, # 'sub.fc' : qconfig_fc, # 'sub.conv': None # } # # Module ``sub.fc`` will be configured with ``qconfig_fc``, and all other child modules in ``sub`` will be configured with ``qconfig_sub`` and ``sub.conv`` will not be quantized. All other modules in the model will be quantized with ``qconfig_global`` # Utility functions related to ``qconfig`` can be found in https://github.com/pytorch/pytorch/blob/master/torch/quantization/qconfig.py. qconfig = get_default_qconfig('fbgemm') qconfig_dict = {'': qconfig} ###################################################################### # 5. Define Calibration Function # ------------------------- # # .. code:: python # # def calibrate(model, sample_data, ...): # model(sample_data, ...) # # # Calibration function is run after the observers are inserted in the model. # The purpose for calibration is to run through some sample examples that is representative of the workload # (for example a sample of the training data set) so that the observers in the model are able to observe
def __init__(self, qconfig_dicts: Optional[QConfigDicts] = None) -> None: """ Initialize the callback. """ self.qconfig_dicts = qconfig_dicts or {"": {"": get_default_qconfig()}} self.prepared: Optional[torch.nn.Module] = None self.quantized: Optional[torch.nn.Module] = None self.should_calibrate = _requires_calibration(self.qconfig_dicts)
def test_sparse_qlinear_serdes(self): batch_size = 12 input_channels = 4 output_channels = 7 model = self.SparseQuantizedModel(input_channels, output_channels) # For sparse kernels both the activation and weight ZP = 0 X_scale = 0.2 X_zp = 0 W_scale = 1e-2 W_zp = 0 with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) float_bias = torch.randn(output_channels, dtype=torch.float32) X_q = torch.quantize_per_tensor(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) X_fp32 = X_q.dequantize() W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) mask = torch.randint(0, 2, W_fp32.shape) W_fp32 *= mask W_q = torch.quantize_per_tensor(W_fp32, W_scale, W_zp, torch.qint8) model.weight = nn.Parameter(W_q.dequantize()) model.eval() # Note: At the moment, for sparse kernels # fbgemm supports only static quantized sparse linear # qnnpack supports only dynamically quantized sparse linear # Hence we have two different tests. # fbgemm tests static flow, qnnpack tests dynamic. # Should be unified later on and tests should be fixed # appropriately. if qengine_is_fbgemm(): model.qconfig = tq.get_default_qconfig('fbgemm') qmodel = copy.deepcopy(model) sqmodel = copy.deepcopy(model) tq.prepare(qmodel, inplace=True) tq.prepare(sqmodel, inplace=True) with torch.no_grad(): qmodel(X_fp32) sqmodel(X_fp32) # Make sure the quantization parameters are computed the same way qparams = qmodel.linear.qconfig.weight().calculate_qparams() sqparams = sqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = tq.get_default_static_quant_module_mappings() sparse_mapping[nn.Linear] = ao_nn_sq.Linear tq.convert(sqmodel, inplace=True, mapping=sparse_mapping) tq.convert(qmodel, inplace=True) assert isinstance(sqmodel.linear, ao_nn_sq.Linear), "Convert failed" assert isinstance(qmodel.linear, nn.quantized.Linear), "Mapping failed" scripted_sqmodel = torch.jit.script(sqmodel) scripted_sqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sqmodel, buffer) buffer.seek(0) sqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = qmodel(X_q) Y_hat = sqmodel(X_q) self.assertEqual(Y_ref.dequantize(), Y_hat.dequantize()) if qengine_is_qnnpack(): qconfig = {nn.Linear: tq.qconfig.default_dynamic_qconfig} dqmodel = copy.deepcopy(model) sdqmodel = copy.deepcopy(model) tq.propagate_qconfig_(dqmodel, qconfig) tq.propagate_qconfig_(sdqmodel, qconfig) # Make sure the quantization parameters are computed the same way qparams = dqmodel.linear.qconfig.weight().calculate_qparams() sqparams = sdqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = copy.deepcopy( tq.get_default_dynamic_quant_module_mappings()) sparse_mapping[nn.Linear] = ao_nn_sq.dynamic.Linear with LinearBlockSparsePattern(1, 4): tq.convert(sdqmodel, inplace=True, mapping=sparse_mapping) tq.convert( dqmodel, mapping=tq.get_default_dynamic_quant_module_mappings(), inplace=True) assert isinstance(sdqmodel.linear, ao_nn_sq.dynamic.Linear), "Convert failed" assert isinstance( dqmodel.linear, nn.quantized.dynamic.Linear), "Mapping failed" scripted_sdqmodel = torch.jit.script(sdqmodel) scripted_sdqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sdqmodel, buffer) buffer.seek(0) sdqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = dqmodel(X_fp32) Y_hat = sdqmodel(X_fp32) self.assertEqual(Y_ref, Y_hat)