def get_qconf_from_hw_config_subdict(quantization_subdict: Dict, for_weights=False): bits = quantization_subdict["bits"] mode = HWConfig.get_quantization_mode_from_config_value(quantization_subdict["mode"]) is_per_channel = HWConfig.get_is_per_channel_from_config_value(quantization_subdict["granularity"]) signedness_to_force = None if 'level_low' in quantization_subdict and 'level_high' in quantization_subdict: signedness_to_force = False if mode == QuantizationMode.SYMMETRIC: if quantization_subdict['level_low'] < 0 < quantization_subdict['level_high']: signedness_to_force = True true_level_low, true_level_high, _ = SymmetricQuantizer.calculate_level_ranges(bits, True) else: signedness_to_force = True true_level_low, true_level_high, _ = AsymmetricQuantizer.calculate_level_ranges(bits) assert quantization_subdict['level_low'] == true_level_low, \ "Invalid value of quantizer parameter `level_low`.\ The parameter must be consistent with other parameters!" assert quantization_subdict['level_high'] == true_level_high, \ "Invalid value of quantizer parameter `level_high`.\ The parameter must be consistent with other parameters!" return QuantizerConfig(bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force, is_weights=for_weights)
print() print("Custom Symmetric Per Activation Channel (cuda 0 ) impl") print("input size: {0}".format(input_size)) run_profile( SymmetricQuantizer( QuantizerConfig(QuantizationParams(bits=NBITS), input_shape=input_size, per_channel=True, is_weights=False)).cuda(), input_size, 'cuda', gpu_runs) print() print("Custom Asymmetric (cuda 0 ) impl:") print("input size: {0}".format(input_size)) run_profile( AsymmetricQuantizer(QuantizerConfig( QuantizationParams(bits=NBITS))).cuda(), input_size, 'cuda', gpu_runs) print() print("Custom Asymmetric Per Weight Channel (cuda 0 ) impl") print("input size: {0}".format(input_size)) run_profile( AsymmetricQuantizer( QuantizerConfig(QuantizationParams(bits=NBITS), input_shape=input_size, per_channel=True, is_weights=True)).cuda(), input_size, 'cuda', gpu_runs) print() print("Custom Asymmetric Per Activation Channel (cuda 0 ) impl")
def test_onnx_export_to_quantize_dequantize_per_channel(): # SYMMETRIC q_config = QuantizerConfig(input_shape=(2, 64, 15, 10), bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=True) sym_quantizer = SymmetricQuantizer(q_config) # pylint: disable=protected-access sym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS x = torch.rand((2, 64, 15, 10)) sym_quantizer.run_export_quantization(x) q_config = QuantizerConfig(bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) sym_quantizer = SymmetricQuantizer(q_config) # pylint: disable=protected-access sym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS x = torch.rand((2, 64, 15, 10)) sym_quantizer.run_export_quantization(x) q_config = QuantizerConfig(input_shape=(2, 64, 15, 10), bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=True) sym_quantizer = SymmetricQuantizer(q_config) # pylint: disable=protected-access sym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS sym_quantizer.scale = torch.nn.Parameter(torch.rand(1, 64, 1, 1)) x = torch.rand((2, 64, 15, 10)) try: sym_quantizer.run_export_quantization(x) except RuntimeError as e: assert str( e) == "PyTorch v1.5.0 export to ONNX using QuantizeLinear-DequantizeLinear " \ "doesn't support per channel quantization" # ASYMMETRIC q_config = QuantizerConfig(input_shape=(2, 64, 15, 10), bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=True) assym_quantizer = AsymmetricQuantizer(q_config) # pylint: disable=protected-access assym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS x = torch.rand((2, 64, 15, 10)) assym_quantizer.run_export_quantization(x) q_config = QuantizerConfig(bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=False) assym_quantizer = AsymmetricQuantizer(q_config) # pylint: disable=protected-access assym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS x = torch.rand((2, 64, 15, 10)) assym_quantizer.run_export_quantization(x) q_config = QuantizerConfig(input_shape=(2, 64, 15, 10), bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=True) assym_quantizer = AsymmetricQuantizer(q_config) # pylint: disable=protected-access assym_quantizer._export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS sym_quantizer.scale = torch.nn.Parameter(torch.rand(1, 64, 1, 1)) x = torch.rand((2, 64, 15, 10)) try: assym_quantizer.run_export_quantization(x) except RuntimeError as e: assert str( e) == "PyTorch v1.5.0 export to ONNX using QuantizeLinear-DequantizeLinear" \ " doesn't support per channel quantization"
for k, attrs in nx_graph.nodes.items(): attrs = {k: str(v) for k, v in attrs.items()} load_attrs = {k: str(v).strip('"') for k, v in load_graph.nodes[k].items()} assert attrs == load_attrs assert load_graph.nodes.keys() == nx_graph.nodes.keys() assert nx.DiGraph(load_graph).edges == nx_graph.edges QuantizeConfig = namedtuple('QuantizeConfig', ['quantizer', 'graph_dir']) QUANTIZERS = [ QuantizeConfig(lambda _, is_weights=False, input_shape=None: SymmetricQuantizer( QuantizerConfig(signedness_to_force=is_weights, is_weights=is_weights, input_shape=input_shape)), 'symmetric'), QuantizeConfig(lambda _, is_weights, input_shape=None: AsymmetricQuantizer(QuantizerConfig()), 'asymmetric') ] @pytest.fixture(scope='function', params=QUANTIZERS, ids=[pair.graph_dir for pair in QUANTIZERS]) def _quantize_config(request): config = request.param graph_dir = os.path.join('quantized', config.graph_dir) return QuantizeConfig(config.quantizer, graph_dir) def default_forward_fn(model, input_size_): device = next(model.parameters()).device return model(torch.zeros(input_size_).to(device))
for k, attrs in nx_graph.nodes.items(): attrs = {k: str(v) for k, v in attrs.items()} load_attrs = {k: str(v).strip('"') for k, v in load_graph.nodes[k].items()} assert attrs == load_attrs assert load_graph.nodes.keys() == nx_graph.nodes.keys() assert nx.DiGraph(load_graph).edges == nx_graph.edges QuantizeConfig = namedtuple('QuantizeConfig', ['quantizer', 'graph_dir']) QUANTIZERS = [ QuantizeConfig(lambda _, is_weights=False: SymmetricQuantizer( QuantizerConfig(QuantizationParams(signed=is_weights), is_weights=is_weights)), 'symmetric'), QuantizeConfig(lambda _, is_weights: AsymmetricQuantizer(QuantizerConfig(QuantizationParams())), 'asymmetric') ] @pytest.fixture(scope='function', params=QUANTIZERS, ids=[pair.graph_dir for pair in QUANTIZERS]) def _quantize_config(request): config = request.param graph_dir = os.path.join('quantized', config.graph_dir) return QuantizeConfig(config.quantizer, graph_dir) TEST_MODELS = [ ("alexnet.dot", test_models.AlexNet, (1, 3, 32, 32)), ("lenet.dot", test_models.LeNet, (1, 3, 32, 32)), ("resnet18.dot", test_models.ResNet18, (1, 3, 32, 32)), ("resnet50.dot", test_models.ResNet50, (1, 3, 32, 32)),