def __init__(self, input_width, weight_width, act_width): super(QuantLeNet, self).__init__() self.quant_inp = QuantIdentity(bit_width=input_width, min_val=-1.0, max_val=1.0) self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width) self.fc1 = QuantLinear(16 * 4 * 4, 120, bias=True, weight_bit_width=weight_width) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_width) self.fc3 = QuantLinear(84, 10, bias=False, weight_bit_width=weight_width) self.relu1 = QuantReLU(bit_width=act_width, max_val=6) self.relu2 = QuantReLU(bit_width=act_width, max_val=6) self.relu3 = QuantReLU(bit_width=act_width, max_val=6) self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
def make_qconv2d(in_planes, out_planes, kernel_size=1, padding=0, stride=1, groups=1, bias=True, no_quant=True, **kwargs) -> QuantConv2d: if no_quant: return QuantConv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) else: return QuantConv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, weight_bit_width=kwargs['bit_width'])
def __init__(self): super().__init__() self.conv1 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act1 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=True) self.conv2 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act2 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=True) self.conv3 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act3 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=False) self.linear = nn.Linear(FC_IN_SIZE, CHANNELS)
def __init__(self,weight_bit_width=4,acti_bit_width=8): super(QuantLeNet, self).__init__() self.conv1 = QuantConv2d(1, 6, 5, padding=2,weight_bit_width=weight_bit_width) # self.relu1 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_bit_width) # self.relu2 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc1 = QuantLinear(16*5*5, 120, bias=True, weight_bit_width=weight_bit_width) # self.relu3 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_bit_width) # self.relu4 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc3 = QuantLinear(84, 10, bias=True, weight_bit_width=weight_bit_width)
def __init__(self, in_channels, out_channels, kernel_size, weight_bit_width, act_bit_width, stride=1, padding=0, groups=1, bn_eps=1e-5, activation_scaling_per_channel=False): super(ConvBlock, self).__init__() self.conv = QuantConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=weight_bit_width) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) self.activation = QuantReLU( act_quant=CommonUintActQuant, bit_width=act_bit_width, per_channel_broadcastable_shape=(1, out_channels, 1, 1), scaling_per_channel=activation_scaling_per_channel, return_quant_tensor=True)
def test_quant_conv2d(dw, bias, bias_quant, in_features, in_channels, out_channels, w_bits, channel_scaling, kernel_size, padding, stride, i_bits): # required to generated quantized inputs, not part of the exported model to test quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True) inp_tensor = quant_inp( torch.randn(1, in_channels, in_features, in_features)) conv = QuantConv2d(in_channels=in_channels, out_channels=in_channels if dw else out_channels, groups=in_channels if dw else 1, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant=bias_quant, weight_bit_width=w_bits, weight_scaling_per_output_channel=channel_scaling) conv.eval() model = bo.export_finn_onnx(conv, input_t=inp_tensor) model = ModelWrapper(model) model = model.transform(InferShapes()) # the quantized input tensor passed to FINN should be in integer form int_inp_array = inp_tensor.int(float_datatype=True).numpy() idict = {model.graph.input[0].name: int_inp_array} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] expected = conv(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def prepare_for_export(self, module: QuantConv2d): bias = module.quant_bias() if bias is not None: bias = bias.value.detach() weight = module.quant_weight().value.detach() if len(weight.shape) == 4: # move weights to NHWC already weight = weight.permute(0, 2, 3, 1) self.symbolic_kwargs = { 'weight': weight, 'bias': bias, 'input_quant': self.input_quant_symbolic_kwargs(module), 'weight_quant': self.weight_quant_symbolic_kwargs(module), 'bias_quant': self.bias_quant_symbolic_kwargs(module), 'output_quant': self.output_quant_symbolic_kwargs(module), 'op': self.op_symbolic_kwargs(module) }
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, quant_type, weight_bit_width, act_bit_width, act_scaling_per_channel, weight_scaling_impl_type, bias, compute_micronet_cost, dilation=1, groups=1, bn_eps=1e-5, shared_act=None): super(ConvBlock, self).__init__() self.compute_micronet_cost = compute_micronet_cost self.conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, weight_quant_type=quant_type, weight_bit_width=weight_bit_width, weight_scaling_impl_type=weight_scaling_impl_type, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_min_val=MIN_SCALING_VALUE, compute_output_bit_width= True, # Compute the number of bits in the output accumulator return_quant_tensor= True, # Return a quantized tensor that represents the quantized accumulator weight_scaling_per_output_channel=True) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) if shared_act is None and quant_type == QuantType.FP: self.activ = nn.ReLU6() elif shared_act is None and quant_type == QuantType.INT: self.activ = QuantReLU( quant_type=quant_type, bit_width=act_bit_width, max_val=RELU_MAX_VAL, scaling_per_channel=act_scaling_per_channel, scaling_impl_type=ScalingImplType.PARAMETER, scaling_min_val=MIN_SCALING_VALUE, restrict_scaling_type=RestrictValueType.LOG_FP, per_channel_broadcastable_shape=(1, out_channels, 1, 1), return_quant_tensor=True) elif shared_act is not None: self.activ = shared_act else: raise Exception("Activ non recognized.")
def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export): ishape = (1, 32, 111, 111) if dw is True: groups = in_channels out_channels = in_channels kernel_size = 3 padding = 1 stride = 1 w_shape = (32, 1, 3, 3) else: groups = 1 out_channels = 64 kernel_size = 1 padding = 0 stride = 1 w_shape = (64, 32, 1, 1) b_conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, groups=groups, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=4, weight_quant_type=QuantType.INT, weight_scaling_impl_type=ScalingImplType.STATS, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_per_output_channel=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_min_val=2e-16, ) weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape) b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float()) b_conv.eval() if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_conv, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_conv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_conv.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_float_bias_zero_point(): conv = QuantConv2d(IN_CH, OUTPUT_CH, KERNEL_SIZE, bias=True, input_quant=Int8ActPerTensorFloat, return_quant_tensor=True) out = conv(torch.randn(1, IN_CH, 10, 10)) assert (out.zero_point != 0.).all()
def test_internally_scaled_int_bias(self): mod = QuantConv2d(out_channels=OUTPUT_CHANNELS, in_channels=INPUT_CHANNELS, kernel_size=KERNEL_SIZE, weight_quant_delay_steps=1, bias=True, bias_quant=Int8BiasPerTensorFloatInternalScaling) inp = torch.randn(1, INPUT_CHANNELS, 20, 20) mod(inp)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_ch): super(CNV, self).__init__() self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( QuantIdentity( # for Q1.7 input format act_quant=CommonActQuant, bit_width=in_bit_width, min_val=-1.0, max_val=1.0 - 2.0**(-7), narrow_range=False, restrict_scaling_type=RestrictValueType.POWER_OF_TWO)) for out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=in_ch, out_channels=out_ch, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) in_ch = out_ch self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4)) self.conv_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( QuantLinear(in_features=in_features, out_features=out_features, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(BatchNorm1d(out_features, eps=1e-4)) self.linear_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.linear_features.append( QuantLinear(in_features=LAST_FC_IN_FEATURES, out_features=num_classes, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def test_internally_scaled_int_bias_after_bn_merge(self): mod = QuantConv2d(out_channels=OUTPUT_CHANNELS, in_channels=INPUT_CHANNELS, kernel_size=KERNEL_SIZE, weight_quant_delay_steps=1, bias=False, bias_quant=Int8BiasPerTensorFloatInternalScaling) bn = BatchNorm2d(OUTPUT_CHANNELS) merge_bn(mod, bn) inp = torch.randn(1, INPUT_CHANNELS, 20, 20) mod(inp)
def __init__(self): super().__init__() self.conv = QuantConv2d(out_channels=OUT_CH, in_channels=IN_CH, bias=True, kernel_size=3, input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat, bias_quant=Int16Bias, return_quant_tensor=False) self.conv.weight.data.uniform_(-0.01, 0.01)
def __init__(self): super().__init__() self.inp_quant = QuantIdentity(act_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv = QuantConv2d(5, 10, (3, 3), weight_quant=Int8WeightPerTensorFixedPoint, bias_quant=Int8Bias, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv2 = QuantConv2d(10, 10, (3, 3), weight_quant=Int8WeightPerTensorFixedPoint, bias_quant=Int8Bias, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv.cache_inference_quant_out = True self.conv.cache_inference_quant_bias = True self.conv2.cache_inference_quant_out = True self.conv2.cache_inference_quant_bias = True
def __init__(self): super().__init__() self.conv1 = QuantConv2d( out_channels=OUT_CH, in_channels=IN_CH, kernel_size=KERNEL_SIZE, bias=False, weight_quant=Int8WeightPerTensorFixedPoint, input_quant=Int8ActPerTensorFixedPoint, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv1.weight.data.uniform_(-0.01, 0.01)
def __init__(self): super().__init__() self.conv1 = QuantConv2d( out_channels=OUT_CH, in_channels=IN_CH, kernel_size=KERNEL_SIZE, bias=False, weight_quant=ShiftedUint8WeightPerTensorFloat, input_quant=ShiftedUint8ActPerTensorFloat, output_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=False) self.conv1.weight.data.uniform_(-1.0, 1.0)
def get_quant_conv2d(in_ch, out_ch, bit_width, quant_type): return QuantConv2d(in_channels=in_ch, kernel_size=KERNEL_SIZE, out_channels=out_ch, weight_quant_type=quant_type, weight_bit_width=bit_width, weight_narrow_range=NARROW_RANGE_ENABLED, weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE, weight_scaling_const=WEIGHT_SCALING_CONST, weight_scaling_per_output_channel=CONV_PER_OUT_CH_SCALING, weight_restrict_scaling_type=SCALING_VALUE_TYPE, weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE, bias=BIAS_ENABLED)
def __init__(self): super().__init__() self.conv = QuantConv2d( out_channels=OUT_CH, in_channels=IN_CH, kernel_size=KERNEL_SIZE, bias=False, input_bit_width=7, output_bit_width=7, weight_quant=Int8WeightPerTensorFloat, bias_quant=Int16Bias, input_quant=ShiftedUint8ActPerTensorFloat, output_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=False) self.conv.weight.data.uniform_(-0.01, 0.01)
def test_delayed_quant_module(self): float_mod = Conv2d(out_channels=OUTPUT_CHANNELS, in_channels=INPUT_CHANNELS, kernel_size=KERNEL_SIZE, bias=False) quant_mod = QuantConv2d(out_channels=OUTPUT_CHANNELS, in_channels=INPUT_CHANNELS, kernel_size=KERNEL_SIZE, weight_quant_delay_steps=1, bias=False) quant_mod.load_state_dict(float_mod.state_dict()) inp = torch.randn(1, INPUT_CHANNELS, 20, 20) out_float = float_mod(inp) out_quant = quant_mod(inp) assert out_float.isclose(out_quant).all().item()
def make_layers(cfg, batch_norm, bit_width): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = QuantConv2d( in_channels, v, kernel_size=3, stride=1, padding=1, groups=1, bias=not batch_norm, weight_bit_width=bit_width, weight_quant=CommonIntWeightPerChannelQuant) act = QuantReLU( act_quant=CommonUintActQuant, bit_width=bit_width, return_quant_tensor=True) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), act] else: layers += [conv2d, act] in_channels = v return nn.Sequential(*layers)
def PreQuantizedConv2d(in_channels, out_channels, kernel_size, config, stride=1, padding=0, dilation=1, groups=1, bias=True): return QuantConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, weight_quant_type=QuantType.INT, weight_narrow_range=True, weight_bit_width=config.weight_bit_width, weight_scaling_per_output_channel=True)
def __init__( self, in_channels, out_channels, kernel_size, stride, padding, weight_bit_width, act_bit_width, act_scaling_per_channel, bias, groups=1, bn_eps=1e-5, shared_act=None, return_quant_tensor=False): super(ConvBlock, self).__init__() self.conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, weight_bit_width=weight_bit_width, weight_quant=CommonIntWeightPerChannelQuant) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) if shared_act is None: self.activ = QuantReLU( act_quant=CommonUintActQuant, bit_width=act_bit_width, scaling_per_channel=act_scaling_per_channel, per_channel_broadcastable_shape=(1, out_channels, 1, 1), return_quant_tensor=return_quant_tensor) else: self.activ = shared_act
def quant_weight_scale(module: QuantConv2d): quant_weight_scale = module.quant_weight_scale() return DPUv1QuantConv2dHandler.neg_scalar_exponent_from_scale( quant_weight_scale)
def test_module_init(self): mod = QuantConv2d(out_channels=OUTPUT_CHANNELS, in_channels=INPUT_CHANNELS, kernel_size=KERNEL_SIZE, bias=False)
def __init__(self): super(QuantLeNet, self).__init__() self.conv1 = QuantConv2d(1, 6, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu1 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.conv2 = QuantConv2d(6, 16, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu2 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu3 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc2 = QuantLinear(120, 84, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu4 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc3 = QuantLinear(84, 10, bias=False, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None)
def get_8_bits_quantized_lenet(): model = QuantLeNet() model.conv1 = QuantConv2d(1, 6, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu1 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.conv2 = QuantConv2d(6, 16, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu2 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu3 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc2 = QuantLinear(120, 84, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu4 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc3 = QuantLinear(84, 10, bias=False, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) return model
def quant_weight_scale(module: QuantConv2d): quant_weight_scale = module.quant_weight_scale().type( torch.FloatTensor).detach() if len(quant_weight_scale.shape) == 4: quant_weight_scale = quant_weight_scale.view(1, -1, 1, 1) return quant_weight_scale
def int_weight(module: QuantConv2d): return module.int_weight(float_datatype=False).detach()
def quant_weight_bit_width(module: QuantConv2d): bit_width = module.quant_weight_bit_width() return DPUv1QuantLayerHandler.validate_8b_bit_width(bit_width)