def __init__(self): super().__init__() self.act1 = QuantIdentity(bit_width=7, act_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=True) self.act2 = QuantIdentity(bit_width=7, act_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=False)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_features=(1, 28, 28)): super(FC, self).__init__() self.features = ModuleList() self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width)) self.features.append(Dropout(p=DROPOUT)) in_features = reduce(mul, in_features) self.features.append( QuantLinear(in_features=in_features, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=num_classes, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_ch): super(CNV, self).__init__() self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( QuantIdentity( # for Q1.7 input format act_quant=CommonActQuant, bit_width=in_bit_width, min_val=-1.0, max_val=1.0 - 2.0**(-7), narrow_range=False, restrict_scaling_type=RestrictValueType.POWER_OF_TWO)) for out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=in_ch, out_channels=out_ch, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) in_ch = out_ch self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4)) self.conv_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( QuantLinear(in_features=in_features, out_features=out_features, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(BatchNorm1d(out_features, eps=1e-4)) self.linear_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.linear_features.append( QuantLinear(in_features=LAST_FC_IN_FEATURES, out_features=num_classes, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def __init__(self, my_pretrained_model): super(CybSecMLPForExport, self).__init__() self.pretrained = my_pretrained_model self.qnt_output = QuantIdentity(quant_type=QuantType.BINARY, bit_width=1, min_val=-1.0, max_val=1.0)
def __init__(self, input_width, weight_width, act_width): super(QuantLeNet, self).__init__() self.quant_inp = QuantIdentity(bit_width=input_width, min_val=-1.0, max_val=1.0) self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width) self.fc1 = QuantLinear(16 * 4 * 4, 120, bias=True, weight_bit_width=weight_width) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_width) self.fc3 = QuantLinear(84, 10, bias=False, weight_bit_width=weight_width) self.relu1 = QuantReLU(bit_width=act_width, max_val=6) self.relu2 = QuantReLU(bit_width=act_width, max_val=6) self.relu3 = QuantReLU(bit_width=act_width, max_val=6) self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
def test_quant_conv2d(dw, bias, bias_quant, in_features, in_channels, out_channels, w_bits, channel_scaling, kernel_size, padding, stride, i_bits): # required to generated quantized inputs, not part of the exported model to test quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True) inp_tensor = quant_inp( torch.randn(1, in_channels, in_features, in_features)) conv = QuantConv2d(in_channels=in_channels, out_channels=in_channels if dw else out_channels, groups=in_channels if dw else 1, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant=bias_quant, weight_bit_width=w_bits, weight_scaling_per_output_channel=channel_scaling) conv.eval() model = bo.export_finn_onnx(conv, input_t=inp_tensor) model = ModelWrapper(model) model = model.transform(InferShapes()) # the quantized input tensor passed to FINN should be in integer form int_inp_array = inp_tensor.int(float_datatype=True).numpy() idict = {model.graph.input[0].name: int_inp_array} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] expected = conv(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def __init__(self): super().__init__() self.act = QuantIdentity(bit_width=7, act_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=True) self.pool = QuantMaxPool2d(kernel_size=KERNEL_SIZE, stride=KERNEL_SIZE, return_quant_tensor=False)
def __init__(self): super().__init__() self.quant_inp = QuantIdentity(return_quant_tensor=True) self.linear = QuantLinear(out_features=OUT_CH, in_features=IN_CH, bias=True, output_quant=Int8ActPerTensorFloat, bias_quant=Int16Bias, return_quant_tensor=False) self.linear.weight.data.uniform_(-0.01, 0.01)
def test_generic_quant_avgpool_export_quant_input(): IN_SIZE = (2, OUT_CH, IN_CH, IN_CH) inp = torch.randn(IN_SIZE) inp_quant = QuantIdentity(return_quant_tensor=True) model = QuantAvgPool2d(kernel_size=2, return_quant_tensor=False) inp_quant(inp) # collect scale factors inp_quant.eval() model.eval() BrevitasONNXManager.export( model, input_t=inp_quant(inp), export_path='generic_quant_avgpool_quant_input.onnx')
def test_quant_identity_delay(self, bw_quant_type): DELAY = 10 bit_width, quant_type = bw_quant_type mod = QuantIdentity( min_val=-6.0, max_val=6.0, threshold=0.5, # for ternary quant bit_width=bit_width, quant_type=quant_type, quant_delay_steps=DELAY) for i in range(DELAY): t = torch.randn(1, 10, 5, 5) out = mod(t) assert t.isclose(out).all().item() t = torch.randn(1, 10, 5, 5) out = mod(t) assert not t.isclose(out).all().item()
def __init__(self): super().__init__() self.inp_quant = QuantIdentity(act_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv = QuantConv2d(5, 10, (3, 3), weight_quant=Int8WeightPerTensorFixedPoint, bias_quant=Int8Bias, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv2 = QuantConv2d(10, 10, (3, 3), weight_quant=Int8WeightPerTensorFixedPoint, bias_quant=Int8Bias, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.conv.cache_inference_quant_out = True self.conv.cache_inference_quant_bias = True self.conv2.cache_inference_quant_out = True self.conv2.cache_inference_quant_bias = True
def test_quant_linear(bias, bias_quant, out_features, in_features, w_bits, channel_scaling, i_bits): # required to generated quantized inputs, not part of the exported model to test quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True) inp_tensor = quant_inp(torch.randn(1, in_features)) linear = QuantLinear(out_features=out_features, in_features=in_features, bias=bias, bias_quant=bias_quant, weight_bit_width=w_bits, weight_scaling_per_output_channel=channel_scaling) linear.eval() model = bo.export_finn_onnx(linear, input_t=inp_tensor, export_path='linear.onnx') model = ModelWrapper(model) model = model.transform(InferShapes()) # the quantized input tensor passed to FINN should be in integer form int_inp_array = inp_tensor.int(float_datatype=True).numpy() idict = {model.graph.input[0].name: int_inp_array} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] expected = linear(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def __init__(self): super().__init__() self.act = QuantIdentity(return_quant_tensor=True) self.pool = QuantMaxPool2d(kernel_size=2, return_quant_tensor=False)
def __init__(self): super().__init__() self.act = QuantIdentity(return_quant_tensor=False)
def test_act_bit_width_weighted_by_size(): model = QuantIdentity(bit_width_impl_type='parameter', bit_width=3) loss = ActivationBitWidthWeightedBySize(model) out = model(torch.randn(2, 5, 5)) assert loss.tot_num_elements == 25 assert loss.retrieve() == 3.0
def __init__( self, channels, init_block_channels, final_block_channels, residuals, shortcuts, kernel_sizes, expansions, bit_width, depthwise_bit_width, first_layer_weight_bit_width, hadamard_classifier, bn_eps=1e-3, in_channels=3, num_classes=1000): super(ProxylessNAS, self).__init__() self.features = nn.Sequential() init_block = ConvBlock( in_channels=in_channels, out_channels=init_block_channels, kernel_size=3, stride=2, padding=1, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, bias=False, act_bit_width=bit_width, weight_bit_width=first_layer_weight_bit_width) self.features.add_module("init_block", init_block) in_channels = init_block_channels shared_act = None for i, channels_per_stage in enumerate(channels): stage = nn.Sequential() residuals_per_stage = residuals[i] shortcuts_per_stage = shortcuts[i] kernel_sizes_per_stage = kernel_sizes[i] expansions_per_stage = expansions[i] for j, out_channels in enumerate(channels_per_stage): residual = (residuals_per_stage[j] == 1) shortcut = (shortcuts_per_stage[j] == 1) kernel_size = kernel_sizes_per_stage[j] expansion = expansions_per_stage[j] stride = 2 if (j == 0) and (i != 0) else 1 if not shortcut: shared_act = QuantIdentity( bit_width=bit_width, act_quant=CommonIntActQuant, return_quant_tensor=True) unit = ProxylessUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, bn_eps=bn_eps, expansion=expansion, residual=residual, shortcut=shortcut, bit_width=bit_width, depthwise_bit_width=depthwise_bit_width, shared_act=shared_act) stage.add_module("unit{}".format(j + 1), unit) in_channels = out_channels self.features.add_module("stage{}".format(i + 1), stage) final_block = ConvBlock( in_channels=in_channels, out_channels=final_block_channels, kernel_size=1, stride=1, padding=0, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, act_bit_width=bit_width, weight_bit_width=bit_width, bias=False, return_quant_tensor=True) self.features.add_module("final_block", final_block) in_channels = final_block_channels self.final_pool = QuantAvgPool2d(kernel_size=7, stride=1, bit_width=bit_width) if hadamard_classifier: self.output = HadamardClassifier( in_channels=in_channels, out_channels=num_classes, fixed_scale=False) else: self.output = QuantLinear( in_features=in_channels, out_features=num_classes, bias=True, bias_quant=IntBias, weight_bit_width=bit_width, weight_quant=CommonIntWeightPerTensorQuant)
def __init__(self): super().__init__() self.inp_quant = QuantIdentity(return_quant_tensor=True) self.pool = QuantAvgPool2d(kernel_size=2)