def test_scaling_parameter_from_stats(self): shape = [8, 3, 64, 64] collect_stats_steps = 100 stats_act = QuantReLU( bit_width=BIT_WIDTH, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER_FROM_STATS, scaling_stats_permute_dims=None, scaling_stats_op=StatsOp.PERCENTILE, collect_stats_steps=collect_stats_steps, scaling_min_val=None, percentile_q=99.0) stats_act.train() tensor_quant = stats_act.act_quant.fused_activation_quant_proxy.tensor_quant scaling_value = tensor_quant.scaling_impl.value for i in range(collect_stats_steps): inp = torch.randn(shape) out = stats_act(inp) out.requires_grad_(True) # i need something to require a grad out.sum().backward() assert scaling_value.grad is None inp = torch.randn(shape) out = stats_act(inp) out.sum().backward() assert scaling_value.grad is not None
def __init__(self, input_width, weight_width, act_width): super(QuantLeNet, self).__init__() self.quant_inp = QuantIdentity(bit_width=input_width, min_val=-1.0, max_val=1.0) self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width) self.fc1 = QuantLinear(16 * 4 * 4, 120, bias=True, weight_bit_width=weight_width) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_width) self.fc3 = QuantLinear(84, 10, bias=False, weight_bit_width=weight_width) self.relu1 = QuantReLU(bit_width=act_width, max_val=6) self.relu2 = QuantReLU(bit_width=act_width, max_val=6) self.relu3 = QuantReLU(bit_width=act_width, max_val=6) self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
def __init__(self, model_config): super(JetSubstructureNeqModel, self).__init__() self.model_config = model_config self.num_neurons = [model_config["input_length"]] + model_config["hidden_layers"] + [model_config["output_length"]] layer_list = [] for i in range(1, len(self.num_neurons)): in_features = self.num_neurons[i-1] out_features = self.num_neurons[i] bn = nn.BatchNorm1d(out_features) if i == 1: bn_in = nn.BatchNorm1d(in_features) input_bias = ScalarBiasScale(scale=False, bias_init=-0.25) input_quant = QuantBrevitasActivation(QuantHardTanh(model_config["input_bitwidth"], max_val=1., narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn_in, input_bias]) output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["input_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=input_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}) layer_list.append(layer) elif i == len(self.num_neurons)-1: output_bias_scale = ScalarBiasScale(bias_init=0.33) output_quant = QuantBrevitasActivation(QuantHardTanh(bit_width=model_config["output_bitwidth"], max_val=1.33, narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn], post_transforms=[output_bias_scale]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["output_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) layer_list.append(layer) else: output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["hidden_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) layer_list.append(layer) self.module_list = nn.ModuleList(layer_list) self.is_verilog_inference = False self.latency = 1 self.verilog_dir = None self.top_module_filename = None self.dut = None self.logfile = None
def __init__(self): super().__init__() self.conv1 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act1 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=True) self.conv2 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act2 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=True) self.conv3 = QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=CHANNELS, out_channels=CHANNELS, weight_quant=DPUv1WeightQuantInjector, bias_quant=None, output_quant=DPUv1OutputQuantInjector, bias=False, return_quant_tensor=True) self.act3 = QuantReLU(act_quant=DPUv1ActQuantInjector, return_quant_tensor=False) self.linear = nn.Linear(FC_IN_SIZE, CHANNELS)
def make_qrelu(no_quant=True, **kwargs) -> QuantReLU: if no_quant: return QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) else: return QuantReLU(bit_width=kwargs['bit_width'])
def thresholds(module: QuantReLU): num_distinct_values = 2**int(module.quant_act_bit_width().item()) num_thresholds = num_distinct_values - 1 flat_scale = module.quant_act_scale().view(-1) num_scale_channels = flat_scale.shape[0] step = torch.abs(flat_scale) min_threshold = step / 2 thresholds = torch.empty(num_scale_channels, num_thresholds) for c in range(num_scale_channels): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t return thresholds
def test_scaling_parameter_grad(self): stats_act = QuantReLU(bit_width=BIT_WIDTH, max_val=MAX_VAL, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER) stats_act.train() for i in range(RANDOM_ITERS): inp = torch.randn([8, 3, 64, 64]) stats_act(inp) out = stats_act(inp) out.sum().backward() tensor_quant = stats_act.act_quant.fused_activation_quant_proxy.tensor_quant scaling_value = tensor_quant.scaling_impl.value assert scaling_value.grad is not None
def __init__(self): super().__init__() self.act1 = QuantIdentity( bit_width=7, act_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=True) self.act2 = QuantReLU(act_quant=Uint8ActPerTensorFloat)
def __init__(self, in_channels, out_channels, kernel_size, weight_bit_width, act_bit_width, stride=1, padding=0, groups=1, bn_eps=1e-5, activation_scaling_per_channel=False): super(ConvBlock, self).__init__() self.conv = QuantConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=weight_bit_width) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) self.activation = QuantReLU( act_quant=CommonUintActQuant, bit_width=act_bit_width, per_channel_broadcastable_shape=(1, out_channels, 1, 1), scaling_per_channel=activation_scaling_per_channel, return_quant_tensor=True)
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, quant_type, weight_bit_width, act_bit_width, act_scaling_per_channel, weight_scaling_impl_type, bias, compute_micronet_cost, dilation=1, groups=1, bn_eps=1e-5, shared_act=None): super(ConvBlock, self).__init__() self.compute_micronet_cost = compute_micronet_cost self.conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, weight_quant_type=quant_type, weight_bit_width=weight_bit_width, weight_scaling_impl_type=weight_scaling_impl_type, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_min_val=MIN_SCALING_VALUE, compute_output_bit_width= True, # Compute the number of bits in the output accumulator return_quant_tensor= True, # Return a quantized tensor that represents the quantized accumulator weight_scaling_per_output_channel=True) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) if shared_act is None and quant_type == QuantType.FP: self.activ = nn.ReLU6() elif shared_act is None and quant_type == QuantType.INT: self.activ = QuantReLU( quant_type=quant_type, bit_width=act_bit_width, max_val=RELU_MAX_VAL, scaling_per_channel=act_scaling_per_channel, scaling_impl_type=ScalingImplType.PARAMETER, scaling_min_val=MIN_SCALING_VALUE, restrict_scaling_type=RestrictValueType.LOG_FP, per_channel_broadcastable_shape=(1, out_channels, 1, 1), return_quant_tensor=True) elif shared_act is not None: self.activ = shared_act else: raise Exception("Activ non recognized.")
def quant_type( module: QuantReLU, supported_bit_width: Tuple[int,...] = (2, 4, 8, 16, 32)): bit_width = int(module.quant_act_bit_width().item()) if bit_width in list(supported_bit_width): return f"UINT{bit_width}" else: raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
def quant_type( module: QuantReLU, supported_int_bit_width_range: Tuple[int,...] = (2, 33)): bit_width = int(module.quant_act_bit_width().item()) if bit_width in range(*supported_int_bit_width_range): return f"UINT{bit_width}" else: raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
def thresholds(module: QuantReLU, extend_tensor_to_channels=True): num_distinct_values = 2 ** int(module.quant_act_bit_width().item()) num_thresholds = num_distinct_values - 1 flat_scale = module.quant_act_scale().view(-1) num_scale_channels = flat_scale.shape[0] step = torch.abs(flat_scale) min_threshold = step / 2 thresholds = torch.empty(num_scale_channels, num_thresholds) for c in range(num_scale_channels): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t if extend_tensor_to_channels: output_channels = module._cached_inp.shape[1] final_shape = (output_channels, num_thresholds) if thresholds.shape != final_shape: thresholds = thresholds.expand(final_shape) return thresholds
def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type, QONNX_export): min_val = -1.0 ishape = (1, 15) b_act = QuantReLU( bit_width=abits, max_val=max_val, scaling_impl_type=scaling_impl_type, restrict_scaling_type=RestrictValueType.LOG_FP, quant_type=QuantType.INT, ) if scaling_impl_type == ScalingImplType.PARAMETER: checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ scaling_impl.learned_value": torch.tensor(0.49).type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_act, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print(abits, max_val, scaling_impl_type) print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel): out_channels = 32 ishape = (1, out_channels, 1, 1) min_val = -1.0 b_act = QuantReLU( bit_width=abits, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER, scaling_per_channel=scaling_per_channel, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_min_val=2e-16, max_val=6.0, return_quant_tensor=True, per_channel_broadcastable_shape=(1, out_channels, 1, 1), ) if scaling_per_channel is True: rand_tensor = (2) * torch.rand((1, out_channels, 1, 1)) else: rand_tensor = torch.tensor(1.2398) checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ scaling_impl.learned_value": rand_tensor.type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).tensor.detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print(abits, max_val) print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def __init__(self, cfg, batch_norm, bit_width=8, num_classes=1000): super(QuantVGG, self).__init__() self.features = make_layers(cfg, batch_norm, bit_width) self.avgpool = QuantAvgPool2d(kernel_size=(7, 7), stride=1, bit_width=bit_width) self.classifier = nn.Sequential( QuantLinear( 512 * 7 * 7, 4096, bias=True, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width), QuantReLU(act_quant=CommonUintActQuant, bit_width=bit_width), nn.Dropout(), QuantLinear( 4096, 4096, bias=True, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width), QuantReLU(act_quant=CommonUintActQuant, weight_bit_width=bit_width), nn.Dropout(), QuantLinear( 4096, num_classes, bias=False, weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width), ) self._initialize_weights()
def __init__(self): super().__init__() self.conv1 = QuantConv2d( out_channels=OUT_CH, in_channels=IN_CH, kernel_size=KERNEL_SIZE, bias=False, weight_quant=Int8WeightPerTensorFixedPoint, input_quant=Int8ActPerTensorFixedPoint, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=True) self.relu = QuantReLU(act_quant=None, return_quant_tensor=False) self.conv1.weight.data.uniform_(-0.01, 0.01)
def make_layers(cfg, batch_norm, bit_width): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = QuantConv2d( in_channels, v, kernel_size=3, stride=1, padding=1, groups=1, bias=not batch_norm, weight_bit_width=bit_width, weight_quant=CommonIntWeightPerChannelQuant) act = QuantReLU( act_quant=CommonUintActQuant, bit_width=bit_width, return_quant_tensor=True) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), act] else: layers += [conv2d, act] in_channels = v return nn.Sequential(*layers)
def __init__( self, in_channels, out_channels, kernel_size, stride, padding, weight_bit_width, act_bit_width, act_scaling_per_channel, bias, groups=1, bn_eps=1e-5, shared_act=None, return_quant_tensor=False): super(ConvBlock, self).__init__() self.conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias, weight_bit_width=weight_bit_width, weight_quant=CommonIntWeightPerChannelQuant) self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps) if shared_act is None: self.activ = QuantReLU( act_quant=CommonUintActQuant, bit_width=act_bit_width, scaling_per_channel=act_scaling_per_channel, per_channel_broadcastable_shape=(1, out_channels, 1, 1), return_quant_tensor=return_quant_tensor) else: self.activ = shared_act
def __init__(self): super(QuantLeNet, self).__init__() self.conv1 = QuantConv2d(1, 6, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu1 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.conv2 = QuantConv2d(6, 16, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu2 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu3 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc2 = QuantLinear(120, 84, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu4 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc3 = QuantLinear(84, 10, bias=False, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None)
def quant_act_scale(module: QuantReLU): quant_act_scale = module.quant_act_scale().type(torch.FloatTensor).detach() return quant_act_scale
def get_8_bits_quantized_lenet(): model = QuantLeNet() model.conv1 = QuantConv2d(1, 6, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu1 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.conv2 = QuantConv2d(6, 16, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu2 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu3 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc2 = QuantLinear(120, 84, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu4 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc3 = QuantLinear(84, 10, bias=False, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) return model
def test_module_init_const_scaling(self): mod = QuantReLU(max_val=6, scaling_impl_type='CONST')
def test_module_init_default(self): mod = QuantReLU(max_val=6)
def test_end2end_cybsec_mlp_export(QONNX_export): assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/") # load up trained net in Brevitas input_size = 593 hidden1 = 64 hidden2 = 64 hidden3 = 64 weight_bit_width = 2 act_bit_width = 2 num_classes = 1 model = nn.Sequential( QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden1), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden2), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden3), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width), ) trained_state_dict = torch.load(assets_dir + "/state_dict.pth")["models_state_dict"][0] model.load_state_dict(trained_state_dict, strict=False) W_orig = model[0].weight.data.detach().numpy() # pad the second (593-sized) dimensions with 7 zeroes at the end W_new = np.pad(W_orig, [(0, 0), (0, 7)]) model[0].weight.data = torch.from_numpy(W_new) model_for_export = CybSecMLPForExport(model) export_onnx_path = get_checkpoint_name("export", QONNX_export) input_shape = (1, 600) # create a QuantTensor instance to mark the input as bipolar during export input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32) input_a = 2 * input_a - 1 scale = 1.0 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor(input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True) if QONNX_export: # With the BrevitasONNXManager we need to manually set # the FINN DataType at the input BrevitasONNXManager.export(model_for_export, input_shape, export_path=export_onnx_path) model = ModelWrapper(export_onnx_path) model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"]) model.save(export_onnx_path) qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(ConvertQONNXtoFINN()) model.save(export_onnx_path) else: bo.export_finn_onnx(model_for_export, export_path=export_onnx_path, input_t=input_qt) assert os.path.isfile(export_onnx_path) # fix input datatype finn_model = ModelWrapper(export_onnx_path) finnonnx_in_tensor_name = finn_model.graph.input[0].name assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600) # verify a few exported ops if QONNX_export: # The first "Mul" node doesn't exist in the QONNX export, # because the QuantTensor scale is not exported. # However, this node would have been unity scale anyways and # the models are still equivalent. assert finn_model.graph.node[0].op_type == "Add" assert finn_model.graph.node[1].op_type == "Div" assert finn_model.graph.node[2].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" else: assert finn_model.graph.node[0].op_type == "Mul" assert finn_model.get_initializer( finn_model.graph.node[0].input[1]) == 1.0 assert finn_model.graph.node[1].op_type == "Add" assert finn_model.graph.node[2].op_type == "Div" assert finn_model.graph.node[3].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" # verify datatypes on some tensors assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]) first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1] assert finn_model.get_tensor_datatype( first_matmul_w_name) == DataType["INT2"]
def test_end2end_cybsec_mlp_export(): assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/") # load up trained net in Brevitas input_size = 593 hidden1 = 64 hidden2 = 64 hidden3 = 64 weight_bit_width = 2 act_bit_width = 2 num_classes = 1 model = nn.Sequential( QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden1), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden2), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden3), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width), ) trained_state_dict = torch.load(assets_dir + "/state_dict.pth")["models_state_dict"][0] model.load_state_dict(trained_state_dict, strict=False) W_orig = model[0].weight.data.detach().numpy() # pad the second (593-sized) dimensions with 7 zeroes at the end W_new = np.pad(W_orig, [(0, 0), (0, 7)]) model[0].weight.data = torch.from_numpy(W_new) model_for_export = CybSecMLPForExport(model) export_onnx_path = get_checkpoint_name("export") input_shape = (1, 600) # create a QuantTensor instance to mark the input as bipolar during export input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32) input_a = 2 * input_a - 1 scale = 1.0 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor(input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True) bo.export_finn_onnx(model_for_export, export_path=export_onnx_path, input_t=input_qt) assert os.path.isfile(export_onnx_path) # fix input datatype finn_model = ModelWrapper(export_onnx_path) finnonnx_in_tensor_name = finn_model.graph.input[0].name assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600) # verify a few exported ops assert finn_model.graph.node[1].op_type == "Add" assert finn_model.graph.node[2].op_type == "Div" assert finn_model.graph.node[3].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" # verify datatypes on some tensors assert finn_model.get_tensor_datatype( finnonnx_in_tensor_name) == DataType.BIPOLAR first_matmul_w_name = finn_model.graph.node[3].input[1] assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType.INT2
def test_scaling_stats_to_parameter(self): stats_act = QuantReLU(bit_width=BIT_WIDTH, max_val=MAX_VAL, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.STATS) stats_act.train() for i in range(RANDOM_ITERS): inp = torch.randn([8, 3, 64, 64]) stats_act(inp) stats_state_dict = stats_act.state_dict() param_act = QuantReLU(bit_width=BIT_WIDTH, max_val=MAX_VAL, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER) param_act.load_state_dict(stats_state_dict) stats_act.eval() param_act.eval() assert(torch.allclose(stats_act.quant_act_scale(), param_act.quant_act_scale()))
def quant_type(module: QuantReLU): bit_width = module.quant_act_bit_width() signed = module.is_quant_act_signed return finn_datatype(bit_width, signed)
def PreQuantizedReLU(config): return QuantReLU(bit_width=config.activation_bit_width, max_val=6.0, quant_type=QuantType.INT)