Esempio n. 1
0
 def test_scaling_parameter_from_stats(self):
     shape = [8, 3, 64, 64]
     collect_stats_steps = 100
     stats_act = QuantReLU(
         bit_width=BIT_WIDTH,
         quant_type=QuantType.INT,
         scaling_impl_type=ScalingImplType.PARAMETER_FROM_STATS,
         scaling_stats_permute_dims=None,
         scaling_stats_op=StatsOp.PERCENTILE,
         collect_stats_steps=collect_stats_steps,
         scaling_min_val=None,
         percentile_q=99.0)
     stats_act.train()
     tensor_quant = stats_act.act_quant.fused_activation_quant_proxy.tensor_quant
     scaling_value = tensor_quant.scaling_impl.value
     for i in range(collect_stats_steps):
         inp = torch.randn(shape)
         out = stats_act(inp)
         out.requires_grad_(True)  # i need something to require a grad
         out.sum().backward()
         assert scaling_value.grad is None
     inp = torch.randn(shape)
     out = stats_act(inp)
     out.sum().backward()
     assert scaling_value.grad is not None
    def __init__(self, input_width, weight_width, act_width):

        super(QuantLeNet, self).__init__()
        self.quant_inp = QuantIdentity(bit_width=input_width,
                                       min_val=-1.0,
                                       max_val=1.0)

        self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width)
        self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width)
        self.fc1 = QuantLinear(16 * 4 * 4,
                               120,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc2 = QuantLinear(120,
                               84,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc3 = QuantLinear(84,
                               10,
                               bias=False,
                               weight_bit_width=weight_width)

        self.relu1 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu2 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu3 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
Esempio n. 3
0
 def __init__(self, model_config):
     super(JetSubstructureNeqModel, self).__init__()
     self.model_config = model_config
     self.num_neurons = [model_config["input_length"]] + model_config["hidden_layers"] + [model_config["output_length"]]
     layer_list = []
     for i in range(1, len(self.num_neurons)):
         in_features = self.num_neurons[i-1]
         out_features = self.num_neurons[i]
         bn = nn.BatchNorm1d(out_features)
         if i == 1:
             bn_in = nn.BatchNorm1d(in_features)
             input_bias = ScalarBiasScale(scale=False, bias_init=-0.25)
             input_quant = QuantBrevitasActivation(QuantHardTanh(model_config["input_bitwidth"], max_val=1., narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn_in, input_bias])
             output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["input_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=input_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask})
             layer_list.append(layer)
         elif i == len(self.num_neurons)-1:
             output_bias_scale = ScalarBiasScale(bias_init=0.33)
             output_quant = QuantBrevitasActivation(QuantHardTanh(bit_width=model_config["output_bitwidth"], max_val=1.33, narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn], post_transforms=[output_bias_scale])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["output_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False)
             layer_list.append(layer)
         else:
             output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["hidden_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False)
             layer_list.append(layer)
     self.module_list = nn.ModuleList(layer_list)
     self.is_verilog_inference = False
     self.latency = 1
     self.verilog_dir = None
     self.top_module_filename = None
     self.dut = None
     self.logfile = None
Esempio n. 4
0
 def __init__(self):
     super().__init__()
     self.conv1 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act1 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=True)
     self.conv2 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act2 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=True)
     self.conv3 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act3 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=False)
     self.linear = nn.Linear(FC_IN_SIZE, CHANNELS)
Esempio n. 5
0
def make_qrelu(no_quant=True, **kwargs) -> QuantReLU:
    if no_quant:
        return QuantReLU(input_quant=None,
                         act_quant=None,
                         output_quant=None,
                         update_iqi=None,
                         update_aqi=None)
    else:
        return QuantReLU(bit_width=kwargs['bit_width'])
Esempio n. 6
0
 def thresholds(module: QuantReLU):
     num_distinct_values = 2**int(module.quant_act_bit_width().item())
     num_thresholds = num_distinct_values - 1
     flat_scale = module.quant_act_scale().view(-1)
     num_scale_channels = flat_scale.shape[0]
     step = torch.abs(flat_scale)
     min_threshold = step / 2
     thresholds = torch.empty(num_scale_channels, num_thresholds)
     for c in range(num_scale_channels):
         for t in range(num_thresholds):
             thresholds[c][t] = min_threshold[c] + step[c] * t
     return thresholds
Esempio n. 7
0
 def test_scaling_parameter_grad(self):
     stats_act = QuantReLU(bit_width=BIT_WIDTH,
                           max_val=MAX_VAL,
                           quant_type=QuantType.INT,
                           scaling_impl_type=ScalingImplType.PARAMETER)
     stats_act.train()
     for i in range(RANDOM_ITERS):
         inp = torch.randn([8, 3, 64, 64])
         stats_act(inp)
         out = stats_act(inp)
         out.sum().backward()
         tensor_quant = stats_act.act_quant.fused_activation_quant_proxy.tensor_quant
         scaling_value = tensor_quant.scaling_impl.value
         assert scaling_value.grad is not None
Esempio n. 8
0
 def __init__(self):
     super().__init__()
     self.act1 = QuantIdentity(
         bit_width=7,
         act_quant=ShiftedUint8ActPerTensorFloat,
         return_quant_tensor=True)
     self.act2 = QuantReLU(act_quant=Uint8ActPerTensorFloat)
Esempio n. 9
0
 def __init__(self,
              in_channels,
              out_channels,
              kernel_size,
              weight_bit_width,
              act_bit_width,
              stride=1,
              padding=0,
              groups=1,
              bn_eps=1e-5,
              activation_scaling_per_channel=False):
     super(ConvBlock, self).__init__()
     self.conv = QuantConv2d(in_channels=in_channels,
                             out_channels=out_channels,
                             kernel_size=kernel_size,
                             stride=stride,
                             padding=padding,
                             groups=groups,
                             bias=False,
                             weight_quant=CommonIntWeightPerChannelQuant,
                             weight_bit_width=weight_bit_width)
     self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
     self.activation = QuantReLU(
         act_quant=CommonUintActQuant,
         bit_width=act_bit_width,
         per_channel_broadcastable_shape=(1, out_channels, 1, 1),
         scaling_per_channel=activation_scaling_per_channel,
         return_quant_tensor=True)
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 quant_type,
                 weight_bit_width,
                 act_bit_width,
                 act_scaling_per_channel,
                 weight_scaling_impl_type,
                 bias,
                 compute_micronet_cost,
                 dilation=1,
                 groups=1,
                 bn_eps=1e-5,
                 shared_act=None):
        super(ConvBlock, self).__init__()
        self.compute_micronet_cost = compute_micronet_cost

        self.conv = QuantConv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias,
            weight_quant_type=quant_type,
            weight_bit_width=weight_bit_width,
            weight_scaling_impl_type=weight_scaling_impl_type,
            weight_restrict_scaling_type=RestrictValueType.LOG_FP,
            weight_narrow_range=True,
            weight_scaling_stats_op=StatsOp.MAX,
            weight_scaling_min_val=MIN_SCALING_VALUE,
            compute_output_bit_width=
            True,  # Compute the number of bits in the output accumulator
            return_quant_tensor=
            True,  # Return a quantized tensor that represents the quantized accumulator
            weight_scaling_per_output_channel=True)
        self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
        if shared_act is None and quant_type == QuantType.FP:
            self.activ = nn.ReLU6()
        elif shared_act is None and quant_type == QuantType.INT:
            self.activ = QuantReLU(
                quant_type=quant_type,
                bit_width=act_bit_width,
                max_val=RELU_MAX_VAL,
                scaling_per_channel=act_scaling_per_channel,
                scaling_impl_type=ScalingImplType.PARAMETER,
                scaling_min_val=MIN_SCALING_VALUE,
                restrict_scaling_type=RestrictValueType.LOG_FP,
                per_channel_broadcastable_shape=(1, out_channels, 1, 1),
                return_quant_tensor=True)
        elif shared_act is not None:
            self.activ = shared_act
        else:
            raise Exception("Activ non recognized.")
Esempio n. 11
0
 def quant_type(
         module: QuantReLU,
         supported_bit_width: Tuple[int,...] = (2, 4, 8, 16, 32)):
     bit_width = int(module.quant_act_bit_width().item())
     if bit_width in list(supported_bit_width):
         return f"UINT{bit_width}"
     else:
         raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
Esempio n. 12
0
 def quant_type(
         module: QuantReLU,
         supported_int_bit_width_range: Tuple[int,...] = (2, 33)):
     bit_width = int(module.quant_act_bit_width().item())
     if bit_width in range(*supported_int_bit_width_range):
         return f"UINT{bit_width}"
     else:
         raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
Esempio n. 13
0
 def thresholds(module: QuantReLU, extend_tensor_to_channels=True):
     num_distinct_values = 2 ** int(module.quant_act_bit_width().item())
     num_thresholds = num_distinct_values - 1
     flat_scale = module.quant_act_scale().view(-1)
     num_scale_channels = flat_scale.shape[0]
     step = torch.abs(flat_scale)
     min_threshold = step / 2
     thresholds = torch.empty(num_scale_channels, num_thresholds)
     for c in range(num_scale_channels):
         for t in range(num_thresholds):
             thresholds[c][t] = min_threshold[c] + step[c] * t
     if extend_tensor_to_channels:
         output_channels = module._cached_inp.shape[1]
         final_shape = (output_channels, num_thresholds)
         if thresholds.shape != final_shape:
             thresholds = thresholds.expand(final_shape)
     return thresholds
Esempio n. 14
0
def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type,
                                  QONNX_export):
    min_val = -1.0
    ishape = (1, 15)

    b_act = QuantReLU(
        bit_width=abits,
        max_val=max_val,
        scaling_impl_type=scaling_impl_type,
        restrict_scaling_type=RestrictValueType.LOG_FP,
        quant_type=QuantType.INT,
    )
    if scaling_impl_type == ScalingImplType.PARAMETER:
        checkpoint = {
            "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\
scaling_impl.learned_value":
            torch.tensor(0.49).type(torch.FloatTensor)
        }
        b_act.load_state_dict(checkpoint)
    if QONNX_export:
        m_path = export_onnx_path
        BrevitasONNXManager.export(b_act, ishape, m_path)
        qonnx_cleanup(m_path, out_file=m_path)
        model = ModelWrapper(m_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(m_path)
    else:
        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = np.random.uniform(low=min_val, high=max_val,
                                   size=ishape).astype(np.float32)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    b_act.eval()
    expected = b_act.forward(inp_tensor).detach().numpy()
    if not np.isclose(produced, expected, atol=1e-3).all():
        print(abits, max_val, scaling_impl_type)
        print("scale: ",
              b_act.quant_act_scale().type(torch.FloatTensor).detach())
        if abits < 5:
            print(
                "thres:",
                ", ".join(["{:8.4f}".format(x)
                           for x in b_act.export_thres[0]]),
            )
        print("input:",
              ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]]))
        print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]]))
        print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]]))

    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
def test_brevitas_act_export_relu_imagenet(abits, max_val,
                                           scaling_per_channel):
    out_channels = 32
    ishape = (1, out_channels, 1, 1)
    min_val = -1.0
    b_act = QuantReLU(
        bit_width=abits,
        quant_type=QuantType.INT,
        scaling_impl_type=ScalingImplType.PARAMETER,
        scaling_per_channel=scaling_per_channel,
        restrict_scaling_type=RestrictValueType.LOG_FP,
        scaling_min_val=2e-16,
        max_val=6.0,
        return_quant_tensor=True,
        per_channel_broadcastable_shape=(1, out_channels, 1, 1),
    )
    if scaling_per_channel is True:
        rand_tensor = (2) * torch.rand((1, out_channels, 1, 1))
    else:
        rand_tensor = torch.tensor(1.2398)
    checkpoint = {
        "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\
scaling_impl.learned_value":
        rand_tensor.type(torch.FloatTensor)
    }
    b_act.load_state_dict(checkpoint)
    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = np.random.uniform(low=min_val, high=max_val,
                                   size=ishape).astype(np.float32)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    b_act.eval()
    expected = b_act.forward(inp_tensor).tensor.detach().numpy()
    if not np.isclose(produced, expected, atol=1e-3).all():
        print(abits, max_val)
        print("scale: ",
              b_act.quant_act_scale().type(torch.FloatTensor).detach())
        if abits < 5:
            print(
                "thres:",
                ", ".join(["{:8.4f}".format(x)
                           for x in b_act.export_thres[0]]),
            )
        print("input:",
              ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]]))
        print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]]))
        print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]]))

    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
Esempio n. 16
0
 def __init__(self, cfg, batch_norm, bit_width=8, num_classes=1000):
     super(QuantVGG, self).__init__()
     self.features = make_layers(cfg, batch_norm, bit_width)
     self.avgpool = QuantAvgPool2d(kernel_size=(7, 7), stride=1, bit_width=bit_width)
     self.classifier = nn.Sequential(
         QuantLinear(
             512 * 7 * 7, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, weight_bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, num_classes, bias=False,
             weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width),
     )
     self._initialize_weights()
Esempio n. 17
0
 def __init__(self):
     super().__init__()
     self.conv1 = QuantConv2d(
         out_channels=OUT_CH,
         in_channels=IN_CH,
         kernel_size=KERNEL_SIZE,
         bias=False,
         weight_quant=Int8WeightPerTensorFixedPoint,
         input_quant=Int8ActPerTensorFixedPoint,
         output_quant=Int8ActPerTensorFixedPoint,
         return_quant_tensor=True)
     self.relu = QuantReLU(act_quant=None, return_quant_tensor=False)
     self.conv1.weight.data.uniform_(-0.01, 0.01)
Esempio n. 18
0
def make_layers(cfg, batch_norm, bit_width):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = QuantConv2d(
                in_channels, v, kernel_size=3, stride=1, padding=1, groups=1, bias=not batch_norm,
                weight_bit_width=bit_width, weight_quant=CommonIntWeightPerChannelQuant)
            act = QuantReLU(
                act_quant=CommonUintActQuant, bit_width=bit_width, return_quant_tensor=True)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), act]
            else:
                layers += [conv2d, act]
            in_channels = v
    return nn.Sequential(*layers)
Esempio n. 19
0
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            weight_bit_width,
            act_bit_width,
            act_scaling_per_channel,
            bias,
            groups=1,
            bn_eps=1e-5,
            shared_act=None,
            return_quant_tensor=False):
        super(ConvBlock, self).__init__()

        self.conv = QuantConv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias=bias,
            weight_bit_width=weight_bit_width,
            weight_quant=CommonIntWeightPerChannelQuant)
        self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
        if shared_act is None:
            self.activ = QuantReLU(
                act_quant=CommonUintActQuant,
                bit_width=act_bit_width,
                scaling_per_channel=act_scaling_per_channel,
                per_channel_broadcastable_shape=(1, out_channels, 1, 1),
                return_quant_tensor=return_quant_tensor)
        else:
            self.activ = shared_act
Esempio n. 20
0
 def __init__(self):
     super(QuantLeNet, self).__init__()
     self.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu1 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu2 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu3 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu4 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
Esempio n. 21
0
 def quant_act_scale(module: QuantReLU):
     quant_act_scale = module.quant_act_scale().type(torch.FloatTensor).detach()
     return quant_act_scale
Esempio n. 22
0
def get_8_bits_quantized_lenet():
    model = QuantLeNet()
    model.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu1 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu2 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu3 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu4 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    return model
Esempio n. 23
0
 def test_module_init_const_scaling(self):
     mod = QuantReLU(max_val=6, scaling_impl_type='CONST')
Esempio n. 24
0
 def test_module_init_default(self):
     mod = QuantReLU(max_val=6)
Esempio n. 25
0
def test_end2end_cybsec_mlp_export(QONNX_export):
    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
    # load up trained net in Brevitas
    input_size = 593
    hidden1 = 64
    hidden2 = 64
    hidden3 = 64
    weight_bit_width = 2
    act_bit_width = 2
    num_classes = 1
    model = nn.Sequential(
        QuantLinear(input_size,
                    hidden1,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden1),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden1,
                    hidden2,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden2),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden2,
                    hidden3,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden3),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden3,
                    num_classes,
                    bias=True,
                    weight_bit_width=weight_bit_width),
    )
    trained_state_dict = torch.load(assets_dir +
                                    "/state_dict.pth")["models_state_dict"][0]
    model.load_state_dict(trained_state_dict, strict=False)
    W_orig = model[0].weight.data.detach().numpy()
    # pad the second (593-sized) dimensions with 7 zeroes at the end
    W_new = np.pad(W_orig, [(0, 0), (0, 7)])
    model[0].weight.data = torch.from_numpy(W_new)
    model_for_export = CybSecMLPForExport(model)
    export_onnx_path = get_checkpoint_name("export", QONNX_export)
    input_shape = (1, 600)
    # create a QuantTensor instance to mark the input as bipolar during export
    input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
    input_a = 2 * input_a - 1
    scale = 1.0
    input_t = torch.from_numpy(input_a * scale)
    input_qt = QuantTensor(input_t,
                           scale=torch.tensor(scale),
                           bit_width=torch.tensor(1.0),
                           signed=True)

    if QONNX_export:
        # With the BrevitasONNXManager we need to manually set
        # the FINN DataType at the input
        BrevitasONNXManager.export(model_for_export,
                                   input_shape,
                                   export_path=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model.set_tensor_datatype(model.graph.input[0].name,
                                  DataType["BIPOLAR"])
        model.save(export_onnx_path)
        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(export_onnx_path)
    else:
        bo.export_finn_onnx(model_for_export,
                            export_path=export_onnx_path,
                            input_t=input_qt)
    assert os.path.isfile(export_onnx_path)
    # fix input datatype
    finn_model = ModelWrapper(export_onnx_path)
    finnonnx_in_tensor_name = finn_model.graph.input[0].name
    assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1,
                                                                           600)
    # verify a few exported ops
    if QONNX_export:
        # The first "Mul" node doesn't exist in the QONNX export,
        # because the QuantTensor scale is not exported.
        # However, this node would have been unity scale anyways and
        # the models are still equivalent.
        assert finn_model.graph.node[0].op_type == "Add"
        assert finn_model.graph.node[1].op_type == "Div"
        assert finn_model.graph.node[2].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    else:
        assert finn_model.graph.node[0].op_type == "Mul"
        assert finn_model.get_initializer(
            finn_model.graph.node[0].input[1]) == 1.0
        assert finn_model.graph.node[1].op_type == "Add"
        assert finn_model.graph.node[2].op_type == "Div"
        assert finn_model.graph.node[3].op_type == "MatMul"
        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    # verify datatypes on some tensors
    assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) ==
            DataType["BIPOLAR"])
    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
    assert finn_model.get_tensor_datatype(
        first_matmul_w_name) == DataType["INT2"]
Esempio n. 26
0
def test_end2end_cybsec_mlp_export():
    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
    # load up trained net in Brevitas
    input_size = 593
    hidden1 = 64
    hidden2 = 64
    hidden3 = 64
    weight_bit_width = 2
    act_bit_width = 2
    num_classes = 1
    model = nn.Sequential(
        QuantLinear(input_size,
                    hidden1,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden1),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden1,
                    hidden2,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden2),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden2,
                    hidden3,
                    bias=True,
                    weight_bit_width=weight_bit_width),
        nn.BatchNorm1d(hidden3),
        nn.Dropout(0.5),
        QuantReLU(bit_width=act_bit_width),
        QuantLinear(hidden3,
                    num_classes,
                    bias=True,
                    weight_bit_width=weight_bit_width),
    )
    trained_state_dict = torch.load(assets_dir +
                                    "/state_dict.pth")["models_state_dict"][0]
    model.load_state_dict(trained_state_dict, strict=False)
    W_orig = model[0].weight.data.detach().numpy()
    # pad the second (593-sized) dimensions with 7 zeroes at the end
    W_new = np.pad(W_orig, [(0, 0), (0, 7)])
    model[0].weight.data = torch.from_numpy(W_new)
    model_for_export = CybSecMLPForExport(model)
    export_onnx_path = get_checkpoint_name("export")
    input_shape = (1, 600)
    # create a QuantTensor instance to mark the input as bipolar during export
    input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
    input_a = 2 * input_a - 1
    scale = 1.0
    input_t = torch.from_numpy(input_a * scale)
    input_qt = QuantTensor(input_t,
                           scale=torch.tensor(scale),
                           bit_width=torch.tensor(1.0),
                           signed=True)

    bo.export_finn_onnx(model_for_export,
                        export_path=export_onnx_path,
                        input_t=input_qt)
    assert os.path.isfile(export_onnx_path)
    # fix input datatype
    finn_model = ModelWrapper(export_onnx_path)
    finnonnx_in_tensor_name = finn_model.graph.input[0].name
    assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1,
                                                                           600)
    # verify a few exported ops
    assert finn_model.graph.node[1].op_type == "Add"
    assert finn_model.graph.node[2].op_type == "Div"
    assert finn_model.graph.node[3].op_type == "MatMul"
    assert finn_model.graph.node[-1].op_type == "MultiThreshold"
    # verify datatypes on some tensors
    assert finn_model.get_tensor_datatype(
        finnonnx_in_tensor_name) == DataType.BIPOLAR
    first_matmul_w_name = finn_model.graph.node[3].input[1]
    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType.INT2
Esempio n. 27
0
    def test_scaling_stats_to_parameter(self):

        stats_act = QuantReLU(bit_width=BIT_WIDTH,
                              max_val=MAX_VAL,
                              quant_type=QuantType.INT,
                              scaling_impl_type=ScalingImplType.STATS)
        stats_act.train()
        for i in range(RANDOM_ITERS):
            inp = torch.randn([8, 3, 64, 64])
            stats_act(inp)

        stats_state_dict = stats_act.state_dict()

        param_act = QuantReLU(bit_width=BIT_WIDTH,
                              max_val=MAX_VAL,
                              quant_type=QuantType.INT,
                              scaling_impl_type=ScalingImplType.PARAMETER)
        param_act.load_state_dict(stats_state_dict)

        stats_act.eval()
        param_act.eval()

        assert(torch.allclose(stats_act.quant_act_scale(), param_act.quant_act_scale()))
Esempio n. 28
0
 def quant_type(module: QuantReLU):
     bit_width = module.quant_act_bit_width()
     signed = module.is_quant_act_signed
     return finn_datatype(bit_width, signed)
def PreQuantizedReLU(config):
    return QuantReLU(bit_width=config.activation_bit_width,
                     max_val=6.0,
                     quant_type=QuantType.INT)