Ejemplo n.º 1
0
def test_generic_quant_avgpool_export_quant_input():
    IN_SIZE = (2, OUT_CH, IN_CH, IN_CH)
    inp = torch.randn(IN_SIZE)
    inp_quant = QuantIdentity(return_quant_tensor=True)
    model = QuantAvgPool2d(kernel_size=2, return_quant_tensor=False)
    inp_quant(inp)  # collect scale factors
    inp_quant.eval()
    model.eval()
    BrevitasONNXManager.export(
        model, input_t=inp_quant(inp), export_path='generic_quant_avgpool_quant_input.onnx')
Ejemplo n.º 2
0
def test_brevitas_avg_pool_export(kernel_size, stride, signed, bit_width,
                                  input_bit_width, channels, idim):

    quant_avgpool = QuantAvgPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   bit_width=bit_width)
    quant_avgpool.eval()

    # determine input
    prefix = 'INT' if signed else 'UINT'
    dt_name = prefix + str(input_bit_width)
    dtype = DataType[dt_name]
    input_shape = (1, channels, idim, idim)
    input_array = gen_finn_dt_tensor(dtype, input_shape)
    scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1,
                                                         1)).astype(np.float32)
    input_tensor = torch.from_numpy(input_array * scale_array).float()
    scale_tensor = torch.from_numpy(scale_array).float()
    zp = torch.tensor(0.)
    input_quant_tensor = QuantTensor(input_tensor,
                                     scale_tensor,
                                     zp,
                                     input_bit_width,
                                     signed,
                                     training=False)

    # export
    FINNManager.export(quant_avgpool,
                       export_path=export_onnx_path,
                       input_t=input_quant_tensor)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    # reference brevitas output
    ref_output_array = quant_avgpool(
        input_quant_tensor).tensor.detach().numpy()
    # finn output
    idict = {model.graph.input[0].name: input_array}
    odict = oxe.execute_onnx(model, idict, True)
    finn_output = odict[model.graph.output[0].name]
    # compare outputs
    assert np.isclose(ref_output_array, finn_output).all()
    # cleanup
    os.remove(export_onnx_path)
Ejemplo n.º 3
0
    def __init__(self,
                 channels,
                 first_stage_stride,
                 bit_width,
                 in_channels=3,
                 num_classes=1000):
        super(MobileNet, self).__init__()
        init_block_channels = channels[0][0]

        self.features = Sequential()
        init_block = ConvBlock(in_channels=in_channels,
                               out_channels=init_block_channels,
                               kernel_size=3,
                               stride=2,
                               weight_bit_width=FIRST_LAYER_BIT_WIDTH,
                               activation_scaling_per_channel=True,
                               act_bit_width=bit_width)
        self.features.add_module('init_block', init_block)
        in_channels = init_block_channels
        for i, channels_per_stage in enumerate(channels[1:]):
            stage = Sequential()
            pw_activation_scaling_per_channel = i < len(channels[1:]) - 1
            for j, out_channels in enumerate(channels_per_stage):
                stride = 2 if (j == 0) and (
                    (i != 0) or first_stage_stride) else 1
                mod = DwsConvBlock(in_channels=in_channels,
                                   out_channels=out_channels,
                                   stride=stride,
                                   bit_width=bit_width,
                                   pw_activation_scaling_per_channel=
                                   pw_activation_scaling_per_channel)
                stage.add_module('unit{}'.format(j + 1), mod)
                in_channels = out_channels
            self.features.add_module('stage{}'.format(i + 1), stage)
        self.final_pool = QuantAvgPool2d(kernel_size=7,
                                         stride=1,
                                         bit_width=bit_width)
        self.output = QuantLinear(in_channels,
                                  num_classes,
                                  bias=True,
                                  bias_quant=IntBias,
                                  weight_quant=CommonIntWeightPerTensorQuant,
                                  weight_bit_width=bit_width)
Ejemplo n.º 4
0
 def __init__(self, cfg, batch_norm, bit_width=8, num_classes=1000):
     super(QuantVGG, self).__init__()
     self.features = make_layers(cfg, batch_norm, bit_width)
     self.avgpool = QuantAvgPool2d(kernel_size=(7, 7), stride=1, bit_width=bit_width)
     self.classifier = nn.Sequential(
         QuantLinear(
             512 * 7 * 7, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, weight_bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, num_classes, bias=False,
             weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width),
     )
     self._initialize_weights()
Ejemplo n.º 5
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 residuals,
                 shortcuts,
                 kernel_sizes,
                 expansions,
                 quant_type,
                 bit_width,
                 depthwise_bit_width,
                 first_layer_bit_width,
                 hard_tanh_threshold,
                 dropout_rate,
                 dropout_steps,
                 weight_scaling_impl_type,
                 compute_micronet_cost,
                 input_bit_width=8,
                 bn_eps=1e-3,
                 in_channels=3,
                 num_classes=1000):
        super(ProxylessNAS, self).__init__()
        self.compute_micronet_cost = compute_micronet_cost
        self.input_bit_width = torch.tensor(input_bit_width).float().cuda()
        self.num_classes = num_classes
        self.dropout_rate = dropout_rate
        self.dropout_steps = dropout_steps

        self.features = nn.Sequential()
        self.features.add_module(
            "init_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=init_block_channels,
                      kernel_size=3,
                      stride=2,
                      padding=1,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=first_layer_bit_width,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = init_block_channels
        shared_act = None
        for i, channels_per_stage in enumerate(channels):
            stage = nn.Sequential()
            residuals_per_stage = residuals[i]
            shortcuts_per_stage = shortcuts[i]
            kernel_sizes_per_stage = kernel_sizes[i]
            expansions_per_stage = expansions[i]
            for j, out_channels in enumerate(channels_per_stage):
                residual = (residuals_per_stage[j] == 1)
                shortcut = (shortcuts_per_stage[j] == 1)
                kernel_size = kernel_sizes_per_stage[j]
                expansion = expansions_per_stage[j]
                stride = 2 if (j == 0) and (i != 0) else 1

                if not shortcut:
                    shared_act = QuantHardTanh(
                        bit_width=bit_width,
                        quant_type=quant_type,
                        scaling_per_channel=False,
                        scaling_impl_type=ScalingImplType.PARAMETER,
                        scaling_min_val=MIN_SCALING_VALUE,
                        max_val=hard_tanh_threshold,
                        min_val=-hard_tanh_threshold,
                        restrict_scaling_type=RestrictValueType.LOG_FP,
                        return_quant_tensor=True)

                stage.add_module(
                    "unit{}".format(j + 1),
                    ProxylessUnit(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=kernel_size,
                        stride=stride,
                        bn_eps=bn_eps,
                        expansion=expansion,
                        residual=residual,
                        shortcut=shortcut,
                        bit_width=bit_width,
                        depthwise_bit_width=depthwise_bit_width,
                        quant_type=quant_type,
                        weight_scaling_impl_type=weight_scaling_impl_type,
                        shared_act=shared_act,
                        compute_micronet_cost=compute_micronet_cost))
                in_channels = out_channels

            self.features.add_module("stage{}".format(i + 1), stage)
        self.features.add_module(
            "final_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=final_block_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=bit_width,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = final_block_channels
        self.final_pool = QuantAvgPool2d(kernel_size=7,
                                         stride=1,
                                         quant_type=quant_type,
                                         min_overall_bit_width=bit_width,
                                         max_overall_bit_width=bit_width)

        self.output = QuantLinear(
            in_features=in_channels,
            out_features=num_classes,
            bias=True,
            bias_quant_type=quant_type,
            compute_output_bit_width=quant_type == QuantType.INT,
            compute_output_scale=quant_type == QuantType.INT,
            weight_bit_width=bit_width,
            weight_quant_type=quant_type,
            weight_scaling_min_val=MIN_SCALING_VALUE,
            weight_scaling_per_output_channel=False,
            weight_scaling_stats_op=StatsOp.MAX,
            weight_narrow_range=True,
            weight_restrict_scaling_type=RestrictValueType.LOG_FP,
            weight_scaling_impl_type=weight_scaling_impl_type,
            return_quant_tensor=True)

        self._init_params()
Ejemplo n.º 6
0
def test_brevitas_avg_pool_export(kernel_size, stride, signed, bit_width,
                                  input_bit_width, channels, idim):
    ishape = (1, channels, idim, idim)
    ibw_tensor = torch.Tensor([input_bit_width])

    b_avgpool = QuantAvgPool2d(
        kernel_size=kernel_size,
        stride=stride,
        signed=signed,
        min_overall_bit_width=bit_width,
        max_overall_bit_width=bit_width,
        quant_type=QuantType.INT,
    )
    # call forward pass manually once to cache scale factor and bitwidth
    input_tensor = torch.from_numpy(np.zeros(ishape)).float()
    scale = np.ones((1, channels, 1, 1))
    output_scale = torch.from_numpy(scale).float()
    input_quant_tensor = QuantTensor(input_tensor, output_scale, ibw_tensor,
                                     signed)
    FINNManager.export_onnx(b_avgpool,
                            ishape,
                            export_onnx_path,
                            input_t=input_quant_tensor)
    model = ModelWrapper(export_onnx_path)

    # determine input FINN datatype
    if signed is True:
        prefix = "INT"
    else:
        prefix = "UINT"
    dt_name = prefix + str(input_bit_width // 2)
    dtype = DataType[dt_name]
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    # execution with input tensor using integers and scale = 1
    # calculate golden output
    inp = gen_finn_dt_tensor(dtype, ishape)
    input_tensor = torch.from_numpy(inp).float()
    input_quant_tensor = QuantTensor(input_tensor, output_scale, ibw_tensor,
                                     signed)
    b_avgpool.eval()
    expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()

    # finn execution
    idict = {model.graph.input[0].name: inp}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    assert (expected == produced).all()

    # execution with input tensor using float and scale != 1
    scale = np.random.uniform(low=0, high=1,
                              size=(1, channels, 1, 1)).astype(np.float32)
    inp_tensor = inp * scale
    input_tensor = torch.from_numpy(inp_tensor).float()
    input_scale = torch.from_numpy(scale).float()
    input_quant_tensor = QuantTensor(input_tensor, input_scale, ibw_tensor,
                                     signed)
    # export again to set the scale values correctly
    bo.export_finn_onnx(b_avgpool,
                        ishape,
                        export_onnx_path,
                        input_t=input_quant_tensor)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    b_avgpool.eval()
    expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()
    # finn execution
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]

    assert np.isclose(expected, produced).all()

    os.remove(export_onnx_path)
Ejemplo n.º 7
0
def test_brevitas_avg_pool_export(
    kernel_size,
    stride,
    signed,
    bit_width,
    input_bit_width,
    channels,
    idim,
    QONNX_export,
):
    export_onnx_path = base_export_onnx_path.replace(
        ".onnx", f"test_QONNX-{QONNX_export}.onnx"
    )
    quant_avgpool = QuantAvgPool2d(
        kernel_size=kernel_size,
        stride=stride,
        bit_width=bit_width,
        return_quant_tensor=False,
    )
    quant_avgpool.eval()

    # determine input
    prefix = "INT" if signed else "UINT"
    dt_name = prefix + str(input_bit_width)
    dtype = DataType[dt_name]
    input_shape = (1, channels, idim, idim)
    input_array = gen_finn_dt_tensor(dtype, input_shape)
    # Brevitas QuantAvgPool layers need QuantTensors to export correctly
    # which requires setting up a QuantTensor instance with the scale
    # factor, zero point, bitwidth and signedness
    scale_array = np.ones((1, channels, 1, 1)).astype(np.float32)
    scale_array *= 0.5
    input_tensor = torch.from_numpy(input_array * scale_array).float()
    scale_tensor = torch.from_numpy(scale_array).float()
    zp = torch.tensor(0.0)
    input_quant_tensor = QuantTensor(
        input_tensor, scale_tensor, zp, input_bit_width, signed, training=False
    )

    # export
    if QONNX_export:
        BrevitasONNXManager.export(
            quant_avgpool,
            export_path=export_onnx_path,
            input_t=input_quant_tensor,
        )
        model = ModelWrapper(export_onnx_path)

        # Statically set the additional inputs generated by the BrevitasONNXManager
        model.graph.input.remove(model.graph.input[3])
        model.graph.input.remove(model.graph.input[2])
        model.graph.input.remove(model.graph.input[1])
        model.set_initializer("1", scale_array)
        model.set_initializer("2", np.array(0.0).astype(np.float32))
        model.set_initializer("3", np.array(input_bit_width).astype(np.float32))
        model.save(export_onnx_path)

        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
        model = ModelWrapper(export_onnx_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(export_onnx_path)
    else:
        FINNManager.export(
            quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
        )
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    # reference brevitas output
    ref_output_array = quant_avgpool(input_quant_tensor).detach().numpy()
    # finn output
    if QONNX_export:
        # Manually apply the Quant tensor scaling for QONNX
        idict = {model.graph.input[0].name: input_array * scale_array}
    else:
        idict = {model.graph.input[0].name: input_array}
    odict = oxe.execute_onnx(model, idict, True)
    finn_output = odict[model.graph.output[0].name]
    # compare outputs
    assert np.isclose(ref_output_array, finn_output).all()
    # cleanup
    # assert False
    os.remove(export_onnx_path)
Ejemplo n.º 8
0
 def __init__(self):
     super().__init__()
     self.inp_quant = QuantIdentity(return_quant_tensor=True)
     self.pool = QuantAvgPool2d(kernel_size=2)
Ejemplo n.º 9
0
    def __init__(
            self,
            channels,
            init_block_channels,
            final_block_channels,
            residuals,
            shortcuts,
            kernel_sizes,
            expansions,
            bit_width,
            depthwise_bit_width,
            first_layer_weight_bit_width,
            hadamard_classifier,
            bn_eps=1e-3,
            in_channels=3,
            num_classes=1000):
        super(ProxylessNAS, self).__init__()
        self.features = nn.Sequential()

        init_block = ConvBlock(
            in_channels=in_channels,
            out_channels=init_block_channels,
            kernel_size=3,
            stride=2,
            padding=1,
            groups=1,
            bn_eps=bn_eps,
            act_scaling_per_channel=False,
            bias=False,
            act_bit_width=bit_width,
            weight_bit_width=first_layer_weight_bit_width)
        self.features.add_module("init_block", init_block)

        in_channels = init_block_channels
        shared_act = None

        for i, channels_per_stage in enumerate(channels):
            stage = nn.Sequential()
            residuals_per_stage = residuals[i]
            shortcuts_per_stage = shortcuts[i]
            kernel_sizes_per_stage = kernel_sizes[i]
            expansions_per_stage = expansions[i]

            for j, out_channels in enumerate(channels_per_stage):
                residual = (residuals_per_stage[j] == 1)
                shortcut = (shortcuts_per_stage[j] == 1)
                kernel_size = kernel_sizes_per_stage[j]
                expansion = expansions_per_stage[j]
                stride = 2 if (j == 0) and (i != 0) else 1

                if not shortcut:
                    shared_act = QuantIdentity(
                        bit_width=bit_width, act_quant=CommonIntActQuant, return_quant_tensor=True)

                unit = ProxylessUnit(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=stride,
                    bn_eps=bn_eps,
                    expansion=expansion,
                    residual=residual,
                    shortcut=shortcut,
                    bit_width=bit_width,
                    depthwise_bit_width=depthwise_bit_width,
                    shared_act=shared_act)
                stage.add_module("unit{}".format(j + 1), unit)
                in_channels = out_channels

            self.features.add_module("stage{}".format(i + 1), stage)

        final_block = ConvBlock(
            in_channels=in_channels,
            out_channels=final_block_channels,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
            bn_eps=bn_eps,
            act_scaling_per_channel=False,
            act_bit_width=bit_width,
            weight_bit_width=bit_width,
            bias=False,
            return_quant_tensor=True)
        self.features.add_module("final_block", final_block)
        in_channels = final_block_channels
        self.final_pool = QuantAvgPool2d(kernel_size=7, stride=1, bit_width=bit_width)
        if hadamard_classifier:
            self.output = HadamardClassifier(
                in_channels=in_channels,
                out_channels=num_classes,
                fixed_scale=False)
        else:
            self.output = QuantLinear(
                in_features=in_channels,
                out_features=num_classes,
                bias=True,
                bias_quant=IntBias,
                weight_bit_width=bit_width,
                weight_quant=CommonIntWeightPerTensorQuant)