예제 #1
0
    def __init__(self, config):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
                config, "embedding_size"):
            raise ValueError(
                "The hidden size (%d) is not a multiple of the number of attention "
                "heads (%d)" %
                (config.hidden_size, config.num_attention_heads))
        self.output_attentions = config.output_attentions

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size /
                                       config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        # Quantized implementations of torch.nn.Linear modules
        self.query = quant_nn.QuantLinear(config.hidden_size,
                                          self.all_head_size)
        self.key = quant_nn.QuantLinear(config.hidden_size, self.all_head_size)
        self.value = quant_nn.QuantLinear(config.hidden_size,
                                          self.all_head_size)

        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)

        # Additional quantizers that will be needed to quantize the inputs to the torch.matmul() operation in the
        # forward method. Since it's a simple operation and no quantized version of it exists, the inputs to this
        # operation could be manually quantized to realize a quantized mat-mul operation.
        self.matmul_q_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_k_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_v_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_a_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
예제 #2
0
    def __init__(
        self,
        channels: int,
        reduction_ratio: int,
        context_window: int = -1,
        interpolation_mode: str = 'nearest',
        activation: Optional[Callable] = None,
        quantize: bool = False,
    ):
        """
        Squeeze-and-Excitation sub-module.

        Args:
            channels: Input number of channels.
            reduction_ratio: Reduction ratio for "squeeze" layer.
            context_window: Integer number of timesteps that the context
                should be computed over, using stride 1 average pooling.
                If value < 1, then global context is computed.
            interpolation_mode: Interpolation mode of timestep dimension.
                Used only if context window is > 1.
                The modes available for resizing are: `nearest`, `linear` (3D-only),
                `bilinear`, `area`
            activation: Intermediate activation function used. Must be a
                callable activation function.
        """
        super(SqueezeExcite, self).__init__()
        self.interpolation_mode = interpolation_mode
        self._quantize = quantize

        self.pool = None  # prepare a placeholder which will be updated

        if activation is None:
            activation = nn.ReLU(inplace=True)

        if PYTORCH_QUANTIZATION_AVAILABLE and quantize:
            self.fc = nn.Sequential(
                quant_nn.QuantLinear(channels, channels // reduction_ratio, bias=False),
                activation,
                quant_nn.QuantLinear(channels // reduction_ratio, channels, bias=False),
            )
        elif not PYTORCH_QUANTIZATION_AVAILABLE and quantize:
            raise ImportError(
                "pytorch-quantization is not installed. Install from "
                "https://github.com/NVIDIA/TensorRT/tree/master/tools/pytorch-quantization."
            )
        else:
            self.fc = nn.Sequential(
                nn.Linear(channels, channels // reduction_ratio, bias=False),
                activation,
                nn.Linear(channels // reduction_ratio, channels, bias=False),
            )
        self.gap = nn.AdaptiveAvgPool1d(1)

        # Set default context window
        self.change_context_window(context_window=context_window)

        # Set default max sequence length
        self.set_max_len(16)
예제 #3
0
 def __init__(self):
     super(Net, self).__init__()
     #self.conv1 = torch.nn.Conv2d(1, 32, (5, 5), padding=(2, 2), bias=True) # 换成对应的 Quantize 系列的 API
     self.conv1 = qnn.QuantConv2d(1, 32, (5, 5), padding=(2, 2), bias=True)
     #self.conv2 = torch.nn.Conv2d(32, 64, (5, 5), padding=(2, 2), bias=True)
     self.conv2 = qnn.QuantConv2d(32, 64, (5, 5), padding=(2, 2), bias=True)
     #self.fc1 = torch.nn.Linear(64 * 7 * 7, 1024, bias=True)
     self.fc1 = qnn.QuantLinear(64 * 7 * 7, 1024, bias=True)
     #self.fc2 = torch.nn.Linear(1024, 10, bias=True)
     self.fc2 = qnn.QuantLinear(1024, 10, bias=True)
예제 #4
0
    def test_initialize_deactivate(self):
        no_replace_list = ["Linear"]
        custom_quant_modules = [(torch.nn, "Linear", quant_nn.QuantLinear)]

        quant_modules.initialize(no_replace_list, custom_quant_modules)

        assert (type(quant_nn.QuantLinear(16, 256, 3)) == type(
            torch.nn.Linear(16, 256, 3)))
        assert (type(quant_nn.QuantConv2d(16, 256, 3)) == type(
            torch.nn.Conv2d(16, 256, 3)))

        quant_modules.deactivate()
예제 #5
0
    def test_simple_default_args(self):
        replacement_helper = QuantModuleReplacementHelper()
        replacement_helper.prepare_state()
        replacement_helper.apply_quant_modules()

        # Linear module should not be replaced with its quantized version
        assert (type(quant_nn.QuantLinear(16, 256, 3)) == type(
            torch.nn.Linear(16, 256, 3)))
        assert (type(quant_nn.QuantConv2d(16, 256, 3)) == type(
            torch.nn.Conv2d(16, 256, 3)))

        replacement_helper.restore_float_modules()
예제 #6
0
    def test_with_no_replace_list(self):
        no_replace_list = ["Linear"]
        custom_quant_modules = None
        replacement_helper = QuantModuleReplacementHelper()
        replacement_helper.prepare_state(no_replace_list, custom_quant_modules)
        replacement_helper.apply_quant_modules()

        # Linear module should not be replaced with its quantized version
        assert (type(quant_nn.QuantLinear(16, 256, 3)) != type(
            torch.nn.Linear(16, 256, 3)))
        assert (type(quant_nn.QuantConv2d(16, 256, 3)) == type(
            torch.nn.Conv2d(16, 256, 3)))

        replacement_helper.restore_float_modules()
예제 #7
0
    def test_with_custom_quant_modules(self):
        no_replace_list = ["Linear"]
        custom_quant_modules = [(torch.nn, "Linear", quant_nn.QuantLinear)]
        replacement_helper = QuantModuleReplacementHelper()
        replacement_helper.prepare_state(no_replace_list, custom_quant_modules)
        replacement_helper.apply_quant_modules()

        # Although no replace list indicates Linear module should not be replaced with its
        # quantized version, since the custom_quant_modules still contains the Linear module's
        # mapping, it will replaced.
        assert (type(quant_nn.QuantLinear(16, 256, 3)) == type(
            torch.nn.Linear(16, 256, 3)))
        assert (type(quant_nn.QuantConv2d(16, 256, 3)) == type(
            torch.nn.Conv2d(16, 256, 3)))

        replacement_helper.restore_float_modules()
예제 #8
0
    def __init__(
            self,
            block: Type[Union[BasicBlock, Bottleneck]],
            layers: List[int],
            quantize: bool = False,
            num_classes: int = 1000,
            zero_init_residual: bool = False,
            groups: int = 1,
            width_per_group: int = 64,
            replace_stride_with_dilation: Optional[List[bool]] = None,
            norm_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        super(ResNet, self).__init__()
        self._quantize = quantize

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group

        if quantize:
            self.conv1 = quant_nn.QuantConv2d(3,
                                              self.inplanes,
                                              kernel_size=7,
                                              stride=2,
                                              padding=3,
                                              bias=False)
        else:
            self.conv1 = nn.Conv2d(3,
                                   self.inplanes,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)

        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], quantize=quantize)
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0],
                                       quantize=quantize)
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1],
                                       quantize=quantize)
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2],
                                       quantize=quantize)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        if quantize:
            self.fc = quant_nn.QuantLinear(512 * block.expansion, num_classes)
        else:
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight,
                                      0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight,
                                      0)  # type: ignore[arg-type]