Beispiel #1
0
 def test_qtensor_copy(self):
     scale = 0.5
     zero_point = 10
     val = 100
     numel = 10
     # copy from same scale and zero_point
     q = torch._empty_affine_quantized([numel],
                                       scale=scale,
                                       zero_point=zero_point,
                                       dtype=torch.quint8)
     q2 = torch._empty_affine_quantized([numel],
                                        scale=scale,
                                        zero_point=zero_point,
                                        dtype=torch.quint8)
     q.copy_(q2)
     self.assertEqual(q.int_repr(), q2.int_repr())
     self.assertEqual(q.q_scale(), q2.q_scale())
     self.assertEqual(q.q_zero_point(), q2.q_zero_point())
     # copying from different scale and zero_point
     scale = 3.2
     zero_point = 5
     q = torch._empty_affine_quantized([numel],
                                       scale=scale,
                                       zero_point=zero_point,
                                       dtype=torch.quint8)
     # check original scale and zero_points are set correctly
     self.assertEqual(q.q_scale(), scale)
     self.assertEqual(q.q_zero_point(), zero_point)
     q.copy_(q2)
     # check scale and zero_points has been copied
     self.assertEqual(q, q2)
Beispiel #2
0
 def __init__(self, in_channels, out_channels, kernel_size, stride=1,
              padding=0, dilation=1, groups=1,
              bias=True, padding_mode='zeros'):
     super(Conv2d, self).__init__()
     if padding_mode != 'zeros':
         raise NotImplementedError(
             "Currently only zero-padding is supported by quantized conv")
     if in_channels % groups != 0:
         raise ValueError('in_channels must be divisible by groups')
     if out_channels % groups != 0:
         raise ValueError('out_channels must be divisible by groups')
     self.in_channels = in_channels
     self.out_channels = out_channels
     self.kernel_size = kernel_size
     self.stride = _pair(stride)
     self.padding = _pair(padding)
     self.dilation = _pair(dilation)
     self.transposed = False
     self.output_padding = 0
     self.groups = groups
     self.padding_mode = padding_mode
     qweight = torch._empty_affine_quantized(
         [out_channels, kernel_size[0], kernel_size[1],
          in_channels // self.groups],
         scale=1, zero_point=0, dtype=torch.qint8)
     self.set_weight(qweight)
     self.bias = torch._empty_affine_quantized([out_channels],
                                               scale=1, zero_point=0,
                                               dtype=torch.qint32)
     self.scale = 1.0
     self.zero_point = 0
Beispiel #3
0
    def __init__(self, in_features, out_features, bias_=True):
        super(Linear, self).__init__()
        # We don't muck around with buffers or attributes or anything here
        # to keep the module simple. *everything* is simply a Python attribute.
        # Serialization logic is explicitly handled in the below serialization and
        # deserialization modules
        self.in_features = in_features
        self.out_features = out_features
        if bias_:
            self.bias = torch._empty_affine_quantized([out_features],
                                                      scale=1,
                                                      zero_point=0,
                                                      dtype=torch.qint32)
        else:
            self.bias = None

        qweight = torch._empty_affine_quantized([out_features, in_features],
                                                scale=1,
                                                zero_point=0,
                                                dtype=torch.qint8)

        self.set_weight(qweight)
        self.weight_scale = 1.0
        self.scale = 1.0
        self.zero_point = 0
Beispiel #4
0
 def test_qtensor_copy(self):
     scale = 0.5
     zero_point = 10
     numel = 10
     for device in get_supported_device_types():
         for dtype in [torch.qint8, torch.quint8, torch.qint32]:
             # copy from same scale and zero_point
             q = torch._empty_affine_quantized([numel], scale=scale,
                                               zero_point=zero_point, device=device, dtype=dtype)
             q2 = torch._empty_affine_quantized([numel], scale=scale,
                                                zero_point=zero_point, device=device, dtype=dtype)
             q.copy_(q2)
             self.assertEqual(q.int_repr(), q2.int_repr())
             self.assertEqual(q.q_scale(), q2.q_scale())
             self.assertEqual(q.q_zero_point(), q2.q_zero_point())
             # copying from different scale and zero_point
             scale = 3.2
             zero_point = 5
             q = torch._empty_affine_quantized([numel], scale=scale,
                                               zero_point=zero_point, device=device, dtype=dtype)
             # check original scale and zero_points are set correctly
             self.assertEqual(q.q_scale(), scale)
             self.assertEqual(q.q_zero_point(), zero_point)
             q.copy_(q2)
             # check scale and zero_points has been copied
             self.assertEqual(q, q2)
             # can't copy from quantized tensor to non-quantized tensor
             r = torch.empty([numel], dtype=torch.float)
             q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
             with self.assertRaisesRegex(RuntimeError, "please use dequantize"):
                 r.copy_(q)
    def test_qtensor_copy(self):
        scale = 0.5
        zero_point = 10
        val = 100
        numel = 10
        # copy from same scale and zero_point
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        q.copy_(q2)
        self.assertEqual(q.int_repr(), q2.int_repr())
        self.assertEqual(q.q_scale(), q2.q_scale())
        self.assertEqual(q.q_zero_point(), q2.q_zero_point())
        # copying from different scale and zero_point
        scale = 3.2
        zero_point = 5
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        # check original scale and zero_points are set correctly
        self.assertEqual(q.q_scale(), scale)
        self.assertEqual(q.q_zero_point(), zero_point)
        q.copy_(q2)
        # check scale and zero_points has been copied
        self.assertEqual(q, q2)
        # deep copy
        scale, zero_point, dtype = 1.0, 2, torch.uint8
        q_int = torch.randint(0, 100, [3, 5], dtype=dtype)
        scale, zero_point = 2.0, 3
        q = torch._make_per_tensor_quantized_tensor(q_int, scale=scale, zero_point=zero_point)
        qc = deepcopy(q)
        self.assertEqual(qc, q)

        # can't copy from quantized tensor to non-quantized tensor
        r = torch.empty([numel], dtype=torch.float)
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        with self.assertRaisesRegex(RuntimeError, "please use dequantize"):
            r.copy_(q)
    def test_qtensor_creation(self):
        scale = 0.5
        zero_point = 10
        val = 100
        numel = 10
        q = torch._empty_affine_quantized([numel],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=torch.quint8)
        self.assertEqual(scale, q.q_scale())
        self.assertEqual(zero_point, q.q_zero_point())

        # create Tensor from uint8_t Tensor, scale and zero_point
        int_tensor = torch.randint(0, 100, size=(10, ), dtype=torch.uint8)
        q = torch._make_per_tensor_quantized_tensor(int_tensor, scale,
                                                    zero_point)
        self.assertEqual(int_tensor, q.int_repr())
        self.assertEqual(scale, q.q_scale())
        self.assertEqual(zero_point, q.q_zero_point())

        # create via empty_like
        q = torch._empty_affine_quantized([numel],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=torch.quint8)
        q_el = torch.empty_like(q)
        self.assertEqual(q.q_scale(), q_el.q_scale())
        self.assertEqual(q.q_zero_point(), q_el.q_zero_point())
        self.assertEqual(q.dtype, q_el.dtype)

        # create via empty_like but change the dtype (currently not supported)
        with self.assertRaises(RuntimeError):
            torch.empty_like(q, dtype=torch.qint8)
Beispiel #7
0
    def test_qtensor_view(self):
        scale, zero_point, dtype = 1.0, 2, torch.quint8
        q = torch._empty_affine_quantized(1, 2, 3, scale=scale, zero_point=zero_point, dtype=dtype)
        q2 = q.view(1, 3, 2)
        self.assertEqual(q.numel(), q2.numel())
        # testing -1
        self.assertEqual(q, q2.view(1, -1, 3))

        a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype)
        b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
        c = a.view(1, 3, 2, 4)  # does not change tensor layout
        self.assertEqual(b.size(), c.size())
        self.assertEqual(b.q_scale(), c.q_scale())
        self.assertEqual(b.q_zero_point(), c.q_zero_point())
        self.assertNotEqual(b.int_repr(), c.int_repr())


        # a case can't view non-contiguos Tensor
        a = torch._empty_affine_quantized([1, 2, 3, 4], scale=scale, zero_point=zero_point, dtype=dtype)
        b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
        err_str = "view size is not compatible with input tensor's size and stride*"
        with self.assertRaisesRegex(RuntimeError, err_str):
            b.view(1, 4, 2, 3)
        # view on contiguous tensor is fine
        b.contiguous().view(1, 4, 2, 3)
Beispiel #8
0
    def test_qtensor_reshape(self):
        scale, zero_point, dtype = 1.0, 2, torch.quint8
        q = torch._empty_affine_quantized([3, 5],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=dtype)
        q2 = q.reshape([15])
        self.assertEqual(q.numel(), q2.numel())
        self.assertEqual(q2.size(), [15])
        # testing -1
        self.assertEqual(q, q2.reshape([3, -1]))

        a = torch._empty_affine_quantized([1, 2, 3, 4],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=dtype)
        b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
        c = a.reshape(1, 3, 2, 4)  # does not change tensor layout
        self.assertEqual(b.size(), c.size())
        self.assertEqual(b.q_scale(), c.q_scale())
        self.assertEqual(b.q_zero_point(), c.q_zero_point())
        # TODO: fix flaky test
        # self.assertNotEqual(b.int_repr(), c.int_repr())

        # we can use reshape for non-contiguous Tensor
        a = torch._empty_affine_quantized([1, 2, 3, 4],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=dtype)
        b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
        c = b.reshape(1, 4, 2, 3)
        self.assertEqual(b, c.reshape(1, 3, 2, 4))
Beispiel #9
0
    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add
        add_out = torch.ops.quantized.add_out
        add_relu_out = torch.ops.quantized.add_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A,
                                   scale=scale_A,
                                   zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B,
                                   scale=scale_B,
                                   zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale_C,
                                                   zero_point=zero_point_C,
                                                   dtype=torch.quint8)
        add_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale_C,
                                                       zero_point=zero_point_C,
                                                       dtype=torch.quint8)
        add_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat,
                         qCrelu_out_hat,
                         message="AddReLU.out failed")
Beispiel #10
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 padding_mode='zeros'):
        if padding_mode != 'zeros':
            raise NotImplementedError(
                "Currently only zero-padding is supported!")
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        kernel_size = _pair(kernel_size)
        transposed = False
        output_padding = _pair(0)
        super(Conv2d, self).__init__(in_channels=in_channels,
                                     out_channels=out_channels,
                                     kernel_size=kernel_size,
                                     stride=stride,
                                     padding=padding,
                                     dilation=dilation,
                                     transposed=transposed,
                                     output_padding=output_padding,
                                     groups=groups,
                                     bias=True,
                                     padding_mode=padding_mode)
        del self.weight
        del self.bias

        qweight = torch._empty_affine_quantized([
            out_channels, kernel_size[0], kernel_size[1],
            in_channels // self.groups
        ],
                                                scale=1,
                                                zero_point=0,
                                                dtype=torch.qint8)
        qbias = torch._empty_affine_quantized([out_channels],
                                              scale=1,
                                              zero_point=0,
                                              dtype=torch.qint32)
        self.register_buffer(
            '_packed_weight',
            torch.ops.quantized.fbgemm_conv_prepack(
                qweight.permute([0, 2, 3, 1]), self.stride, self.padding,
                self.dilation, self.groups))
        self.register_buffer('bias', qbias)
        self.register_buffer('scale', torch.tensor([1.0], dtype=torch.double))
        self.register_buffer('zero_point', torch.tensor([0], dtype=torch.long))
Beispiel #11
0
    def test_qmul_relu_same_qparams(self):
        mul_relu = torch.ops.quantized.mul_relu
        mul = torch.ops.quantized.mul
        mul_out = torch.ops.quantized.mul_out
        mul_relu_out = torch.ops.quantized.mul_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # mul ReLU ground truth
        C = (qA.dequantize() * qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized mulition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale,
                                                   zero_point=zero_point,
                                                   dtype=torch.quint8)
        mul_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed")

        # mul + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized mulition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale,
                                                       zero_point=zero_point,
                                                       dtype=torch.quint8)
        mul_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="mulReLU.out failed")

        # Scalar addition
        mul = torch.ops.quantized.mul_scalar
        for b in B:
            C_ref = qA.dequantize().numpy() * b.item()
            qC = _quantize(C_ref, scale, zero_point)
            dqC = _dequantize(qC, scale, zero_point)
            qC_hat = mul(qA, b.item(), scale, zero_point)
            dqC_hat = qC_hat.dequantize()
            self.assertEqual(dqC, dqC_hat)
Beispiel #12
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias_=True,
                 dtype=torch.qint8):
        super(Linear, self).__init__()
        # We don't muck around with buffers or attributes or anything here
        # to keep the module simple. *everything* is simply a Python attribute.
        # Serialization logic is explicitly handled in the below serialization and
        # deserialization modules
        self.in_features = in_features
        self.out_features = out_features
        bias = None
        if bias_:
            bias = torch.zeros(out_features, dtype=torch.float)

        if dtype == torch.qint8:
            qweight = torch._empty_affine_quantized(
                [out_features, in_features],
                scale=1,
                zero_point=0,
                dtype=torch.qint8)
        elif dtype == torch.float16:
            qweight = torch.zeros([out_features, in_features],
                                  dtype=torch.float)
        else:
            raise RuntimeError(
                'Unsupported dtype specified for quantized Linear!')

        self._packed_params = LinearPackedParams(dtype)
        self._packed_params.set_weight_bias(qweight, bias)
        self.scale = 1.0
        self.zero_point = 0
Beispiel #13
0
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros'):
        super(Conv2d, self).__init__()
        if padding_mode != 'zeros':
            raise NotImplementedError(
                "Currently only zero-padding is supported by quantized conv")
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.transposed = False
        self.output_padding = 0
        self.groups = groups
        self.padding_mode = padding_mode
        # Initialize as NCHW. set_weight will internally transpose to
        # NHWC
        qweight = torch._empty_affine_quantized(
            [out_channels, in_channels // self.groups, self.kernel_size[0],
                self.kernel_size[1]],
            scale=1, zero_point=0, dtype=torch.qint8)
        bias_float = None
        if bias:
            bias_float = torch.zeros(out_channels, dtype=torch.float)

        self.set_weight_bias(qweight, bias_float)
        self.scale = 1.0
        self.zero_point = 0
Beispiel #14
0
def elu(input, alpha=1., inplace=False, scale=None, zero_point=None):
    # type: (Tensor, Optional[float], bool, Optional[float], Optional[int]) -> Tensor
    r"""
    Applies the quantized ELU function element-wise:

    .. math::
        \text{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1))

    Args:
        input: quantized input
        alpha: the :math:`\alpha` value for the ELU formulation. Default: 1.0
        inplace: Inplace modification of the input tensor
        scale, zero_point: Scale and zero point of the output tensor.
    """
    if not input.is_quantized:
        raise ValueError("Input to 'quantized.elu' must be quantized!")
    if (scale is not None) != (zero_point is not None):
        raise ValueError(
            "Either both or none of (scale, zero_point) must be specified!")

    if scale is not None and zero_point is not None:
        assert not inplace, "Cannot rescale with `inplace`"
        output = torch._empty_affine_quantized(input.shape,
                                               scale=scale,
                                               zero_point=int(zero_point),
                                               dtype=input.dtype)
        torch._C._nn.elu(input, alpha, out=output)
        return output
    elif inplace:
        return torch._C._nn.elu_(input, alpha)
    else:
        return torch._C._nn.elu(input, alpha)
Beispiel #15
0
    def __init__(self, in_channels, out_channels, kernel_size, stride,
                 padding, dilation,
                 transposed, output_padding,
                 groups, bias,
                 padding_mode='zeros'):
        super(_ConvNd, self).__init__()
        if padding_mode != 'zeros':
            raise NotImplementedError(
                "Currently only zero-padding is supported by quantized conv")
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = transposed
        self.output_padding = output_padding
        self.groups = groups
        self.padding_mode = padding_mode
        # Initialize as NCHW. set_weight will internally transpose to NHWC.
        qweight = torch._empty_affine_quantized(
            [out_channels, in_channels // self.groups] + list(kernel_size),
            scale=1, zero_point=0, dtype=torch.qint8)
        bias_float = (
            torch.zeros(out_channels, dtype=torch.float) if bias else None)

        self.set_weight_bias(qweight, bias_float)
        self.scale = 1.0
        self.zero_point = 0
def get_size_of_node(fx_module: GraphModule, node: Node) -> size_bytes:
    """Given a node with node.dtype and node.shape, return its total size and its output size.
    total_size = weights + bias + output_size
    """
    # Total num of elements
    total_num_of_elems = 0
    # For a module, conside all parameters
    if node.op == "call_module":
        submodule_dict = dict(fx_module.named_modules())
        submodule = submodule_dict[node.target]
        parameters = submodule.named_parameters()
        # Parameters are named tuples
        for name, p in parameters:
            total_num_of_elems += p.numel()
    # Don't forget the output size
    # node.shape is the shape of this node's output
    tensor_meta = get_tensor_meta(node)
    output_elem = tensor_meta.shape.numel()
    total_num_of_elems += output_elem
    # Assume for now if it's quantized then it's qint8 or quint8
    if tensor_meta.is_quantized:
        size_per_elem_bytes = torch._empty_affine_quantized(
            [], dtype=tensor_meta.dtype).element_size()
    else:
        size_per_elem_bytes = torch.tensor(
            [], dtype=tensor_meta.dtype).element_size()
    total_size = size_per_elem_bytes * total_num_of_elems
    output_size = size_per_elem_bytes * output_elem
    return size_bytes(output_size, total_size)
Beispiel #17
0
 def __init__(self):
     super(LinearPackedParams, self).__init__()
     wq = torch._empty_affine_quantized([1, 1],
                                        scale=1.0,
                                        zero_point=0,
                                        dtype=torch.qint8)
     self.set_weight_bias(wq, None)
Beispiel #18
0
def _rebuild_qtensor(storage, storage_offset, size, stride, quantizer_params,
                     requires_grad, backward_hooks):
    qscheme = quantizer_params[0]
    if qscheme == torch.per_tensor_affine:
        _, scale, zero_point = quantizer_params
        tensor = torch._empty_affine_quantized(size,
                                               scale=scale,
                                               zero_point=zero_point,
                                               dtype=storage.dtype)
    elif qscheme == torch.per_channel_affine:
        _, scales, zero_points, axis = quantizer_params
        scales = torch.tensor(scales, dtype=torch.float64)
        zero_points = torch.tensor(zero_points, dtype=torch.int64)
        tensor = torch._empty_per_channel_affine_quantized(
            size,
            scales=scales,
            zero_points=zero_points,
            axis=axis,
            dtype=storage.dtype)
    else:
        raise RuntimeError(
            "Can't deserialize quantized tensor with qscheme {}".format(
                qscheme))
    tensor.set_(storage, storage_offset, size, stride)
    tensor.requires_grad = requires_grad
    # NB: This line exists only for backwards compatibility; the
    # general expectation is that backward_hooks is an empty
    # OrderedDict.  See Note [Don't serialize hooks]
    tensor._backward_hooks = backward_hooks
    return tensor
Beispiel #19
0
def hardswish(input, scale, zero_point):
    # type: (Tensor, float, int) -> Tensor
    r"""Applies the quantized version of the hardswish function, element-wise,
    as described in the paper:

    `Searching for MobileNetV3`_.

    .. math::
        \text{Hardswish}(x) = \begin{cases}
            0 & \text{if~} x \le -3, \\
            x & \text{if~} x \ge +3, \\
            x^2/6 & \text{otherwise}
        \end{cases}

    Args:
        input: quantized input
        scale, zero_point: Scale and zero point of the output tensor.

    See :class:`~torch.nn.Hardswish` for more details.

    .. _`Searching for MobileNetV3`:
        https://arxiv.org/abs/1905.02244
    """
    if not input.is_quantized:
        raise ValueError("Input to 'quantized.hardswish' must be quantized!")
    output = torch._empty_affine_quantized(input.shape,
                                           scale=scale,
                                           zero_point=int(zero_point),
                                           dtype=input.dtype)
    torch._C._nn.hardswish(input, out=output)
    return output
Beispiel #20
0
    def test_qtensor_fill(self):
        numel = 10
        scale = 0.5
        zero_point = 10

        ones = torch.ones(numel).to(torch.float)

        types = [torch.qint8, torch.quint8, torch.qint32]
        fills = [-1, 1, 2**32]  # positive, negative, overflow

        # `fill_` uses `copy_(float)`, which doesn't support CUDA
        device = 'cpu'
        ones = ones.to(device)
        for qtype, fill_with in itertools.product(types, fills):
            q_filled = torch._empty_affine_quantized([numel],
                                                     scale=scale,
                                                     zero_point=zero_point,
                                                     device=device,
                                                     dtype=qtype)
            q_filled.fill_(fill_with)
            int_repr = torch.quantize_per_tensor(ones * fill_with, scale,
                                                 zero_point, qtype)
            fill_with = int_repr.dequantize()
            int_repr = int_repr.int_repr()

            self.assertEqual(q_filled.int_repr(), int_repr)
            self.assertEqual(q_filled.dequantize(), fill_with)
            # Make sure the scale and zero_point don't change
            self.assertEqual(q_filled.q_scale(), scale)
            self.assertEqual(q_filled.q_zero_point(), zero_point)
Beispiel #21
0
def leaky_relu(input,
               negative_slope=0.01,
               inplace=False,
               scale=None,
               zero_point=None):
    # type: (Tensor, float, bool, float, int) -> Tensor
    r"""
    Quantized version of the.
    leaky_relu(input, negative_slope=0.01, inplace=False, scale, zero_point) -> Tensor

    Applies element-wise,
    :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)`

    Args:
        input: Quaintized input
        negative_slope: The slope of the negative input
        inplace: Inplace modification of the input tensor
        scale, zero_point: Scale and zero point of thhe output tensor.

    See :class:`~torch.nn.LeakyReLU` for more details.
    """
    if scale is not None and zero_point is not None:
        assert not inplace, "Cannot rescale with `inplace`"
        output = torch._empty_affine_quantized(input.shape,
                                               scale=scale,
                                               zero_point=int(zero_point),
                                               dtype=input.dtype)
        torch._C._nn.leaky_relu(input, negative_slope, out=output)
        return output
    if inplace:
        result = torch._C._nn.leaky_relu_(input, negative_slope)
    else:
        result = torch._C._nn.leaky_relu(input, negative_slope)
    return result
    def test_qtensor_sub_byte(self):
        num_elements = 10
        scale = 1.0
        zero_point = 2
        for dtype in [torch.quint4x2]:
            r = torch.ones((5, 2), dtype=torch.float)
            qr = torch.quantize_per_tensor(r, scale, zero_point, dtype)
            self.assertEqual(qr.q_scale(), scale)
            self.assertEqual(qr.q_zero_point(), zero_point)
            self.assertTrue(qr.is_quantized)
            self.assertFalse(r.is_quantized)
            self.assertEqual(qr.storage().size(), 5)

            int_repr = qr.int_repr()
            for num in int_repr[0:5]:
                self.assertEqual(num, 51)  # Packed entries, each of value 3, i.e. 00110011

            # Test tensor creation
            q = torch._empty_affine_quantized([num_elements], scale=scale, zero_point=zero_point,
                                              dtype=torch.quint4x2)
            self.assertEqual(q.storage().size(), 5)

            # Test save/load
            with tempfile.NamedTemporaryFile() as f:
                torch.save(qr, f)
                f.seek(0)
                loaded_q = torch.load(f)
                loaded_int_repr = loaded_q.int_repr()[0:5]
                self.assertEqual(int_repr[0:5], loaded_int_repr)
Beispiel #23
0
    def __init__(self,
                 in_features,
                 out_features,
                 row_block_size,
                 col_block_size,
                 bias=True,
                 dtype=torch.qint8):
        super().__init__()

        if dtype != torch.qint8:
            raise NotImplementedError(
                "Only QINT8 is supported for Sparse Quantized Linear")

        self.in_features = in_features
        self.out_features = out_features

        if bias:
            bias = torch.zeros(self.out_features, dtype=torch.float)
        else:
            bias = None

        qweight = torch._empty_affine_quantized([out_features, in_features],
                                                scale=1,
                                                zero_point=0,
                                                dtype=torch.qint8)
        self._packed_params = LinearPackedParams(row_block_size=row_block_size,
                                                 col_block_size=col_block_size,
                                                 dtype=dtype)
        self._packed_params.set_weight_bias(qweight, bias, row_block_size,
                                            col_block_size)
        self.scale = 1.0
        self.zero_point = 0
Beispiel #24
0
    def test_clone(self):
        numel = 10
        scale = 0.5
        zero_point = 10

        options = itertools.product(get_supported_device_types(),
                                    [torch.qint8, torch.quint8, torch.qint32])

        for device, dtype in options:
            per_tensor_quantized = torch._empty_affine_quantized(
                [numel],
                scale=scale,
                zero_point=zero_point,
                device=device,
                dtype=dtype)
            per_channel_quantized = torch._empty_per_channel_affine_quantized(
                [numel],
                scales=torch.tensor([scale]),
                zero_points=torch.tensor([zero_point]),
                axis=0,
                device=device,
                dtype=dtype)
            qtensors = [per_tensor_quantized, per_channel_quantized]

            for q in qtensors:
                q2 = q.clone()
                # Check to make sure the scale and zero_point has been copied.
                self.assertEqual(q, q2)
Beispiel #25
0
 def test_qtensor_clone(self):
     numel = 10
     scale = 0.5
     zero_point = 10
     q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
     q = q2.clone()
     # Check to make sure the scale and zero_point has been copied.
     self.assertEqual(q, q2)
Beispiel #26
0
 def __init__(self):
     super(ConvPackedParams, self).__init__()
     wq = torch._empty_affine_quantized([1, 1, 1, 1], scale=1.0, zero_point=0, dtype=torch.qint8)
     self.stride = [1, 1]
     self.padding = [0, 0]
     self.dilation = [1, 1]
     self.groups = 1
     self.set_weight_bias(wq, None)
Beispiel #27
0
 def __init__(self, dtype=torch.qint8):
     super().__init__()
     self.dtype = dtype
     if self.dtype == torch.qint8:
         wq = torch._empty_affine_quantized([1, 1], scale=1.0, zero_point=0, dtype=torch.qint8)
     elif self.dtype == torch.float16:
         wq = torch.zeros([1, 1], dtype=torch.float)
     self.set_weight_bias(wq, None)
Beispiel #28
0
    def test_cat(self, X, num, dim, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(dim < X.ndim)
        X = torch.from_numpy(X)
        new_shape = np.array(X.shape)
        new_shape[dim] = 0
        for idx in range(num):
            tensors_q.append(
                torch.quantize_linear(X, scale, zero_point, torch_type))
            tensors_ref.append(X)
            new_shape[dim] += tensors_ref[-1].shape[dim]

        cat_ref = torch.cat(tensors_ref, dim=dim)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
            q_cat_out_op = torch.ops.quantized.cat_relu_out
        else:
            q_cat_op = torch.ops.quantized.cat
            q_cat_out_op = torch.ops.quantized.cat_out

        cat_q = q_cat_op(tensors_q,
                         dim=dim,
                         scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        cat_q_out = torch._empty_affine_quantized(list(new_shape),
                                                  scale=scale,
                                                  zero_point=zero_point,
                                                  dtype=torch_type)
        q_cat_out_op(tensors_q, dim=dim, out=cat_q_out)
        cat_q_out = cat_q_out.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        scales = scales.to(torch.float64)
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        zero_points = zero_points.to(torch.long)
        tensors_q[0] = torch.quantize_linear_per_channel(X,
                                                         scales,
                                                         zero_points,
                                                         axis=[ch_axis],
                                                         dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q,
                             dim=ch_axis,
                             scale=scale,
                             zero_point=zero_point)
 def __init__(self, in_features, out_features):
     super(Linear, self).__init__()
     qweight = torch._empty_affine_quantized(
         [out_features, in_features],
         scale=1,
         zero_point=0,
         dtype=torch.qint8)
     self._packed_weight = torch.ops.quantized.linear_prepack(
         qweight)
Beispiel #30
0
    def _init(self,
              in_channels,
              out_channels,
              kernel_size,
              stride,
              padding,
              dilation,
              transposed,
              output_padding,
              groups,
              bias,
              padding_mode='zeros',
              device=None,
              dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(_ConvNd, self).__init__()

        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = transposed
        self.output_padding = output_padding
        self.groups = groups
        if padding_mode not in _SUPPORTED_PADDING:
            raise ValueError(
                "'padding_mode' {} is not supported by quantized convolution".
                format(padding_mode))
        self.padding_mode = padding_mode
        # Initialize as NCHW. set_weight will internally transpose to NHWC.
        if self.transposed:
            weight_shape = [in_channels, out_channels // self.groups]
        else:
            weight_shape = [out_channels, in_channels // self.groups]
        qweight = torch._empty_affine_quantized(
            weight_shape + list(kernel_size),
            scale=1,
            zero_point=0,
            dtype=torch.qint8,
            **{k: v
               for k, v in factory_kwargs.items() if k != 'dtype'})
        bias_float = (torch.zeros(
            out_channels,
            dtype=torch.float,
            **{k: v
               for k, v in factory_kwargs.items()
               if k != 'dtype'}) if bias else None)

        self.set_weight_bias(qweight, bias_float)
        self.scale = 1.0
        self.zero_point = 0