コード例 #1
0
    def test_qtensor_view(self):
        scale, zero_point, dtype = 1.0, 2, torch.uint8
        for device in get_supported_device_types():
            q_int = torch.randint(0,
                                  100, [1, 2, 3],
                                  device=device,
                                  dtype=dtype)
            q = torch._make_per_tensor_quantized_tensor(q_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            q2 = q.view(1, 3, 2)
            self.assertEqual(q.numel(), q2.numel())
            # testing -1
            self.assertEqual(q, q2.view(1, -1, 3))

            a_int = torch.randint(0,
                                  100, [1, 2, 3, 4],
                                  device=device,
                                  dtype=dtype)
            a = torch._make_per_tensor_quantized_tensor(a_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
            c = a.view(1, 3, 2, 4)  # does not change tensor layout in memory
            self.assertEqual(b.size(), c.size())
            self.assertEqual(b.q_scale(), c.q_scale())
            self.assertEqual(b.q_zero_point(), c.q_zero_point())
            self.assertNotEqual(b.stride(), c.stride())
            # size is the same but the underlying data is different
            self.assertNotEqual(b.int_repr(), c.int_repr())
            # torch.equal is not supported for the cuda backend
            if device == 'cpu':
                self.assertFalse(torch.equal(b, c))
            else:
                self.assertRaises(RuntimeError, lambda: torch.equal(b, c))

            # a case can't view non-contiguos Tensor
            a_int = torch.randint(0,
                                  100, [1, 2, 3, 4],
                                  device=device,
                                  dtype=dtype)
            a = torch._make_per_tensor_quantized_tensor(a_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
            err_str = "view size is not compatible with input tensor's size and stride*"
            with self.assertRaisesRegex(RuntimeError, err_str):
                b.view(1, 4, 2, 3)
            # view on contiguous tensor is fine
            b.contiguous().view(1, 4, 2, 3)
コード例 #2
0
    def test_qtensor_resize(self):
        scale, zero_point, dtype = 1.0, 2, torch.uint8
        sizes1 = [1, 2, 3, 4]
        sizes2 = [1 * 2, 3 * 4]
        sizes3 = [1, 2 * 3, 4]
        sizes4 = [1 * 2 * 3 * 4]
        sizes5 = [1, 2, 1, 3, 1, 4]

        q1_int = torch.randint(0, 100, sizes1, dtype=dtype)
        q1 = torch._make_per_tensor_quantized_tensor(q1_int,
                                                     scale=scale,
                                                     zero_point=zero_point)
        q2 = q1.resize(*sizes2)
        q3 = q2.resize(*sizes3)
        q4 = q3.resize(*sizes4)
        q5 = q4.resize(*sizes5)

        self.assertEqual(q1.numel(), q2.numel())
        self.assertEqual(q1.numel(), q3.numel())
        self.assertEqual(q1.numel(), q4.numel())
        self.assertEqual(q1.numel(), q5.numel())

        # Compare original and post-transpose
        a_int = torch.randint(0, 100, sizes1, dtype=dtype)
        a = torch._make_per_tensor_quantized_tensor(a_int,
                                                    scale=scale,
                                                    zero_point=zero_point)
        b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
        c = b.resize(*sizes1)  # Change the sizes back to the original

        self.assertEqual(a.size(), c.size())
        self.assertEqual(b.q_scale(), c.q_scale())
        self.assertEqual(b.q_zero_point(), c.q_zero_point())
        self.assertNotEqual(b.stride(), c.stride())
        # size is the same but the underlying data is different
        self.assertNotEqual(b.int_repr(), c.int_repr())
        self.assertFalse(torch.equal(b, c))

        # Throws an error if numel is wrong
        q1_int = torch.randint(0, 100, sizes1, dtype=dtype)
        q1 = torch._make_per_tensor_quantized_tensor(a_int,
                                                     scale=scale,
                                                     zero_point=zero_point)
        err_str = "requested resize to*"
        with self.assertRaisesRegex(RuntimeError, err_str):
            q2 = q1.resize(*sizes1[:-1])
        # resize on both contiguous and non-contiguous tensor should be fine
        q3 = q1.resize(*sizes2)
        q4 = q1.contiguous().resize(*sizes2)
コード例 #3
0
    def test_qtensor_creation(self):
        scale = 0.5
        zero_point = 10
        val = 100
        numel = 10
        q = torch._empty_affine_quantized([numel],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=torch.quint8)
        self.assertEqual(scale, q.q_scale())
        self.assertEqual(zero_point, q.q_zero_point())

        # create Tensor from uint8_t Tensor, scale and zero_point
        int_tensor = torch.randint(0, 100, size=(10, ), dtype=torch.uint8)
        q = torch._make_per_tensor_quantized_tensor(int_tensor, scale,
                                                    zero_point)
        self.assertEqual(int_tensor, q.int_repr())
        self.assertEqual(scale, q.q_scale())
        self.assertEqual(zero_point, q.q_zero_point())

        # create via empty_like
        q = torch._empty_affine_quantized([numel],
                                          scale=scale,
                                          zero_point=zero_point,
                                          dtype=torch.quint8)
        q_el = torch.empty_like(q)
        self.assertEqual(q.q_scale(), q_el.q_scale())
        self.assertEqual(q.q_zero_point(), q_el.q_zero_point())
        self.assertEqual(q.dtype, q_el.dtype)

        # create via empty_like but change the dtype (currently not supported)
        with self.assertRaises(RuntimeError):
            torch.empty_like(q, dtype=torch.qint8)
コード例 #4
0
    def test_qtensor_copy(self):
        scale = 0.5
        zero_point = 10
        val = 100
        numel = 10
        # copy from same scale and zero_point
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        q2 = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        q.copy_(q2)
        self.assertEqual(q.int_repr(), q2.int_repr())
        self.assertEqual(q.q_scale(), q2.q_scale())
        self.assertEqual(q.q_zero_point(), q2.q_zero_point())
        # copying from different scale and zero_point
        scale = 3.2
        zero_point = 5
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        # check original scale and zero_points are set correctly
        self.assertEqual(q.q_scale(), scale)
        self.assertEqual(q.q_zero_point(), zero_point)
        q.copy_(q2)
        # check scale and zero_points has been copied
        self.assertEqual(q, q2)
        # deep copy
        scale, zero_point, dtype = 1.0, 2, torch.uint8
        q_int = torch.randint(0, 100, [3, 5], dtype=dtype)
        scale, zero_point = 2.0, 3
        q = torch._make_per_tensor_quantized_tensor(q_int, scale=scale, zero_point=zero_point)
        qc = deepcopy(q)
        self.assertEqual(qc, q)

        # can't copy from quantized tensor to non-quantized tensor
        r = torch.empty([numel], dtype=torch.float)
        q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, dtype=torch.quint8)
        with self.assertRaisesRegex(RuntimeError, "please use dequantize"):
            r.copy_(q)
コード例 #5
0
 def test_qtensor_dequantize_per_tensor(self):
     t = torch.arange(-10, 10, dtype=torch.int8)
     scale = 3
     zero_point = 2
     qt = torch._dequantize_per_tensor(t, scale, zero_point, torch.qint8)
     qt2 = torch._make_per_tensor_quantized_tensor(t, scale, zero_point)
     self.assertEqual(qt, qt2.dequantize())
コード例 #6
0
 def test_torch_qtensor_deepcopy(self):
     # cuda is not supported yet
     device = "cpu"
     q_int = torch.randint(0, 100, [3, 5], device=device, dtype=torch.uint8)
     scale, zero_point = 2.0, 3
     q = torch._make_per_tensor_quantized_tensor(q_int, scale=scale, zero_point=zero_point)
     qc = deepcopy(q)
     self.assertEqual(qc, q)
コード例 #7
0
    def test_qtensor_reshape(self):
        scale, zero_point, dtype = 1.0, 2, torch.uint8
        for device in get_supported_device_types():
            q_int = torch.randint(0, 100, [3, 5], dtype=dtype, device=device)
            q = torch._make_per_tensor_quantized_tensor(q_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            q2 = q.reshape([15])
            self.assertEqual(q.numel(), q2.numel())
            self.assertEqual(q2.size(), [15])
            # testing -1
            self.assertEqual(q, q2.reshape([3, -1]))

            a_int = torch.randint(0,
                                  100, [1, 2, 3, 4],
                                  dtype=dtype,
                                  device=device)
            a = torch._make_per_tensor_quantized_tensor(a_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
            c = a.reshape(1, 3, 2, 4)  # does not change tensor layout
            self.assertEqual(b.size(), c.size())
            self.assertEqual(b.q_scale(), c.q_scale())
            self.assertEqual(b.q_zero_point(), c.q_zero_point())
            self.assertNotEqual(b.stride(), c.stride())
            self.assertNotEqual(b.int_repr(), c.int_repr())
            # torch.equal is not supported for the cuda backend
            if device == 'cpu':
                self.assertFalse(torch.equal(b, c))
            else:
                self.assertRaises(RuntimeError, lambda: torch.equal(b, c))

            # we can use reshape for non-contiguous Tensor
            a_int = torch.randint(0,
                                  100, [1, 2, 3, 4],
                                  dtype=dtype,
                                  device=device)
            a = torch._make_per_tensor_quantized_tensor(a_int,
                                                        scale=scale,
                                                        zero_point=zero_point)
            b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
            c = b.reshape(1, 4, 2, 3)
コード例 #8
0
    def test_cuda_quantization_does_not_pin_memory(self):
        # Context - https://github.com/pytorch/pytorch/issues/41115
        x = torch.randn(3)
        self.assertEqual(x.is_pinned(), False)

        q_int = torch.randint(0, 100, [1, 2, 3], device="cuda", dtype=torch.uint8)
        q = torch._make_per_tensor_quantized_tensor(q_int, scale=0.1, zero_point=0)

        x = torch.randn(3)
        self.assertEqual(x.is_pinned(), False)
コード例 #9
0
def quantized_tensor_to_pytorch(tensor: torch.Tensor,
                                scale,
                                zp,
                                num_bits,
                                mode,
                                dest_dtype,
                                per_channel=False,
                                channel_dim=0):
    """
    Convert a tensor quantized with quantization parameters calculated by CACP to a PyTorch "native" quantized
    tensor.

    We refer to quantization parameters calculated using either of:
      * quantization.symmetric_linear_quantization_params
      * quantization.asymmetric_linear_quantization_params

    And to tensors quantized using either of:
      * quantization.linear_quantize
      * quantization.linear_quantize_clamp

    Args:
        tensor (torch.Tensor): The tensor quantized in CACP
        scale (torch.Tensor): Scale factor calcualted by CACP
        zp (torch.Tensor): Zero point calcualted by CACP
        num_bits (int): Number of bits used for quantization in CACP
        mode (quantization.LinearQuantMode): The quantization mode used in CACP
        dest_dtype (torch.dtype): PyTorch quantized dtype to convert to. Must be one of: torch.quint8, torch.qint8
        per_channel (bool): Flag in indicating if tensor was quantized per-channel
        channel_dim (int): If per_channel is set, this indicates the dimension of the channel in the tensor

    Returns:
        PyTorch quantized tensor (dtype one of torch.quint8 / torch.qint8 / torch.qint32)
    """
    assert (
        tensor == tensor.int()).all(), 'Tensor does not appear to be quantized'
    converted_scale, converted_zp = qparams_to_pytorch(scale,
                                                       zp,
                                                       num_bits,
                                                       mode,
                                                       dest_dtype,
                                                       reduce_range=False)
    zp_diff = -converted_zp.view(zp.shape) - zp

    if dest_dtype == torch.quint8:
        temp_dtype = torch.uint8
    elif dest_dtype == torch.qint8:
        temp_dtype = torch.int8
    else:  # dest_dtype == torch.qint32:
        temp_dtype = torch.int32
    tensor = (tensor - zp_diff).to(temp_dtype)
    if per_channel and scale.shape[channel_dim] > 1:
        return torch._make_per_channel_quantized_tensor(
            tensor, converted_scale, converted_zp, channel_dim)
    return torch._make_per_tensor_quantized_tensor(tensor, converted_scale,
                                                   converted_zp)
コード例 #10
0
ファイル: torch_serde.py プロジェクト: zeta1999/PySyft
def protobuf_tensor_deserializer(
        worker: AbstractWorker, protobuf_tensor: TensorDataPB) -> torch.Tensor:
    """"Strategy to deserialize a binary input using Protobuf"""
    size = tuple(protobuf_tensor.shape.dims)
    data = getattr(protobuf_tensor, "contents_" + protobuf_tensor.dtype)

    if protobuf_tensor.is_quantized:
        # Drop the 'q' from the beginning of the quantized dtype to get the int type
        dtype = TORCH_STR_DTYPE[protobuf_tensor.dtype[1:]]
        int_tensor = torch.tensor(data, dtype=dtype).reshape(size)
        # Automatically converts int types to quantized types
        return torch._make_per_tensor_quantized_tensor(
            int_tensor, protobuf_tensor.scale, protobuf_tensor.zero_point)
    else:
        dtype = TORCH_STR_DTYPE[protobuf_tensor.dtype]
        return torch.tensor(data, dtype=dtype).reshape(size)
コード例 #11
0
def _clamp_weights(qweight, observer, scale, zp):
    if not _needs_weight_clamping(observer, qweight.dtype):
        return qweight

    observer = _get_weight_observer(observer)
    min_, max_ = observer.quant_min, observer.quant_max

    # Doing this because can't use torch.ops.quantized.clamp() with per_channel qscheme yet.
    qw_int_max = torch.clone(qweight.int_repr()).fill_(max_)
    qw_int_min = torch.clone(qweight.int_repr()).fill_(min_)
    qw_int = torch.minimum(torch.maximum(qweight.int_repr(), qw_int_min), qw_int_max)

    if observer.qscheme in [torch.per_tensor_symmetric,
                            torch.per_tensor_affine]:
        qweight = torch._make_per_tensor_quantized_tensor(qw_int, scale.item(), zp.item())
    elif observer.qscheme in [torch.per_channel_symmetric,
                              torch.per_channel_affine,
                              torch.per_channel_affine_float_qparams]:
        qweight = torch._make_per_channel_quantized_tensor(qw_int, scale, zp, axis=observer.ch_axis)
    else:
        raise ValueError("Unexpected qscheme " + observer.qscheme)
    return qweight
コード例 #12
0
def deserialize_tensor(protobuf_tensor: TorchTensor_PB) -> th.Tensor:
    """
    This method converts a Protobuf torch tensor back into a
    Torch tensor.

    Args:
        protobuf_tensor (bin): Protobuf message of torch tensor.

    Returns:
        tensor (th.Tensor): a torch tensor converted from Protobuf
    """
    tensor_id = get_protobuf_id(protobuf_tensor.id)
    tags = protobuf_tensor.tags
    description = protobuf_tensor.description

    contents_type = protobuf_tensor.WhichOneof("contents")
    tensor_data_pb = getattr(protobuf_tensor, contents_type)

    size = tuple(tensor_data_pb.shape.dims)
    data = getattr(tensor_data_pb, "contents_" + tensor_data_pb.dtype)

    if tensor_data_pb.is_quantized:
        # Drop the 'q' from the beginning of the quantized dtype to get the int type
        dtype = TORCH_STR_DTYPE[tensor_data_pb.dtype[1:]]
        int_tensor = th.tensor(data, dtype=dtype).reshape(size)
        # Automatically converts int types to quantized types
        tensor = th._make_per_tensor_quantized_tensor(
            int_tensor, tensor_data_pb.scale, tensor_data_pb.zero_point)
    else:
        dtype = TORCH_STR_DTYPE[tensor_data_pb.dtype]
        tensor = th.tensor(data, dtype=dtype).reshape(size)

    tensor.id = tensor_id
    tensor.tags = set(tags)
    tensor.description = description

    return tensor
コード例 #13
0
ファイル: serializer.py プロジェクト: btxuyenHCMUS/MTCNN-GPU
    def add_qconv2d(self, node, fuse_code):
        assert node.inputsSize() == 4
        assert node.outputsSize() == 1

        (
            jit_image,
            jit_packed_weight,
            jit_scale,
            jit_zero_point,
        ) = node.inputs()

        _, out_scale = self.get_constant_value(jit_scale, "FloatType")
        _, out_zero_point = self.get_constant_value(jit_zero_point, "IntType")
        weight_ctype, packed_weight = self.get_constant_value(
            jit_packed_weight)
        assert weight_ctype.name() == "Conv2dPackedParamsBase"
        (
            pack_version,
            tensors,
            opt_tensors,
        ) = packed_weight.__getstate__()[0]
        assert pack_version == "2"
        packed_config, raw_weight = tensors
        raw_bias, = opt_tensors
        assert raw_bias is not None
        args = self.get_conv_pool_args_2d_from_pack(raw_weight.shape[2:4],
                                                    packed_config)

        assert raw_weight.qscheme() == torch.per_tensor_affine
        if raw_weight.dtype == torch.quint8:
            unsigned_weight = raw_weight
        else:
            assert raw_weight.dtype == torch.qint8
            unsigned_weight = torch._make_per_tensor_quantized_tensor(
                (raw_weight.int_repr().int() + 128).to(torch.uint8),
                scale=raw_weight.q_scale(),
                zero_point=raw_weight.q_zero_point() + 128)
        weight_scale = unsigned_weight.q_scale()
        _, image_oper = self.get_tensor_operand_by_jitval(jit_image)
        bias_scale = image_oper.scale * weight_scale
        int_bias = torch.quantize_per_tensor(raw_bias, bias_scale, 0,
                                             torch.qint32)
        bias_id = self.add_tensor_operand_for_weight(int_bias)

        multiplier = image_oper.scale * weight_scale / out_scale
        assert multiplier > 0
        if multiplier >= 1:
            raise Exception(
                "Quantized convolution multiplier is greater than 1.  "
                "This is supported by NNAPI, but not by most hardware backends.  "
                "Try training a model without quantization-aware training.  ")

        return self.add_conv2d_common(
            node.outputsAt(0),
            out_scale,
            out_zero_point,
            jit_image,
            unsigned_weight,
            bias_id,
            args,
            False,  # transpose
            fuse_code,
        )
コード例 #14
0
ファイル: serializer.py プロジェクト: btxuyenHCMUS/MTCNN-GPU
    def add_qlinear(self, node):
        assert node.inputsSize() == 4
        assert node.outputsSize() == 1
        (
            jit_input,
            jit_packed_weight,
            jit_scale,
            jit_zero_point,
        ) = node.inputs()

        input_id, input_oper = self.get_tensor_operand_by_jitval(jit_input)
        # TODO: Support automatic reshape
        assert len(input_oper.shape) == 2

        _, out_scale = self.get_constant_value(jit_scale, "FloatType")
        _, out_zero_point = self.get_constant_value(jit_zero_point, "IntType")
        weight_ctype, packed_weight = self.get_constant_value(
            jit_packed_weight)
        assert weight_ctype.name() == "LinearPackedParamsBase"
        raw_weight, raw_bias = packed_weight.__getstate__()[0]
        assert raw_bias is not None

        assert len(raw_weight.shape) == 2
        assert len(raw_bias.shape) == 1
        assert raw_bias.shape[0] == raw_weight.shape[0]
        assert raw_weight.shape[1] == input_oper.shape[1]

        assert raw_weight.qscheme() == torch.per_tensor_affine
        if raw_weight.dtype == torch.quint8:
            unsigned_weight = raw_weight
        else:
            assert raw_weight.dtype == torch.qint8
            unsigned_weight = torch._make_per_tensor_quantized_tensor(
                (raw_weight.int_repr().int() + 128).to(torch.uint8),
                scale=raw_weight.q_scale(),
                zero_point=raw_weight.q_zero_point() + 128)
        weight_scale = unsigned_weight.q_scale()
        bias_scale = input_oper.scale * weight_scale
        int_bias = torch.quantize_per_tensor(raw_bias, bias_scale, 0,
                                             torch.qint32)
        bias_id = self.add_tensor_operand_for_weight(int_bias)

        multiplier = input_oper.scale * weight_scale / out_scale
        assert multiplier > 0
        if multiplier >= 1:
            raise Exception(
                "Quantized convolution multiplier is greater than 1.  "
                "This is supported by NNAPI, but not by most hardware backends.  "
                "Try training a model without quantization-aware training.  ")

        # TODO: Transform at load time to share weights with CPU model.
        nnapi_weight_tensor = unsigned_weight.contiguous()
        weight_id = self.add_tensor_operand_for_weight(nnapi_weight_tensor)
        weight_oper = self.operands[weight_id]

        out_shape = (input_oper.shape[0], weight_oper.shape[0])
        out_oper = input_oper._replace(
            shape=out_shape,
            scale=out_scale,
            zero_point=out_zero_point,
        )

        inputs = [None] * 4
        inputs[0] = input_id
        inputs[1] = weight_id
        inputs[2] = bias_id
        inputs[3] = self.add_immediate_int_scalar(NNAPI_FuseCode.FUSED_NONE)

        outputs = [None] * 1
        outputs[0] = self.add_tensor_operand(node.outputsAt(0), out_oper)

        self.add_operation(NNAPI_OperationCode.FULLY_CONNECTED, inputs,
                           outputs)