Ejemplo n.º 1
0
def transform_fc_online(tensor, exponent, mantissa, chnl_group):
    # Offline means the shared exponent is fixed
    #      it is deternmined during the pre-inference
    # Quantize the activation tensor along channel dimension
    # Here we require the input tensor has the shape: [batch, channel]
    # opt_exp_list: the shared exponent list for offline quantization
    shp = tf.shape(tensor)
    #print ("shape1:", shp[1], " opt_exp_list:", len(opt_exp_list))

    if (chnl_group == -1):
        chnl_group = shp[1]
    number_of_blocks = math.ceil(shp[1] / chnl_group)

    if shp[1] % chnl_group == 0:
        # shp[1] is divisible by block size
        # Therefore just one tensor will be created
        tensor = bfp_quantize(tensor,
                              exponent,
                              mantissa,
                              quant_dim=len(tf.shape(tensor)) - 1)
    else:
        raise ValueError(
            "Channel is not divisible by channel group while bfp quantizeing the FC"
        )

    return tensor
Ejemplo n.º 2
0
def transform_weight(tensor, exponent, mantissa, filter_group):
    # Quantize the weight tensor along filter dimension
    # Here we require the weight has the shape: [filter, channel, k, k]
    # filter_group : Inditate the number of filters in one group, where one group shared the same exponenet

    shp = tf.shape(tensor)
    number_of_blocks = math.ceil(shp[0] / filter_group)
    if shp[0] % filter_group == 0:
        # shp[1] is divisible by block size
        # Therefore just one tensor will be created
        tensor = tf.reshape(
            tensor,
            (number_of_blocks, filter_group * shp[1] * shp[2] * shp[3]))
        tensor = bfp_quantize(tensor,
                              exponent,
                              mantissa,
                              quant_dim=len(tf.shape(tensor)) - 1)
        tensor = tf.reshape(tensor, (shp[0], shp[1], shp[2], shp[3]))
        return tensor

    else:
        # shp[0] is not divisible by channel group
        # Therefore two tensors will be created
        input('Filter is not divisible by filter group')

        if number_of_blocks == 1:
            # This means that the depth is less than the block size, so just one tensor will be created
            tensor = tf.reshape(tensor, (1, shp[0] * shp[1] * shp[2] * shp[3]))
            tensor = bfp_quantize(tensor,
                                  exponent,
                                  mantissa,
                                  quant_dim=len(tf.shape(tensor)) - 1)
            tensor = tf.reshape(tensor, (shp[0], shp[1], shp[2], shp[3]))
            return tensor
        else:
            # Separate two part, tensor1 contain (number_of_blocks-1), tensor2 contain the rest
            first_filter = ((number_of_blocks - 1) * filter_group)
            tensor1 = tensor[0:first_filter, :, :, :]
            t1_shp = tf.shape(tensor1)
            tensor2 = tensor[first_filter:shp[0], :, :, :]
            t2_shp = tf.shape(tensor2)

            # Perform quantization
            tensor1 = tf.reshape(tensor1,
                                 (number_of_blocks - 1,
                                  filter_group * shp[1] * shp[2] * shp[3]))
            tensor2 = tf.reshape(
                tensor2,
                (1, (shp[0] - first_first_filter) * shp[1] * shp[2] * shp[3]))
            tensor1 = bfp_quantize(tensor1,
                                   exponent,
                                   mantissa,
                                   quant_dim=len(tf.shape(tensor)) - 1)
            tensor2 = bfp_quantize(tensor2,
                                   exponent,
                                   mantissa,
                                   quant_dim=len(tf.shape(tensor)) - 1)

            # Reshape and put back to original tensor
            tensor1 = tf.reshape(tensor1, t1_shp)
            tensor2 = tf.reshape(tensor2, t2_shp)
            tensor[0:first_filter, :, :, :] = tensor1
            tensor[first_filter:shp[0], :, :, :] = tensor2
            return tensor

    return tensor
Ejemplo n.º 3
0
def transform_activation_online(tensor,
                                exponent,
                                mantissa,
                                chnl_group,
                                is_3d=False):
    # Online means the shared exponent is not fixed
    #      it is deternmined during the inference
    # Quantize the activation tensor along channel dimension
    # Here we require the input tensor has the shape: [batch, channel, heigh, widht]
    # chnl_group : Inditate the number of channel in one group, where one group shared the same exponenet
    if is_3d is True:
        orig_shape = tensor.shape
        tensor = torch.reshape(tensor,
                               (orig_shape[0], orig_shape[1] * orig_shape[2],
                                orig_shape[3], orig_shape[4]))
    shp = tensor.shape
    if (chnl_group == -1):
        chnl_group = shp[1]
    number_of_blocks = math.ceil(shp[1] / chnl_group)
    if shp[1] % chnl_group == 0:
        # shp[1] is divisible by block size
        # Therefore just one tensor will be created
        tensor = torch.reshape(
            tensor, (shp[0], number_of_blocks, chnl_group * shp[2] * shp[3]))
        tensor = bfp_quantize(tensor,
                              exponent,
                              mantissa,
                              quant_dim=len(tensor.shape) - 1)
        tensor = torch.reshape(tensor, (shp[0], shp[1], shp[2], shp[3]))
        if is_3d is True:
            tensor = torch.reshape(
                tensor, (orig_shape[0], orig_shape[1], orig_shape[2],
                         orig_shape[3], orig_shape[4]))
        return tensor

    else:
        # shp[1] is not divisible by channel group
        # Therefore two tensors will be created
        input('Channel is not divisible by channel group')

        if number_of_blocks == 1:
            # This means that the depth is less than the block size, so just one tensor will be created
            tensor = torch.reshape(tensor,
                                   (shp[0], 1, shp[1] * shp[2] * shp[3]))
            tensor = bfp_quantize(tensor,
                                  exponent,
                                  mantissa,
                                  quant_dim=len(tensor.shape) - 1)
            tensor = torch.reshape(tensor, (shp[0], shp[1], shp[2], shp[3]))
            return tensor
        else:
            # Separate two part, tensor1 contain (number_of_blocks-1), tensor2 contain the rest
            first_chnl = ((number_of_blocks - 1) * chnl_group)
            tensor1 = tensor[:, 0:first_chnl, :, :]
            t1_shp = tensor1.shape
            tensor2 = tensor[:, first_chnl:shp[1], :, :]
            t2_shp = tensor2.shape

            # Perform quantization
            tensor1 = torch.reshape(
                tensor1,
                (shp[0], number_of_blocks - 1, chnl_group * shp[2] * shp[3]))
            tensor2 = torch.reshape(tensor2,
                                    (shp[0], 1,
                                     (shp[1] - first_chnl) * shp[2] * shp[3]))
            tensor1 = bfp_quantize(tensor1,
                                   exponent,
                                   mantissa,
                                   quant_dim=len(tensor1.shape) - 1)
            tensor2 = bfp_quantize(tensor2,
                                   exponent,
                                   mantissa,
                                   quant_dim=len(tensor2.shape) - 1)

            # Reshape and put back to original tensor
            tensor1 = torch.reshape(tensor1, t1_shp)
            tensor2 = torch.reshape(tensor2, t2_shp)
            tensor[:, 0:first_chnl, :, :] = tensor1
            tensor[:, first_chnl:shp[1], :, :] = tensor2
            return tensor

    return tensor