コード例 #1
0
 def fp2fxp(fp_vars):
     for v in fp_vars:
         quantemu_ops.quantize_emu(v,
                                   allocate_copy=0,
                                   data_type=9,
                                   data_format='channels_first',
                                   precision=8,
                                   exponent_bits=5,
                                   channel_blocking_type=0,
                                   channels_per_block=0,
                                   round_mode=1)
コード例 #2
0
 def fp2fxp(fp_vars, is_vars=True):
   fxp_v = [ quantemu_ops.quantize_emu(
                 v,
                 data_type=9,
                 data_format='channels_first',
                 precision=8,
                 exponent_bits=5,
                 channel_blocking_type=0,
                 channels_per_block=0,
                 round_mode=1)
             for v in fp_vars ]
   if is_vars:
     return [tf.assign(v, x, name='fp2fxp')
             for v, x in zip(fp_vars, fxp_v)]
   else:
     return fxp_v
コード例 #3
0
 def testQuantEmuOp(self):
     # quantemu_ops = tf.load_op_library('./quantemu.so')
     # print(quantemu_ops)
     with self.test_session(use_gpu=True, force_gpu=False) as sess:
         output = quantemu_ops.quantize_emu(tf.constant((10, ),
                                                        dtype=tf.float32),
                                            data_type=9,
                                            data_format='channels_first',
                                            precision=8,
                                            exponent_bits=5,
                                            channel_blocking_type=0,
                                            channels_per_block=0,
                                            round_mode=0)
         # output = tf.Print(output, [output], message="output and output2 tensor: ")
         # output2 = output * [10]
         # end = tf.Print(output2, [output2], message="output2 tensor: ")
         print(output.eval())
コード例 #4
0
def _BaseFusedBatchNormGrad(op, version, *grad):
    """Return the gradients for the 3 inputs of BatchNorm.

  Args:
    op: The BatchNormOp for which we need to compute gradients.
    version: Integer indicating which version to use of the fused batch
      norm gradient.
    *grad: An argument list for tensors of gradients wrt the outputs
      with grad[0] as grad_y.

  Returns:
    grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
            [grad_y - mean(grad_y) - (x - mean(x)) *
            mean(grad_y * (x - mean(x))) / (variance + epsilon)]
            in training mode; grad_y * scale * rsqrt(pop_variance + epsilon)
            in freeze mode.

    grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) *
                rsqrt(variance + epsilon)) in training mode;
                sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon))
                in freeze mode.

    grad_offset: gradient for offset, which is sum(grad_y) in training mode;
                 sum(grad_y) in freeze mode.
  """
    x = op.inputs[0]
    grad_y = grad[0]

    enable_quantop_grad = int(os.getenv('ENABLE_QUANTOP_BNORM_GRAD', 0))
    enable_quantop_input = int(os.getenv('ENABLE_QUANTOP_BNORM', 0))

    dformat = 'channels_last'
    if op.get_attr("data_format") == b'NCHW':
        dformat = 'channels_first'
    elif op.get_attr("data_format") == b'None':
        dformat = 'unknown'

    if enable_quantop_grad == 1:
        grad_y = quantemu_ops.quantize_emu(
            grad_y,
            data_format=dformat,
            data_type=int(os.getenv('QUANTEMU_BNORM_DATA_TYPE', 0)),
            precision=int(os.getenv('QUANTEMU_PRECISION_BNORM_GRADS', 23)),
            exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
            channel_blocking_type=int(
                os.getenv('QUANTEMU_CBLOCK_TYPE_BNORM_GRADS', 0)),
            channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_GRADS', 0)),
            round_mode=int(os.getenv('QUANTEMU_BNORM_RMODE_GRADS', 0)))

    if enable_quantop_input == 1:
        x = quantemu_ops.quantize_emu(
            x,
            data_format=dformat,
            data_type=int(os.getenv('QUANTEMU_BNORM_DATA_TYPE', 0)),
            precision=int(os.getenv('QUANTEMU_PRECISION_BNORM_INPUTS', 23)),
            exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
            channel_blocking_type=int(
                os.getenv('QUANTEMU_CBLOCK_TYPE_BNORM_INPUTS', 0)),
            channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_INPUTS',
                                             0)),
            round_mode=int(os.getenv('QUANTEMU_BNORM_RMODE_INPUTS', 0)))

    scale = op.inputs[1]
    epsilon = op.get_attr("epsilon")
    data_format = op.get_attr("data_format")
    is_training = op.get_attr("is_training")
    if version == 2:
        grad_fun = gen_nn_ops.fused_batch_norm_grad_v3
    elif version == 1:
        grad_fun = gen_nn_ops.fused_batch_norm_grad_v2
    else:
        grad_fun = gen_nn_ops.fused_batch_norm_grad
    if is_training:
        args = {
            "y_backprop": grad_y,
            "x": x,
            "scale": scale,
            "reserve_space_1": op.outputs[3],
            "reserve_space_2": op.outputs[4],
            "epsilon": epsilon,
            "data_format": data_format,
            "is_training": is_training
        }
        if version == 2:
            args["reserve_space_3"] = op.outputs[5]
        return grad_fun(**args)
    else:
        pop_mean = op.inputs[3]
        pop_var = op.inputs[4]
        if data_format == b"NCHW":
            x = array_ops.transpose(x, [0, 2, 3, 1])
            grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1])
        args = {
            "y_backprop": grad_y,
            "x": x,
            "scale": scale,
            "reserve_space_1": pop_mean,
            "reserve_space_2": pop_var,
            "epsilon": epsilon,
            "data_format": "NHWC",
            "is_training": is_training
        }
        if version == 2:
            args["reserve_space_3"] = op.outputs[5]
        dx, dscale, doffset, _, _ = grad_fun(**args)
        if data_format == b"NCHW":
            dx = array_ops.transpose(dx, [0, 3, 1, 2])
        return dx, dscale, doffset, None, None
コード例 #5
0
def _Conv2DGrad(op, grad):
    """Gradient function for Conv2D."""
    dilations = op.get_attr("dilations")
    strides = op.get_attr("strides")
    padding = op.get_attr("padding")
    explicit_paddings = op.get_attr("explicit_paddings")
    use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu")
    data_format = op.get_attr("data_format")
    use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu")
    shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]])

    enable_quantop_grad = int(os.getenv('ENABLE_QUANTOP_CONV_GRAD', 0))
    enable_quantop_input = int(os.getenv('ENABLE_QUANTOP_CONV', 0))
    enable_quantop_wtgrad = int(os.getenv('ENABLE_QUANTOP_CONV_WTGRAD', 0))

    dformat = 'channels_last'
    inp_channels = op.inputs[0].get_shape()[3].value
    if data_format == b'NCHW':
        dformat = 'channels_first'
        inp_channels = op.inputs[0].get_shape()[1].value
    elif data_format == b'None':
        dformat = 'unknown'

    quant_input_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS', 23))
    quant_filter_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 23))
    quant_input_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS',
                                          23))
    quant_filter_precision = int(
        os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23))
    quant_grad_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23))
    quant_wtgrad_precision = int(
        os.getenv('QUANTEMU_PRECISION_CONV_WTGRADS', 23))
    if inp_channels == 3:
        quant_grad_precision = quant_input_precision = int(
            os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23))
        quant_filter_precision = int(
            os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23))

    if enable_quantop_grad == 1:
        grad = quantemu_ops.quantize_emu(
            grad,
            data_format=dformat,
            data_type=int(os.getenv('QUANTEMU_GRAD_DATA_TYPE', 0)),
            precision=
            quant_grad_precision,  #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)),
            exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
            channel_blocking_type=int(
                os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_GRADS', 0)),
            channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_GRADS', 0)),
            round_mode=int(os.getenv('QUANTEMU_RMODE_GRADS', 0)))

    if enable_quantop_input == 1:
        if quant_input_copy == 1:
            acts = quantemu_ops.quantize_emu(
                op.inputs[0],
                data_format=dformat,
                data_type=int(os.getenv('QUANTEMU_INPUT_DATA_TYPE', 0)),
                precision=
                quant_input_precision,  #int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS', 23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                channel_blocking_type=int(
                    os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_INPUTS', 0)),
                channels_per_block=int(
                    os.getenv('QUANTEMU_CBLOCK_SIZE_INPUTS', 0)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0)))
        else:
            acts = op.inputs[0]

        if quant_filter_copy == 1:
            filters = quantemu_ops.quantize_emu(
                op.inputs[1],
                data_format=dformat,
                allocate_copy=int(
                    os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)),
                data_type=int(os.getenv('QUANTEMU_FILTER_DATA_TYPE', 0)),
                precision=
                quant_filter_precision,  #int(os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                channel_blocking_type=int(
                    os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_FILTERS', 0)),
                channels_per_block=int(
                    os.getenv('QUANTEMU_CBLOCK_SIZE_FILTERS', 0)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0)))
        else:
            filters = op.inputs[1]

        outgrad = nn_ops.conv2d_backprop_input(
            shape_0,
            #op.inputs[1],
            filters,
            grad,
            dilations=dilations,
            strides=strides,
            padding=padding,
            use_cudnn_on_gpu=use_cudnn_on_gpu,
            data_format=data_format)
        wtgrad = nn_ops.conv2d_backprop_filter(
            #op.inputs[0],
            acts,
            shape_1,
            grad,
            dilations=dilations,
            strides=strides,
            padding=padding,
            use_cudnn_on_gpu=use_cudnn_on_gpu,
            data_format=data_format)

        if enable_quantop_wtgrad == 1:
            wtgrad = quantemu_ops.quantize_emu(
                wtgrad,
                data_format=dformat,
                data_type=int(os.getenv('QUANTEMU_WTGRAD_DATA_TYPE', 0)),
                precision=
                quant_wtgrad_precision,  #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                channel_blocking_type=int(
                    os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_WTGRADS', 0)),
                channels_per_block=int(
                    os.getenv('QUANTEMU_CBLOCK_SIZE_WTGRADS', 0)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_WTGRADS', 0)))

        return [outgrad, wtgrad]
    else:  # No Quantization
        return [
            nn_ops.conv2d_backprop_input(shape_0,
                                         op.inputs[1],
                                         grad,
                                         dilations=dilations,
                                         strides=strides,
                                         padding=padding,
                                         use_cudnn_on_gpu=use_cudnn_on_gpu,
                                         data_format=data_format),
            nn_ops.conv2d_backprop_filter(op.inputs[0],
                                          shape_1,
                                          grad,
                                          dilations=dilations,
                                          strides=strides,
                                          padding=padding,
                                          use_cudnn_on_gpu=use_cudnn_on_gpu,
                                          data_format=data_format)
        ]
コード例 #6
0
    def call(self, inputs):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)

        enable_quantop_dense = int(os.getenv('ENABLE_QUANTOP_DENSE', 0))
        if enable_quantop_dense == 1:
            inputs_qs = quantemu_ops.quantize_emu(
                inputs,
                data_format='unknown',
                allocate_copy=int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS',
                                            0)),
                data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)),
                precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_INPUTS',
                                        23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0)))

            kernel_qs = quantemu_ops.quantize_emu(
                self.kernel,
                data_format='unknown',
                allocate_copy=int(
                    os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)),
                data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)),
                precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_FILTERS',
                                        23)),
                exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)),
                round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0)))
            rank = common_shapes.rank(inputs)
            if rank > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs_qs, kernel_qs,
                                                 [[rank - 1], [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    shape = inputs.get_shape().as_list()
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs_qs, kernel_qs)
            if self.use_bias:
                outputs = nn.bias_add(outputs, self.bias)
            if self.activation is not None:
                return self.activation(outputs)  # pylint: disable=not-callable
            return outputs

        else:  # No quantization

            rank = common_shapes.rank(inputs)
            if rank > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs, self.kernel,
                                                 [[rank - 1], [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    shape = inputs.get_shape().as_list()
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs, self.kernel)
            if self.use_bias:
                outputs = nn.bias_add(outputs, self.bias)
            if self.activation is not None:
                return self.activation(outputs)  # pylint: disable=not-callable
            return outputs