def fp2fxp(fp_vars): for v in fp_vars: quantemu_ops.quantize_emu(v, allocate_copy=0, data_type=9, data_format='channels_first', precision=8, exponent_bits=5, channel_blocking_type=0, channels_per_block=0, round_mode=1)
def fp2fxp(fp_vars, is_vars=True): fxp_v = [ quantemu_ops.quantize_emu( v, data_type=9, data_format='channels_first', precision=8, exponent_bits=5, channel_blocking_type=0, channels_per_block=0, round_mode=1) for v in fp_vars ] if is_vars: return [tf.assign(v, x, name='fp2fxp') for v, x in zip(fp_vars, fxp_v)] else: return fxp_v
def testQuantEmuOp(self): # quantemu_ops = tf.load_op_library('./quantemu.so') # print(quantemu_ops) with self.test_session(use_gpu=True, force_gpu=False) as sess: output = quantemu_ops.quantize_emu(tf.constant((10, ), dtype=tf.float32), data_type=9, data_format='channels_first', precision=8, exponent_bits=5, channel_blocking_type=0, channels_per_block=0, round_mode=0) # output = tf.Print(output, [output], message="output and output2 tensor: ") # output2 = output * [10] # end = tf.Print(output2, [output2], message="output2 tensor: ") print(output.eval())
def _BaseFusedBatchNormGrad(op, version, *grad): """Return the gradients for the 3 inputs of BatchNorm. Args: op: The BatchNormOp for which we need to compute gradients. version: Integer indicating which version to use of the fused batch norm gradient. *grad: An argument list for tensors of gradients wrt the outputs with grad[0] as grad_y. Returns: grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) * [grad_y - mean(grad_y) - (x - mean(x)) * mean(grad_y * (x - mean(x))) / (variance + epsilon)] in training mode; grad_y * scale * rsqrt(pop_variance + epsilon) in freeze mode. grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) * rsqrt(variance + epsilon)) in training mode; sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon)) in freeze mode. grad_offset: gradient for offset, which is sum(grad_y) in training mode; sum(grad_y) in freeze mode. """ x = op.inputs[0] grad_y = grad[0] enable_quantop_grad = int(os.getenv('ENABLE_QUANTOP_BNORM_GRAD', 0)) enable_quantop_input = int(os.getenv('ENABLE_QUANTOP_BNORM', 0)) dformat = 'channels_last' if op.get_attr("data_format") == b'NCHW': dformat = 'channels_first' elif op.get_attr("data_format") == b'None': dformat = 'unknown' if enable_quantop_grad == 1: grad_y = quantemu_ops.quantize_emu( grad_y, data_format=dformat, data_type=int(os.getenv('QUANTEMU_BNORM_DATA_TYPE', 0)), precision=int(os.getenv('QUANTEMU_PRECISION_BNORM_GRADS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_BNORM_GRADS', 0)), channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_GRADS', 0)), round_mode=int(os.getenv('QUANTEMU_BNORM_RMODE_GRADS', 0))) if enable_quantop_input == 1: x = quantemu_ops.quantize_emu( x, data_format=dformat, data_type=int(os.getenv('QUANTEMU_BNORM_DATA_TYPE', 0)), precision=int(os.getenv('QUANTEMU_PRECISION_BNORM_INPUTS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_BNORM_INPUTS', 0)), channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_INPUTS', 0)), round_mode=int(os.getenv('QUANTEMU_BNORM_RMODE_INPUTS', 0))) scale = op.inputs[1] epsilon = op.get_attr("epsilon") data_format = op.get_attr("data_format") is_training = op.get_attr("is_training") if version == 2: grad_fun = gen_nn_ops.fused_batch_norm_grad_v3 elif version == 1: grad_fun = gen_nn_ops.fused_batch_norm_grad_v2 else: grad_fun = gen_nn_ops.fused_batch_norm_grad if is_training: args = { "y_backprop": grad_y, "x": x, "scale": scale, "reserve_space_1": op.outputs[3], "reserve_space_2": op.outputs[4], "epsilon": epsilon, "data_format": data_format, "is_training": is_training } if version == 2: args["reserve_space_3"] = op.outputs[5] return grad_fun(**args) else: pop_mean = op.inputs[3] pop_var = op.inputs[4] if data_format == b"NCHW": x = array_ops.transpose(x, [0, 2, 3, 1]) grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1]) args = { "y_backprop": grad_y, "x": x, "scale": scale, "reserve_space_1": pop_mean, "reserve_space_2": pop_var, "epsilon": epsilon, "data_format": "NHWC", "is_training": is_training } if version == 2: args["reserve_space_3"] = op.outputs[5] dx, dscale, doffset, _, _ = grad_fun(**args) if data_format == b"NCHW": dx = array_ops.transpose(dx, [0, 3, 1, 2]) return dx, dscale, doffset, None, None
def _Conv2DGrad(op, grad): """Gradient function for Conv2D.""" dilations = op.get_attr("dilations") strides = op.get_attr("strides") padding = op.get_attr("padding") explicit_paddings = op.get_attr("explicit_paddings") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") data_format = op.get_attr("data_format") use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu") shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]]) enable_quantop_grad = int(os.getenv('ENABLE_QUANTOP_CONV_GRAD', 0)) enable_quantop_input = int(os.getenv('ENABLE_QUANTOP_CONV', 0)) enable_quantop_wtgrad = int(os.getenv('ENABLE_QUANTOP_CONV_WTGRAD', 0)) dformat = 'channels_last' inp_channels = op.inputs[0].get_shape()[3].value if data_format == b'NCHW': dformat = 'channels_first' inp_channels = op.inputs[0].get_shape()[1].value elif data_format == b'None': dformat = 'unknown' quant_input_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS', 23)) quant_filter_copy = int(os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 23)) quant_input_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS', 23)) quant_filter_precision = int( os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23)) quant_grad_precision = int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)) quant_wtgrad_precision = int( os.getenv('QUANTEMU_PRECISION_CONV_WTGRADS', 23)) if inp_channels == 3: quant_grad_precision = quant_input_precision = int( os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23)) quant_filter_precision = int( os.getenv('QUANTEMU_FIRST_LAYER_PRECISION', 23)) if enable_quantop_grad == 1: grad = quantemu_ops.quantize_emu( grad, data_format=dformat, data_type=int(os.getenv('QUANTEMU_GRAD_DATA_TYPE', 0)), precision= quant_grad_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_GRADS', 0)), channels_per_block=int(os.getenv('QUANTEMU_CBLOCK_SIZE_GRADS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_GRADS', 0))) if enable_quantop_input == 1: if quant_input_copy == 1: acts = quantemu_ops.quantize_emu( op.inputs[0], data_format=dformat, data_type=int(os.getenv('QUANTEMU_INPUT_DATA_TYPE', 0)), precision= quant_input_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_INPUTS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_INPUTS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_INPUTS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0))) else: acts = op.inputs[0] if quant_filter_copy == 1: filters = quantemu_ops.quantize_emu( op.inputs[1], data_format=dformat, allocate_copy=int( os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)), data_type=int(os.getenv('QUANTEMU_FILTER_DATA_TYPE', 0)), precision= quant_filter_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_FILTERS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_FILTERS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_FILTERS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0))) else: filters = op.inputs[1] outgrad = nn_ops.conv2d_backprop_input( shape_0, #op.inputs[1], filters, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) wtgrad = nn_ops.conv2d_backprop_filter( #op.inputs[0], acts, shape_1, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) if enable_quantop_wtgrad == 1: wtgrad = quantemu_ops.quantize_emu( wtgrad, data_format=dformat, data_type=int(os.getenv('QUANTEMU_WTGRAD_DATA_TYPE', 0)), precision= quant_wtgrad_precision, #int(os.getenv('QUANTEMU_PRECISION_CONV_GRADS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), channel_blocking_type=int( os.getenv('QUANTEMU_CBLOCK_TYPE_CONV_WTGRADS', 0)), channels_per_block=int( os.getenv('QUANTEMU_CBLOCK_SIZE_WTGRADS', 0)), round_mode=int(os.getenv('QUANTEMU_RMODE_WTGRADS', 0))) return [outgrad, wtgrad] else: # No Quantization return [ nn_ops.conv2d_backprop_input(shape_0, op.inputs[1], grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format), nn_ops.conv2d_backprop_filter(op.inputs[0], shape_1, grad, dilations=dilations, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) ]
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) enable_quantop_dense = int(os.getenv('ENABLE_QUANTOP_DENSE', 0)) if enable_quantop_dense == 1: inputs_qs = quantemu_ops.quantize_emu( inputs, data_format='unknown', allocate_copy=int(os.getenv('QUANTEMU_ALLOCATE_COPY_INPUTS', 0)), data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)), precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_INPUTS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), round_mode=int(os.getenv('QUANTEMU_RMODE_INPUTS', 0))) kernel_qs = quantemu_ops.quantize_emu( self.kernel, data_format='unknown', allocate_copy=int( os.getenv('QUANTEMU_ALLOCATE_COPY_FILTERS', 0)), data_type=int(os.getenv('QUANTEMU_DENSE_DATA_TYPE', 0)), precision=int(os.getenv('QUANTEMU_PRECISION_DENSE_FILTERS', 23)), exponent_bits=int(os.getenv('QUANTEMU_EXPBITS', 5)), round_mode=int(os.getenv('QUANTEMU_RMODE_FILTERS', 0))) rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs_qs, kernel_qs, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs_qs, kernel_qs) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs else: # No quantization rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs