def compareToTranspose(self, batch_size, in_height, in_width, out_channels, block_size, data_format, use_gpu): in_channels = out_channels * block_size * block_size nhwc_input_shape = [batch_size, in_height, in_width, in_channels] nchw_input_shape = [batch_size, in_channels, in_height, in_width] total_size = np.prod(nhwc_input_shape) if data_format == "NCHW_VECT_C": # Initialize the input tensor with qint8 values that circle -127..127. x = [((f + 128) % 255) - 127 for f in range(total_size)] t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32) expected = self.depthToSpaceUsingTranspose(t, block_size, "NHWC") t = test_util.NHWCToNCHW_VECT_C(t) t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8) t = array_ops.depth_to_space(t, block_size, data_format="NCHW_VECT_C") t = gen_array_ops.dequantize(t, -128, 127) actual = test_util.NCHW_VECT_CToNHWC(t) else: # Initialize the input tensor with ascending whole numbers as floats. x = [f * 1.0 for f in range(total_size)] shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape t = constant_op.constant(x, shape=shape, dtype=dtypes.float32) expected = self.depthToSpaceUsingTranspose(t, block_size, data_format) actual = array_ops.depth_to_space(t, block_size, data_format=data_format) with self.test_session(use_gpu=use_gpu) as sess: actual_vals, expected_vals = sess.run([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals))
def compareToTranspose(self, batch_size, out_height, out_width, in_channels, block_size, data_format, use_gpu): in_height = out_height * block_size in_width = out_width * block_size nhwc_input_shape = [batch_size, in_height, in_width, in_channels] nchw_input_shape = [batch_size, in_channels, in_height, in_width] total_size = np.prod(nhwc_input_shape) if data_format == "NCHW_VECT_C": # Initialize the input tensor with qint8 values that circle -127..127. x = [((f + 128) % 255) - 127 for f in range(total_size)] t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32) expected = self.spaceToDepthUsingTranspose(t, block_size, "NHWC") t = test_util.NHWCToNCHW_VECT_C(t) t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8) t = array_ops.space_to_depth(t, block_size, data_format="NCHW_VECT_C") t = gen_array_ops.dequantize(t, -128, 127) actual = test_util.NCHW_VECT_CToNHWC(t) else: # Initialize the input tensor with ascending whole numbers as floats. x = [f * 1.0 for f in range(total_size)] shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape t = constant_op.constant(x, shape=shape, dtype=dtypes.float32) expected = self.spaceToDepthUsingTranspose(t, block_size, data_format) actual = array_ops.space_to_depth(t, block_size, data_format=data_format) with self.cached_session(use_gpu=use_gpu) as sess: actual_vals, expected_vals = sess.run([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals))
def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") result, _, _ = gen_array_ops.quantize_v2( NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8) return result
def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") result, _, _ = gen_array_ops.quantize_v2( NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8) return result
def compareToTranspose(self, batch_size, out_height, out_width, in_channels, block_size, data_format, data_type, use_gpu): in_height = out_height * block_size in_width = out_width * block_size nhwc_input_shape = [batch_size, in_height, in_width, in_channels] nchw_input_shape = [batch_size, in_channels, in_height, in_width] total_size = np.prod(nhwc_input_shape) # Construct the input tensor in data_type and NHWC. # force_cpu is needed because quantize_v2 runs on only CPU. with test_util.force_cpu(): if data_type == dtypes.qint8: # Initialize the input tensor with qint8 values that circle -127..127. x = [((f + 128) % 255) - 127 for f in range(total_size)] t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32) t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8) else: assert data_type == dtypes.float32 # Initialize the input tensor with ascending whole numbers as floats. x = [f * 1.0 for f in range(total_size)] shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape t = constant_op.constant(x, shape=shape, dtype=dtypes.float32) with test_util.device(use_gpu): if data_format == "NCHW_VECT_C": assert data_type == dtypes.qint8 # Convert to int8, then NHWCToNCHW_VECT_C, and then back to qint8. actual = array_ops.bitcast(t, dtypes.int8) actual = test_util.NHWCToNCHW_VECT_C(actual) actual = array_ops.bitcast(actual, dtypes.qint8) actual = array_ops.space_to_depth(actual, block_size, data_format=data_format) actual = array_ops.bitcast(actual, dtypes.int8) actual = test_util.NCHW_VECT_CToNHWC(actual) actual = array_ops.bitcast(actual, dtypes.qint8) expected = array_ops.bitcast(t, dtypes.int8) expected = math_ops.cast(expected, dtypes.float32) expected = self.spaceToDepthUsingTranspose( expected, block_size, "NHWC") expected = math_ops.cast(expected, dtypes.int8) expected = array_ops.bitcast(expected, dtypes.qint8) else: # Initialize the input tensor with ascending whole numbers as floats. actual = array_ops.space_to_depth(t, block_size, data_format=data_format) expected = self.spaceToDepthUsingTranspose( t, block_size, data_format) actual_vals, expected_vals = self.evaluate([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals))
def _SimulateFusedConv2dBiasActivationInt8OnCpu(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NHWC layout. kernel: A `Tensor` of type `qint8` in HWIO layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NHWC layout. biases: A `Tensor` of type `float32` in NHWC layout. apply_relu: A boolean to specify whether to apply "Relu" activation function that clips outputs to the range [0, 127], or "None" activation that clips to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NHWC layout. """ conv_result = nn_ops.conv2d( math_ops.cast(conv_input, dtypes.float32), math_ops.cast(kernel, dtypes.float32), strides=strides, padding=padding, data_format="NHWC") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * math_ops.cast( side_input, dtypes.float32) output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NHWC") if apply_relu: output = nn_ops.relu(output) # In this case quantization is identical to clipping and casting. result, _, _ = gen_array_ops.quantize_v2(output, -128, 127, dtypes.qint8) return result
def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. apply_relu: A boolean to specify whether to apply "Relu" activation function that clips outputs to the range [0, 127], or "None" activation that clips to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") if apply_relu: output = nn_ops.relu(output) result, _, _ = gen_array_ops.quantize_v2(_NchwToNchwVectC(output), -128, 127, dtypes.qint8) return result
def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. apply_relu: A boolean to specify whether to apply "Relu" activation function that clips outputs to the range [0, 127], or "None" activation that clips to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") if apply_relu: output = nn_ops.relu(output) result, _, _ = gen_array_ops.quantize_v2( _NchwToNchwVectC(output), -128, 127, dtypes.qint8) return result
def runTest(self, test_param, apply_relu): """Runs tests for dimensions configured in test_param.""" batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] input_height = test_param["input_height"] input_width = test_param["input_width"] filter_height = test_param["filter_height"] filter_width = test_param["filter_width"] vertical_stride = test_param["vertical_stride"] horizontal_stride = test_param["horizontal_stride"] conv_input_scale = test_param["conv_input_scale"] side_input_scale = test_param["side_input_scale"] bias_scale = test_param["bias_scale"] padding_type = test_param["padding_type"] with self.cached_session(use_gpu=True) as sess, self.test_scope(): conv_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ batch_size, input_channels // 4, input_height, input_width, 4 ], minval=-0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) kernel, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ output_channels, input_channels // 4, filter_height, filter_width, 4 ], minval=-1.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) output_height = _CalculateConvolvedOutputDim( input_height, filter_height, vertical_stride, padding_type) output_width = _CalculateConvolvedOutputDim( input_width, filter_width, horizontal_stride, padding_type) tf_logging.info("output_height=%s, output_width=%s", output_height, output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ batch_size, output_channels // 4, output_height, output_width, 4 ], minval=0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) biases = random_ops.random_uniform([output_channels], minval=-10 * bias_scale, maxval=20 * bias_scale, dtype=dtypes.float32) with ops.device("/cpu:0"): t = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( _Int8Roundtrip(_NchwVectCToNhwc, conv_input), _Int8Roundtrip(_OihwVectIToHwio, kernel), biases, strides=[1, vertical_stride, horizontal_stride, 1], padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=_Int8Roundtrip(_NchwVectCToNhwc, side_input), activation_mode="Relu" if apply_relu else "None", data_format="NHWC", filter_format="HWIO") cpu_result = _Int8Roundtrip(_NhwcToNchwVectC, t) with ops.device("/gpu:0"): t = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( conv_input, kernel, biases, strides=[1, 1, vertical_stride, horizontal_stride], padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, activation_mode="Relu" if apply_relu else "None", data_format="NCHW_VECT_C", filter_format="OIHW_VECT_I") gpu_result = t cpu_y, gpu_y = sess.run([cpu_result, gpu_result]) self.assertAllClose(cpu_y, gpu_y, rtol=0, atol=0)
def runTest(self, test_param, apply_relu): """Runs tests for dimensions configured in test_param.""" batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] input_height = test_param["input_height"] input_width = test_param["input_width"] filter_height = test_param["filter_height"] filter_width = test_param["filter_width"] vertical_stride = test_param["vertical_stride"] horizontal_stride = test_param["horizontal_stride"] conv_input_scale = test_param["conv_input_scale"] side_input_scale = test_param["side_input_scale"] bias_scale = test_param["bias_scale"] padding_type = test_param["padding_type"] with self.cached_session(use_gpu=False) as sess, self.test_scope(): conv_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [batch_size, input_height, input_width, input_channels], minval=-0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) kernel, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ filter_height, filter_width, input_channels, output_channels ], minval=-1.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) output_height = _CalculateConvolvedOutputDim( input_height, filter_height, vertical_stride, padding_type) output_width = _CalculateConvolvedOutputDim( input_width, filter_width, horizontal_stride, padding_type) tf_logging.info("output_height=%s, output_width=%s", output_height, output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [batch_size, output_height, output_width, output_channels], minval=0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) biases = random_ops.random_uniform([output_channels], minval=-10 * bias_scale, maxval=20 * bias_scale, dtype=dtypes.float32) strides = [1, vertical_stride, horizontal_stride, 1] actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( conv_input, kernel, biases, strides=strides, padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, activation_mode="Relu" if apply_relu else "None", data_format="NHWC", filter_format="HWIO") expected = _SimulateFusedConv2dBiasActivationInt8OnCpu( conv_input_scale, conv_input, kernel, padding_type, strides, side_input_scale, side_input, biases, apply_relu) actual_y, expected_y = sess.run([actual, expected]) self.assertAllClose(actual_y, expected_y, rtol=0, atol=1)
def runTest(self, test_param): batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] input_height = test_param["input_height"] input_width = test_param["input_width"] filter_height = test_param["filter_height"] filter_width = test_param["filter_width"] vertical_stride = test_param["vertical_stride"] horizontal_stride = test_param["horizontal_stride"] conv_input_scale = test_param["conv_input_scale"] side_input_scale = test_param["side_input_scale"] bias_scale = test_param["bias_scale"] padding_type = test_param["padding_type"] conv_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [batch_size, input_channels // 4, input_height, input_width, 4], minval=-0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) kernel, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [ output_channels, input_channels // 4, filter_height, filter_width, 4 ], minval=-1.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) output_height = CalculateConvolvedOutputDim(input_height, filter_height, vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) print("output_height=", output_height, ", output_width=", output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [batch_size, output_channels // 4, output_height, output_width, 4], minval=0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) biases = random_ops.random_uniform( [output_channels], minval=-10 * bias_scale, maxval=20 * bias_scale, dtype=dtypes.float32) strides = [1, 1, vertical_stride, horizontal_stride] actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( conv_input, kernel, biases, strides=strides, padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, data_format="NCHW_VECT_C", filter_format="OIHW_VECT_I") expected = SimulateFusedConv2dBiasActivationInt8( conv_input_scale, conv_input, kernel, padding_type, strides, side_input_scale, side_input, biases) with self.test_session(use_gpu=True) as sess: actual_y, expected_y = sess.run([actual, expected]) print("actual_y = ", actual_y) print("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y))
def runTest(self, test_param, apply_relu): batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] input_height = test_param["input_height"] input_width = test_param["input_width"] filter_height = test_param["filter_height"] filter_width = test_param["filter_width"] vertical_stride = test_param["vertical_stride"] horizontal_stride = test_param["horizontal_stride"] conv_input_scale = test_param["conv_input_scale"] side_input_scale = test_param["side_input_scale"] bias_scale = test_param["bias_scale"] padding_type = test_param["padding_type"] conv_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ batch_size, input_channels // 4, input_height, input_width, 4 ], minval=-0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) kernel, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ output_channels, input_channels // 4, filter_height, filter_width, 4 ], minval=-1.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) output_height = CalculateConvolvedOutputDim(input_height, filter_height, vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) tf_logging.info("output_height=", output_height, ", output_width=", output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ batch_size, output_channels // 4, output_height, output_width, 4 ], minval=0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) biases = random_ops.random_uniform([output_channels], minval=-10 * bias_scale, maxval=20 * bias_scale, dtype=dtypes.float32) strides = [1, 1, vertical_stride, horizontal_stride] actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( conv_input, kernel, biases, strides=strides, padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, activation_mode="Relu" if apply_relu else "None", data_format="NCHW_VECT_C", filter_format="OIHW_VECT_I") expected = SimulateFusedConv2dBiasActivationInt8( conv_input_scale, conv_input, kernel, padding_type, strides, side_input_scale, side_input, biases, apply_relu) with self.test_session(use_gpu=True) as sess: actual_y, expected_y = sess.run([actual, expected]) tf_logging.info("actual_y = ", actual_y) tf_logging.info("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y))
from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops import tensorflow as tf import numpy as np with tf.Session() as sess: rand_input = tf.convert_to_tensor(np.random.rand(28, 28), dtype=tf.float32) new_dim = tf.convert_to_tensor(np.array([784, 1]), dtype=tf.int32) reshaped_tensor = tf.reshape(rand_input, new_dim, name="ref_reshape") rand_act = tf.convert_to_tensor(np.random.rand(784, 1) * 10, dtype=tf.float32) act_min = tf.reduce_min(rand_act, name="ref_min") act_max = tf.reduce_max(rand_act, name="ref_max") [q_a, a_min, a_max] = gen_array_ops.quantize_v2(rand_act, act_min, act_max, tf.quint8, "MIN_FIRST") [out_a, out_min, out_max] = gen_nn_ops.quantized_relu(q_a, a_min, a_max, tf.quint8, name="ref_qRelu") rand_a = tf.convert_to_tensor(np.random.rand(200, 1) * 10, dtype=tf.float32) rand_b = tf.convert_to_tensor(np.random.rand(200, 1) * 10, dtype=tf.float32) ab_sum = tf.add(rand_a, rand_b, name="ref_add") #max_i = math_ops.argmax(rand_act, 1, output_type=tf.int32, name="ref_argmax") #the data for max and min functions will be extracted from deep_mlp directly
#file /usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py with tf.Session() as sess: a = tf.cast(tf.convert_to_tensor(np.random.rand(128, 128) * 10), tf.float32) b = tf.cast(tf.convert_to_tensor(np.random.rand(128, 1) * 10), tf.float32) # a = tf.cast(tf.convert_to_tensor(np.ones((1024,1024)) * 10), tf.float32) # b = tf.cast(tf.convert_to_tensor(np.ones((1024,1)) * 10), tf.float32) a_min = tf.reduce_min(a) a_max = tf.reduce_max(a) [q_a, a_min, a_max] = gen_array_ops.quantize_v2(a, a_min, a_max, tf.quint8, "MIN_FIRST", name="qA") b_min = tf.reduce_min(b) b_max = tf.reduce_max(b) [q_b, b_min, b_max] = gen_array_ops.quantize_v2(b, b_min, b_max, tf.quint8, "MIN_FIRST", name="qB") print("------- float32 input matrices ------") print("a min: ", a_min.eval(), " a max: ", a_max.eval()) print("b min: ", b_min.eval(), " b max: ", b_max.eval())
def runTest(self, test_param, apply_relu): batch_size = test_param["batch_size"] input_channels = test_param["input_channels"] output_channels = test_param["output_channels"] input_height = test_param["input_height"] input_width = test_param["input_width"] filter_height = test_param["filter_height"] filter_width = test_param["filter_width"] vertical_stride = test_param["vertical_stride"] horizontal_stride = test_param["horizontal_stride"] conv_input_scale = test_param["conv_input_scale"] side_input_scale = test_param["side_input_scale"] bias_scale = test_param["bias_scale"] padding_type = test_param["padding_type"] with self.cached_session(use_gpu=True) as sess, self.test_scope(): conv_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( [batch_size, input_channels // 4, input_height, input_width, 4], minval=-0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) kernel, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ output_channels, input_channels // 4, filter_height, filter_width, 4 ], minval=-1.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) output_height = _CalculateConvolvedOutputDim( input_height, filter_height, vertical_stride, padding_type) output_width = _CalculateConvolvedOutputDim( input_width, filter_width, horizontal_stride, padding_type) tf_logging.info("output_height=%s, output_width=%s", output_height, output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform([ batch_size, output_channels // 4, output_height, output_width, 4 ], minval=0.0, maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) biases = random_ops.random_uniform([output_channels], minval=-10 * bias_scale, maxval=20 * bias_scale, dtype=dtypes.float32) strides = [1, 1, vertical_stride, horizontal_stride] actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( conv_input, kernel, biases, strides=strides, padding=padding_type, conv_input_scale=conv_input_scale, side_input_scale=side_input_scale, side_input=side_input, activation_mode="Relu" if apply_relu else "None", data_format="NCHW_VECT_C", filter_format="OIHW_VECT_I") expected = _SimulateFusedConv2dBiasActivationInt8( conv_input_scale, conv_input, kernel, padding_type, strides, side_input_scale, side_input, biases, apply_relu) actual_y, expected_y = sess.run([actual, expected]) self.assertAllClose(actual_y, expected_y, rtol=0, atol=1)