def testInvalidQuantizationTypes(self): # Invalid quantization type with self.assertRaises(ValueError): quantization.quantize_weights(np.array([]), np.bool) # Invalid data dtype for float16 quantization with self.assertRaises(ValueError): d = np.ones(1, dtype=np.int32) quantization.quantize_weights(d, np.float16)
def test_quantize_group(self): groups = [[{ 'name': 'weight1', 'data': np.array([1, 2, 3], 'float32') }, { 'name': 'weight2', 'data': np.array([4, 5], 'int32') }]] manifest_json = write_weights.write_weights( groups, TMP_DIR, shard_size_bytes=8 * 4, quantization_dtype=np.uint8) manifest = json.loads(manifest_json) self.assertTrue( os.path.isfile(os.path.join(TMP_DIR, 'weights_manifest.json')), 'weights_manifest.json does not exist') q, s, m = zip( quantization.quantize_weights(groups[0][0]['data'], np.uint8), quantization.quantize_weights(groups[0][1]['data'], np.uint8)) self.assertEqual(manifest, [{ 'paths': ['group1-shard1of1'], 'weights': [{ 'name': 'weight1', 'shape': [3], 'dtype': 'float32', 'quantization': { 'min': m[0], 'scale': s[0], 'dtype': 'uint8' } }, { 'name': 'weight2', 'shape': [2], 'dtype': 'int32', 'quantization': { 'min': m[1], 'scale': s[1], 'dtype': 'uint8' } }] }]) weights_path = os.path.join(TMP_DIR, 'group1-shard1of1') weights = np.fromfile(weights_path, 'uint8') np.testing.assert_array_equal(weights, np.concatenate([q[0], q[1]]))
def testAllEqual(self): d = np.ones(5, dtype=np.float32) q, s, m = quantization.quantize_weights(d, np.uint8) self.assertEqual(s, 1.0) self.assertEqual(q.dtype, np.uint8) de_q = quantization.dequantize_weights(q, s, m, np.float32) np.testing.assert_array_equal(de_q, d)
def testFloatQuantizeAllEqual(self): d = np.ones(5, dtype=np.float32) q, metadata = quantization.quantize_weights(d, np.float16) self.assertDictEqual(metadata, {}) self.assertEqual(q.dtype, np.float16) de_q = quantization.dequantize_weights(q, metadata, np.float32) np.testing.assert_array_equal(de_q, d)
def test_quantize_group(self): groups = [ [{ 'name': 'weight1', 'data': np.array([1, 2, 3], 'float32') }, { 'name': 'weight2', 'data': np.array([4, 5], 'int32') }] ] manifest_json = write_weights.write_weights( groups, TMP_DIR, shard_size_bytes=8 * 4, quantization_dtype=np.uint8) manifest = json.loads(manifest_json) self.assertTrue( os.path.isfile(os.path.join(TMP_DIR, 'weights_manifest.json')), 'weights_manifest.json does not exist') q, s, m = zip( quantization.quantize_weights(groups[0][0]['data'], np.uint8), quantization.quantize_weights(groups[0][1]['data'], np.uint8)) self.assertEqual( manifest, [{ 'paths': ['group1-shard1of1'], 'weights': [{ 'name': 'weight1', 'shape': [3], 'dtype': 'float32', 'quantization': { 'min': m[0], 'scale': s[0], 'dtype': 'uint8' } }, { 'name': 'weight2', 'shape': [2], 'dtype': 'int32', 'quantization': { 'min': m[1], 'scale': s[1], 'dtype': 'uint8' } }] }]) weights_path = os.path.join(TMP_DIR, 'group1-shard1of1') weights = np.fromfile(weights_path, 'uint8') np.testing.assert_array_equal(weights, np.concatenate([q[0], q[1]]))
def testAffineQuantizeAllEqual(self): d = np.ones(5, dtype=np.float32) q, metadata = quantization.quantize_weights(d, np.uint8) assert 'scale' in metadata and 'min' in metadata self.assertEqual(metadata['scale'], 1.0) self.assertEqual(q.dtype, np.uint8) de_q = quantization.dequantize_weights(q, metadata, np.float32) np.testing.assert_array_equal(de_q, d)
def _runQuantizeTest(self, range_min, range_max, data_dtype, quantization_dtype, expected_scale): d = np.arange(range_min, range_max + 1, dtype=data_dtype) q, s, m = quantization.quantize_weights(d, quantization_dtype) self.assertAlmostEqual(s, expected_scale) self.assertEqual(q.dtype, quantization_dtype) de_q = quantization.dequantize_weights(q, s, m, data_dtype) np.testing.assert_allclose(de_q, d) if range_min <= 0 <= range_max: d_0 = np.zeros(1, data_dtype) q_0 = np.round((d_0 - m) / s).astype(quantization_dtype) self.assertEqual( quantization.dequantize_weights(q_0, s, m, data_dtype), d_0)
def _runQuantizeTest( self, range_min, range_max, data_dtype, quantization_dtype, expected_scale): d = np.arange(range_min, range_max + 1, dtype=data_dtype) q, s, m = quantization.quantize_weights(d, quantization_dtype) self.assertAlmostEqual(s, expected_scale) self.assertEqual(q.dtype, quantization_dtype) de_q = quantization.dequantize_weights(q, s, m, data_dtype) np.testing.assert_allclose(de_q, d) if range_min <= 0 <= range_max: d_0 = np.zeros(1, data_dtype) q_0 = np.round((d_0 - m) / s).astype(quantization_dtype) self.assertEqual( quantization.dequantize_weights(q_0, s, m, data_dtype), d_0)
def _quantize_entry(entry, quantization_dtype): """Quantizes the weights in the entry, returning a new entry. The weights are quantized by linearly re-scaling the values between the minimum and maximum value, and representing them with the number of bits provided by the `quantization_dtype`. In order to guarantee that 0 is perfectly represented by one of the quanzitzed values, the range is "nudged" in the same manner as in TF-Lite. Args: entry: A weight entries to quantize. quantization_dtype: An numpy dtype to quantize weights to. Only np.uint8 and np.uint16 are supported. Returns: A new entry containing the quantized data and additional quantization info, for example: original_entry = { 'name': 'weight1', 'data': np.array([0, -0.1, 1.2], 'float32') } quantized_entry = { 'name': 'weight1', 'data': np.array([20, 0, 255], 'uint8') 'quantization': {'min': -0.10196078817, 'scale': 0.00509803940852, 'original_dtype': 'float32'} } """ data = entry['data'] # Only float32 tensors are quantized. if data.dtype != 'float32': return entry quantized_data, scale, min_val = quantization.quantize_weights( data, quantization_dtype) quantized_entry = entry.copy() quantized_entry['data'] = quantized_data quantized_entry['quantization'] = { 'min': min_val, 'scale': scale, 'original_dtype': data.dtype.name } return quantized_entry
def _runQuantizeTest(self, range_min, range_max, data_dtype, quantization_dtype, expected_metadata): d = np.arange(range_min, range_max + 1, dtype=data_dtype) q, metadata = quantization.quantize_weights(d, quantization_dtype) self.assertDictContainsSubsetAlmostEqual(metadata, expected_metadata) self.assertEqual(q.dtype, quantization_dtype) de_q = quantization.dequantize_weights(q, metadata, data_dtype) np.testing.assert_allclose(de_q, d) if quantization_dtype in [np.uint8, np.uint16]: s = metadata['scale'] m = metadata['min'] if range_min <= 0 <= range_max: d_0 = np.zeros(1, data_dtype) q_0 = np.round((d_0 - m) / s).astype(quantization_dtype) self.assertEqual( quantization.dequantize_weights(q_0, metadata, data_dtype), d_0)
def testAffineQuantizeNormalizedFloats(self): data = np.array([-0.29098126, -0.24776903, -0.27248842, 0.23848203], dtype=np.float32) q, metadata = quantization.quantize_weights(data, np.uint16) de_q = quantization.dequantize_weights(q, metadata, data.dtype) np.testing.assert_array_almost_equal(de_q, data, decimal=5)