Ejemplo n.º 1
0
 def conv(self, input_tensor, filter_tensor):
     q_input = array_ops.fake_quant_with_min_max_args(
         input_tensor,
         min=-0.1,
         max=0.2,
         num_bits=8,
         narrow_range=False)
     q_filters = array_ops.fake_quant_with_min_max_args(
         filter_tensor,
         min=-1.0,
         max=2.0,
         num_bits=8,
         narrow_range=False)
     bias = array_ops.constant([0, 0], dtype=dtypes.float32)
     out = nn_ops.conv2d(q_input,
                         q_filters,
                         strides=[1, 1, 2, 1],
                         dilations=[1, 1, 1, 1],
                         padding='SAME',
                         data_format='NHWC')
     if has_bias:
         out = nn_ops.bias_add(out, bias, data_format='NHWC')
     if activation_fn is not None:
         out = activation_fn(out)
     q_out = array_ops.fake_quant_with_min_max_args(
         out, min=-0.3, max=0.4, num_bits=8, narrow_range=False)
     return {'output': q_out}
Ejemplo n.º 2
0
 def add(self, x, y):
     float_res = math_ops.add(x, y)
     x = array_ops.fake_quant_with_min_max_args(x,
                                                min=-0.1,
                                                max=0.2,
                                                num_bits=8,
                                                narrow_range=False)
     y = array_ops.fake_quant_with_min_max_args(y,
                                                min=-0.3,
                                                max=0.4,
                                                num_bits=8,
                                                narrow_range=False)
     res = math_ops.add(x, y)
     res = array_ops.fake_quant_with_min_max_args(
         res, min=-0.4, max=0.6, num_bits=8, narrow_range=False)
     return {'output': res, 'float_output': float_res}
Ejemplo n.º 3
0
def quantize(graph, quantize_info):
    """Quantize the graph with quantize_info.

  Args:
    graph: Graph to be modified.
    quantize_info: Quantization info in dictionary format.
  Raises:
    ValueError: When quantization fails.
  """
    for tensor_name, min_max in quantize_info.items():
        tensor = graph.get_tensor_by_name(tensor_name)
        name = tensor_name.split(':')[0]
        consumers = tensor.consumers()
        quant = array_ops.fake_quant_with_min_max_args(tensor,
                                                       min=min_max[0],
                                                       max=min_max[1],
                                                       name=name +
                                                       '/fakequant')

        if consumers:
            modified_count = common.RerouteTensor(quant,
                                                  tensor,
                                                  can_modify=consumers)
            # Some operations can have multiple output tensors going to the same
            # consumer. Since consumers is a set, we need to ensure that
            # modified_count is greater than or equal to the length of the set
            # of consumers.
            if modified_count < len(consumers):
                raise ValueError(
                    'No inputs quantized for ops: [%s]' %
                    ', '.join([consumer.name for consumer in consumers]))
Ejemplo n.º 4
0
    def testQATFrozenGraphDefInt8(self):
        with ops.Graph().as_default():
            in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name='inputA')
            in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name='inputB')
            _ = array_ops.fake_quant_with_min_max_args(
                in_tensor_1 + in_tensor_2,
                min=0.,
                max=1.,
                name='output',
                num_bits=16
            )  # INT8 inference type works for 16 bits fake quant.
            sess = session.Session()

        # Write graph to file.
        graph_def_file = self._getFilepath('model.pb')
        write_graph(sess.graph_def, '', graph_def_file, False)
        sess.close()

        flags_str = ('--inference_type=INT8 --std_dev_values=128,128 '
                     '--mean_values=128,128 '
                     '--graph_def_file={0} --input_arrays={1},{2} '
                     '--output_arrays={3}'.format(graph_def_file, 'inputA',
                                                  'inputB', 'output'))
        self._run(flags_str, should_succeed=True)
        os.remove(graph_def_file)
Ejemplo n.º 5
0
    def testQATFrozenGraphDefUInt8(self):
        with ops.Graph().as_default():
            in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name='inputA')
            in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name='inputB')
            _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 +
                                                       in_tensor_2,
                                                       min=0.,
                                                       max=1.,
                                                       name='output')
            sess = session.Session()

        # Write graph to file.
        graph_def_file = self._getFilepath('model.pb')
        write_graph(sess.graph_def, '', graph_def_file, False)
        sess.close()

        # Define converter flags
        flags_str = ('--std_dev_values=128,128 --mean_values=128,128 '
                     '--graph_def_file={0} --input_arrays={1} '
                     '--output_arrays={2}'.format(graph_def_file,
                                                  'inputA,inputB', 'output'))

        # Set inference_type UINT8 and (default) inference_input_type UINT8
        flags_str_1 = flags_str + ' --inference_type=UINT8'
        self._run(flags_str_1, should_succeed=True)

        # Set inference_type UINT8 and inference_input_type FLOAT
        flags_str_2 = flags_str_1 + ' --inference_input_type=FLOAT'
        self._run(flags_str_2, should_succeed=True)

        os.remove(graph_def_file)
Ejemplo n.º 6
0
    def testQuantizationInvalid(self):
        in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name='inputA')
        in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name='inputB')
        out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor_1 +
                                                            in_tensor_2,
                                                            min=0.,
                                                            max=1.,
                                                            name='output')
        sess = session.Session()

        # Convert model and ensure model is not None.
        converter = lite.TocoConverter.from_session(sess,
                                                    [in_tensor_1, in_tensor_2],
                                                    [out_tensor])
        converter.inference_type = lite_constants.QUANTIZED_UINT8
        converter.quantized_input_stats = {'inputA': (0., 1.)}  # mean, std_dev
        with self.assertRaises(ValueError) as error:
            converter.convert()
        self.assertEqual(
            'Quantization input stats are not available for input tensors '
            '\'inputB\'.', str(error.exception))
    def testQuantizationInvalid(self):
        with ops.Graph().as_default():
            in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
                                              dtype=dtypes.float32)
            out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor +
                                                                in_tensor,
                                                                min=0.,
                                                                max=1.)
            sess = session.Session()

        with self.assertRaises(ValueError) as error:
            convert.toco_convert(sess.graph_def, [in_tensor], [out_tensor],
                                 inference_type=lite_constants.QUANTIZED_UINT8)
        self.assertEqual(
            "std_dev and mean must be defined when inference_type or "
            "inference_input_type is QUANTIZED_UINT8 or INT8.",
            str(error.exception))

        with self.assertRaises(ValueError) as error:
            convert.toco_convert(sess.graph_def, [in_tensor], [out_tensor],
                                 inference_type=lite_constants.QUANTIZED_UINT8,
                                 inference_input_type=lite_constants.FLOAT)
        self.assertEqual(
            "std_dev and mean must be defined when inference_type or "
            "inference_input_type is QUANTIZED_UINT8 or INT8.",
            str(error.exception))
Ejemplo n.º 8
0
    def testGraphDefQuantizationInvalid(self):
        in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name="inputA")
        in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name="inputB")
        _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 + in_tensor_2,
                                                   min=0.,
                                                   max=1.,
                                                   name="output")
        sess = session.Session()

        input_arrays_map = [("inputA", [1, 16, 16, 3]),
                            ("inputB", [1, 16, 16, 3])]
        output_arrays = ["output"]
        with self.assertRaises(ValueError) as error:
            convert.toco_convert_graph_def(
                sess.graph_def,
                input_arrays_map,
                output_arrays,
                inference_type=lite_constants.QUANTIZED_UINT8)
        self.assertEqual(
            "std_dev and mean must be defined when inference_input_type is "
            "QUANTIZED_UINT8.", str(error.exception))
Ejemplo n.º 9
0
  def testQuantization(self):
    in_tensor = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name='input')
    out_tensor = array_ops.fake_quant_with_min_max_args(
        in_tensor + in_tensor, min=0., max=1., name='output')
    sess = session.Session()

    # Convert model and ensure model is not None.
    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
    converter.inference_type = lite_constants.QUANTIZED_UINT8
    converter.quantized_input_stats = [(0., 1.)]  # mean, std_dev
    tflite_model = converter.convert()
    self.assertTrue(tflite_model)

    # Check values from converted model.
    interpreter = Interpreter(model_content=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    self.assertEqual(1, len(input_details))
    self.assertEqual('input', input_details[0]['name'])
    self.assertEqual(np.uint8, input_details[0]['dtype'])
    self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all())
    self.assertEqual((1., 0.),
                     input_details[0]['quantization'])  # scale, zero_point

    output_details = interpreter.get_output_details()
    self.assertEqual(1, len(output_details))
    self.assertEqual('output', output_details[0]['name'])
    self.assertEqual(np.uint8, output_details[0]['dtype'])
    self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all())
    self.assertTrue(output_details[0]['quantization'][0] > 0)  # scale
Ejemplo n.º 10
0
    def testGraphDefQuantizationInvalid(self):
        with ops.Graph().as_default():
            in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name="inputA")
            in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name="inputB")
            _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 +
                                                       in_tensor_2,
                                                       min=0.,
                                                       max=1.,
                                                       name="output")
            sess = session.Session()

        input_arrays_map = [("inputA", [1, 16, 16, 3]),
                            ("inputB", [1, 16, 16, 3])]
        output_arrays = ["output"]
        with self.assertRaises(ValueError) as error:
            convert.toco_convert_graph_def(sess.graph_def,
                                           input_arrays_map,
                                           output_arrays,
                                           enable_mlir_converter=False,
                                           inference_type=dtypes.uint8)
        self.assertEqual(
            "The `quantized_input_stats` flag must be defined when either "
            "`inference_type` flag or `inference_input_type` flag is set to "
            "tf.int8 or tf.uint8.", str(error.exception))
Ejemplo n.º 11
0
 def testQuantization(self):
   in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
                                     dtype=dtypes.float32)
   out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor,
                                                       min=0., max=1.)
   sess = session.Session()
   result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor],
                              inference_type=lite.QUANTIZED_UINT8,
                              quantized_input_stats=[(0., 1.)])
   self.assertTrue(result)
Ejemplo n.º 12
0
  def testQuantization(self):
    in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
                                      dtype=dtypes.float32)
    out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor,
                                                        min=0., max=1.)
    sess = session.Session()

    tflite_model = convert.toco_convert(
        sess.graph_def, [in_tensor], [out_tensor],
        inference_type=lite_constants.QUANTIZED_UINT8,
        quantized_input_stats=[(0., 1.)])
    self.assertTrue(tflite_model)
Ejemplo n.º 13
0
    def testGraphDefQuantization(self):
        with ops.Graph().as_default():
            in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name="inputA")
            in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                                dtype=dtypes.float32,
                                                name="inputB")
            _ = array_ops.fake_quant_with_min_max_args(in_tensor_1 +
                                                       in_tensor_2,
                                                       min=0.,
                                                       max=1.,
                                                       name="output")
            sess = session.Session()

        input_arrays_map = [("inputA", [1, 16, 16, 3]),
                            ("inputB", [1, 16, 16, 3])]
        output_arrays = ["output"]
        tflite_model = convert.toco_convert_graph_def(
            sess.graph_def,
            input_arrays_map,
            output_arrays,
            enable_mlir_converter=False,
            control_output_arrays=None,
            inference_type=dtypes.uint8,
            quantized_input_stats=[(0., 1.), (0., 1.)])
        self.assertTrue(tflite_model)

        # Check values from converted model.
        interpreter = Interpreter(model_content=tflite_model)
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()
        self.assertEqual(2, len(input_details))
        self.assertEqual("inputA", input_details[0]["name"])
        self.assertEqual(np.uint8, input_details[0]["dtype"])
        self.assertTrue(([1, 16, 16, 3] == input_details[0]["shape"]).all())
        self.assertEqual((1., 0.),
                         input_details[0]["quantization"])  # scale, zero_point

        self.assertEqual("inputB", input_details[1]["name"])
        self.assertEqual(np.uint8, input_details[1]["dtype"])
        self.assertTrue(([1, 16, 16, 3] == input_details[1]["shape"]).all())
        self.assertEqual((1., 0.),
                         input_details[1]["quantization"])  # scale, zero_point

        output_details = interpreter.get_output_details()
        self.assertEqual(1, len(output_details))
        self.assertEqual("output", output_details[0]["name"])
        self.assertEqual(np.uint8, output_details[0]["dtype"])
        self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all())
        self.assertGreater(output_details[0]["quantization"][0], 0)  # scale
Ejemplo n.º 14
0
    def testQuantization(self):
        in_tensor_1 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name='inputA')
        in_tensor_2 = array_ops.placeholder(shape=[1, 16, 16, 3],
                                            dtype=dtypes.float32,
                                            name='inputB')
        out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor_1 +
                                                            in_tensor_2,
                                                            min=0.,
                                                            max=1.,
                                                            name='output')
        sess = session.Session()

        # Convert model and ensure model is not None.
        converter = lite.TFLiteConverter.from_session(
            sess, [in_tensor_1, in_tensor_2], [out_tensor])
        converter.experimental_enable_mlir_converter = True
        converter.inference_type = lite_constants.QUANTIZED_UINT8
        converter.quantized_input_stats = {
            'inputA': (0., 1.),
            'inputB': (0., 1.)
        }  # mean, std_dev
        tflite_model = mlir_convert_and_check_for_unsupported(self, converter)
        if tflite_model is None:
            return

        # Check values from converted model.
        interpreter = Interpreter(model_content=tflite_model)
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()
        self.assertEqual(2, len(input_details))
        self.assertEqual('inputA', input_details[0]['name'])
        self.assertEqual(np.uint8, input_details[0]['dtype'])
        self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all())
        self.assertEqual((1., 0.),
                         input_details[0]['quantization'])  # scale, zero_point

        self.assertEqual('inputB', input_details[1]['name'])
        self.assertEqual(np.uint8, input_details[1]['dtype'])
        self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all())
        self.assertEqual((1., 0.),
                         input_details[1]['quantization'])  # scale, zero_point

        output_details = interpreter.get_output_details()
        self.assertEqual(1, len(output_details))
        self.assertEqual('add', output_details[0]['name'])
        self.assertEqual(np.uint8, output_details[0]['dtype'])
        self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all())
        self.assertGreater(output_details[0]['quantization'][0], 0)  # scale
Ejemplo n.º 15
0
def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
  """Adds a fake quantize layer with fixed quantization interval.

  Args:
    inputs: a tensor containing values to be quantized.
    init_min: the lower end of quantization interval.
    init_max: the upper end of quantization interval.
    scope: Optional scope for name_scope.
  Returns:
    a tensor containing quantized values.
  """
  with ops.name_scope(scope, 'FixedQuantize', values=[inputs]):
    return array_ops.fake_quant_with_min_max_args(
        inputs, min=init_min, max=init_max)
Ejemplo n.º 16
0
def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
  """Adds a fake quantize layer with fixed quantization interval.

  Args:
    inputs: a tensor containing values to be quantized.
    init_min: the lower end of quantization interval.
    init_max: the upper end of quantization interval.
    scope: Optional scope for name_scope.
  Returns:
    a tensor containing quantized values.
  """
  with ops.name_scope(scope, 'FixedQuantize', values=[inputs]):
    return array_ops.fake_quant_with_min_max_args(
        inputs, min=init_min, max=init_max)
Ejemplo n.º 17
0
  def testQuantizationInvalid(self):
    in_tensor = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32)
    out_tensor = array_ops.fake_quant_with_min_max_args(
        in_tensor + in_tensor, min=0., max=1.)
    sess = session.Session()

    with self.assertRaises(ValueError) as error:
      convert.toco_convert(
          sess.graph_def, [in_tensor], [out_tensor],
          inference_type=lite_constants.QUANTIZED_UINT8)
    self.assertEqual(
        "std_dev and mean must be defined when inference_input_type is "
        "QUANTIZED_UINT8.", str(error.exception))
Ejemplo n.º 18
0
    def testQuantization(self):
        with ops.Graph().as_default():
            in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
                                              dtype=dtypes.float32)
            out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor +
                                                                in_tensor,
                                                                min=0.,
                                                                max=1.)
            sess = session.Session()

        tflite_model = convert.convert_graphdef(sess.graph_def,
                                                input_tensors=[in_tensor],
                                                output_tensors=[out_tensor],
                                                inference_type=dtypes.uint8,
                                                quantized_input_stats=[(0., 1.)
                                                                       ])
        self.assertTrue(tflite_model)
Ejemplo n.º 19
0
  def testGraphDefQuantization(self):
    in_tensor_1 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA")
    in_tensor_2 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB")
    _ = array_ops.fake_quant_with_min_max_args(
        in_tensor_1 + in_tensor_2, min=0., max=1., name="output")
    sess = session.Session()

    input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])]
    output_arrays = ["output"]
    tflite_model = convert.toco_convert_graph_def(
        sess.graph_def,
        input_arrays_map,
        output_arrays,
        inference_type=lite_constants.QUANTIZED_UINT8,
        quantized_input_stats=[(0., 1.), (0., 1.)])
    self.assertTrue(tflite_model)

    # Check values from converted model.
    interpreter = Interpreter(model_content=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    self.assertEqual(2, len(input_details))
    self.assertEqual("inputA", input_details[0]["name"])
    self.assertEqual(np.uint8, input_details[0]["dtype"])
    self.assertTrue(([1, 16, 16, 3] == input_details[0]["shape"]).all())
    self.assertEqual((1., 0.),
                     input_details[0]["quantization"])  # scale, zero_point

    self.assertEqual("inputB", input_details[1]["name"])
    self.assertEqual(np.uint8, input_details[1]["dtype"])
    self.assertTrue(([1, 16, 16, 3] == input_details[1]["shape"]).all())
    self.assertEqual((1., 0.),
                     input_details[1]["quantization"])  # scale, zero_point

    output_details = interpreter.get_output_details()
    self.assertEqual(1, len(output_details))
    self.assertEqual("output", output_details[0]["name"])
    self.assertEqual(np.uint8, output_details[0]["dtype"])
    self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all())
    self.assertTrue(output_details[0]["quantization"][0] > 0)  # scale
Ejemplo n.º 20
0
  def testQuantizationInvalid(self):
    in_tensor_1 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
    in_tensor_2 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
    out_tensor = array_ops.fake_quant_with_min_max_args(
        in_tensor_1 + in_tensor_2, min=0., max=1., name='output')
    sess = session.Session()

    # Convert model and ensure model is not None.
    converter = lite.TFLiteConverter.from_session(
        sess, [in_tensor_1, in_tensor_2], [out_tensor])
    converter.inference_type = lite_constants.QUANTIZED_UINT8
    converter.quantized_input_stats = {'inputA': (0., 1.)}  # mean, std_dev
    with self.assertRaises(ValueError) as error:
      converter.convert()
    self.assertEqual(
        'Quantization input stats are not available for input tensors '
        '\'inputB\'.', str(error.exception))
    def _TestOp(self, input_min, input_max, num_bits, narrow_range,
                expected_nudged_input_min, expected_nudged_input_max,
                expected_step):
        inputs = np.array([
            expected_nudged_input_min - expected_step,
            expected_nudged_input_min - 0.01, expected_nudged_input_min,
            expected_nudged_input_min + 0.01, expected_nudged_input_min +
            expected_step - 0.01, expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step + 0.01,
            expected_nudged_input_max - 0.01, expected_nudged_input_max,
            expected_nudged_input_max + 0.01,
            expected_nudged_input_max + expected_step
        ],
                          dtype=np.float32)
        expected = np.array([
            expected_nudged_input_min, expected_nudged_input_min,
            expected_nudged_input_min, expected_nudged_input_min,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_max, expected_nudged_input_max,
            expected_nudged_input_max, expected_nudged_input_max
        ],
                            dtype=np.float32)

        with self.test_session() as session:
            with self.test_scope():
                input_placeholder = array_ops.placeholder(dtypes.float32,
                                                          inputs.shape,
                                                          name="inputs")
                outputs = array_ops.fake_quant_with_min_max_args(
                    input_placeholder,
                    min=input_min,
                    max=input_max,
                    num_bits=num_bits,
                    narrow_range=narrow_range)
            result = session.run(outputs, {input_placeholder: inputs})
            self.assertAllCloseAccordingToType(result,
                                               expected,
                                               rtol=1e-3,
                                               atol=1e-5,
                                               bfloat16_rtol=0.03)
Ejemplo n.º 22
0
  def testGraphDefQuantizationInvalid(self):
    in_tensor_1 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputA")
    in_tensor_2 = array_ops.placeholder(
        shape=[1, 16, 16, 3], dtype=dtypes.float32, name="inputB")
    _ = array_ops.fake_quant_with_min_max_args(
        in_tensor_1 + in_tensor_2, min=0., max=1., name="output")
    sess = session.Session()

    input_arrays_map = [("inputA", [1, 16, 16, 3]), ("inputB", [1, 16, 16, 3])]
    output_arrays = ["output"]
    with self.assertRaises(ValueError) as error:
      convert.toco_convert_graph_def(
          sess.graph_def,
          input_arrays_map,
          output_arrays,
          inference_type=lite_constants.QUANTIZED_UINT8)
    self.assertEqual(
        "std_dev and mean must be defined when inference_input_type is "
        "QUANTIZED_UINT8.", str(error.exception))
Ejemplo n.º 23
0
  def _TestOp(self, input_min, input_max, num_bits, narrow_range,
              expected_nudged_input_min, expected_nudged_input_max,
              expected_step):
    inputs = np.array(
        [
            expected_nudged_input_min - expected_step,
            expected_nudged_input_min - 0.01, expected_nudged_input_min,
            expected_nudged_input_min + 0.01,
            expected_nudged_input_min + expected_step - 0.01,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step + 0.01,
            expected_nudged_input_max - 0.01, expected_nudged_input_max,
            expected_nudged_input_max + 0.01,
            expected_nudged_input_max + expected_step
        ],
        dtype=np.float32)
    expected = np.array(
        [
            expected_nudged_input_min, expected_nudged_input_min,
            expected_nudged_input_min, expected_nudged_input_min,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_min + expected_step,
            expected_nudged_input_max, expected_nudged_input_max,
            expected_nudged_input_max, expected_nudged_input_max
        ],
        dtype=np.float32)

    with self.cached_session() as session:
      with self.test_scope():
        input_placeholder = array_ops.placeholder(
            dtypes.float32, inputs.shape, name="inputs")
        outputs = array_ops.fake_quant_with_min_max_args(
            input_placeholder,
            min=input_min,
            max=input_max,
            num_bits=num_bits,
            narrow_range=narrow_range)
      result = session.run(outputs, {input_placeholder: inputs})
      self.assertAllCloseAccordingToType(
          result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03)
Ejemplo n.º 24
0
    def testQuantization(self):
        in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
                                          dtype=dtypes.float32,
                                          name='input')
        out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor +
                                                            in_tensor,
                                                            min=0.,
                                                            max=1.,
                                                            name='output')
        sess = session.Session()

        # Convert model and ensure model is not None.
        converter = lite.TocoConverter.from_session(sess, [in_tensor],
                                                    [out_tensor])
        converter.inference_type = lite_constants.QUANTIZED_UINT8
        converter.quantized_input_stats = [(0., 1.)]  # mean, std_dev
        tflite_model = converter.convert()
        self.assertTrue(tflite_model)

        # Check values from converted model.
        interpreter = Interpreter(model_content=tflite_model)
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()
        self.assertEqual(1, len(input_details))
        self.assertEqual('input', input_details[0]['name'])
        self.assertEqual(np.uint8, input_details[0]['dtype'])
        self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all())
        self.assertEqual((1., 0.),
                         input_details[0]['quantization'])  # scale, zero_point

        output_details = interpreter.get_output_details()
        self.assertEqual(1, len(output_details))
        self.assertEqual('output', output_details[0]['name'])
        self.assertEqual(np.uint8, output_details[0]['dtype'])
        self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all())
        self.assertTrue(output_details[0]['quantization'][0] > 0)  # scale