Esempio n. 1
0
def LastValueLogThQuantize(inputs,
                           log_th_var,
                           min_var,
                           max_var,
                           bit_width,
                           is_training,
                           mode,
                           name_scope="LastValueLogThQuantize"):
    """Last value power of 2 quantize op with log threshold. """
    with tf.name_scope(name_scope):
        # ANALYSE branch
        if mode == 'ANALYSE':
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')
            return tf.identity(inputs, name='identity')

        if is_training or mode == 'QCB':
            # Training and calibration branch
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')
            return fake_quantize_with_log_th(inputs, log_th_var, bit_width)

        else:
            # Evaluation branch
            return fake_quantize_with_log_th(inputs, log_th_var, bit_width)
Esempio n. 2
0
def LastValueQuantPosQuantize(inputs,
                              quant_pos_var,
                              min_var,
                              max_var,
                              bit_width,
                              method,
                              is_training,
                              mode,
                              round_mode,
                              name_scope="LastValueQuantPosQuantize"):
    """Last value power of 2 quantize op with quantize position. """
    with tf.name_scope(name_scope):
        # ANALYSE branch
        if mode == 'ANALYSE':
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')
            return tf.identity(inputs, name='identity')

        if is_training or mode == 'QCB':
            # Training and calibration branch
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')

            batch_quantize_pos = get_quantize_pos(inputs, assign_min,
                                                  assign_max, bit_width,
                                                  method)
            assign_quantize_pos = tf_compat.assign(quant_pos_var,
                                                   batch_quantize_pos,
                                                   name="assign_quantize_pos")

            if round_mode == 0:
                return fake_quantize_with_quantize_pos_std(
                    inputs, assign_quantize_pos, bit_width)
            elif round_mode == 1:
                return fake_quantize_with_quantize_pos_dpu(
                    inputs, assign_quantize_pos, bit_width)
            else:
                raise ValueError('Invalid round mode: {}'.format(round_mode))
        else:
            # Evaluation branch
            if round_mode == 0:
                return fake_quantize_with_quantize_pos_std(
                    inputs, quant_pos_var, bit_width)
            elif round_mode == 1:
                return fake_quantize_with_quantize_pos_dpu(
                    inputs, quant_pos_var, bit_width)
            else:
                raise ValueError('Invalid round mode: {}'.format(round_mode))
      def update():
        assign_objs = []

        for weight, mask, threshold in self._pruning_vars:
          new_threshold, new_mask = self._maybe_update_block_mask(weight)
          assign_objs.append(tf_compat.assign(threshold, new_threshold))
          assign_objs.append(tf_compat.assign(mask, new_mask))

        return tf.group(assign_objs)
Esempio n. 4
0
def LastValueMinMaxQuantize(inputs,
                            min_var,
                            max_var,
                            bit_width,
                            is_training,
                            mode,
                            name_scope="LastValueMinMaxQuantize"):
    """Last value float scale quantize op. """
    with tf.name_scope(name_scope):
        # ANALYSE branch
        if mode == 'ANALYSE':
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')
            return tf.identity(inputs, name='identity')

        if is_training or mode == 'QCB':
            # Training and calibration branch
            batch_min, batch_max = get_min_max(inputs)
            assign_min = tf_compat.assign(min_var,
                                          batch_min,
                                          name='assign_min')
            assign_max = tf_compat.assign(max_var,
                                          batch_max,
                                          name='assign_max')
            return fake_quantize_with_min_max(inputs, assign_min, assign_max,
                                              bit_width)

        else:
            # Evaluation branch
            return fake_quantize_with_min_max(inputs, min_var, max_var,
                                              bit_width)
Esempio n. 5
0
    def _weight_assign_objs(self):
        """Gather the assign objs for assigning weights<=weights*mask.

    The objs are ops for graph execution and tensors for eager
    execution.

    Returns:
      group of objs for weight assignment.
    """
        def update_fn(distribution, values_and_vars):
            # TODO(yunluli): Need this ReduceOp because the weight is created by the
            # layer wrapped, so we don't have control of its aggregation policy. May
            # be able to optimize this when distribution strategy supports easier
            # update to mirrored variables in replica context.
            reduced_values = distribution.extended.batch_reduce_to(
                tf.distribute.ReduceOp.MEAN, values_and_vars)
            var_list = [v for _, v in values_and_vars]
            values_and_vars = zip(reduced_values, var_list)

            def update_var(variable, reduced_value):
                return tf_compat.assign(variable, reduced_value)

            update_objs = []
            for value, var in values_and_vars:
                update_objs.append(
                    distribution.extended.update(var,
                                                 update_var,
                                                 args=(value, )))

            return tf.group(update_objs)

        assign_objs = []

        if tf.distribute.get_replica_context():
            values_and_vars = []
            for weight, mask, _ in self._pruning_vars:
                masked_weight = tf.math.multiply(weight, mask)
                values_and_vars.append((masked_weight, weight))
            if values_and_vars:
                assign_objs.append(
                    tf.distribute.get_replica_context().merge_call(
                        update_fn, args=(values_and_vars, )))
        else:
            for weight, mask, _ in self._pruning_vars:
                masked_weight = tf.math.multiply(weight, mask)
                assign_objs.append(tf_compat.assign(weight, masked_weight))

        return assign_objs
Esempio n. 6
0
 def increment_step():
     with tf.control_dependencies(
         [tf_compat.assign(self.pruning_step, self.pruning_step + 1)]):
         return tf.no_op('update')
Esempio n. 7
0
 def increment_step():
     return tf_compat.assign(self.pruning_step, self.pruning_step + 1)
Esempio n. 8
0
 def update(var, value):
     return tf_compat.assign(var, value)
Esempio n. 9
0
 def update_var(variable, reduced_value):
     return tf_compat.assign(variable, reduced_value)
Esempio n. 10
0
def TQTQuantize(inputs,
                log_th_var,
                min_var,
                max_var,
                bit_width,
                method,
                round_mode,
                mode,
                is_training,
                symmetry,
                per_channel,
                channel_axis,
                narrow_range=False,
                name_scope="TQTQuantize"):
  """Power-of-2 quantize op with log threshold.

  Args:
    inputs: Input values.
    log_th_var: Variable of log threshold.
    min_var: Variable of minimum value of inputs.
    max_var: Variable of maximum value of inputs.
    bit_width: Int, bit width of quantized values.
    method: Int Enum, method of how to get the initial log threshold, 0 for non_overflow.
    round_mode: Int, the mode of rounding function, 0 for HALF_TO_EVEN, 1 for HALF_UP, 2 for HALF_AWAY_FROM_ZERO.
    mode: String, the mode of quantization, available modes are ['ANALYSE', 'QCB', 'QCBEV', 'QAT']
    is_training: Bool, whether in training phase.
    symmetry: Bool, whether to apply symmetry quantization.
    per_channel: Bool, whether to apply per_channel quantization.
    channel_axis: The axis of the channel, used with per_channel enabled. The last dimension is 
      regarded as channel axis and other dimension will be reduces by default.
    narrow_range: Bool, whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].

  Return:
    Quantized inputs.
  """
  with tf.name_scope(name_scope):
    reduce_dims = None
    if per_channel:
      input_dims = len(inputs.get_shape())
      reduce_dims = convert_channel_axis_to_reduce_dims(input_dims,
                                                        channel_axis)

    quantize_kernel = TQTFakeQuantize(
        bit_width=bit_width,
        method=method,
        round_mode=round_mode,
        symmetry=symmetry,
        per_channel=per_channel,
        narrow_range=narrow_range,
        reduce_dims=reduce_dims)

    # ANALYSE branch
    if mode == 'ANALYSE':
      batch_min, batch_max = get_min_max(
          inputs,
          bit_width,
          symmetry=symmetry,
          per_channel=per_channel,
          narrow_range=narrow_range,
          reduce_dims=reduce_dims)
      assign_min = tf_compat.assign(min_var, batch_min, name='assign_min')
      assign_max = tf_compat.assign(max_var, batch_max, name='assign_max')
      return tf.identity(inputs, name='identity')

    if is_training or mode == 'QCB':
      # Training and calibration branch
      batch_min, batch_max = get_min_max(
          inputs,
          bit_width,
          symmetry=symmetry,
          per_channel=per_channel,
          narrow_range=narrow_range,
          reduce_dims=reduce_dims)
      assign_min = tf_compat.assign(min_var, batch_min, name='assign_min')
      assign_max = tf_compat.assign(max_var, batch_max, name='assign_max')

      if mode == 'QCB':
        batch_log_th = quantize_kernel.get_log_th(inputs, assign_min,
                                                  assign_max)
        assign_log_th = tf_compat.assign(
            log_th_var, batch_log_th, name="assign_log_th")
        return quantize_kernel.call(inputs, assign_log_th, assign_min,
                                    assign_max)
      else:
        return quantize_kernel.call(inputs, log_th_var, assign_min, assign_max)
    else:
      # Evaluation branch
      return quantize_kernel.call(inputs, log_th_var, min_var, max_var)
Esempio n. 11
0
def FSQuantize(
    inputs,
    min_var,
    max_var,
    calib_hist,
    calib_bin_edges,
    bit_width,
    method,
    round_mode,
    mode,
    is_training,
    symmetry,
    per_channel,
    channel_axis,
    use_framework_quant=True,
    narrow_range=False,
    name_scope="FSQuantize",
):
  """Float scale quantize op.

  Args:
    inputs: Input values.
    min_var: Variable of minimum value of inputs.
    max_var: Variable of maximum value of inputs.
    calib_hist: Variable of histogram of inputs. 
    calib_bin_edges: Variable of linspace of inputs.
    bit_width: Int, bit width of quantized values.
    method: method of quantize valued of inputs,
    round_mode: Int, the mode of rounding function, 0 for HALF_TO_EVEN, 1 for HALF_UP, 2 for HALF_AWAY_FROM_ZERO.
    mode: String, the mode of quantization, available modes are ['ANALYSE', 'QCB', 'QCBEV', 'QAT']
    is_training: Bool, whether in training phase.
    symmetry: Bool, whether to apply symmetry quantization.
    per_channel: Bool, whether to apply per_channel quantization.
    channel_axis: The axis of the channel, used with per_channel enabled. The last dimension is 
      regarded as channel axis and other dimension will be reduces by default.
    use_framework_quant: Bool, whether to use the tensorflow fake_quantize operations. If not, the custom
      quantize kernel will be used.
    narrow_range: Bool, whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].

  Return:
    Quantized inputs.
  """
  with tf.name_scope(name_scope):
    reduce_dims = None
    if per_channel:
      input_dims = len(inputs.get_shape())
      reduce_dims = convert_channel_axis_to_reduce_dims(input_dims,
                                                        channel_axis)

    quantize_kernel = FSFakeQuantize(
        bit_width=bit_width,
        round_mode=round_mode,
        symmetry=symmetry,
        per_channel=per_channel,
        use_framework_quant=use_framework_quant,
        narrow_range=narrow_range,
        reduce_dims=reduce_dims)

    # ANALYSE branch
    if mode == 'ANALYSE':
      batch_min, batch_max = get_min_max(
          inputs,
          bit_width,
          method,
          symmetry=symmetry,
          per_channel=per_channel,
          narrow_range=narrow_range,
          reduce_dims=reduce_dims)
      assign_min = tf_compat.assign(min_var, batch_min, name='assign_min')
      assign_max = tf_compat.assign(max_var, batch_max, name='assign_max')
      return tf.identity(inputs, name='identity')

    if is_training or mode == 'QCB':
      # Training and calibration branch
      batch_min = None
      batch_max = None
      method = QuantizeMethod(method)
      if method == QuantizeMethod.NON_OVERFLOW or method == QuantizeMethod.MIN_MSE:
        batch_min, batch_max = get_min_max(
            inputs,
            bit_width,
            method,
            symmetry=symmetry,
            per_channel=per_channel,
            narrow_range=narrow_range,
            reduce_dims=reduce_dims)
        #if not per_channel:
        batch_min = tf.math.minimum(min_var, batch_min)
        batch_max = tf.math.maximum(max_var, batch_max)
        assign_min = tf_compat.assign(min_var, batch_min, name='assign_min')
        assign_max = tf_compat.assign(max_var, batch_max, name='assign_max')
        return quantize_kernel.call(inputs, assign_min, assign_max)

      elif method == QuantizeMethod.MIN_KL:
        _calib_hist, _calib_bin_edges = calibrator_numpy.numpy_collect(
            inputs, calib_hist, calib_bin_edges)
        calib_hist = tf_compat.assign(
            calib_hist, _calib_hist, name='calib_hist')
        calib_bin_edges = tf_compat.assign(
            calib_bin_edges, _calib_bin_edges, name='calib_bin_edges')
        return tf.identity(inputs, name='identity')

      elif method == QuantizeMethod.PERCENTILE:
        _calib_hist, _calib_bin_edges = calibrator_numpy.numpy_collect(
            inputs, calib_hist, calib_bin_edges)
        calib_hist = tf_compat.assign(
            calib_hist, _calib_hist, name='calib_hist')
        calib_bin_edges = tf_compat.assign(
            calib_bin_edges, _calib_bin_edges, name='calib_bin_edges')
        return tf.identity(inputs, name='identity')
      else:
        logger.error('Invalid method: {}'.format(method))
        return tf.identity(inputs, name='identity')

    else:
      # Evaluation branch
      return quantize_kernel.call(inputs, min_var, max_var)
Esempio n. 12
0
def AllValuesQuantize(inputs,
                      min_var,
                      max_var,
                      name_prefix='AllValuesQuantize',
                      is_training=True,
                      num_bits=8,
                      narrow_range=False,
                      symmetric=False):
    """Adds a layer that collects quantization ranges as min/max of tensor values.

  AllValuesQuantize creates variables called 'min' and 'max',
  representing the interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    min_var: Variable which stores the min value of tensor.
    max_var: Variable which stores the max value of tensor.
    name_prefix: name_prefix for created nodes.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
    symmetric: If true, use symmetric quantization limits instead of training
      the minimum and maximum of each quantization range separately.
  Returns:
    a tensor containing quantized values.
  """
    with tf.name_scope(name_prefix):
        if not is_training:
            return _FakeQuantWithMinMaxVars(inputs,
                                            min_var,
                                            max_var,
                                            per_channel=False,
                                            num_bits=num_bits,
                                            narrow_range=narrow_range)

        batch_min = tf.math.reduce_min(inputs, name='BatchMin')
        batch_max = tf.math.reduce_max(inputs, name='BatchMax')

        if symmetric:
            if narrow_range:
                min_max_ratio = -1
            else:
                # In two's complement notation, the negative range is slightly larger
                # than the positive range.
                min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)

            # TFLite requires that 0.0 is always in the [min; max] range. Because
            # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
            batch_min = tf.math.minimum(batch_min, batch_max / min_max_ratio)
            batch_max = tf.math.maximum(batch_max, batch_min * min_max_ratio)

        # TFLite requires that 0.0 if always in the [min; max] range.
        range_min = tf.math.minimum(tf.math.minimum(min_var, batch_min), 0.0)
        range_max = tf.math.maximum(tf.math.maximum(max_var, batch_max), 0.0)

        assign_min = tf_compat.assign(min_var,
                                      range_min,
                                      name='AssignMinAllValue')
        assign_max = tf_compat.assign(max_var,
                                      range_max,
                                      name='AssignMaxAllValue')

        return _FakeQuantWithMinMaxVars(inputs,
                                        assign_min,
                                        assign_max,
                                        per_channel=False,
                                        num_bits=num_bits,
                                        narrow_range=narrow_range)
Esempio n. 13
0
def LastValueQuantize(inputs,
                      min_var,
                      max_var,
                      per_channel=False,
                      name_prefix='LastValueQuant',
                      is_training=True,
                      num_bits=8,
                      narrow_range=False,
                      symmetric=False):
    """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    name_prefix: name_prefix for created nodes.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
    symmetric: If true, use symmetric quantization limits instead of training
      the minimum and maximum of each quantization range separately.
  Returns:
    a tensor containing quantized values.
  """
    with tf.name_scope(name_prefix):
        input_shape = inputs.get_shape()
        input_dim = len(input_shape)

        if not is_training:
            return _FakeQuantWithMinMaxVars(inputs,
                                            min_var,
                                            max_var,
                                            per_channel=per_channel,
                                            num_bits=num_bits,
                                            narrow_range=narrow_range)

        if per_channel:
            if input_dim == 2:
                reduce_dims = [0]
            elif input_dim == 4:
                reduce_dims = [0, 1, 2]

        if per_channel:
            if input_dim >= 2:
                batch_min = tf.math.reduce_min(inputs,
                                               axis=reduce_dims,
                                               name='BatchMin')
            else:
                batch_min = inputs
        else:
            batch_min = tf.math.reduce_min(inputs, name='BatchMin')

        if per_channel:
            if input_dim >= 2:
                batch_max = tf.math.reduce_max(inputs,
                                               axis=reduce_dims,
                                               name='BatchMax')
            else:
                batch_max = inputs
        else:
            batch_max = tf.math.reduce_max(inputs, name='BatchMax')

        if symmetric:
            if narrow_range:
                min_max_ratio = -1
            else:
                # In two's complement notation, the negative range is slightly larger
                # than the positive range.
                min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)

            # TFLite requires that 0.0 if always in the [min; max] range. Because
            # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
            range_min = tf.math.minimum(batch_min, batch_max / min_max_ratio)
            range_max = tf.math.maximum(batch_max, batch_min * min_max_ratio)
        else:
            # TFLite requires that 0.0 if always in the [min; max] range.
            range_min = tf.math.minimum(batch_min, 0.0)
            range_max = tf.math.maximum(batch_max, 0.0)

        assign_min = tf_compat.assign(min_var, range_min, name='AssignMinLast')
        assign_max = tf_compat.assign(max_var, range_max, name='AssignMaxLast')

        return _FakeQuantWithMinMaxVars(inputs,
                                        assign_min,
                                        assign_max,
                                        per_channel=per_channel,
                                        num_bits=num_bits,
                                        narrow_range=narrow_range)