예제 #1
0
def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
    """Finds unfused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, True if training.
    freeze_batch_norm_delay: How many steps to wait before freezing moving mean
      and variance and using them for batch normalization.

  Raises:
    ValueError: When batch norm folding fails.
  """
    input_to_ops_map = input_to_ops.InputToOps(graph)

    for bn in common.BatchNormGroups(graph):
        has_scaling = _HasScaling(graph, input_to_ops_map, bn)

        if not _IsValidUnfusedBatchNorm(graph, bn):
            continue

        # The mangling code intimately depends on BatchNorm node's internals.
        original_op, folded_op = _CreateFoldedOp(
            graph,
            bn,
            has_scaling=has_scaling,
            freeze_batch_norm_delay=freeze_batch_norm_delay,
            is_training=is_training)

        activation = common.GetEndpointActivationOp(graph, bn)
        if activation:
            nodes_modified_count = common.RerouteTensor(
                folded_op.outputs[0],
                original_op.outputs[0],
                can_modify=[activation])
            if nodes_modified_count != 1:
                raise ValueError('Unexpected inputs to op: %s' %
                                 activation.name)
            continue

        # Treat consumer ops in bypass modules differently since they have Add
        # operations instead of Relu* above.
        add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1)
        add_bypass = graph.get_operation_by_name(add_bypass_ctx + '/Add')
        nodes_modified_count = common.RerouteTensor(folded_op.outputs[0],
                                                    original_op.outputs[0],
                                                    can_modify=[add_bypass])
        if nodes_modified_count != 1:
            raise ValueError('Unexpected inputs to op: %s' % add_bypass.name)
예제 #2
0
def insert_quant_op(graph, node_name, is_train):
    """Insert quantization operations to the specified activation node.

  Args:
  * graph: TensorFlow graph
  * node_name: activation node's name
  * is_train: insert training-related operations or not
  """

    # locate the node & activation operation
    for op in graph.get_operations():
        if node_name in [node.name for node in op.outputs]:
            tf.logging.info('op: {} / inputs: {} / outputs: {}'.format(
                op.name, [node.name for node in op.inputs],
                [node.name for node in op.outputs]))
            node = op.outputs[0]
            activation_op = op
            break

    # re-route the graph to insert quantization operations
    input_to_ops_map = input_to_ops.InputToOps(graph)
    consumer_ops = input_to_ops_map.ConsumerOperations(activation_op)
    node_quant = quant_ops.MovingAvgQuantize(
        node, is_training=is_train, num_bits=FLAGS.uqtf_activation_bits)
    nb_update_inputs = common.RerouteTensor(node_quant, node, consumer_ops)
    tf.logging.info('nb_update_inputs = %d' % nb_update_inputs)
예제 #3
0
파일: calib.py 프로젝트: pyjennings/tf_pg
def quantize(graph, quantize_info):
    """Quantize the graph with quantize_info.

  Args:
    graph: Graph to be modified.
    quantize_info: Quantization info in dictionary format.
  Raises:
    ValueError: When quantization fails.
  """
    for tensor_name, min_max in quantize_info.items():
        tensor = graph.get_tensor_by_name(tensor_name)
        name = tensor_name.split(':')[0]
        consumers = tensor.consumers()
        quant = array_ops.fake_quant_with_min_max_args(tensor,
                                                       min=min_max[0],
                                                       max=min_max[1],
                                                       name=name +
                                                       '/fakequant')

        if consumers:
            modified_count = common.RerouteTensor(quant,
                                                  tensor,
                                                  can_modify=consumers)
            # Some operations can have multiple output tensors going to the same
            # consumer. Since consumers is a set, we need to ensure that
            # modified_count is greater than or equal to the length of the set
            # of consumers.
            if modified_count < len(consumers):
                raise ValueError(
                    'No inputs quantized for ops: [%s]' %
                    ', '.join([consumer.name for consumer in consumers]))
예제 #4
0
def _insert_fixed_quant_op(context,
                           name,
                           producer,
                           consumers,
                           init_min=-6.0,
                           init_max=6.0,
                           quant_delay=None):
    """Adds a fake quant op with fixed ranges.

  Args:
    context: The parent scope of the op to be quantized.
    name: The name of the fake quant op.
    producer: The producer op to be quantized.
    consumers: The consumer ops to the producer op.
    init_min: The minimum range for the fake quant op.
    init_max: The maximum range for the fake quant op.
    quant_delay: Number of steps to wait before activating the fake quant op.

  Raises:
    ValueError: When producer operation is not directly connected to the
      consumer operation.
  """
    name_prefix = name if not context else context + '/' + name
    inputs = producer.outputs[0]
    quant = quant_ops.FixedQuantize(inputs,
                                    init_min=init_min,
                                    init_max=init_max,
                                    scope=name_prefix)

    if quant_delay and quant_delay > 0:
        activate_quant = math_ops.greater_equal(
            common.CreateOrGetQuantizationStep(),
            quant_delay,
            name=name_prefix + '/activate_quant')
        quant = control_flow_ops.cond(activate_quant,
                                      lambda: quant,
                                      lambda: inputs,
                                      name=name_prefix + '/delayed_quant')

    if consumers:
        tensors_modified_count = common.RerouteTensor(quant,
                                                      inputs,
                                                      can_modify=consumers)
        # Some operations can have multiple output tensors going to the same
        # consumer. Since consumers is a set, we need to ensure that
        # tensors_modified_count is greater than or equal to the length of the set
        # of consumers.
        if tensors_modified_count < len(consumers):
            raise ValueError(
                'No inputs quantized for ops: [%s]' %
                ', '.join([consumer.name for consumer in consumers]))
예제 #5
0
def _RedoRestAvgPool(graph):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.

  Raises:
    ValueError: When batch norm folding fails.
  """
    matches = _FindRestAvgPool(graph)
    print("Replacing", len(matches), "AvgPool")
    for match in matches:
        scope, sep, _ = match['layer_op'].name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            # with graph.name_scope(scope + sep + '_psb' + sep):

            input_tensor = match['input_tensor']
            layer_op = match['layer_op']
            # output_tensor = match['output_tensor']

            # >>>>> CUSTOM >>>>>>>>>>>>>>
            avg_size = np.prod(layer_op.get_attr("ksize")).astype(np.float32)
            if avg_size == 2**np.log2(avg_size):
                continue
            output_tensor = nn_ops.avg_pool(
                input_tensor,
                ksize=layer_op.get_attr('ksize'),
                strides=layer_op.get_attr('strides'),
                padding=layer_op.get_attr('padding'),
                data_format=layer_op.get_attr('data_format'),
                name=layer_op.name.split('/')[-1] + '_psb')
            avg_size_new = variableFromSettings(
                [], hiddenVar=(1.0 / avg_size).astype(np.float32))[0]
            new_layer_tensor = output_tensor * avg_size * avg_size_new
            # <<<<<<<<<<<<<<<<<<<<<<<<<<<

            nodes_modified_count = common.RerouteTensor(
                new_layer_tensor, match['output_tensor'])
            if nodes_modified_count == 0:
                raise ValueError(
                    'Folding batch norms failed, %s had no outputs.' %
                    match['output_tensor'].name)
예제 #6
0
def _RedoRestBias(graph):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.

  Raises:
    ValueError: When batch norm folding fails.
  """
    matches = _FindRestBias(graph)
    print("Replacing", len(matches), "BiasAdd")
    for match in matches:
        scope, sep, _ = match['layer_op'].name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            # with graph.name_scope(scope + sep + '_psb' + sep):

            bias = match['weight_tensor']

            # >>>>> CUSTOM >>>>>>>>>>>>>>
            # use hidden variable instead
            # bias = variableFromSettings([],hiddenVar=bias)[0]
            if S("util.variable.fixed_point.use"):
                bias = fixed_point(bias,
                                   S("util.variable.fixed_point.bits"),
                                   max=S("util.variable.fixed_point.max"),
                                   min=S("util.variable.fixed_point.min"))
            # <<<<<<<<<<<<<<<<<<<<<<<<<<<

            new_layer_tensor = match['input_tensor'] + bias
            if S("util.variable.fixed_point.use"):
                new_layer_tensor = fixed_point(
                    new_layer_tensor,
                    S("util.variable.fixed_point.bits"),
                    max=S("util.variable.fixed_point.max"),
                    min=S("util.variable.fixed_point.min"))

            nodes_modified_count = common.RerouteTensor(
                new_layer_tensor, match['output_tensor'])
            if nodes_modified_count == 0:
                raise ValueError(
                    'Folding batch norms failed, %s had no outputs.' %
                    match['output_tensor'].name)
예제 #7
0
    def testRerouteTensor(self):
        a = constant_op.constant(1, name='a')
        b = constant_op.constant(2, name='b')
        c = constant_op.constant(3, name='c')
        d = constant_op.constant(4, name='d')

        add_ac = math_ops.add(a, c)
        add_ad = math_ops.add(a, d)

        # Ensure that before rerouting the inputs are what we think.
        self._CheckOpHasInputs(add_ac.op, [a, c])
        self._CheckOpHasInputs(add_ad.op, [a, d])

        # references to tensor a should be replaced with b for all ops in
        # can_modify. This means add_ac will be changed but add_ad will not.
        common.RerouteTensor(b, a, can_modify=[add_ac.op])
        self._CheckOpHasInputs(add_ac.op, [b, c])
        self._CheckOpHasInputs(add_ad.op, [a, d])
예제 #8
0
def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
    """Finds unfused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, True if training.
    freeze_batch_norm_delay: How many steps to wait before freezing moving mean
      and variance and using them for batch normalization.

  Raises:
    ValueError: When batch norm folding fails.
  """
    input_to_ops_map = input_to_ops.InputToOps(graph)

    for bn in common.BatchNormGroups(graph):
        has_scaling = _HasScaling(graph, input_to_ops_map, bn)

        if not _IsValidUnfusedBatchNorm(graph, bn):
            continue

        # The mangling code intimately depends on BatchNorm node's internals.
        original_op, folded_op = _CreateFoldedOp(
            graph,
            bn,
            has_scaling=has_scaling,
            freeze_batch_norm_delay=freeze_batch_norm_delay,
            is_training=is_training)

        # TODO: generalise
        activation = input_to_ops_map.ConsumerOperations(original_op).pop()
        # assert any(activation.type == o or
        #            o.lower() in activation.name.split("/")[-1].lower()
        #            for o in (common._ACTIVATION_OP_SUFFIXES + ["Add"]))

        nodes_modified_count = common.RerouteTensor(folded_op.outputs[0],
                                                    original_op.outputs[0],
                                                    can_modify=[activation])
        if nodes_modified_count != 1:
            raise ValueError('Unexpected inputs to op: %s' % activation.name)
예제 #9
0
def _InsertQuantOp(context,
                   name,
                   producer,
                   consumers,
                   is_training,
                   moving_avg=True,
                   init_min=-6.0,
                   init_max=6.0,
                   bits=8,
                   symmetric=False,
                   ema_decay=0.999,
                   quant_delay=None,
                   vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
                   narrow_range=False,
                   producer_scope=None,
                   consumer_scope=None):
    """Inserts a quant op between a producer op and (multiple) consumer ops.

  Args:
    context: Context where producer and consumer operations are nested.
    name: Name for the new quantization op within the context.
    producer: Producer operation of the pairs where quantization will be
      inserted.
    consumers: Consumer operations of the pairs.
    is_training: Whether quantizing training graph or eval graph.
    moving_avg: Specifies whether to use exponential moving average or just
      the last value seen.
    init_min: Starting minimum value for the new quantization op.
    init_max: Starting maximum value for the new quantization op.
    bits: Number of bits to use for quantization, must be between 2 and 8.
    symmetric: (Optional) If true, use symmetric quantization limits instead of
      training the minimum and maximum of each quantization range separately.
    ema_decay: (Optional) Float, EMA decay parameter.  EMA is used to update
      quantization intervals for quantizing activations (see here about EMA:
      https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average).
    quant_delay: (Optional, default None) Int, count of global steps for which
      to delay quantization.  This helps weights stabilize at the start of
      training.
    vars_collection: (Optional) Collection where to store the variables for
      quantization interval ends.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^bits - 1] or wide range [0; 2^bits - 1].
    producer_scope: The restriction of producer scope. If not None, the new op
      will be inserted only when the producer is in this scope.
    consumer_scope: The restriction of producer scope. If not None, the new op
      will be inserted only when all the consumers are in this scope.
  Raises:
    ValueError: When producer operation is not directly connected to the
      consumer operation.
  """
    if producer_scope and not producer.name.startswith(producer_scope):
        logging.info(
            '_InsertQuantOp ignores context="%s" name="%s" '
            'because producer "%s" is not in scope "%s"', context, name,
            producer.name, producer_scope)
        return

    if consumer_scope:
        consumers_in_scope = []
        for consumer in consumers:
            if consumer.name.startswith(consumer_scope):
                consumers_in_scope.append(consumer)
            else:
                logging.info(
                    '_InsertQuantOp context="%s" name="%s" ignores '
                    'consumer "%s" because it is not in scope "%s"', context,
                    name, consumer.name, consumer_scope)
                return
        consumers = consumers_in_scope

    name_prefix = _AddContextToName(context, name)
    # This is needed on TPU where name_scope == 'TPUReplicate/loop', and
    # name_prefix starts with 'TPUReplicate/loop/'; without dropping it
    # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which
    # breaks things later.
    name_scope = ops.get_name_scope()
    if name_scope:
        name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/')

    inputs = producer.outputs[0]
    # Prevent ops from being quantized multiple times. Bypass ops can sometimes
    # overlap between multiple matches, so we need to ensure that we don't
    # add duplicate FakeQuant operations.
    if _FollowedByFakeQuant(inputs):
        return

    if moving_avg:
        quant = (quant_ops.MovingAvgQuantize(inputs,
                                             init_min=init_min,
                                             init_max=init_max,
                                             ema_decay=ema_decay,
                                             is_training=is_training,
                                             num_bits=bits,
                                             symmetric=symmetric,
                                             narrow_range=narrow_range,
                                             vars_collection=vars_collection,
                                             name_prefix=name_prefix))
    else:
        quant = (quant_ops.LastValueQuantize(inputs,
                                             init_min=init_min,
                                             init_max=init_max,
                                             is_training=is_training,
                                             num_bits=bits,
                                             symmetric=symmetric,
                                             narrow_range=narrow_range,
                                             vars_collection=vars_collection,
                                             name_prefix=name_prefix))

    if quant_delay and quant_delay > 0:
        activate_quant = math_ops.greater_equal(
            common.CreateOrGetQuantizationStep(),
            quant_delay,
            name=name_prefix + '/activate_quant')
        quant = control_flow_ops.cond(activate_quant,
                                      lambda: quant,
                                      lambda: inputs,
                                      name=name_prefix + '/delayed_quant')

    if consumers:
        tensors_modified_count = common.RerouteTensor(quant,
                                                      inputs,
                                                      can_modify=consumers)
        # Some operations can have multiple output tensors going to the same
        # consumer. Since consumers is a set, we need to ensure that
        # tensors_modified_count is greater than or equal to the length of the set
        # of consumers.
        if tensors_modified_count < len(consumers):
            raise ValueError(
                'No inputs quantized for ops: [%s]' %
                ', '.join([consumer.name for consumer in consumers]))
예제 #10
0
def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.
    freeze_batch_norm_delay: How many steps to wait before freezing moving mean
      and variance and using them for batch normalization.

  Raises:
    ValueError: When batch norm folding fails.
  """
    for match in _FindFusedBatchNorms(graph):
        scope, sep, _ = match.layer_op.name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep):
                # new weights = old weights * gamma / sqrt(variance + epsilon)
                # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
                multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
                    match.variance_tensor + match.bn_op.get_attr('epsilon'))
                bias_tensor = math_ops.subtract(match.beta_tensor,
                                                match.mean_tensor *
                                                multiplier_tensor,
                                                name='bias')

                correction_scale, correction_recip, correction_offset = None, None, None
                if is_training:
                    correction_scale, correction_recip, correction_offset = (
                        _ComputeBatchNormCorrections(
                            context='',
                            match=match,
                            freeze_batch_norm_delay=freeze_batch_norm_delay))
                # The shape of depthwise weights is different, so we need to reshape the
                # multiplier_tensor to ensure that the scaled_weight_tensor has the
                # expected shape.
                weights = match.weight_tensor
                if match.layer_op.type == 'DepthwiseConv2dNative':
                    new_shape = [
                        match.weight_tensor.get_shape().as_list()[2],
                        match.weight_tensor.get_shape().as_list()[3]
                    ]
                    multiplier_tensor = array_ops.reshape(multiplier_tensor,
                                                          new_shape,
                                                          name='scale_reshape')

                    if correction_scale is not None:
                        correction_scale = array_ops.reshape(
                            correction_scale,
                            new_shape,
                            name='correction_reshape')

            if correction_scale is not None:
                weights = math_ops.multiply(correction_scale,
                                            weights,
                                            name='correction_mult')

            scaled_weight_tensor = math_ops.multiply(weights,
                                                     multiplier_tensor,
                                                     name='mul_fold')

            new_layer_tensor = _CloneWithNewOperands(match.layer_op,
                                                     match.input_tensor,
                                                     scaled_weight_tensor,
                                                     match.batch_to_space_op)

            if correction_recip is not None:
                new_layer_tensor = math_ops.multiply(correction_recip,
                                                     new_layer_tensor,
                                                     name='post_conv_mul')
                new_layer_tensor = math_ops.add(new_layer_tensor,
                                                (correction_offset),
                                                'correction_add')

            bias_add_tensor = math_ops.add(new_layer_tensor,
                                           bias_tensor,
                                           name='add_fold')

            nodes_modified_count = common.RerouteTensor(
                bias_add_tensor, match.output_tensor)
            if nodes_modified_count == 0:
                raise ValueError(
                    'Folding batch norms failed, %s had no outputs.' %
                    match.output_tensor.name)
예제 #11
0
def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay):
    """Computes batch norm correction params.

     Before batch normalization is frozen:
     We use batch statistics for batch norm.
       correction_scale = sigma_b/sigma_mv
       correction_recip = 1/correction_scale
       correction_offset = 0

     After batch normalization is frozen:
      correction_scale = sigma_b/sigma_mv
      correction_recip = 1
      correction_offset =  gamma*(mu_b/sigma_b-mu_mv/sigma_mv).

     Batch norm is frozen if global_step > bn_freeze_delay.
     The corrections ensure that:
     a) The weights are quantized after scaling by gamma/sigma_mv. This enables
     smoother training as the scaling on the weights changes slowly, rather than
     jump across mini-batches
     b) Changing the values of the corrections allows for one to switch between
     using batch statistics to using moving mean and average, without requiring
     changes to batch_norm


  Args:
    context: The scope under which we look for batch norm params
    match: Object containing required batch norm tensors for correction
      computation.
    freeze_batch_norm_delay: Delay in steps at which computation switches
      from regular batch norm to frozen mean and variance.


  Returns:
    A tuple of correction_scale, correction_recip, correction_offset
  """

    g = ops.get_default_graph()
    prefix = '' if not context else context
    with g.name_scope(prefix + 'batch_norm_correction'):
        recip_sigma_mv = math_ops.rsqrt(match.moving_variance_tensor +
                                        match.batch_epsilon)
        recip_sigma = math_ops.rsqrt(match.variance_tensor +
                                     match.batch_epsilon)
        correction_scale = math_ops.divide(recip_sigma_mv,
                                           recip_sigma,
                                           name='scale_compute')
        correction_scale = array_ops.identity(correction_scale,
                                              name='correction_scale')
        correction_recip = math_ops.reciprocal(correction_scale,
                                               name='reciprocal_compute')
        mv = match.moving_mean_tensor  #if match.moving_mean_tensor is not None else 0
        correction_offset = math_ops.multiply(match.gamma_tensor,
                                              match.mean_tensor * recip_sigma -
                                              mv,
                                              name='offset_compute')

        if freeze_batch_norm_delay is not None:
            use_mv_avg = math_ops.greater_equal(
                common.CreateOrGetQuantizationStep(),
                freeze_batch_norm_delay,
                name='use_moving_average')
        else:
            use_mv_avg = False

        bn_decay_zero = 0.0
        bn_decay_mean_consumers = list(match.bn_decay_mean_tensor.consumers())
        bn_decay_var_consumers = list(match.bn_decay_mean_tensor.consumers())

        bn_decay_mean_out = utils.smart_cond(
            use_mv_avg,
            lambda: bn_decay_zero,
            lambda: match.bn_decay_mean_tensor,
            name='freeze_moving_mean')

        common.RerouteTensor(bn_decay_mean_out,
                             match.bn_decay_mean_tensor,
                             can_modify=bn_decay_mean_consumers)

        bn_decay_var_consumers = list(match.bn_decay_var_tensor.consumers())
        bn_decay_var_out = utils.smart_cond(use_mv_avg,
                                            lambda: bn_decay_zero,
                                            lambda: match.bn_decay_var_tensor,
                                            name='freeze_moving_var')
        common.RerouteTensor(bn_decay_var_out,
                             match.bn_decay_var_tensor,
                             can_modify=bn_decay_var_consumers)

        correction_recip = utils.smart_cond(
            use_mv_avg,
            lambda: array_ops.ones(correction_scale.shape),
            lambda: correction_recip,
            name='correction_recip')

        correction_offset = utils.smart_cond(
            use_mv_avg,
            lambda: correction_offset,
            lambda: array_ops.zeros(correction_offset.shape),
            name='correction_offset')
    return correction_scale, correction_recip, correction_offset
예제 #12
0
def _RedoRestBatchnorms(graph, is_training):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.

  Raises:
    ValueError: When batch norm folding fails.
  """
    matches = _FindRestBatchNorms(graph)
    print("Replacing", len(matches), "BatchNorms (without a preceding Conv2D)")
    for match in matches:
        scope, sep, _ = match.bn_op.name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            with graph.name_scope(scope + sep + '_psb' + sep):

                mean = match.mean_tensor
                variance = match.variance_tensor
                beta = match.beta_tensor
                gamma = match.gamma_tensor
                eps = match.batch_epsilon

                # new gamma = gamma / sqrt(variance + epsilon)
                # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
                multfac = gamma / math_ops.sqrt(variance + eps)
                gamma = multfac
                beta = -multfac * mean + beta
                mean = array_ops.zeros_like(mean)
                variance = array_ops.ones_like(variance)
                eps = array_ops.zeros_like(eps)

                gamma = variableFromSettings([], hiddenVar=gamma)[0]
                # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                # gamma = next_base2(gamma,strict_positive=False)
                # gamma = 1/variableFromSettings([],hiddenVar=1/gamma)[0]
                # variance = variableFromSettings([],hiddenVar=math_ops.sqrt(variance+eps))[0]**2
                # beta = variableFromSettings([],hiddenVar=beta)[0]
                if S("util.variable.fixed_point.use"):
                    beta = fixed_point(beta,
                                       S("util.variable.fixed_point.bits"),
                                       max=S("util.variable.fixed_point.max"),
                                       min=S("util.variable.fixed_point.min"))
                    # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                    # mean = fixed_point(mean,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                    # variance = fixed_point(variance,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))

                # fixed_point division could be ok
                # silly silly_idiv(silly x, silly y) {
                #     uint64_t sign_bit = 1UL<<63;
                #     // unsetting the sign bit to ignore it
                #     silly res = ((x & ~sign_bit) / (y & sign_bit)) << 32;

                #     // setting the sign bit iff only one of sign bits is set
                #     res |= (x & sign_bit) ^ (y & sign_bit);
                #     return res;
                # }

            new_layer_tensor = nn.batch_normalization(
                match.input_tensor,
                mean,
                variance,
                beta,
                gamma,
                eps,
                name=match.bn_op.name.split("/")[-1] + "_psb")
            if S("util.variable.fixed_point.use"):
                new_layer_tensor = fixed_point(
                    new_layer_tensor,
                    S("util.variable.fixed_point.bits"),
                    max=S("util.variable.fixed_point.max"),
                    min=S("util.variable.fixed_point.min"))
            nodes_modified_count = common.RerouteTensor(
                new_layer_tensor, match.output_tensor)
            if nodes_modified_count == 0:
                raise ValueError(
                    'Folding batch norms failed, %s had no outputs.' %
                    match['output_tensor'].name)
예제 #13
0
def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
    """Finds unfused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, True if training.
    freeze_batch_norm_delay: How many steps to wait before freezing moving mean
      and variance and using them for batch normalization.

  Raises:
    ValueError: When batch norm folding fails.
  """
    input_to_ops_map = input_to_ops.InputToOps(graph)

    for bn in common.BatchNormGroups(graph):
        has_scaling = _HasScaling(graph, input_to_ops_map, bn)

        if not _IsValidUnfusedBatchNorm(graph, bn):
            continue

        print("found unfused batchnarm")
        raise Exception("Not Implemented")

        # The mangling code intimately depends on BatchNorm node's internals.
        original_op, folded_op = _CreateFoldedOp(
            graph,
            bn,
            has_scaling=has_scaling,
            freeze_batch_norm_delay=freeze_batch_norm_delay,
            is_training=is_training)

        activation = common.GetEndpointActivationOp(graph, bn)
        if activation:
            nodes_modified_count = common.RerouteTensor(
                folded_op.outputs[0],
                original_op.outputs[0],
                can_modify=[activation])
            if nodes_modified_count != 1:
                raise ValueError('Unexpected inputs to op: %s' %
                                 activation.name)
            continue

        # Treat consumer ops in bypass modules differently since they have Add
        # operations instead of Relu* above.
        # Changes to make sure that the correct scope is selected for the bypass add
        # The rule here is that if the scope is of the form: str1/str2 for the
        # batch norm,
        # the bypass add is at scope str1. If bn is of scope just str1, then the
        # bypass add is at scope ''.
        # If there is no batch norm, then there is no bypass add.
        add_bypass_ctx = ''
        if bn:
            try:
                add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1)
            except AttributeError:
                add_bypass_ctx = ''

        if add_bypass_ctx:
            add_bypass_ctx = add_bypass_ctx + '/'

        add_bypass = graph.get_operation_by_name(add_bypass_ctx + 'Add')
        nodes_modified_count = common.RerouteTensor(folded_op.outputs[0],
                                                    original_op.outputs[0],
                                                    can_modify=[add_bypass])
        if nodes_modified_count != 1:
            raise ValueError('Unexpected inputs to op: %s' % add_bypass.name)
예제 #14
0
def attention_predict(local):
    # get needed global variables
    hks, scaffold, test_size, print_orig, net_test, data, S, make_accuracy = local[
        "hks"], local["scaffold"], local["test_size"], local[
            "print_orig"], local["net_test"], local["data"], local["S"], local[
                "make_accuracy"]

    # convert last spatial layer to mask

    # resnet50_v2
    # last_spatial        = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[0].op.inputs[0]

    # resnet18_slim
    last_spatial = net_test.op.inputs[0].op.inputs[0].op.inputs[0].op.inputs[
        0].op.inputs[0].op.inputs[0]
    print(last_spatial)
    # fl_weight           = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[1]
    # fl_bias             = net_test.op.inputs[1].op.inputs[0].op.inputs[1]
    # with tf.variable_scope("attention_psb"):
    #     last_spatial        = tf.nn.conv2d(last_spatial,tf.reshape(fl_weight,[1,1]+fl_weight.shape.as_list()),strides=[1]*4,padding="SAME", name="additional_psb") + fl_bias
    fraction = S("attention.fraction")
    img_shape = data[0].shape.as_list()[1:3]
    mask_shape = last_spatial.shape.as_list()[1:3]

    if S("attention.mode") != "neuron":

        if S("attention.spatial_mode") == "random":
            mask_np = 1.0 * (np.random.random([1] + mask_shape + [1]) <
                             fraction)
            mask = tf.constant(mask_np, tf.float32)

        elif S("attention.spatial_mode") == "center":
            mask_np = np.zeros([1] + mask_shape + [1])
            mask_np[0, 3, 3, 0] = 1
            mask = tf.constant(mask_np, tf.float32)

        if S("attention.spatial_mode") == "max_activation":
            activation_per_pixel = tf.reduce_max(last_spatial,
                                                 axis=-1,
                                                 keepdims=True)
            image_max = tf.reduce_max(last_spatial,
                                      axis=[1, 2, 3],
                                      keepdims=True)
            mask = tf.cast(tf.equal(activation_per_pixel, image_max),
                           tf.float32)

        elif S("attention.spatial_mode") == "mean_activation":
            activation_per_pixel = tf.reduce_mean(last_spatial,
                                                  axis=-1,
                                                  keepdims=True)
            image_mean = tf.reduce_mean(last_spatial,
                                        axis=[1, 2, 3],
                                        keepdims=True)
            mask = tf.cast(activation_per_pixel > image_mean * fraction,
                           tf.float32)

        elif S("attention.spatial_mode") == "mean_entropy":
            pixelwise_ce = tf.losses.softmax_cross_entropy(
                last_spatial, last_spatial, reduction=tf.losses.Reduction.NONE)
            pixelwise_ce = tf.expand_dims(pixelwise_ce, axis=-1)
            mask = tf.cast(
                pixelwise_ce >
                tf.reduce_mean(pixelwise_ce, axis=[1, 2], keepdims=True) *
                fraction, tf.float32)

        elif S("attention.spatial_mode") == "max_entropy":
            pixelwise_ce = tf.losses.softmax_cross_entropy(
                last_spatial, last_spatial, reduction=tf.losses.Reduction.NONE)
            activation_per_pixel = pixelwise_ce
            image_max = tf.reduce_max(pixelwise_ce, axis=[1, 2], keepdims=True)
            pixelwise_ce = tf.expand_dims(pixelwise_ce, axis=-1)
            image_max = tf.expand_dims(image_max, axis=-1)
            mask = tf.cast(tf.equal(pixelwise_ce, image_max), tf.float32)

        if S("attention.spatial_surround") > 1:
            mask = tf.layers.max_pooling2d(
                mask,
                pool_size=S("attention.spatial_surround"),
                padding="same",
                strides=1)

        # top k patches
        # # k = 8
        # k = 15
        # # pixelwise_ce = tf.layers.average_pooling2d(pixelwise_ce,pool_size=3,padding="valid",strides=1)
        # tf.summary.image("mask",reduce_img(data[0]*mask_scaled+data[0]*(1-mask_scaled)*0.5))
        # tf.summary.image("entropy",reduce_img(pixelwise_ce))
        # ce_shape = pixelwise_ce.shape.as_list()[1:3]
        # pixelwise_ce = tf.layers.flatten(pixelwise_ce)
        # top_k_val, top_k_ind = tf.nn.top_k(pixelwise_ce,k)
        # mask = tf.reduce_sum([
        #     tf.one_hot(top_k_ind[:,i],depth=pixelwise_ce.shape.as_list()[-1])
        #     for i in range(k)
        # ], axis=0)
        # mask = tf.reshape(mask,[-1]+ce_shape+[1])

        # plot mask
        mask_scaled = tf.image.resize_images(
            mask, img_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        tf.summary.image(
            "mask",
            reduce_img(data[0] * mask_scaled + data[0] *
                       (1 - mask_scaled) * 0.3))

    # initialize mask-counter
    if S("attention.mode") == "spatial" or S(
            "attention.mode") == "spatial_old":
        mask_sum = tf.reduce_sum(mask)
        mask_total = tf.reduce_sum(mask * 0 + 1)
    elif S("attention.mode") == "channels":
        mask_sum = 0
        mask_total = 0

    # fold batch norms, replace weights, ...
    if S("util.tfl") == "tf_mod":
        print("manipulating original graph")
        fold_batch_norms.FoldBatchNorms(tf.get_default_graph(),
                                        is_training=False)

    if S("attention.mode") == "neuron":
        mask_sum = GLOBAL["m_sum"]
        mask_total = GLOBAL["m_total"]
        accuracy_test_masked, correct_prediction_test = make_accuracy(
            net_test, data)

    else:
        # reuse model (tf_resnet_official)
        with tf.variable_scope(tf.get_variable_scope(),
                               reuse=True):  # for tf_resnet_official
            logits_masked = GLOBAL["keras_model"](
                GLOBAL["keras_model_preprocess"](data[0]))
        net_test_masked = logits_masked

        # reuse model (keras)
        # logits_masked = GLOBAL["keras_model"](GLOBAL["keras_model_preprocess"](data[0]))
        # net_test_masked = tf.concat([tf.expand_dims(logits_masked[:,0]*0,1),logits_masked],axis=-1)

        accuracy_test_masked, correct_prediction_test = make_accuracy(
            net_test_masked, data)

        # new settings
        transformation_template = S("attention.transform")
        if transformation_template == "psb":
            S("binom.sample_size", set=S("attention.sample_size"))
        S("util.variable.transformation",
          set=GLOBAL["transformation_templates"][transformation_template])
        S("util.variable.transformation.template_name",
          set=transformation_template)

        # fold batch norms, replace weights, ...
        if S("util.tfl") == "tf_mod":
            print("manipulating attention graph")
            fold_batch_norms.FoldBatchNorms(tf.get_default_graph(),
                                            is_training=False)

        print("decide which graph to use per layer")
        from util.fold_batch_norms import _FindRestFilters, _CloneWithNewOperands
        graph = tf.get_default_graph()
        matches = _FindRestFilters(graph, False)
        print(
            "Replacing", len(matches),
            "Conv|Mul|DepthwiseConv2dNative-Filters (without a suceeding BatchNorm)"
        )
        for match in matches:
            scope, sep, _ = match['layer_op'].name.rpartition('/')
            model_name = S("model.classification_models.model") + "/"
            if not scope.startswith(model_name):
                continue
            with graph.as_default(), graph.name_scope(scope + sep):
                with graph.name_scope(scope + sep + '_masked' + sep):
                    weight = match['weight_tensor']
                    input_tensor = match['input_tensor']
                    if not len(input_tensor.shape.as_list()) == 4:
                        continue
                    kernel_size = weight.shape.as_list()[0]

                    if not input_tensor.name.startswith(model_name):
                        input_tensor_orig = input_tensor
                    else:
                        input_tensor_orig = graph.get_tensor_by_name(
                            input_tensor.name[len(model_name):])
                    output_tensor = match['output_tensor']
                    output_tensor_orig = graph.get_tensor_by_name(
                        output_tensor.name[len(model_name):])

                    img_shape_in = input_tensor.shape.as_list()[1:3]
                    img_shape_out = output_tensor.shape.as_list()[1:3]

                    # add mask to input (and redefine borders)
                    if S("attention.mode") == "spatial_old":
                        mask_scaled2 = tf.image.resize_images(
                            mask,
                            img_shape_in,
                            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                        new_input_tensor = input_tensor * mask_scaled2 + input_tensor_orig * (
                            1 - mask_scaled2)
                        new_layer_tensor = _CloneWithNewOperands(
                            match['layer_op'], new_input_tensor, weight, False)
                    elif S("attention.mode") == "spatial":
                        mask_scaled2 = tf.image.resize_images(
                            mask,
                            img_shape_out,
                            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                        output_tensor_new = _CloneWithNewOperands(
                            match['layer_op'], input_tensor, weight,
                            False)  # just for rerouting
                        new_layer_tensor = output_tensor_new * mask_scaled2 + output_tensor_orig * (
                            1 - mask_scaled2)
                    elif S("attention.mode") == "channels":
                        if not weight.name.startswith(model_name):
                            weight_p = GLOBAL["weights_p"][(
                                "/".join(weight.name.split("/")[0:-1]) +
                                "/var/p_1:0").replace("kernel", "_psb")]
                        else:
                            weight_p = GLOBAL["weights_p"][
                                "/".join(weight.name.split("/")[1:-1]) +
                                "/var/p_1:0"]
                        weight_p_mean = tf.reduce_mean(weight_p,
                                                       axis=[0, 1, 2],
                                                       keepdims=True)
                        weight_p_mean_total = tf.reduce_mean(weight_p,
                                                             keepdims=True)
                        mask_channels = tf.cast(
                            weight_p_mean > weight_p_mean_total, tf.float32)
                        # mask_channels = tf.transpose(mask_channels,[2,0,1,3])
                        output_tensor_new = _CloneWithNewOperands(
                            match['layer_op'], input_tensor, weight,
                            False)  # just for rerouting
                        new_layer_tensor = output_tensor_new * mask_channels + output_tensor_orig * (
                            1 - mask_channels)
                        mask_sum += tf.reduce_sum(mask_channels)
                        mask_total += tf.reduce_sum(0 * mask_channels + 1)

                    # reroute tensor to output depending on sampling mode
                    nodes_modified_count = common.RerouteTensor(
                        new_layer_tensor, output_tensor)

                    if kernel_size > 1:
                        pass
                        # tf.summary.image("mask",reduce_img(input_tensor*mask_scaled2))
                        # tf.summary.image("img_masked",reduce_img(new_input_tensor))

                        # tf.summary.image("input_tensor_all",[
                        #     # tf.reduce_max((new_input_tensor[0]-input_tensor_orig[0])*mask_scaled2[0],axis=-1,keepdims=True),
                        #     # tf.reduce_max(input_tensor[0],axis=-1,keepdims=True),
                        #     tf.reduce_max(tf.abs(input_tensor[0]-input_tensor_orig[0]),axis=-1,keepdims=True),
                        #     tf.reduce_max(mask_scaled2[0],axis=-1,keepdims=True),
                        #     tf.reduce_max(input_tensor_orig[0],axis=-1,keepdims=True),
                        #     tf.reduce_max(input_tensor[0],axis=-1,keepdims=True),
                        #     tf.reduce_max(new_input_tensor[0],axis=-1,keepdims=True)
                        # ], max_outputs=4)

                    if nodes_modified_count == 0:
                        raise ValueError(
                            'Folding batch norms failed, %s had no outputs.' %
                            match['output_tensor'].name)

    # for new summaries
    hks.append(
        CustomSummarySaverHook(
            save_steps=1,
            # save_steps=1,
            summary_op=tf.summary.merge_all(),
            output_dir=S("log.dir") + "_test"
            # output_dir=S("log.dir")
        ))

    correct_prediction_test_mask = correct_prediction_test
    correct_prediction_test = local["correct_prediction_test"]

    accuracy_res = 0
    mask_sum_np, mask_total_np = 0, 0
    # steps = 0
    i = 0
    with tf.train.SingularMonitoredSession(
            scaffold=scaffold,
            hooks=hks,  # list of all hooks
            checkpoint_dir=None if S("log.optimistic_restore") else S(
                "log.dir")  # restores checkpoint
    ) as sess:
        print(80 * '#')
        print('#' + 34 * ' ' + ' TESTING ' + 35 * ' ' + '#')
        print(80 * '#')
        pbar = tqdm(total=test_size)
        while not sess.should_stop():
            print("run", i)
            correct, mask_sum_np_c, mask_total_np_c = sess.run(
                [correct_prediction_test_mask, mask_sum, mask_total])
            mask_sum_np += mask_sum_np_c
            mask_total_np += mask_total_np_c
            i += correct.shape[0]
            pbar.update(correct.shape[0])
            accuracy_current = np.sum(correct)
            accuracy_res += accuracy_current

            pbar.set_description(
                "∅-Acc %f, current Acc %f, mask-proportion %f" %
                ((accuracy_res / i), accuracy_current / correct.shape[0],
                 mask_sum_np /
                 mask_total_np if mask_total_np > 0 else "nothing masked"))
            # pbar.set_postfix("current Acc %f" % accuracy_current)
    # print("Total Accuracy:",accuracy_res / i, i)
    print("Total Proportion:", mask_sum_np / mask_total_np, mask_sum_np,
          mask_total_np)
    pbar.close()

    # for easier grepping using bash-scripts
    print_orig(mask_sum_np / mask_total_np)
    print_orig(accuracy_res / i)
예제 #15
0
def get_accuracy_for_batches(local):
    # get needed global variables
    hks, scaffold, test_size, net_test, data, print_orig, S, GLOBAL = local[
        "hks"], local["scaffold"], local["test_size"], local[
            "net_test"], local["data"], local["print_orig"], local["S"], local[
                "GLOBAL"]

    def make_accuracy(net, data):
        with tf.name_scope('accuracy'):
            with tf.name_scope("output"):
                logits = tf.identity(net, name='logits')
                labels = tf.identity(data[1], name='labels')

            with tf.name_scope("metrics"):

                # accuracy
                with tf.name_scope('correct_prediction'):
                    correct_prediction = tf.equal(tf.argmax(net, 1),
                                                  tf.cast(labels, tf.int64))
                correct_prediction = tf.cast(correct_prediction, tf.float32)
                accuracy = tf.reduce_mean(correct_prediction)
                tf.summary.scalar("accuracy", accuracy)

        return correct_prediction

    num_patches = GLOBAL["patches"]
    data = data[0], tf.split(data[1], num_patches)[0]

    # get network result without softmax
    with tf.name_scope("patches_collect"):
        avg_pool = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[
            0].op
        last_spatial = net_test.op.inputs[1].op.inputs[0].op.inputs[
            0].op.inputs[0].op.inputs[0]
        patches_concat = tf.concat(tf.split(last_spatial, num_patches), axis=2)
        patches_concat_test = tf.concat(tf.split(data[0], num_patches), axis=2)
        tf.summary.image("patches_concat_in", patches_concat_test)
        tf.summary.image("patches_concat_out",
                         tf.reduce_max(patches_concat, axis=-1, keepdims=True))
        avg_new = tf.reduce_mean(patches_concat, axis=[1, 2], name="avg_new")
        # avg_new = tf.reduce_max(patches_concat, axis=[1,2],name="avg_new")
        avg_new = tf.concat([avg_new] * num_patches, axis=0)
        nodes_modified_count = common.RerouteTensor(avg_new,
                                                    avg_pool.outputs[0])
        if nodes_modified_count == 0:
            raise ValueError('Replacing failed.')

    net_test = tf.split(net_test, num_patches)[0]
    correct_prediction_test = make_accuracy(net_test, data)

    accuracy_res = 0
    # steps = 0
    i = 0

    # for new summaries
    hks.append(
        CustomSummarySaverHook(
            save_steps=1,
            # save_steps=1,
            summary_op=tf.summary.merge_all(),
            output_dir=S("log.dir") + "_test"
            # output_dir=S("log.dir")
        ))

    with tf.train.SingularMonitoredSession(
            scaffold=scaffold,
            hooks=hks,  # list of all hooks
            checkpoint_dir=None if S("log.optimistic_restore") else S(
                "log.dir")  # restores checkpoint
    ) as sess:
        print(80 * '#')
        print('#' + 34 * ' ' + ' TESTING ' + 35 * ' ' + '#')
        print(80 * '#')
        pbar = tqdm(total=test_size)
        while not sess.should_stop():
            # print(sess.run(data[1]))
            correct = sess.run(correct_prediction_test)
            i += correct.shape[0]
            pbar.update(correct.shape[0])
            accuracy_current = np.sum(correct)
            accuracy_res += accuracy_current

            pbar.set_description(
                "∅-Acc %f, current Acc %f" %
                ((accuracy_res / i), accuracy_current / correct.shape[0]))
            # pbar.set_postfix("current Acc %f" % accuracy_current)
    print("Total Accuracy:", accuracy_res / i, i)
    pbar.close()

    # for easier grepping using bash-scripts
    print_orig(accuracy_res / i)