def rewrite_nn_resize_op(is_quantized=False): """Replaces a custom nearest-neighbor resize op with the Tensorflow version. Some graphs use this custom version for TPU-compatibility. Args: is_quantized: True if the default graph is quantized. """ input_pattern = graph_matcher.OpTypePattern( 'FakeQuantWithMinMaxVars' if is_quantized else '*') reshape_1_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[input_pattern, 'Const'], ordered_inputs=False) mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[reshape_1_pattern, 'Const'], ordered_inputs=False) # The quantization script may or may not insert a fake quant op after the # Mul. In either case, these min/max vars are not needed once replaced with # the TF version of NN resize. fake_quant_pattern = graph_matcher.OpTypePattern( 'FakeQuantWithMinMaxVars', inputs=[mul_pattern, 'Identity', 'Identity'], ordered_inputs=False) reshape_2_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[ graph_matcher.OneofPattern([fake_quant_pattern, mul_pattern]), 'Const' ], ordered_inputs=False) add_type_name = 'Add' if tf.compat.forward_compatible(2019, 6, 26): add_type_name = 'AddV2' add_pattern = graph_matcher.OpTypePattern(add_type_name, inputs=[reshape_2_pattern, '*'], ordered_inputs=False) matcher = graph_matcher.GraphMatcher(add_pattern) for match in matcher.match_graph(tf.get_default_graph()): projection_op = match.get_op(input_pattern) reshape_2_op = match.get_op(reshape_2_pattern) add_op = match.get_op(add_pattern) nn_resize = tf.image.resize_nearest_neighbor( projection_op.outputs[0], add_op.outputs[0].shape.dims[1:3], align_corners=False, name=os.path.split(reshape_2_op.name)[0] + '/resize_nearest_neighbor') for index, op_input in enumerate(add_op.inputs): if op_input == reshape_2_op.outputs[0]: add_op._update_input(index, nn_resize) # pylint: disable=protected-access break
def test_oneof_pattern(self): reshape_pattern = graph_matcher.OpTypePattern('Reshape') transpose_pattern = graph_matcher.OneofPattern([ graph_matcher.OpTypePattern( 'Transpose', name='transpose', inputs=[ graph_matcher.OpTypePattern( 'Slice', name='slice', inputs=[reshape_pattern, '*', '*']), '*' ]), graph_matcher.OpTypePattern('Transpose', name='transpose', inputs=[reshape_pattern, '*']) ]) matcher = graph_matcher.GraphMatcher(transpose_pattern) g = ops.Graph() with g.as_default(): inputs = array_ops.placeholder(dtypes.float32, shape=[6]) reshape = array_ops.reshape(inputs, [2, 3]) transpose = array_ops.transpose(reshape) [match_result] = list(matcher.match_graph(g)) self.assertEqual(match_result.get_tensor(reshape_pattern), reshape) self.assertEqual(match_result.get_tensor('slice'), None) self.assertEqual(match_result.get_op('transpose'), transpose.op) g = ops.Graph() with g.as_default(): inputs = array_ops.placeholder(dtypes.float32, shape=[6]) reshape = array_ops.reshape(inputs, [2, 3]) slicing = array_ops.slice(reshape, [0, 0], [-1, -1]) transpose = array_ops.transpose(slicing) [match_result] = list(matcher.match_graph(g)) self.assertEqual(match_result.get_tensor(reshape_pattern), reshape) self.assertEqual(match_result.get_tensor('slice'), slicing) self.assertEqual(match_result.get_op('transpose'), transpose.op)
def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. The following patterns get matched. Nodes surrounded by [] will be optionally matched: weight|folded_weight / conv|fc | [post_conv_correction] | biasadd|folded_bias | [bypass] | activation | [post_activation_bypass] Match replacements: If weight_folded_weight is found, FakeQuant is added afterwards. If bypass is found, FakeQuant is added before and after. If activation is found, FakeQuant is added afterwards. If post_activation_bypass is found, FakeQuant is added afterwards. Args: graph: Graph to perform match on. Yields: _LayerMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('|'.join(_WEIGHT_TYPES)) weight_pattern = graph_matcher.OpTypePattern('Identity|ReadVariableOp', inputs=[weight_var_pattern]) folded_weight_pattern = graph_matcher.OpTypePattern('Mul') # The weights inputs to the layer operation can either be from the Variable or # the folded weight (Mul). layer_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ input_pattern, graph_matcher.OneofPattern([weight_pattern, folded_weight_pattern]) ]) folded_bias_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[graph_matcher.OpTypePattern('*'), layer_pattern]) post_layer_op_correction_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[folded_bias_mul_pattern, graph_matcher.OpTypePattern('*')]) folded_bias_add_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ post_layer_op_correction_pattern, graph_matcher.OpTypePattern('*') ]) bias_add_pattern = graph_matcher.OpTypePattern('Add|BiasAdd', inputs=[layer_pattern, '*']) # The bias can come from the bias add or the folded bias add. bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=[ graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]), '*' ]) bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[ '*', graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]) ]) # The input to the activation can come from bias add, fold bias add, the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, bypass_pattern_a, bypass_pattern_b ]) ]) post_activation_bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=['*', activation_pattern]) post_activation_bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[activation_pattern, '*']) layer_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern([ post_activation_bypass_pattern_a, post_activation_bypass_pattern_b, activation_pattern ])) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) post_activation_bypass_op = match_result.get_op( post_activation_bypass_pattern_a) if post_activation_bypass_op is None: post_activation_bypass_op = match_result.get_op( post_activation_bypass_pattern_b) # If we don't find a post_activation_bypass_op but activation_op has a # bypass following it, then we need to skip this match, since there will be # another match that includes post_activation_bypass_op. if post_activation_bypass_op is None and _HasPostActivationBypass( activation_op): continue yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, post_activation_bypass_op, bias_add_op) # Match the final layer, where there will not be an activation and instead # the output of the final BiasAdd must be quantized, so we treat it as the # 'activation_op' in the _LayerMatch. # TODO(suharshs): Figure out how to quantize this final layer across many # models. final_layer_matcher = graph_matcher.GraphMatcher(bias_add_pattern) for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(bias_add_pattern) if activation_op is None: activation_op = match_result.get_op(folded_bias_add_pattern) yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)
def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. The following patterns get matched. Nodes surrounded by [] will be optionally matched: weight|folded_weight / conv|fc | [batch_to_space_nd] | [post_conv_correction] | biasadd|folded_bias | [bypass] | activation | [post_activation_bypass] Match replacements: If weight|folded_weight is found, FakeQuant is added afterwards. If bypass is found, FakeQuant is added before and after. If activation is found, FakeQuant is added afterwards. If post_activation_bypass is found, FakeQuant is added afterwards. Args: graph: Graph to perform match on. Returns: list of _LayerMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('Variable|VariableV2') weight_partition_identity_pattern = graph_matcher.OpTypePattern( 'Identity', inputs=[weight_var_pattern]) weight_partition_concat_pattern = graph_matcher.OpTypePattern( 'ConcatV2', inputs=[weight_partition_identity_pattern, '*', '*']) weight_identity_pattern = graph_matcher.OpTypePattern( 'Identity', inputs=[ graph_matcher.OneofPattern([ weight_partition_identity_pattern, weight_partition_concat_pattern, weight_var_pattern, ]) ]) weight_resource_var_pattern = graph_matcher.OpTypePattern('ReadVariableOp') folded_weight_pattern = graph_matcher.OpTypePattern('Mul') # The weights inputs to the layer operation can either be from the Variable or # the folded weight (Mul). layer_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ input_pattern, graph_matcher.OneofPattern([ weight_identity_pattern, weight_resource_var_pattern, folded_weight_pattern ]) ], ordered_inputs=False) # For atrous convolutions a BatchToSpaceND will occur after the depthwise # convolution. batch_to_space_pattern = graph_matcher.OpTypePattern( 'BatchToSpaceND', inputs=[ layer_pattern, graph_matcher.OpTypePattern('*'), graph_matcher.OpTypePattern('*') ]) layer_output_pattern = graph_matcher.OneofPattern( [batch_to_space_pattern, layer_pattern]) # For separable convolutions, we are looking for a conv, followed by a conv # with no activations between the two. sep_conv_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ graph_matcher.OneofPattern([layer_output_pattern]), graph_matcher.OpTypePattern('*') ], ordered_inputs=False) folded_bias_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[graph_matcher.OpTypePattern('*'), layer_output_pattern], ordered_inputs=False) post_layer_op_correction_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[folded_bias_mul_pattern, graph_matcher.OpTypePattern('*')], ordered_inputs=False) folded_bias_add_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ post_layer_op_correction_pattern, graph_matcher.OpTypePattern('*') ], ordered_inputs=False) # batch_norms with forced updates have an Identity operation at the end. # TODO(suharshs): Find a way to easily skip extra Identity operations. The # current issue is that doing so can often match patterns across many layers # incorrectly. batch_norm_identity = graph_matcher.OpTypePattern( 'Identity', inputs=[folded_bias_add_pattern]) bias_add_pattern = graph_matcher.OpTypePattern( 'Add|BiasAdd', inputs=[layer_output_pattern, '*'], ordered_inputs=False) # The bias can come from the bias add or the folded bias add. bypass_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, batch_norm_identity ]), '*' ], ordered_inputs=False) # The input to the activation can come from bias add, fold bias add, the # bypasses. # TODO(suharshs): We should ideally skip Identity operations instead of # treating them as activations. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES) + '|Identity', inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, batch_norm_identity, bypass_pattern, ]) ]) post_activation_bypass_pattern = graph_matcher.OpTypePattern( 'Add', inputs=['*', activation_pattern], ordered_inputs=False) # The order of the following matching blocks is very important. Since matches # aren't guaranteed to be disjoint, we structure matches from largest to # smallest to guarantee that the largest match always wins. Additionally, we # ensure that we don't match layers multiple times. layer_matches = [] # We use matched_layer_set to ensure that layers aren't matched multiple # times. matched_layer_set = set() # First, we match layers that have a post activation bypass. We do this first # to ensure we don't match only the first part of this layer, missing the # post activation bypass node. post_activation_bypass_layer_matcher = graph_matcher.GraphMatcher( post_activation_bypass_pattern) for match_result in post_activation_bypass_layer_matcher.match_graph( graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor( weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern) post_activation_bypass_op = match_result.get_op( post_activation_bypass_pattern) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, post_activation_bypass_op, bias_add_op)) # Now, we match the basic layer ending at an activation. We may get duplicate # matches from above, but we don't add them to layer_matches. layer_matcher = graph_matcher.GraphMatcher(activation_pattern) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor( weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, bias_add_op)) # Match the final layer, where there may not be an activation and instead # the output of the final BiasAdd must be quantized. So we treat the BiasAdd # as the 'activation_op' in the _LayerMatch, to ensure that it's output is # quantized. final_layer_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern([bias_add_pattern, folded_bias_add_pattern])) for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor( weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(bias_add_pattern) if activation_op is None: activation_op = match_result.get_op(folded_bias_add_pattern) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)) # Look for separable convolutions here sep_conv_matcher = graph_matcher.GraphMatcher(sep_conv_pattern) for match_result in sep_conv_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) activation_op = match_result.get_op(layer_pattern) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)) return layer_matches
def _FindFusedBatchNorms(graph): """Finds all ops and tensors related to found FusedBatchNorms. Args: graph: Graph to inspect. Returns: _FusedBatchNormMatches. """ input_pattern = graph_matcher.OpTypePattern('*') # In practice, the weight pattern can match a Variable or a SpaceToBatchND # operation that follows a variable for atrous convolutions. weight_pattern = graph_matcher.OpTypePattern('*') gamma_pattern = graph_matcher.OpTypePattern('*') beta_pattern = graph_matcher.OpTypePattern('*') mean_pattern = graph_matcher.OpTypePattern('*') variance_pattern = graph_matcher.OpTypePattern('*') moving_average_pattern = graph_matcher.OpTypePattern('*') bn_decay_pattern = graph_matcher.OpTypePattern('*') layer_pattern = graph_matcher.OpTypePattern( 'Conv2D|DepthwiseConv2dNative|MatMul', inputs=[input_pattern, weight_pattern]) batch_to_space_pattern = graph_matcher.OpTypePattern( 'BatchToSpaceND', inputs=[ layer_pattern, graph_matcher.OpTypePattern('*'), graph_matcher.OpTypePattern('*') ]) # Identity between conv/matmul and bn layer_pattern_with_identity = graph_matcher.OpTypePattern( 'Identity', inputs=[ graph_matcher.OneofPattern([batch_to_space_pattern, layer_pattern]) ]) layer_output_pattern = graph_matcher.OneofPattern( [layer_pattern_with_identity, layer_pattern, batch_to_space_pattern]) # MatMul has a Reshape between it and FusedBatchNorm. matmul_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[layer_output_pattern, graph_matcher.OpTypePattern('*')]) batch_norm_pattern = graph_matcher.OpTypePattern( 'FusedBatchNorm', inputs=[ graph_matcher.OneofPattern( [matmul_reshape_pattern, layer_output_pattern]), gamma_pattern, beta_pattern, mean_pattern, variance_pattern ]) matmul_bn_output_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[batch_norm_pattern, graph_matcher.OpTypePattern('*')]) batch_norm_identity_pattern = graph_matcher.OpTypePattern( 'Identity', inputs=[batch_norm_pattern, matmul_bn_output_reshape_pattern]) bn_identity_matcher = graph_matcher.GraphMatcher( batch_norm_identity_pattern) bn_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern( [matmul_bn_output_reshape_pattern, batch_norm_pattern])) moving_average_sub_pattern = graph_matcher.OpTypePattern( 'Sub', inputs=[moving_average_pattern, batch_norm_pattern]) moving_average_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[moving_average_sub_pattern, bn_decay_pattern]) moving_avg_mul_matcher = graph_matcher.GraphMatcher( moving_average_mul_pattern) def _GetLayerMatch(match_result): """Populates a layer match object containing ops/tensors for folding BNs. Args: match_result: Matched result from graph matcher Returns: layer_op: Matching conv/fc op prior to batch norm BatchNormMatch: _BatchNormMatch containing all required batch norm parameters. """ moving_mean_tensor = None moving_variance_tensor = None bn_decay_mean_tensor = None bn_decay_var_tensor = None batch_to_space_op = None layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) bn_id_op = match_result.get_op(batch_norm_identity_pattern) bn_op = match_result.get_op(batch_norm_pattern) if bn_id_op is None: bn_id_op = bn_op batch_epsilon = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a # 2D tensor, so the output_tensor is the output of the Reshape op. output_tensor = bn_op.outputs[0] if layer_op.type == 'MatMul': output_reshape_op = match_result.get_op( matmul_bn_output_reshape_pattern) # If the matcher didn't match matmul_bn_output_reshape, there will be # another match for this 'MatMul' later, so we can skip this one. if output_reshape_op is None: return None, None output_tensor = output_reshape_op.outputs[0] # Ensure that the output tensor has consumers, otherwise this is a dangling # node and not a match. if not output_tensor.consumers(): return None, None batch_to_space_op = match_result.get_op(batch_to_space_pattern) input_tensor = match_result.get_tensor(input_pattern) weight_tensor = match_result.get_tensor(weight_pattern) gamma_tensor = match_result.get_tensor(gamma_pattern) beta_tensor = match_result.get_tensor(beta_pattern) # FusedBatchNorm in training is different from that in inference. It takes # empty 'mean' and empty 'variance', and produces the mean and the variance # of the batch. Therefore, when is_training is true, mean_tensor and # variance_tensor point to 1st and 2nd (0-based) output of bn_op, # respectively; when is_training is false, they point to bn_op's inputs. is_training = bn_op.get_attr('is_training') if is_training: # FusedBatchNormGrad doesn't compute gradients of the batch_mean and # batch_variance outputs, so we need to substitute our own custom # gradient. # TODO(suharshs, raghuramank): Find a way to avoid needing this hack. # pylint: disable=protected-access bn_op._set_attr( '_gradient_op_type', attr_value_pb2.AttrValue( s=compat.as_bytes('FoldFusedBatchNormGrad'))) # pylint: enable=protected-access mean_tensor = bn_op.outputs[1] # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's # correction). The variance tensor read from FuseBatchNorm has Bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() with g.as_default(), g.name_scope(scope + sep): n = math_ops.cast( array_ops.size(layer_tensor) / array_ops.size(mean_tensor), dtypes.float32) variance_tensor = math_ops.multiply( bn_op.outputs[2], (n - 1) / n, name='Undo_Bessel_Correction') # TODO(suharshs): Find a way to get rid of this inner match. for mul_match_result in moving_avg_mul_matcher.match_graph(graph): sub_op = mul_match_result.get_op(moving_average_sub_pattern) if sub_op.inputs[1].name == bn_op.outputs[1].name: # During training: Batch Mean is bn_op.outputs[1] moving_mean_tensor = sub_op.inputs[0] bn_decay_mean_tensor = mul_match_result.get_tensor( bn_decay_pattern) if sub_op.inputs[1].name == bn_op.outputs[2].name: # During training: Batch Var is bn_op.outputs[2] moving_variance_tensor = sub_op.inputs[0] bn_decay_var_tensor = mul_match_result.get_tensor( bn_decay_pattern) else: mean_tensor = match_result.get_tensor(mean_pattern) variance_tensor = match_result.get_tensor(variance_pattern) return layer_op, _BatchNormMatch( layer_op=layer_op, bn_op=bn_op, output_tensor=output_tensor, input_tensor=input_tensor, weight_tensor=weight_tensor, gamma_tensor=gamma_tensor, beta_tensor=beta_tensor, mean_tensor=mean_tensor, variance_tensor=variance_tensor, moving_mean_tensor=moving_mean_tensor, moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, batch_epsilon=batch_epsilon, batch_to_space_op=batch_to_space_op) layer_matches = [] # We use matched_layer_set to ensure that layers aren't matched multiple # times. matched_layer_set = set() for match_result in bn_identity_matcher.match_graph(graph): layer_op, layer_match = _GetLayerMatch(match_result) if layer_op is not None: if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append(layer_match) for match_result in bn_matcher.match_graph(graph): layer_op, layer_match = _GetLayerMatch(match_result) if layer_op is not None: if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append(layer_match) return layer_matches
def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. Args: graph: Graph to perform match on. Yields: _LayerMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('|'.join(_WEIGHT_TYPES)) weight_pattern = graph_matcher.OpTypePattern('Identity', inputs=[weight_var_pattern]) folded_weight_pattern = graph_matcher.OpTypePattern('Mul') # The weights inputs to the layer operation can either be from the Variable or # the folded weight (Mul). layer_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ input_pattern, graph_matcher.OneofPattern([weight_pattern, folded_weight_pattern]) ]) folded_bias_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[graph_matcher.OpTypePattern('*'), layer_pattern]) post_layer_op_correction_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[folded_bias_mul_pattern, graph_matcher.OpTypePattern('*')]) folded_bias_add_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ post_layer_op_correction_pattern, graph_matcher.OpTypePattern('*') ]) bias_add_pattern = graph_matcher.OpTypePattern('Add|BiasAdd', inputs=[layer_pattern, '*']) # The bias can come from the bias add or the folded bias add. bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=[ graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]), '*' ]) bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[ '*', graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]) ]) # The input to the activation can come from bias add, fold bias add or the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, bypass_pattern_a, bypass_pattern_b ]) ]) layer_matcher = graph_matcher.GraphMatcher(activation_pattern) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, bias_add_op)
def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. The following patterns get matched. Nodes surrounded by [] will be optionally matched: weight|folded_weight / conv|fc | [post_conv_correction] | biasadd|folded_bias | [bypass] | activation | [post_activation_bypass] Match replacements: If weight|folded_weight is found, FakeQuant is added afterwards. If bypass is found, FakeQuant is added before and after. If activation is found, FakeQuant is added afterwards. If post_activation_bypass is found, FakeQuant is added afterwards. Args: graph: Graph to perform match on. Returns: list of _LayerMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('Variable|VariableV2') weight_identity_pattern = graph_matcher.OpTypePattern( 'Identity', inputs=[weight_var_pattern]) weight_resource_var_pattern = graph_matcher.OpTypePattern('ReadVariableOp') folded_weight_pattern = graph_matcher.OpTypePattern('Mul') # The weights inputs to the layer operation can either be from the Variable or # the folded weight (Mul). layer_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ input_pattern, graph_matcher.OneofPattern([ weight_identity_pattern, weight_resource_var_pattern, folded_weight_pattern ]) ]) folded_bias_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[graph_matcher.OpTypePattern('*'), layer_pattern]) post_layer_op_correction_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[folded_bias_mul_pattern, graph_matcher.OpTypePattern('*')]) folded_bias_add_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ post_layer_op_correction_pattern, graph_matcher.OpTypePattern('*') ]) bias_add_pattern = graph_matcher.OpTypePattern( 'Add|BiasAdd', inputs=[layer_pattern, '*']) # The bias can come from the bias add or the folded bias add. bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=[ graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]), '*' ]) bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[ '*', graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]) ]) # The input to the activation can come from bias add, fold bias add, the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, bypass_pattern_a, bypass_pattern_b ]) ]) post_activation_bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=['*', activation_pattern]) post_activation_bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[activation_pattern, '*']) # The order of the following matching blocks is very important. Since matches # aren't guaranteed to be disjoint, we structure matches from largest to # smallest to guarantee that the largest match always wins. Additionally, we # ensure that we don't match layers multiple times. layer_matches = [] # We use matched_layer_set to ensure that layers aren't matched multiple # times. matched_layer_set = set() # First, we match layers that have a post activation bypass. We do this first # to ensure we don't match only the first part of this layer, missing the # post activation bypass node. post_activation_bypass_layer_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern([ post_activation_bypass_pattern_a, post_activation_bypass_pattern_b, ])) for match_result in post_activation_bypass_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) post_activation_bypass_op = match_result.get_op( post_activation_bypass_pattern_a) if post_activation_bypass_op is None: post_activation_bypass_op = match_result.get_op( post_activation_bypass_pattern_b) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, post_activation_bypass_op, bias_add_op)) # Now, we match the basic layer ending at an activation. We may get duplicate # matches from above, but we don't add them to layer_matches. layer_matcher = graph_matcher.GraphMatcher(activation_pattern) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, bias_add_op)) # Match the final layer, where there may not be an activation and instead # the output of the final BiasAdd must be quantized. So we treat the BiasAdd # as the 'activation_op' in the _LayerMatch, to ensure that it's output is # quantized. final_layer_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern([bias_add_pattern, folded_bias_add_pattern])) for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(weight_resource_var_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(bias_add_pattern) if activation_op is None: activation_op = match_result.get_op(folded_bias_add_pattern) if layer_op not in matched_layer_set: matched_layer_set.add(layer_op) layer_matches.append( _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)) return layer_matches
def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. Args: graph: Graph to perform match on. Yields: _LayerMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('|'.join(_WEIGHT_TYPES)) weight_pattern = graph_matcher.OpTypePattern('Identity|ReadVariableOp', inputs=[weight_var_pattern]) folded_weight_pattern = graph_matcher.OpTypePattern('Mul') # The weights inputs to the layer operation can either be from the Variable or # the folded weight (Mul). layer_pattern = graph_matcher.OpTypePattern( '|'.join(_QUANTIZABLE_TYPES), inputs=[ input_pattern, graph_matcher.OneofPattern([weight_pattern, folded_weight_pattern]) ]) folded_bias_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[graph_matcher.OpTypePattern('*'), layer_pattern]) post_layer_op_correction_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[folded_bias_mul_pattern, graph_matcher.OpTypePattern('*')]) folded_bias_add_pattern = graph_matcher.OpTypePattern( 'Add', inputs=[ post_layer_op_correction_pattern, graph_matcher.OpTypePattern('*') ]) bias_add_pattern = graph_matcher.OpTypePattern('Add|BiasAdd', inputs=[layer_pattern, '*']) # The bias can come from the bias add or the folded bias add. bypass_pattern_a = graph_matcher.OpTypePattern( 'Add', inputs=[ graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]), '*' ]) bypass_pattern_b = graph_matcher.OpTypePattern( 'Add', inputs=[ '*', graph_matcher.OneofPattern( [bias_add_pattern, folded_bias_add_pattern]) ]) # The input to the activation can come from bias add, fold bias add or the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), inputs=[ graph_matcher.OneofPattern([ bias_add_pattern, folded_bias_add_pattern, bypass_pattern_a, bypass_pattern_b ]) ]) layer_matcher = graph_matcher.GraphMatcher(activation_pattern) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) if weight_tensor is None: weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(activation_pattern) bias_add_op = match_result.get_op(bias_add_pattern) if bias_add_op is None: bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, bias_add_op) # Match the final layer, where there will not be an activation and instead # the output of the final BiasAdd must be quantized, so we treat it as the # 'activation_op' in the _LayerMatch. # TODO(suharshs): Figure out how to quantize this final layer across many # models. final_layer_matcher = graph_matcher.GraphMatcher(bias_add_pattern) for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) activation_op = match_result.get_op(bias_add_pattern) yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None)
def _FindFusedBatchNorms(graph): """Finds all ops and tensors related to found FusedBatchNorms. Args: graph: Graph to inspect. Yields: _FusedBatchNormMatches. """ input_pattern = graph_matcher.OpTypePattern('*') weight_pattern = graph_matcher.OpTypePattern('*') gamma_pattern = graph_matcher.OpTypePattern('*') beta_pattern = graph_matcher.OpTypePattern('*') mean_pattern = graph_matcher.OpTypePattern('*') variance_pattern = graph_matcher.OpTypePattern('*') moving_average_pattern = graph_matcher.OpTypePattern('*') bn_decay_pattern = graph_matcher.OpTypePattern('*') layer_pattern = graph_matcher.OpTypePattern( 'Conv2D|DepthwiseConv2dNative|MatMul', inputs=[input_pattern, weight_pattern]) # MatMul has a Reshape between it and FusedBatchNorm. matmul_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[layer_pattern, graph_matcher.OpTypePattern('*')]) batch_norm_pattern = graph_matcher.OpTypePattern( 'FusedBatchNorm', inputs=[ graph_matcher.OneofPattern([matmul_reshape_pattern, layer_pattern]), gamma_pattern, beta_pattern, mean_pattern, variance_pattern ]) matmul_bn_output_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', inputs=[batch_norm_pattern, graph_matcher.OpTypePattern('*')]) bn_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern( [matmul_bn_output_reshape_pattern, batch_norm_pattern])) moving_average_sub_pattern = graph_matcher.OpTypePattern( 'Sub', inputs=[moving_average_pattern, batch_norm_pattern]) moving_average_mul_pattern = graph_matcher.OpTypePattern( 'Mul', inputs=[moving_average_sub_pattern, bn_decay_pattern]) moving_avg_mul_matcher = graph_matcher.GraphMatcher( moving_average_mul_pattern) for match_result in bn_matcher.match_graph(graph): moving_mean_tensor = None moving_variance_tensor = None bn_decay_mean_tensor = None bn_decay_var_tensor = None layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) bn_op = match_result.get_op(batch_norm_pattern) batch_epsilon_tensor = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a # 2D tensor, so the output_tensor is the output of the Reshape op. output_tensor = bn_op.outputs[0] if layer_op.type == 'MatMul': output_reshape_op = match_result.get_op(matmul_bn_output_reshape_pattern) # If the matcher didn't match matmul_bn_output_reshape, there will be # another match for this 'MatMul' later, so we can skip this one. if output_reshape_op is None: continue output_tensor = output_reshape_op.outputs[0] input_tensor = match_result.get_tensor(input_pattern) weight_tensor = match_result.get_tensor(weight_pattern) gamma_tensor = match_result.get_tensor(gamma_pattern) beta_tensor = match_result.get_tensor(beta_pattern) # FusedBatchNorm in training is different from that in inference. It takes # empty 'mean' and empty 'variance', and produces the mean and the variance # of the batch. Therefore, when is_training is true, mean_tensor and # variance_tensor point to 1st and 2nd (0-based) output of bn_op, # respectively; when is_training is false, they point to bn_op's inputs. is_training = bn_op.get_attr('is_training') if is_training: # FusedBatchNormGrad doesn't compute gradients of the batch_mean and # batch_variance outputs, so we need to substitute our own custom # gradient. # TODO(suharshs, raghuramank): Find a way to avoid needing this hack. # pylint: disable=protected-access bn_op._set_attr( '_gradient_op_type', attr_value_pb2.AttrValue(s=compat.as_bytes('FoldFusedBatchNormGrad'))) # pylint: enable=protected-access mean_tensor = bn_op.outputs[1] # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's # correction). The variance tensor read from FuseBatchNorm has bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() with g.as_default(), g.name_scope(scope + sep): n = math_ops.cast( array_ops.size(layer_tensor) / array_ops.size(mean_tensor), dtypes.float32) variance_tensor = math_ops.multiply( bn_op.outputs[2], (n - 1) / n, name='Undo_Bessel_Correction') # TODO(suharshs): Find a way to get rid of this inner match. for mul_match_result in moving_avg_mul_matcher.match_graph(graph): sub_op = mul_match_result.get_op(moving_average_sub_pattern) if sub_op.inputs[1].name == bn_op.outputs[1].name: # During training: Batch Mean is bn_op.outputs[1] moving_mean_tensor = sub_op.inputs[0] bn_decay_mean_tensor = mul_match_result.get_tensor(bn_decay_pattern) if sub_op.inputs[1].name == bn_op.outputs[2].name: # During training: Batch Var is bn_op.outputs[2] moving_variance_tensor = sub_op.inputs[0] bn_decay_var_tensor = mul_match_result.get_tensor(bn_decay_pattern) else: mean_tensor = match_result.get_tensor(mean_pattern) variance_tensor = match_result.get_tensor(variance_pattern) yield _BatchNormMatch( layer_op=layer_op, bn_op=bn_op, output_tensor=output_tensor, input_tensor=input_tensor, weight_tensor=weight_tensor, gamma_tensor=gamma_tensor, beta_tensor=beta_tensor, mean_tensor=mean_tensor, variance_tensor=variance_tensor, moving_mean_tensor=moving_mean_tensor, moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, batch_epsilon_tensor=batch_epsilon_tensor)