예제 #1
0
    def _TestFailsWithFusedBatchNorm(self, relu, relu_op_name, with_bypass):
        """Tests that batch norm fails when fused batch norm ops are present."""
        g = ops.Graph()
        with g.as_default():
            batch_size, height, width = 5, 128, 128
            inputs = array_ops.zeros((batch_size, height, width, 3))
            out_depth = 3 if with_bypass else 32
            stride = 1 if with_bypass else 2
            activation_fn = None if with_bypass else relu
            batch_norm_params = _DEFAULT_BATCH_NORM_PARAMS.copy()
            batch_norm_params['fused'] = True
            scope = 'test/test2' if with_bypass else 'test'
            node = conv2d(inputs,
                          out_depth, [5, 5],
                          stride=stride,
                          padding='SAME',
                          weights_initializer=self._WeightInit(0.09),
                          activation_fn=activation_fn,
                          normalizer_fn=batch_norm,
                          normalizer_params=batch_norm_params,
                          scope=scope)
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')
                relu(node, name='test/' + relu_op_name)

            with self.assertRaises(ValueError):
                fold_batch_norms.FoldBatchNorms(g)
예제 #2
0
  def _TestFoldConv2dUnknownShape(self, relu, relu_op_name, with_bypass,
                                  has_scaling, fused_batch_norm):
    """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*.

    Tests that folding works even with an input shape where some dimensions are
    not known (i.e. None).

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
    """
    g = ops.Graph()
    with g.as_default():
      inputs = array_ops.placeholder(dtypes.float32, shape=(5, None, None, 3))
      out_depth = 3 if with_bypass else 32
      stride = 1 if with_bypass else 2
      activation_fn = None if with_bypass else relu
      scope = 'test/test2' if with_bypass else 'test'
      node = conv2d(
          inputs,
          out_depth, [5, 5],
          stride=stride,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=activation_fn,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(
              scale=has_scaling, fused=fused_batch_norm),
          scope=scope)
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')
        relu(node, name='test/' + relu_op_name)

      fold_batch_norms.FoldBatchNorms(g)

    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    self._AssertInputOpsAre(folded_mul, [
        scope + '/weights/read',
        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
    ])
    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold'])

    folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold')
    self.assertEqual(folded_conv.type, 'Conv2D')
    self._AssertInputOpsAre(folded_conv, [scope + '/mul_fold', inputs.op.name])
    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/add_fold'])

    folded_add = g.get_operation_by_name(scope + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        scope + '/Conv2D_Fold',
        self._BathNormBiasName(scope, fused_batch_norm)
    ])
    output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)
예제 #3
0
def _create_graph(input_graph=None,
                  is_training=True,
                  weight_bits=8,
                  activation_bits=8,
                  quant_delay=None,
                  freeze_bn_delay=None,
                  scope=None,
                  quant_type="affine"):
  """Rewrites an input_graph in place for simulated quantization.

  The graph has fake quantization ops inserted to simulate the error
  introduced by quantization. Since the graph is transformed in place,
  the expected behavior of previously held references to nodes and tensors may
  change.

  Args:
    input_graph: The tf.Graph to be transformed, if None then defaults to the
      default graph.
    is_training: Whether quantizing training or eval graph.
    weight_bits: Number of bits to use for quantizing weights.
    activation_bits: Number of bits to use for quantizing activations.
    symmetric: If true, use symmetric quantization limits instead of training
      the minimum and maximum of each quantization range separately.
    quant_delay: Number of steps after which weights and activations are
      quantized during training.
    freeze_bn_delay: Number of steps after which moving mean and variance are
      frozen and used instead of batch statistics during training.
      freeze_bn_delay should be greater than quant_delay and should correspond
      to the number of steps when training has almost converged
    scope: The scope to be transformed. If it's not None, only the ops which
      are in this scope will be transformed.

  Raises:
    ValueError: If elements contains an element that isn't a tf.Tensor or
      tf.Operation.
  """
  print("Success: using a patched version of tf.contrib.quantize.")

  if input_graph is None:
    input_graph = ops.get_default_graph()

  symmetric = True if quant_type == "symmetric" else False

  # Add check to see if graph has training ops, if so provide error message and
  # exit
  _check_for_training_ops(input_graph)
  with input_graph.as_default():
    fold_batch_norms.FoldBatchNorms(
        input_graph,
        freeze_batch_norm_delay=freeze_bn_delay,
        is_training=is_training)
    quantize.Quantize(
        input_graph,
        is_training,
        quant_delay=quant_delay,
        weight_bits=weight_bits,
        activation_bits=activation_bits,
        symmetric=symmetric,
        scope=scope)
예제 #4
0
    def _TestFoldFullyConnectedLayer(self, relu, relu_op_name, with_bypass,
                                     has_scaling, fused_batch_norm):
        """Tests folding cases: inputs -> FC with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
    """
        g = ops.Graph()
        with g.as_default():
            batch_size, depth = 5, 256
            inputs = array_ops.zeros((batch_size, depth))
            out_depth = 256 if with_bypass else 128
            activation_fn = None if with_bypass else relu
            scope = 'test/test2' if with_bypass else 'test'
            node = fully_connected(inputs,
                                   out_depth,
                                   weights_initializer=self._WeightInit(0.03),
                                   activation_fn=activation_fn,
                                   normalizer_fn=batch_norm,
                                   normalizer_params=self._BatchNormParams(
                                       scale=has_scaling,
                                       fused=fused_batch_norm),
                                   scope=scope)
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')
                relu(node, name='test/' + relu_op_name)

            fold_batch_norms.FoldBatchNorms(g)

        folded_mul = g.get_operation_by_name(scope + '/mul_fold')
        self.assertEqual(folded_mul.type, 'Mul')
        self._AssertInputOpsAre(folded_mul, [
            scope + '/weights/read',
            self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
        ])
        self._AssertOutputGoesToOps(folded_mul, g, [scope + '/MatMul_Fold'])

        folded_conv = g.get_operation_by_name(scope + '/MatMul_Fold')
        self.assertEqual(folded_conv.type, 'MatMul')
        self._AssertInputOpsAre(folded_conv,
                                [scope + '/mul_fold', inputs.op.name])
        self._AssertOutputGoesToOps(folded_conv, g, [scope + '/add_fold'])

        folded_add = g.get_operation_by_name(scope + '/add_fold')
        self.assertEqual(folded_add.type, 'Add')
        self._AssertInputOpsAre(folded_add, [
            scope + '/MatMul_Fold',
            self._BathNormBiasName(scope, fused_batch_norm)
        ])
        output_op_names = [
            'test/Add' if with_bypass else 'test/' + relu_op_name
        ]
        self._AssertOutputGoesToOps(folded_add, g, output_op_names)
예제 #5
0
  def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass,
                                  has_scaling, fused_batch_norm):
    """Tests that running folded and unfolded BN returns the same results.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
    """
    random_seed.set_random_seed(1234)
    unfolded_g = ops.Graph()
    with unfolded_g.as_default():
      batch_size, height, width = 5, 128, 128
      inputs = random_ops.random_uniform(
          (batch_size, height, width, 3), dtype=dtypes.float32, seed=1234)
      out_depth = 3 if with_bypass else 32
      stride = 1 if with_bypass else 2
      activation_fn = None if with_bypass else relu
      scope = 'test/test2' if with_bypass else 'test'
      node = conv2d(
          inputs,
          out_depth, [5, 5],
          stride=stride,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=activation_fn,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(
              scale=has_scaling, fused=fused_batch_norm),
          scope=scope)
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')
      relu_node = relu(node, name='test/' + relu_op_name)

    folded_g = copy_graph.CopyGraph(unfolded_g)
    with folded_g.as_default():
      fold_batch_norms.FoldBatchNorms(folded_g)

    with session.Session(graph=unfolded_g) as sess:
      sess.run(variables.global_variables_initializer())
      grad_node = gradients.gradients(relu_node, inputs)
      results = sess.run([relu_node, grad_node])
      unfolded_forward, unfolded_backward = results[0], results[1]

    with session.Session(graph=folded_g) as sess:
      sess.run(variables.global_variables_initializer())
      relu_node = folded_g.get_tensor_by_name(relu_node.name)
      inputs = folded_g.get_tensor_by_name(inputs.name)
      grad_node = gradients.gradients(relu_node, inputs)
      results = sess.run([relu_node, grad_node])
      folded_forward, folded_backward = results[0], results[1]

    # Check that the folded and unfolded results match.
    self.assertAllClose(unfolded_forward, folded_forward, atol=1e-3)
    self.assertAllClose(unfolded_backward, folded_backward, atol=1e-3)
예제 #6
0
    def _TestFoldConv2dWithoutScale(self, relu, relu_op_name, with_bypass):
        """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
    """
        g = ops.Graph()
        with g.as_default():
            batch_size, height, width = 5, 128, 128
            inputs = array_ops.zeros((batch_size, height, width, 3))
            out_depth = 3 if with_bypass else 32
            stride = 1 if with_bypass else 2
            activation_fn = None if with_bypass else relu
            bn_params = copy.copy(_DEFAULT_BATCH_NORM_PARAMS)
            bn_params['scale'] = False
            scope = 'test/test2' if with_bypass else 'test'
            node = conv2d(inputs,
                          out_depth, [5, 5],
                          stride=stride,
                          padding='SAME',
                          weights_initializer=self._WeightInit(0.09),
                          activation_fn=activation_fn,
                          normalizer_fn=batch_norm,
                          normalizer_params=bn_params,
                          scope=scope)
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')
                relu(node, name='test/' + relu_op_name)

            fold_batch_norms.FoldBatchNorms(g)

        folded_mul = g.get_operation_by_name(scope + '/mul_fold')
        self.assertEqual(folded_mul.type, 'Mul')
        self._AssertInputOpsAre(
            folded_mul,
            [scope + '/weights/read', scope + '/BatchNorm/batchnorm/Rsqrt'])
        self._AssertOutputGoesToOps(folded_mul, g,
                                    [scope + '/convolution_Fold'])

        folded_conv = g.get_operation_by_name(scope + '/convolution_Fold')
        self.assertEqual(folded_conv.type, 'Conv2D')
        self._AssertInputOpsAre(folded_conv,
                                [scope + '/mul_fold', inputs.op.name])
        self._AssertOutputGoesToOps(folded_conv, g, [scope + '/add_fold'])

        folded_add = g.get_operation_by_name(scope + '/add_fold')
        self.assertEqual(folded_add.type, 'Add')
        self._AssertInputOpsAre(
            folded_add,
            [scope + '/convolution_Fold', scope + '/BatchNorm/batchnorm/sub'])
        output_op_names = [
            'test/Add' if with_bypass else 'test/' + relu_op_name
        ]
        self._AssertOutputGoesToOps(folded_add, g, output_op_names)
 def _AssertIdempotent(self, graph):
     # Ensure that calling the rewrite again doesn't change the graph.
     graph_def_before = str(graph.as_graph_def())
     with graph.as_default():
         # Ensuring that calling the rewrite again doesn't add more nodes.
         fold_batch_norms.FoldBatchNorms(graph, is_training=True)
         quantize.Quantize(graph, True)
     graph_def_after = str(graph.as_graph_def())
     self.assertEqual(graph_def_before, graph_def_after)
    def _TestQuantize_AtrousConvWithBatchNorm(self, activation,
                                              activation_op_name, with_bypass,
                                              delay, fused_batch_norm,
                                              use_resource, scope):
        """Tests quantization: inputs -> atrous conv with batch norm -> Activation.

    Args:
      activation: Callable that returns an Operation, a factory method for the
        Activation.
      activation_op_name: String, name of the Activation operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Activation.
      delay: Int (optional), delay in number of steps until quantization starts.
      fused_batch_norm: Bool, when true use FusedBatchNorm.
      use_resource: Bool, when true uses resource variables.
      scope: String, specifies top level scope for the graph
    """
        graph = ops.Graph()
        with graph.as_default():
            variable_scope.get_variable_scope().set_use_resource(use_resource)
            batch_size, height, width, depth = 5, 128, 128, 3
            inputs = array_ops.zeros((batch_size, height, width, depth))
            dilation_rate = 2
            conv_scope = self._GetConvScope(scope, with_bypass)
            scope = '' if scope is None else scope
            delim = '/' if scope else ''

            node = separable_conv2d(
                inputs,
                None, [3, 3],
                rate=dilation_rate,
                depth_multiplier=1.0,
                padding='SAME',
                weights_initializer=self._WeightInit(0.09),
                activation_fn=None,
                normalizer_fn=batch_norm,
                normalizer_params=self._BatchNormParams(fused_batch_norm),
                scope=conv_scope)

            # Manually add a bypass (optional) and an activation.
            if with_bypass:
                node = math_ops.add(inputs, node, name=scope + delim + 'Add')

            node = activation(node, name=scope + delim + activation_op_name)

            update_barrier = control_flow_ops.no_op(name='update_barrier')
            with ops.control_dependencies([update_barrier]):
                array_ops.identity(node, name='control_dependency')

            fold_batch_norms.FoldBatchNorms(graph, is_training=True)
            quantize.Quantize(graph, True, quant_delay=delay)

            self._AssertCorrectQuantizedGraphWithBatchNorm(
                graph, scope, 'DepthwiseConv2dNative', activation_op_name,
                with_bypass, delay, use_resource)
예제 #9
0
def _create_graph(input_graph,
                  is_training,
                  elements=None,
                  device_name_or_function=None):
    """Returns a transformed training input_graph for simulated quantization.

  The forward pass has fake quantization ops inserted to simulate the error
  introduced by quantization.

  Args:
    input_graph: The tf.Graph to be transformed.
    is_training: Whether quantizing training or eval graph.
    elements: (Optional) List of Tensors and Operations in input_graph whose
        corresponding elements in the new graph will be returned.
    device_name_or_function: (Optional) The device name or function to use.

  Returns:
    g is new tf.Graph that is rewritten for simulated quantization.
    l is a list of Tensors/Operations in g corresponding to the provided input
        elements, if elements is not None.

  Raises:
    ValueError: If elements contains an element that isn't a tf.Tensor or
        tf.Operation.
  """
    # TODO(suharshs): Describe the process in more detail in the doc string.
    g = copy_graph.CopyGraph(input_graph)
    if is_training:
        # TODO(raghuramank): Need to make freeze_batch_norm_delay
        # a function of the batch size. For now setting this to 250 epochs
        # This corresponds to 5 million steps at a batch size of 64.
        freeze_batch_norm_delay = 5000000
    else:
        freeze_batch_norm_delay = None
    with g.as_default():
        with ops.device(device_name_or_function):
            fold_batch_norms.FoldBatchNorms(
                g,
                freeze_batch_norm_delay=freeze_batch_norm_delay,
                is_training=is_training)
            quantize.Quantize(g, is_training=is_training)
    if elements is None:
        return g

    return_elements = []
    for element in elements:
        if isinstance(element, (ops.Tensor, variables.Variable)):
            return_elements.append(g.get_tensor_by_name(element.name))
        elif isinstance(element, ops.Operation):
            return_elements.append(g.get_operation_by_name(element.name))
        else:
            raise ValueError(
                'elements must consist of Tensor or Operation objects, got: ',
                str(element))
    return g, return_elements
예제 #10
0
    def _TestQuantize_Conv2dWithBatchNorm(self, activation, activation_op_name,
                                          with_bypass, delay, fused_batch_norm,
                                          use_resource):
        """Tests quantization: inputs -> Conv2d with batch norm -> Activation.

    Args:
      activation: Callable that returns an Operation, a factory method for the
        Activation.
      activation_op_name: String, name of the Activation operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Activation.
      delay: Int (optional), delay in number of steps until quantization starts.
      fused_batch_norm: Bool, when true use FusedBatchNorm.
      use_resource: Bool, when true uses resource variables.
    """
        graph = ops.Graph()
        with graph.as_default():
            variable_scope.get_variable_scope().set_use_resource(use_resource)
            batch_size, height, width, depth = 5, 128, 128, 3
            inputs = array_ops.zeros((batch_size, height, width, depth))
            stride = 1 if with_bypass else 2
            out_depth = 3 if with_bypass else 32
            scope = 'test/test2' if with_bypass else 'test'
            node = conv2d(
                inputs,
                out_depth, [5, 5],
                stride=stride,
                padding='SAME',
                weights_initializer=self._WeightInit(0.09),
                activation_fn=None,
                normalizer_fn=batch_norm,
                normalizer_params=self._BatchNormParams(fused_batch_norm),
                scope=scope)

            # Manually add a bypass (optionaly) and an activation.
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')

            node = activation(node, name='test/' + activation_op_name)

            update_barrier = control_flow_ops.no_op(name='update_barrier')
            with ops.control_dependencies([update_barrier]):
                array_ops.identity(node, name='control_dependency')

            fold_batch_norms.FoldBatchNorms(graph, is_training=True)
            quantize.Quantize(graph, True, quant_delay=delay)

            self._AssertCorrectQuantizedGraphWithBatchNorm(
                graph, scope, 'Conv2D', activation_op_name, with_bypass, delay,
                use_resource)
예제 #11
0
def _create_graph(input_graph=None,
                  is_training=True,
                  weight_bits=8,
                  activation_bits=8,
                  quant_delay=None,
                  freeze_bn_delay=None,
                  scope=None):
  """Rewrites an input_graph in place for simulated quantization.

  The graph has fake quantization ops inserted to simulate the error
  introduced by quantization. Since the graph is transformed in place,
  the expected behavior of previously held references to nodes and tensors may
  change.

  Args:
    input_graph: The tf.Graph to be transformed, if None then defaults to the
      default graph.
    is_training: Whether quantizing training or eval graph.
    weight_bits: Number of bits to use for quantizing weights.
    activation_bits: Number of bits to use for quantizing activations.
    quant_delay: Number of steps after which weights and activations are
      quantized during training.
    freeze_bn_delay: Number of steps after which moving mean and variance are
      frozen and used instead of batch statistics during training.
      freeze_bn_delay should be greater than quant_delay and should correspond
      to the number of steps when training has almost converged
    scope: The scope to be transformed. If it's not None, only the ops which
      are in this scope will be transformed.

  Raises:
    ValueError: If elements contains an element that isn't a tf.Tensor or
      tf.Operation.
  """

  if input_graph is None:
    input_graph = ops.get_default_graph()
  with input_graph.as_default():
    fold_batch_norms.FoldBatchNorms(
        input_graph,
        freeze_batch_norm_delay=freeze_bn_delay,
        is_training=is_training)
    quantize.Quantize(
        input_graph,
        is_training,
        quant_delay=quant_delay,
        weight_bits=weight_bits,
        activation_bits=activation_bits,
        scope=scope)
 def build_eval_graph(self):
     g = tf.Graph()
     with g.as_default():
         sess, saver = load_graph(g, self._graph, self._checkpoint)
         if self._fold_bn:
             fold_batch_norms.FoldBatchNorms(graph=sess.graph,
                                             freeze_batch_norm_delay=None,
                                             is_training=False)
         if self._quantize:
             quantize.Quantize(graph=sess.graph,
                               is_training=False,
                               quant_delay=0,
                               weight_bits=8,
                               activation_bits=8,
                               scope=None)
     return sess, saver
    def _TestBatchNormForcedUpdates(self, activation, activation_op_name,
                                    fused_batch_norm, use_resource):
        """post_activation bypass quantization should happen with forced updates."""
        graph = ops.Graph()
        with graph.as_default():
            variable_scope.get_variable_scope().set_use_resource(use_resource)
            batch_size, height, width, depth = 5, 128, 128, 3
            input1 = array_ops.zeros((batch_size, height, width, depth))
            input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32))
            # Setting updates_collections to None forces updates adding an extra
            # identity operation following batch norms.
            bn_params = self._BatchNormParams(fused=fused_batch_norm,
                                              force_updates=True)
            conv = conv2d(input1,
                          32, [5, 5],
                          stride=2,
                          padding='SAME',
                          weights_initializer=self._WeightInit(0.09),
                          activation_fn=activation,
                          normalizer_fn=batch_norm,
                          normalizer_params=bn_params,
                          scope='test/test')
            bypass_tensor = math_ops.add(conv, input2, name='test/add')
            # The output of the post_activation bypass will be another layer.
            _ = conv2d(bypass_tensor,
                       32, [5, 5],
                       stride=2,
                       padding='SAME',
                       weights_initializer=self._WeightInit(0.09),
                       normalizer_fn=batch_norm,
                       normalizer_params=bn_params,
                       activation_fn=activation,
                       scope='test/unused')

            fold_batch_norms.FoldBatchNorms(graph, is_training=True)
            quantize.Quantize(graph, is_training=True)

            # Ensure that the bypass node is preceded by and followed by a
            # FakeQuantWithMinMaxVar operation, since the output of the Add isn't an
            # activation.
            self.assertTrue('FakeQuantWithMinMaxVars' in
                            [c.type for c in bypass_tensor.consumers()])
            self.assertTrue('FakeQuantWithMinMaxVars' in
                            [i.op.type for i in bypass_tensor.op.inputs])

        with open('/tmp/bn_quant_test.pbtxt', 'w') as f:
            f.write(str(graph.as_graph_def()))
예제 #14
0
def _create_graph(input_graph, is_training, elements=None):
    """Returns a transformed training input_graph for simulated quantization.

  The forward pass has fake quantization ops inserted to simulate the error
  introduced by quantization.

  Args:
    input_graph: The tf.Graph to be transformed.
    is_training: Whether quantizing training or eval graph.
    elements: (Optional) List of Tensors and Operations in input_graph whose
        corresponding elements in the new graph will be returned.

  Returns:
    Returns a tuple(g, l) where:
    g is new tf.Graph that is rewritten for simulated quantization.
    l is a list of Tensors/Operations in g corresponding to the provided input
        elements.

  Raises:
    ValueError: If elements contains an element that isn't a tf.Tensor or
        tf.Operation.
  """
    # TODO(suharshs): Describe the process in more detail in the doc string.
    g = copy_graph.CopyGraph(input_graph)
    fold_batch_norms.FoldBatchNorms(g)
    quantize.Quantize(g, is_training=is_training)
    return_elements = []
    if elements is None:
        elements = []
    for element in elements:
        if isinstance(element, (ops.Tensor, variables.Variable)):
            return_elements.append(g.get_tensor_by_name(element.name))
        elif isinstance(element, ops.Operation):
            return_elements.append(g.get_operation_by_name(element.name))
        else:
            raise ValueError(
                'elements must consist of Tensor or Operation objects, got: ',
                str(element))
    return g, return_elements
예제 #15
0
  def _TestFoldDepthwiseConv2d(self, relu, relu_op_name, with_bypass,
                               has_scaling, fused_batch_norm):
    """Tests folding: inputs -> DepthwiseConv2d with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
    """
    g = ops.Graph()
    with g.as_default():
      batch_size, height, width = 5, 128, 128
      inputs = array_ops.zeros((batch_size, height, width, 3))
      stride = 1 if with_bypass else 2
      activation_fn = None if with_bypass else relu
      scope = 'test/test2' if with_bypass else 'test'
      node = separable_conv2d(
          inputs,
          None, [5, 5],
          stride=stride,
          depth_multiplier=1.0,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=activation_fn,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(
              scale=has_scaling, fused=fused_batch_norm),
          scope=scope)
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')
        relu(node, name='test/' + relu_op_name)

      fold_batch_norms.FoldBatchNorms(g)

    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    if fused_batch_norm:
      scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape'
    else:
      scale_reshape_op_name = scope + '/scale_reshape'
    self._AssertInputOpsAre(folded_mul,
                            [scope + '/depthwise_weights/read',
                             scale_reshape_op_name])
    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold'])

    scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
    self.assertEqual(scale_reshape.type, 'Reshape')
    self._AssertInputOpsAre(scale_reshape, [
        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm),
        scale_reshape_op_name + '/shape'
    ])
    self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold'])

    folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold')
    self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative')
    self._AssertInputOpsAre(folded_conv,
                            [scope + '/mul_fold', inputs.op.name])
    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/add_fold'])

    folded_add = g.get_operation_by_name(scope + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        scope + '/depthwise_Fold',
        self._BathNormBiasName(scope, fused_batch_norm)
    ])
    output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        # Quantize training graph
        g = tf.get_default_graph()
        fold_batch_norms.FoldBatchNorms(g)
        quantize.Quantize(g, is_training=True)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
예제 #17
0
  def _TestFoldConv2d(self, relu, relu_op_name, with_bypass, has_scaling,
                      fused_batch_norm, freeze_batch_norm_delay):
    """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
      switches to using frozen mean and variance
    """
    g = ops.Graph()
    with g.as_default():
      batch_size, height, width = 5, 128, 128
      inputs = array_ops.zeros((batch_size, height, width, 3))
      out_depth = 3 if with_bypass else 32
      stride = 1 if with_bypass else 2
      activation_fn = None if with_bypass else relu
      scope = 'test/test2' if with_bypass else 'test'
      node = conv2d(
          inputs,
          out_depth, [5, 5],
          stride=stride,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=activation_fn,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(
              scale=has_scaling, fused=fused_batch_norm),
          scope=scope)
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')
        relu(node, name='test/' + relu_op_name)

      fold_batch_norms.FoldBatchNorms(
          g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)

    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    self._AssertInputOpsAre(folded_mul, [
        scope + '/correction_mult',
        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
    ])
    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold'])

    folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold')
    self.assertEqual(folded_conv.type, 'Conv2D')
    self._AssertInputOpsAre(folded_conv,
                            [scope + '/mul_fold', inputs.op.name])
    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul'])

    folded_add = g.get_operation_by_name(scope + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        scope + '/correction_add',
        self._BathNormBiasName(scope, fused_batch_norm)
    ])
    output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)

    for op in g.get_operations():
      self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
예제 #18
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Quantize training graph
        g = tf.get_default_graph()
        fold_batch_norms.FoldBatchNorms(g)
        quantize.Quantize(g, is_training=True)
        for var in g.get_collection('variables'):
            if var.name.endswith('min:0') or var.name.endswith('max:0'):
                summaries.add(tf.summary.scalar(var.name, var))

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
예제 #19
0
  def testMultipleLayerConv2d(self,
                              relu=nn_ops.relu,
                              relu_op_name='Relu',
                              has_scaling=True,
                              fused_batch_norm=False,
                              freeze_batch_norm_delay=None):
    """Tests folding cases for a network with multiple layers.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
      switches to using frozen mean and variance
    """
    g = ops.Graph()
    with g.as_default():
      batch_size, height, width = 5, 128, 128
      inputs = array_ops.zeros((batch_size, height, width, 3))
      out_depth = 3
      stride = 1
      activation_fn = relu
      scope = 'topnet/testnet'
      with variable_scope.variable_scope(scope, [inputs]):
        layer1 = conv2d(
            inputs,
            out_depth, [5, 5],
            stride=stride,
            padding='SAME',
            weights_initializer=self._WeightInit(0.09),
            activation_fn=None,
            normalizer_fn=None,
            scope='testnet/layer1')
        # Add bn and relu with different scope
        layer1 = batch_norm(
            layer1, scale=has_scaling, fused=fused_batch_norm, scope='layer1')
        layer1 = activation_fn(layer1)
        layer2 = conv2d(
            layer1,
            2 * out_depth, [5, 5],
            stride=stride,
            padding='SAME',
            weights_initializer=self._WeightInit(0.09),
            activation_fn=activation_fn,
            normalizer_fn=batch_norm,
            normalizer_params=self._BatchNormParams(
                scale=has_scaling, fused=fused_batch_norm),
            scope='testnet/layer2')
        # Add bn and relu with different scope
        layer2 = batch_norm(
            layer2, scale=has_scaling, fused=fused_batch_norm, scope='layer2')
        _ = activation_fn(layer2)

      scope = 'topnet/testnet/testnet/layer2'

      fold_batch_norms.FoldBatchNorms(
          g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)
    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    self._AssertInputOpsAre(folded_mul, [
        scope + '/correction_mult',
        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
    ])
    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold'])

    folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold')
    self.assertEqual(folded_conv.type, 'Conv2D')
    # Remove :0 at end of name for tensor prior to comparison
    self._AssertInputOpsAre(folded_conv,
                            [scope + '/mul_fold', layer1.name[:-2]])
    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul'])

    folded_add = g.get_operation_by_name(scope + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        scope + '/correction_add',
        self._BathNormBiasName(scope, fused_batch_norm)
    ])
    output_op_names = [scope + '/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)

    for op in g.get_operations():
      self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
  def _testQuantize_FCWithBatchNorm(self, activation, activation_op_name,
                                    with_bypass, delay, fused_batch_norm,
                                    use_ema):
    """Tests quantization: inputs -> FC with batch norm -> Activation.

    Args:
      activation: Callable that returns an Operation, a factory method for the
        Activation.
      activation_op_name: String, name of the Activation operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Activation.
      delay: Int (optional), delay in number of steps until quantization starts.
      fused_batch_norm: Bool, when true use FusedBatchNorm.
      use_ema: Bool, when true uses EMA quantization for BN folded weights.
    """
    graph = ops.Graph()
    with graph.as_default():
      training.create_global_step(graph)

      batch_size, depth = 5, 256
      inputs = array_ops.zeros((batch_size, depth))
      out_depth = 256 if with_bypass else 128
      scope = 'test/test2' if with_bypass else 'test'
      node = fully_connected(
          inputs,
          out_depth,
          weights_initializer=self._WeightInit(0.03),
          activation_fn=None,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(fused_batch_norm),
          scope=scope)

      # Manually add a bypass (optionaly) and an activation.
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')

      node = activation(node, name='test/' + activation_op_name)

      update_barrier = control_flow_ops.no_op(name='update_barrier')
      with ops.control_dependencies([update_barrier]):
        array_ops.identity(node, name='control_dependency')

      fold_batch_norms.FoldBatchNorms(graph)

      quantize.Quantize(
          graph, quant_delay=delay, quantize_folded_weights_use_ema=use_ema)

    quantization_node_name = 'FakeQuantWithMinMaxVars'
    weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' +
                                                quantization_node_name)
    self.assertEqual(weights_quant.type, quantization_node_name)
    expected_inputs = [
        scope + '/weights_quant/' + ('AssignMinEma'
                                     if use_ema else 'AssignMinLast'),
        scope + '/weights_quant/' + ('AssignMaxEma'
                                     if use_ema else 'AssignMaxLast'),
        scope + '/mul_fold'
    ]
    self._AssertInputOpsAre(weights_quant, expected_inputs)
    output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1'
                              if delay and use_ema else '/MatMul_Fold')
    self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name])

    if with_bypass:
      conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' +
                                               quantization_node_name)
      self.assertEqual(conv_quant.type, quantization_node_name)
      expected_inputs = [
          scope + '/conv_quant/AssignMinEma',
          scope + '/conv_quant/AssignMaxEma', scope + '/add_fold'
      ]
      self._AssertInputOpsAre(conv_quant, expected_inputs)
      output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1'
                        if delay else 'test/Add')
      self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name])

    act_quant = graph.get_operation_by_name('test/act_quant/' +
                                            quantization_node_name)
    self.assertEqual(act_quant.type, quantization_node_name)
    expected_inputs = [
        'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma',
        'test/' + activation_op_name
    ]
    self._AssertInputOpsAre(act_quant, expected_inputs)
    output_op_name = ('test/act_quant/delayed_quant/Switch_1'
                      if delay else 'control_dependency')
    self._AssertOutputGoesToOps(act_quant, graph, [output_op_name])
  def _TestQuantize_DepthwiseConv2dWithBatchNorm(
      self, activation, activation_op_name, with_bypass, delay,
      fused_batch_norm):
    """Tests quantization: inputs -> DWConv2d with batch norm -> Activation.

    Args:
      activation: Callable that returns an Operation, a factory method for the
        Activation.
      activation_op_name: String, name of the Activation operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Activation.
      delay: Int (optional), delay in number of steps until quantization starts.
      fused_batch_norm: Bool, when true use FusedBatchNorm.
    """
    graph = ops.Graph()
    with graph.as_default():
      batch_size, height, width, depth = 5, 128, 128, 3
      inputs = array_ops.zeros((batch_size, height, width, depth))
      stride = 1 if with_bypass else 2
      scope = 'test/test2' if with_bypass else 'test'
      node = separable_conv2d(
          inputs,
          None, [5, 5],
          stride=stride,
          depth_multiplier=1.0,
          padding='SAME',
          weights_initializer=self._WeightInit(0.09),
          activation_fn=None,
          normalizer_fn=batch_norm,
          normalizer_params=self._BatchNormParams(fused_batch_norm),
          scope=scope)

      # Manually add a bypass (optionaly) and an activation.
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/Add')

      node = activation(node, name='test/' + activation_op_name)

      update_barrier = control_flow_ops.no_op(name='update_barrier')
      with ops.control_dependencies([update_barrier]):
        array_ops.identity(node, name='control_dependency')

      fold_batch_norms.FoldBatchNorms(graph)

      quantize.Quantize(graph, quant_delay=delay)
    quantization_node_name = 'FakeQuantWithMinMaxVars'
    weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' +
                                                quantization_node_name)
    self.assertEqual(weights_quant.type, quantization_node_name)
    expected_inputs = [
        scope + '/weights_quant/' + 'AssignMinLast',
        scope + '/weights_quant/' + 'AssignMaxLast', scope + '/mul_fold'
    ]
    self._AssertInputOpsAre(weights_quant, expected_inputs)
    output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1'
                              if delay else '/depthwise_Fold')
    self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name])

    if with_bypass:
      conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' +
                                               quantization_node_name)
      self.assertEqual(conv_quant.type, quantization_node_name)
      expected_inputs = [
          scope + '/conv_quant/AssignMinEma',
          scope + '/conv_quant/AssignMaxEma', scope + '/add_fold'
      ]
      self._AssertInputOpsAre(conv_quant, expected_inputs)
      output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1'
                        if delay else 'test/Add')
      self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name])

    act_quant = graph.get_operation_by_name('test/act_quant/' +
                                            quantization_node_name)
    self.assertEqual(act_quant.type, quantization_node_name)
    expected_inputs = [
        'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma',
        'test/' + activation_op_name
    ]
    self._AssertInputOpsAre(act_quant, expected_inputs)
    output_op_name = ('test/act_quant/delayed_quant/Switch_1'
                      if delay else 'control_dependency')
    self._AssertOutputGoesToOps(act_quant, graph, [output_op_name])
예제 #22
0
  def _TestFoldAtrousConv2d(self, relu, relu_op_name, with_bypass, has_scaling,
                            fused_batch_norm, freeze_batch_norm_delay,
                            insert_identity_node):
    """Tests folding: inputs -> AtrousConv2d with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
        switches to using frozen mean and variance
      insert_identity_node: Bool, insert identity node between conv and batch
        norm
    """
    g = ops.Graph()
    with g.as_default():
      batch_size, height, width = 5, 128, 128
      inputs = array_ops.zeros((batch_size, height, width, 3))
      dilation_rate = 2
      activation_fn = None if with_bypass else relu
      name = 'test/test2' if with_bypass else 'test'
      if insert_identity_node:
        with g.name_scope(name):
          node = separable_conv2d(
              inputs,
              None, [3, 3],
              rate=dilation_rate,
              depth_multiplier=1.0,
              padding='SAME',
              weights_initializer=self._WeightInit(0.09),
              activation_fn=None,
              normalizer_fn=None,
              biases_initializer=None)
          node = array_ops.identity(node, name='sep_conv_out')

          node = batch_norm(
              node,
              center=True,
              scale=has_scaling,
              decay=1.0 - 0.003,
              fused=fused_batch_norm)
          if activation_fn is not None:
            node = activation_fn(node)
          sep_conv_name = name + '/SeparableConv2d'
      else:
        node = separable_conv2d(
            inputs,
            None, [3, 3],
            rate=dilation_rate,
            depth_multiplier=1.0,
            padding='SAME',
            weights_initializer=self._WeightInit(0.09),
            activation_fn=activation_fn,
            normalizer_fn=batch_norm,
            normalizer_params=self._BatchNormParams(
                scale=has_scaling, fused=fused_batch_norm),
            scope=name)
        sep_conv_name = name
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/AddV2')
        relu(node, name='test/' + relu_op_name)

      fold_batch_norms.FoldBatchNorms(
          g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)

    folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    if fused_batch_norm:
      scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape'
    else:
      scale_reshape_op_name = sep_conv_name + '/scale_reshape'
    self._AssertInputOpsAre(
        folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name])
    self._AssertOutputGoesToOps(folded_mul, g,
                                [sep_conv_name + '/depthwise_Fold'])

    scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
    self.assertEqual(scale_reshape.type, 'Reshape')
    self._AssertInputOpsAre(scale_reshape, [
        self._BatchNormMultiplierName(sep_conv_name, has_scaling,
                                      fused_batch_norm),
        scale_reshape_op_name + '/shape'
    ])
    self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold'])

    folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold')
    self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative')
    self._AssertInputOpsAre(folded_conv, [
        sep_conv_name + '/mul_fold', sep_conv_name + '/depthwise/SpaceToBatchND'
    ])
    if fused_batch_norm:
      self._AssertOutputGoesToOps(folded_conv, g,
                                  [sep_conv_name + '/BatchToSpaceND_Fold'])
    else:
      self._AssertOutputGoesToOps(
          folded_conv, g, [sep_conv_name + '/depthwise/BatchToSpaceND_Fold'])

    folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        sep_conv_name + '/correction_add',
        self._BathNormBiasName(sep_conv_name, fused_batch_norm)
    ])
    output_op_names = ['test/AddV2' if with_bypass else 'test/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)
    if freeze_batch_norm_delay is not None:
      self._AssertMovingAveragesAreFrozen(g, name)

    for op in g.get_operations():
      self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
예제 #23
0
  def _TestFoldFullyConnectedLayer(
      self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm,
      freeze_batch_norm_delay, insert_identity_node):
    """Tests folding cases: inputs -> FC with batch norm -> Relu*.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
      switches to using frozen mean and variance
      insert_identity_node: Bool, insert identity node between conv and batch
      norm
    """
    g = ops.Graph()
    with g.as_default():
      batch_size, depth = 5, 256
      inputs = array_ops.zeros((batch_size, depth))
      out_depth = 256 if with_bypass else 128
      activation_fn = None if with_bypass else relu
      name = 'test/test2' if with_bypass else 'test'
      insert_identity_node = fused_batch_norm
      if insert_identity_node:
        with g.name_scope(name):
          node = fully_connected(
              inputs,
              out_depth,
              weights_initializer=self._WeightInit(0.03),
              activation_fn=None,
              normalizer_fn=None,
              biases_initializer=None)
          node = array_ops.identity(node, name='fc_out')

          node = batch_norm(
              node,
              center=True,
              scale=has_scaling,
              decay=1.0 - 0.003,
              fused=fused_batch_norm)
          if activation_fn is not None:
            node = activation_fn(node)
          fc_name = name + '/fully_connected'
      else:

        node = fully_connected(
            inputs,
            out_depth,
            weights_initializer=self._WeightInit(0.03),
            activation_fn=activation_fn,
            normalizer_fn=batch_norm,
            normalizer_params=self._BatchNormParams(
                scale=has_scaling, fused=fused_batch_norm),
            scope=name)
        fc_name = name
      if with_bypass:
        node = math_ops.add(inputs, node, name='test/AddV2')
        relu(node, name='test/' + relu_op_name)

      fold_batch_norms.FoldBatchNorms(
          g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)

    folded_mul = g.get_operation_by_name(fc_name + '/mul_fold')
    self.assertEqual(folded_mul.type, 'Mul')
    self._AssertInputOpsAre(folded_mul, [
        fc_name + '/correction_mult',
        self._BatchNormMultiplierName(fc_name, has_scaling, fused_batch_norm)
    ])
    self._AssertOutputGoesToOps(folded_mul, g, [fc_name + '/MatMul_Fold'])

    folded_conv = g.get_operation_by_name(fc_name + '/MatMul_Fold')
    self.assertEqual(folded_conv.type, 'MatMul')
    self._AssertInputOpsAre(folded_conv,
                            [fc_name + '/mul_fold', inputs.op.name])
    self._AssertOutputGoesToOps(folded_conv, g, [fc_name + '/post_conv_mul'])

    folded_add = g.get_operation_by_name(fc_name + '/add_fold')
    self.assertEqual(folded_add.type, 'Add')
    self._AssertInputOpsAre(folded_add, [
        fc_name + '/correction_add',
        self._BathNormBiasName(fc_name, fused_batch_norm)
    ])
    output_op_names = ['test/AddV2' if with_bypass else 'test/' + relu_op_name]
    self._AssertOutputGoesToOps(folded_add, g, output_op_names)
    if freeze_batch_norm_delay is not None:
      self._AssertMovingAveragesAreFrozen(g, name)

    for op in g.get_operations():
      self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)