Ejemplo n.º 1
0
    def testFuseDepthwiseConv2dNativeWithBiasAndActivation(self):
        layers = [
            tf.keras.layers.DepthwiseConv2D(
                1, bias_initializer=tf.initializers.constant(0.25)),
            tf.keras.layers.ReLU()
        ]
        model = tf.keras.Sequential(layers)
        tf.keras.backend.set_learning_phase(0)
        input_tensor = tf.constant([1.0, 1.0], shape=[1, 1, 1, 2])

        @tf.function
        def execute_model(tensor):
            return model(tensor)

        graph = tf_saved_model_conversion_v2._freeze_saved_model_v2(
            execute_model.get_concrete_function(input_tensor))
        graph_def = graph.as_graph_def()

        optimized_graph_def = fuse_depthwise_conv2d.fuse_depthwise_conv2d(
            graph_def)
        depthwise_conv2d_count = 0
        depthwise_conv2d = None
        for node in optimized_graph_def.node:
            self.assertNotEqual("BiasAdd", node.op)
            self.assertNotEqual("DepthwiseConv2dNative", node.op)
            self.assertNotEqual("Relu", node.op)
            if node.op == graph_rewrite_util.FUSED_DEPTHWISE_CONV2D:
                depthwise_conv2d_count += 1
                depthwise_conv2d = node
        self.assertEqual(depthwise_conv2d_count, 1)
        self.assertEqual(depthwise_conv2d.attr['fused_ops'].list.s,
                         [b'BiasAdd', b'Relu'])
        self.assertEqual(depthwise_conv2d.attr['num_args'].i, 1)
Ejemplo n.º 2
0
    def testFusePreluWithDepthwiseConv2d(self):
        layers = [
            tf.keras.layers.DepthwiseConv2D(
                1, bias_initializer=tf.initializers.constant(0.25)),
            tf.keras.layers.PReLU()
        ]
        model = tf.keras.Sequential(layers)
        tf.keras.backend.set_learning_phase(0)
        input_tensor = tf.constant([1.0, 1.0], shape=[1, 2, 1, 1])

        @tf.function
        def execute_model(tensor):
            return model(tensor)

        graph = tf_saved_model_conversion_v2._freeze_saved_model_v2(
            execute_model.get_concrete_function(input_tensor))
        graph_def = graph.as_graph_def()

        for node in graph_def.node:
            if node.op == 'Conv2D':
                node.device = "/CPU:0"

        config = config_pb2.ConfigProto()
        rewriter_config = config.graph_options.rewrite_options
        rewriter_config.optimizers[:] = [
            'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
            'remap', 'constfold', 'arithmetic', 'dependency'
        ]

        for output in ['Identity']:
            graph.add_to_collection('train_op',
                                    graph.get_operation_by_name(output))

        signature = meta_graph_pb2.SignatureDef()
        graph_def = tf_saved_model_conversion_v2._run_grappler(
            config, graph_def, graph, signature)
        graph_def = fuse_prelu.fuse_ops_for_prelu(graph_def)
        graph_def = fuse_depthwise_conv2d.fuse_depthwise_conv2d(graph_def)

        optimized_graph_def = fuse_prelu.fuse_prelu_with_fused_conv2d_or_matmul(
            graph_def)

        conv2d_op = None
        for node in optimized_graph_def.node:
            self.assertNotEqual("Prelu", node.op)
            if node.op == 'FusedDepthwiseConv2dNative':
                conv2d_op = node
        self.assertNotEqual(conv2d_op, None)
        self.assertEqual(conv2d_op.attr['fused_ops'].list.s,
                         [b'BiasAdd', b'Prelu'])
        self.assertEqual(conv2d_op.attr['num_args'].i, 2)
Ejemplo n.º 3
0
def optimize_graph(graph,
                   signature_def,
                   output_graph,
                   tf_version,
                   quantization_dtype=None,
                   skip_op_check=False,
                   strip_debug_ops=False,
                   weight_shard_size_bytes=1024 * 1024 * 4):
    """Takes a Python Graph object and optimizes the graph.

  Args:
    graph: The frozen graph to optimize.
    signature_def: the SignatureDef of the inference graph.
    output_graph: The location of the output graph.
    tf_version: Tensorflow version of the input graph.
    quantization_dtype: An optional numpy dtype to quantize weights to for
      compression. Only np.uint8 and np.uint16 are supported.
    skip_op_check: Bool whether to skip the op check.
    strip_debug_ops: Bool whether to strip debug ops.
    weight_shard_size_bytes: Shard size (in bytes) of the weight files.
      The size of each weight file will be <= this value.
  """

    # Add a collection 'train_op' so that Grappler knows the outputs.
    for _, output in signature_def.outputs.items():
        name = output.name.split(':')[0]
        graph.add_to_collection('train_op', graph.get_operation_by_name(name))

    graph_def = graph.as_graph_def()

    unsupported = validate(graph_def.node, skip_op_check, strip_debug_ops)
    if unsupported:
        raise ValueError('Unsupported Ops in the model before optimization\n' +
                         ', '.join(unsupported))

    # first pass of grappler optimization, this is needed for batch norm folding.
    config = config_pb2.ConfigProto()
    rewriter_config = config.graph_options.rewrite_options
    rewriter_config.optimizers[:] = [
        'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
        'constfold', 'arithmetic', 'dependency'
    ]
    if strip_debug_ops:
        rewriter_config.optimizers.insert(0, 'debug_stripper')

    optimized_graph = _run_grappler(config, graph_def, graph, signature_def)

    # batch norm folding
    optimized_graph = fold_batch_norms.fold_batch_norms(optimized_graph)

    # set the device to CPU for all Conv2d and MatMul nodes, since grappler
    # remap optimizer only support FusedConv2D and FusedMatMul for CPU.
    for node in optimized_graph.node:
        if node.op == 'Conv2D' or node.op == 'MatMul':
            node.device = '/device:CPU:0'

    # rerun grappler to fuse conv2d/matmul
    config.graph_options.rewrite_options.optimizers[:] = [
        'remap', 'constfold', 'arithmetic', 'dependency'
    ]

    optimized_graph = _run_grappler(config, optimized_graph, graph,
                                    signature_def)
    optimized_graph = _remove_unused_control_flow_inputs(optimized_graph)

    # Because TF break the Prelu op into 6 ops, for performance we are
    # fusing those ops into a single prelu
    optimized_graph = fuse_prelu.fuse_ops_for_prelu(optimized_graph)

    # Because grappler does not support DepthwiseConv2d fusing, we have
    # implemented it here.
    optimized_graph = fuse_depthwise_conv2d.fuse_depthwise_conv2d(
        optimized_graph)

    # Since the grappler remap optimizer doe snot support prelu as the activation
    # function for _FusedConv2D op, we are doing it manually here.
    optimized_graph = fuse_prelu.fuse_prelu_with_fused_conv2d_or_matmul(
        optimized_graph)

    unsupported = validate(optimized_graph.node, skip_op_check,
                           strip_debug_ops)
    if unsupported:
        raise ValueError('Unsupported Ops in the model after optimization\n' +
                         ', '.join(unsupported))

    extract_weights(optimized_graph, output_graph, tf_version, signature_def,
                    quantization_dtype, weight_shard_size_bytes)
    return optimize_graph