Exemple #1
0
  def testFusePrelu(self):
    layers = [
        tf.keras.layers.PReLU(
            alpha_initializer=tf.initializers.constant(0.25)),
        tf.keras.layers.PReLU(
            alpha_initializer=tf.initializers.constant(0.25))
    ]
    model = tf.keras.Sequential(layers)
    tf.keras.backend.set_learning_phase(0)
    input_tensor = tf.constant([1.0, 1.0])

    @tf.function
    def execute_model(tensor):
      return model(tensor)

    graph_def = execute_model.get_concrete_function(
        input_tensor).graph.as_graph_def()
    optimized_graph_def = fuse_prelu.fuse_ops_for_prelu(graph_def)

    prelu_op_count = 0
    for node in optimized_graph_def.node:
      self.assertNotEqual("Relu", node.op)
      if node.op == 'Prelu':
        prelu_op_count += 1
    self.assertEqual(prelu_op_count, 2)
Exemple #2
0
    def testFusePreluWithConv2d(self):
        layers = [
            tf.keras.layers.Conv2D(
                16, [3, 3],
                padding='same',
                use_bias=True,
                bias_initializer=tf.initializers.constant(0.25)),
            tf.keras.layers.PReLU()
        ]
        model = tf.keras.Sequential(layers)
        tf.keras.backend.set_learning_phase(0)
        input_tensor = tf.constant([1.0, 1.0], shape=[1, 2, 1, 1])

        @tf.function
        def execute_model(tensor):
            return model(tensor)

        graph = tf_saved_model_conversion_v2._freeze_saved_model_v2(
            execute_model.get_concrete_function(input_tensor))
        graph_def = graph.as_graph_def()

        for node in graph_def.node:
            if node.op == 'Conv2D':
                node.device = "/CPU:0"

        config = config_pb2.ConfigProto()
        rewriter_config = config.graph_options.rewrite_options
        rewriter_config.optimizers[:] = [
            'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
            'remap', 'constfold', 'arithmetic', 'dependency'
        ]

        for output in ['Identity']:
            graph.add_to_collection('train_op',
                                    graph.get_operation_by_name(output))

        signature = meta_graph_pb2.SignatureDef()
        graph_def = tf_saved_model_conversion_v2._run_grappler(
            config, graph_def, graph, signature)
        graph_def = fuse_prelu.fuse_ops_for_prelu(graph_def)

        optimized_graph_def = fuse_prelu.fuse_prelu_with_fused_conv2d(
            graph_def)

        conv2d_op = None
        for node in optimized_graph_def.node:
            self.assertNotEqual("Prelu", node.op)
            if node.op == '_FusedConv2D':
                conv2d_op = node
        self.assertNotEqual(conv2d_op, None)
        self.assertEqual(conv2d_op.attr['fused_ops'].list.s,
                         [b'BiasAdd', b'Prelu'])
        self.assertEqual(conv2d_op.attr['num_args'].i, 2)
Exemple #3
0
    def testFusePrelu(self):
        layers = [
            tf.keras.layers.PReLU(
                alpha_initializer=tf.initializers.constant(0.25)),
            tf.keras.layers.PReLU(
                alpha_initializer=tf.initializers.constant(0.25))
        ]
        model = tf.keras.Sequential(layers)
        tf.keras.backend.set_learning_phase(0)
        input_tensor = tf.constant([1.0, 1.0])

        @tf.function
        def execute_model(tensor):
            return model(tensor)

        graph = tf_saved_model_conversion_v2._freeze_saved_model_v2(
            execute_model.get_concrete_function(input_tensor))
        graph_def = graph.as_graph_def()
        for node in graph_def.node:
            if node.op == 'Conv2D':
                node.device = "/CPU:0"

        config = config_pb2.ConfigProto()
        rewriter_config = config.graph_options.rewrite_options
        rewriter_config.optimizers[:] = [
            'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
            'remap', 'constfold', 'arithmetic', 'dependency'
        ]

        for output in ['Identity']:
            graph.add_to_collection('train_op',
                                    graph.get_operation_by_name(output))

        signature = meta_graph_pb2.SignatureDef()
        graph_def = tf_saved_model_conversion_v2._run_grappler(
            config, graph_def, graph, signature)

        optimized_graph_def = fuse_prelu.fuse_ops_for_prelu(graph_def)

        prelu_op_count = 0
        value = None
        for node in optimized_graph_def.node:
            self.assertNotEqual("Relu", node.op)
            if node.op == 'Prelu':
                prelu_op_count += 1
            if node.op == 'Const':
                value = common.values_from_const(node)
        self.assertEqual(prelu_op_count, 2)
        self.assertEqual(value, [0.25])
def optimize_graph(graph,
                   signature_def,
                   output_graph,
                   tf_version,
                   quantization_dtype=None,
                   skip_op_check=False,
                   strip_debug_ops=False,
                   weight_shard_size_bytes=1024 * 1024 * 4):
    """Takes a Python Graph object and optimizes the graph.

  Args:
    graph: The frozen graph to optimize.
    signature_def: the SignatureDef of the inference graph.
    output_graph: The location of the output graph.
    tf_version: Tensorflow version of the input graph.
    quantization_dtype: An optional numpy dtype to quantize weights to for
      compression. Only np.uint8 and np.uint16 are supported.
    skip_op_check: Bool whether to skip the op check.
    strip_debug_ops: Bool whether to strip debug ops.
    weight_shard_size_bytes: Shard size (in bytes) of the weight files.
      The size of each weight file will be <= this value.
  """

    # Add a collection 'train_op' so that Grappler knows the outputs.
    for _, output in signature_def.outputs.items():
        name = output.name.split(':')[0]
        graph.add_to_collection('train_op', graph.get_operation_by_name(name))

    graph_def = graph.as_graph_def()

    unsupported = validate(graph_def.node, skip_op_check, strip_debug_ops)
    if unsupported:
        raise ValueError('Unsupported Ops in the model before optimization\n' +
                         ', '.join(unsupported))

    # first pass of grappler optimization, this is needed for batch norm folding.
    config = config_pb2.ConfigProto()
    rewriter_config = config.graph_options.rewrite_options
    rewriter_config.optimizers[:] = [
        'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
        'constfold', 'arithmetic', 'dependency'
    ]
    if strip_debug_ops:
        rewriter_config.optimizers.insert(0, 'debug_stripper')

    optimized_graph = _run_grappler(config, graph_def, graph, signature_def)

    # batch norm folding
    optimized_graph = fold_batch_norms.fold_batch_norms(optimized_graph)

    # set the device to CPU for all Conv2d and MatMul nodes, since grappler
    # remap optimizer only support FusedConv2D and FusedMatMul for CPU.
    for node in optimized_graph.node:
        if node.op == 'Conv2D' or node.op == 'MatMul':
            node.device = '/device:CPU:0'

    # rerun grappler to fuse conv2d/matmul
    config.graph_options.rewrite_options.optimizers[:] = [
        'remap', 'constfold', 'arithmetic', 'dependency'
    ]

    optimized_graph = _run_grappler(config, optimized_graph, graph,
                                    signature_def)
    optimized_graph = _remove_unused_control_flow_inputs(optimized_graph)

    # Because TF break the Prelu op into 6 ops, for performance we are
    # fusing those ops into a single prelu
    optimized_graph = fuse_prelu.fuse_ops_for_prelu(optimized_graph)

    # Because grappler does not support DepthwiseConv2d fusing, we have
    # implemented it here.
    optimized_graph = fuse_depthwise_conv2d.fuse_depthwise_conv2d(
        optimized_graph)

    # Since the grappler remap optimizer doe snot support prelu as the activation
    # function for _FusedConv2D op, we are doing it manually here.
    optimized_graph = fuse_prelu.fuse_prelu_with_fused_conv2d_or_matmul(
        optimized_graph)

    unsupported = validate(optimized_graph.node, skip_op_check,
                           strip_debug_ops)
    if unsupported:
        raise ValueError('Unsupported Ops in the model after optimization\n' +
                         ', '.join(unsupported))

    extract_weights(optimized_graph, output_graph, tf_version, signature_def,
                    quantization_dtype, weight_shard_size_bytes)
    return optimize_graph
def optimize_graph(graph,
                   output_node_names,
                   output_graph,
                   tf_version,
                   quantization_dtype=None,
                   skip_op_check=False,
                   strip_debug_ops=False):
    """Takes a Python Graph object and optimizes the graph.

  Args:
    graph: The frozen graph to optimize.
    output_node_names: List of output node names.
    output_graph: The location of the output graph.
    tf_version: Tensorflow version of the input graph.
    quantization_dtype: An optional numpy dtype to quantize weights to for
      compression. Only np.uint8 and np.uint16 are supported.
    skip_op_check: Bool whether to skip the op check.
    strip_debug_ops: Bool whether to strip debug ops.
  """
    fuse_prelu.register_prelu_func(graph)

    # Add a collection 'train_op' so that Grappler knows the outputs.
    for output in output_node_names:
        graph.add_to_collection('train_op',
                                graph.get_operation_by_name(output))

    graph_def = graph.as_graph_def()

    unsupported = validate(graph_def.node, skip_op_check, strip_debug_ops)
    if unsupported:
        raise ValueError('Unsupported Ops in the model before optimization\n' +
                         ', '.join(unsupported))

    # Because TF break the Prelu op into 6 ops, for performance we are
    # fusing those ops into a single prelu
    optimized_graph = fuse_prelu.fuse_ops_for_prelu(graph_def)

    # first pass of grappler optimization, this is needed for batch norm folding.
    config = config_pb2.ConfigProto()
    rewriter_config = config.graph_options.rewrite_options
    rewriter_config.optimizers[:] = [
        'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning',
        'constfold', 'arithmetic', 'dependency'
    ]
    if strip_debug_ops:
        rewriter_config.optimizers.insert(0, 'debug_stripper')

    optimized_graph = _run_grappler(config, optimized_graph, graph)

    # batch norm folding
    optimized_graph = fold_batch_norms.fold_batch_norms(optimized_graph)

    # set the device to CPU for all Conv2d nodes, since grappler remap optimizer
    # only support FusedConv2D for CPU.
    for node in optimized_graph.node:
        if node.op == 'Conv2D':
            node.device = '/device:CPU:0'

    # rerun grappler to fuse conv2d
    config.graph_options.rewrite_options.optimizers[:] = [
        'remap', 'constfold', 'arithmetic', 'dependency'
    ]

    optimized_graph = _run_grappler(config, optimized_graph, graph)

    # Since the grappler remap optimizer doe snot support prelu as the activation
    # function for _FusedConv2D op, we are doing it manually here.
    optimized_graph = fuse_prelu.fuse_prelu_with_fused_conv2d(optimized_graph)

    unsupported = validate(optimized_graph.node, skip_op_check,
                           strip_debug_ops)

    if unsupported:
        raise ValueError('Unsupported Ops in the model after optimization\n' +
                         ', '.join(unsupported))

    extract_weights(optimized_graph, output_graph, tf_version,
                    quantization_dtype)
    return optimize_graph