def set_attr_dtype(self, node, key, value):
     node.attr[key].CopyFrom(tf.AttrValue(type=value.as_datatype_enum))
 def set_attr_tensor(self, node, key, value, dtype, shape=None):
     node.attr[key].CopyFrom(
         tf.AttrValue(tensor=tensor_util.make_tensor_proto(
             value, dtype=dtype, shape=shape)))
예제 #3
0
        op_inputs = [inp_tens]

        # Get filters for Conv but don't transpose
        if n_org.type == 'Conv2D':
            filter_tens = sess.graph.get_tensor_by_name(n_org.inputs[1].name)
            op_inputs.append(filter_tens)

        # Attributes without data_format, NWHC is default
        atts = {
            key: n_org.node_def.attr[key]
            for key in list(n_org.node_def.attr.keys()) if key != 'data_format'
        }
        if 'ksize' in atts:
            kl = atts['ksize'].list.i
            ksl = [kl[0], kl[2], kl[3], kl[1]]
            atts['ksize'] = tf.AttrValue(list=tf.AttrValue.ListValue(i=ksl))
        if 'strides' in atts:
            st = atts['strides'].list.i
            stl = [st[0], st[2], st[3], st[1]]
            atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue(i=stl))

        # Create new Operation
        op = sess.graph.create_op(op_type=n_org.type,
                                  inputs=op_inputs,
                                  name=n_org.name + '_new',
                                  attrs=atts)
        out_tens = sess.graph.get_tensor_by_name(n_org.name + '_new' + ':0')
        out_trans = tf.transpose(out_tens, [0, 3, 1, 2],
                                 name=n_org.name + '_transp_out')
        assert out_trans.shape == sess.graph.get_tensor_by_name(n_org.name +
                                                                ':0').shape
예제 #4
0
def _apply_colocation_attr_map(colocation_attr_map, absolute_import_scope):
    """Rewrites colocation constraints in the current default graph.

  Nodes in `absolute_import_scope` get their "_class" attr lists rewritten
  according to `colocation_attr_map`: each entry that matches a key gets
  replaced by the associated values (with deduplication). The node's device
  is updated accordingly.

  Args:
    colocation_attr_map: as returned by _build_colocation_attr_map.
    absolute_import_scope: as for fix_colocation_after_import.

  Raises:
    ValueError: if rewriting runs into an inconsistent value in
      `colocation_attr_map`.
  """
    graph = tf.get_default_graph()
    for op in graph.get_operations():
        # Rewrite the values of the "_class" attr that store colocation constraints.
        # NOTE: The colocation_group loc:@X of a node with itself is not stored
        # explicitly as an attr, so rewrite errors for loc:@X are not triggered
        # by the mere existence of X.
        if not op.name.startswith(absolute_import_scope + "/"): continue
        try:
            class_values = op.get_attr("_class")
        except ValueError:
            continue  # No _class attr found; nothing to do.
        new_attr_value = tf.AttrValue()
        new_coloc_groups = []
        for class_value in class_values:
            if class_value.startswith(tf.compat.as_bytes("loc:@")):
                if class_value not in colocation_attr_map:
                    rewritten_class_value = [class_value]
                else:
                    rewritten_class_value = (
                        colocation_attr_map[class_value].
                        GetConsistentValueOrRaise(
                            "Failed to rewrite colocation constraints while applying "
                            "hub.Module:\n"
                            "The module graph contains a node {op!r} "
                            "that has a colocation constraint {class_value!r} "
                            "with ambiguous rewriting {old_value!r} vs {new_value!r} "
                            "because {old_reason} and {new_reason}, respectively.\n"
                            "To fix, avoid publishing a module with inputs comprising "
                            "multiple outputs of one op that is referenced in "
                            "tf.colocate_with(...) constraints on other ops.",
                            {
                                "op": op.name,
                                "class_value": class_value
                            }))
                new_coloc_groups.extend(rewritten_class_value)
            else:
                new_attr_value.list.s.append(class_value)
        new_coloc_groups = sorted(set(new_coloc_groups))
        new_attr_value.list.s.extend(new_coloc_groups)
        op._set_attr("_class", new_attr_value)  # pylint: disable=protected-access

        # Mimic the code of tf.import_graph_def(): If there are colocation
        # constraints, use any of them to set the device (overriding what the
        # device function stack would do), without attempting to merge or check for
        # equality. If they were inconsistent, TensorFlow's C++ runtime would fail
        # anyways due to conflicting colocation constraints.
        # Note that Hub imports GraphDefs with devices cleared, so this code deals
        # with the result of import_graph_def, not a setting saved in the module.
        if new_coloc_groups:
            new_coloc_device = ""
            for new_coloc_group in new_coloc_groups:
                assert new_coloc_group.startswith(tf.compat.as_bytes("loc:@"))
                new_coloc_target_op = graph.get_operation_by_name(
                    tf.compat.as_str_any(new_coloc_group[5:]))
                new_coloc_device = new_coloc_target_op.device
                if new_coloc_device: break
            # Set this, even if empty, to avoid retaining an outdated value.
            op._set_device(new_coloc_device)  # pylint: disable=protected-access
예제 #5
0
def set_attr_float(node, key, value):
    try:
        node.attr[key].CopyFrom(tf.AttrValue(f=value))
    except KeyError:
        pass
예제 #6
0
def set_attr_dtype(node, key, value):
    try:
        node.attr[key].CopyFrom(tf.AttrValue(type=value.as_datatype_enum))
    except KeyError:
        pass
예제 #7
0
def set_attr_string(node, key, value):
    try:
        node.attr[key].CopyFrom(tf.AttrValue(s=value))
    except KeyError:
        pass
예제 #8
0
def set_attr_int_list(node, key, value):
    list_value = tf.AttrValue.ListValue(i=value)
    try:
        node.attr[key].CopyFrom(tf.AttrValue(list=list_value))
    except KeyError:
        pass
예제 #9
0
import tensorflow as tf

input_path = sys.argv[1]
output_path = sys.argv[2]

with open(input_path, "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

tf.import_graph_def(graph_def, name="")
graph = tf.get_default_graph()

output_graph_def = tf.GraphDef()
for node in graph_def.node:
    replace_node = tf.NodeDef()
    replace_node.CopyFrom(node)
    if node.name == "value":
        continue
    if node.name == "zero/value_head/Reshape_1":
        replace_node.name = "value"
    if node.name == "inputs":
        replace_node.attr["dtype"].CopyFrom(
            tf.AttrValue(type=tf.float32.as_datatype_enum))
    for i, inp in enumerate(node.input):
        if inp == "Cast":
            replace_node.input[i] = "inputs"
    output_graph_def.node.extend([replace_node])

with open(output_path, "wb") as f:
    f.write(output_graph_def.SerializeToString())
예제 #10
0
def ssd_mobilenet_v1_unsupported_nodes_to_plugin_nodes(ssd_graph, input_shape):
    """Makes ssd_graph TensorRT comparible using graphsurgeon.

    This function takes ssd_graph, which contains graphsurgeon
    DynamicGraph data structure. This structure describes frozen Tensorflow
    graph, that can be modified using graphsurgeon (by deleting, adding,
    replacing certain nodes). The graph is modified by removing
    Tensorflow operations that are not supported by TensorRT's UffParser
    and replacing them with custom layer plugin nodes.

    Note: This specific implementation works only for
    ssd_mobilenet_v2_coco_2018_03_29 network.

    Args:
        ssd_graph (gs.DynamicGraph): graph to convert
        input_shape: input shape in CHW format
    Returns:
        gs.DynamicGraph: UffParser compatible SSD graph
    """
    # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph
    # channels = ModelData.get_input_channels()
    # height = ModelData.get_input_height()
    # width = ModelData.get_input_width()

    channels, height, width = input_shape

    Input = gs.create_plugin_node(name="Input",
                                  op="Placeholder",
                                  dtype=tf.float32,
                                  shape=[1, channels, height, width])
    PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
                                     minSize=0.2,
                                     maxSize=0.95,
                                     aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
                                     variance=[0.1, 0.1, 0.2, 0.2],
                                     featureMapShapes=[19, 10, 5, 3, 2, 1],
                                     numLayers=6
                                     )
    NMS = gs.create_plugin_node(
        name="NMS",
        op="NMS_TRT",
        shareLocation=1,
        varianceEncodedInTarget=0,
        backgroundLabelId=0,
        confidenceThreshold=1e-8,
        nmsThreshold=0.6,
        topK=100,
        keepTopK=100,
        numClasses=91,
        inputOrder=[0, 2, 1],
        confSigmoid=1,
        isNormalized=1
    )
    concat_priorbox = gs.create_node(
        "concat_priorbox",
        op="ConcatV2",
        dtype=tf.float32,
        axis=2
    )
    concat_box_loc = gs.create_plugin_node(
        "concat_box_loc",
        op="FlattenConcat_TRT",
        dtype=tf.float32,
        axis=1,
        ignoreBatch=0
    )
    concat_box_conf = gs.create_plugin_node(
        "concat_box_conf",
        op="FlattenConcat_TRT",
        dtype=tf.float32,
        axis=1,
        ignoreBatch=0
    )

    const = tf.NodeDef(name="Const", op="Const",
                       attr={"dtype": tf.AttrValue(type=1),
                             "value": tf.AttrValue(tensor=tf.make_tensor_proto([1, 1], dtype=tf.float32))})

    death_list = ["strided_slice_7"]
    ssd_graph.remove(ssd_graph.find_nodes_by_path(death_list))
    ssd_graph.find_nodes_by_path("Shape_6")[0].input.remove("Preprocessor/sub")

    # Create a mapping of namespace names -> plugin nodes.
    namespace_plugin_map = {
        "MultipleGridAnchorGenerator": concat_priorbox,
        "Postprocessor": NMS,
        "Preprocessor/map": Input,
        "ToFloat": Input,
        # "image_tensor": Input,
        "strided_slice_6": PriorBox,
        "Shape_6": const,
        "Concatenate": concat_priorbox,
        "concat": concat_box_loc,
        "concat_1": concat_box_conf
    }
    for node in ssd_graph.graph_inputs:
        namespace_plugin_map[node.name] = Input

    # Create a new graph by collapsing namespaces
    ssd_graph.collapse_namespaces(namespace_plugin_map)
    # Remove the outputs, so we just have a single output node (NMS).
    # If remove_exclusive_dependencies is True, the whole graph will be removed!
    ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False)
    # Disconnect the Input node from NMS, as it expects to have only 3 inputs.

    to_remove_nodes = ["Input", "Preprocessor/stack_1"]
    for node in to_remove_nodes:
        if node in ssd_graph.find_nodes_by_op("NMS_TRT")[0].input:
            ssd_graph.find_nodes_by_op("NMS_TRT")[0].input.remove(node)
            if node == "Preprocessor/stack_1":
                ssd_graph.remove(node)

    ssd_graph.find_nodes_by_path("Input")[0].input.remove("image_tensor:0")

    return ssd_graph
def fuse_resize_and_conv(input_graph_def, output_node_names):
    """Merges preceding resize and mirror pad ops into a specialized convolution.

  There's a common pattern of enlarging the input to a convolution using a
  resize operation, and also using MirrorPad to extend the boundaries to that
  zero edge pixels don't bleed inwards when convolving. This routine looks for
  that pattern of operations, and fuses them together into a Conv2DWithResizeOp.

  Args:
    input_graph_def: A GraphDef containing a model.

  Returns:
    Modified graph with resize and pad ops merged.

  Raises:
    ValueError: If the graph is badly formed with duplicate node names.
  """

    input_node_map = {}
    for node in input_graph_def.node:
        if node.name not in input_node_map.keys():
            input_node_map[node.name] = node
        else:
            raise ValueError("Duplicate node names detected for ", node.name)

    node_reference_count = collections.defaultdict(int)
    for node in input_graph_def.node:
        for input_name in node.input:
            stripped_name = node_name_from_input(input_name)
            node_reference_count[stripped_name] += 1
    for output_name in output_node_names:
        node_reference_count[output_name] += 1

    new_ops = []
    for node in input_graph_def.node:

        if node.op != "Conv2D":
            continue
        conv_op = node

        input_op = node_from_map(input_node_map, conv_op.input[0])
        if input_op.op == "MirrorPad":
            mirror_pad_op = input_op
            resize_op = node_from_map(input_node_map, mirror_pad_op.input[0])
            if resize_op.op != "ResizeBilinear":
                resize_op = None
        else:
            mirror_pad_op = None
            if input_op.op == "ResizeBilinear":
                resize_op = input_op
            else:
                resize_op = None

        # There are no ops to be fused into the conv, so skip replacing this one.
        if not mirror_pad_op and not resize_op:
            continue

        # We're replacing this node, so make sure the old one is removed.
        node_reference_count[conv_op.name] = 0
        if mirror_pad_op:
            node_reference_count[mirror_pad_op.name] -= 1
        if resize_op:
            node_reference_count[resize_op.name] -= 1

        fused_conv_op = tf.NodeDef()
        if resize_op:
            fused_conv_op.op = "FusedResizeAndPadConv2D"
        else:
            fused_conv_op.op = "FusedPadConv2D"
        fused_conv_op.name = conv_op.name
        if mirror_pad_op:
            mirror_paddings_name = mirror_pad_op.input[1]
            mirror_paddings_mode = mirror_pad_op.attr["mode"]
        else:
            # If there was no MirrorPad op, then create settings that make the padding
            # stage of the fused operation a no-op.
            paddings_op = tf.NodeDef()
            paddings_op.op = "Const"
            paddings_op.name = conv_op.name + "_dummy_paddings"
            paddings_op.attr["dtype"].CopyFrom(
                tf.AttrValue(type=tf.int32.as_datatype_enum))
            paddings_op.attr["value"].CopyFrom(
                tf.AttrValue(tensor=tensor_util.make_tensor_proto(
                    [0, 0, 0, 0, 0, 0, 0, 0], tf.int32, [4, 2])))
            new_ops.extend([paddings_op])
            mirror_paddings_name = paddings_op.name
            mirror_paddings_mode = tf.AttrValue(s=b"REFLECT")
        if resize_op:
            fused_conv_op.input.extend([
                resize_op.input[0], resize_op.input[1], mirror_paddings_name,
                conv_op.input[1]
            ])
            fused_conv_op.attr["resize_align_corners"].CopyFrom(
                resize_op.attr["align_corners"])
        else:
            fused_conv_op.input.extend([
                mirror_pad_op.input[0], mirror_paddings_name, conv_op.input[1]
            ])
        fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"])
        fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode)
        fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"])
        fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"])
        new_ops.extend([fused_conv_op])

    result_graph_def = tf.GraphDef()
    for node in input_graph_def.node:
        if node_reference_count[node.name] < 1:
            continue
        new_node = tf.NodeDef()
        new_node.CopyFrom(node)
        result_graph_def.node.extend([new_node])

    result_graph_def.node.extend(new_ops)
    return result_graph_def
def fold_batch_norms(input_graph_def):
    """Removes batch normalization ops by folding them into convolutions.

  Batch normalization during training has multiple dynamic parameters that are
  updated, but once the graph is finalized these become constants. That means
  there's an opportunity to reduce the computations down to a scale and
  addition, rather than the more expensive multiple ops, and even bake the
  scaling into the convolution weights. This function identifies the typical
  pattern of batch normalization subgraphs, and performs the transformation to
  fold the computations down into a simpler form. It currently only spots batch
  normalization that's performed by the BatchNormWithGlobalNormalization op, and
  will need to be extended in the future to handle the newer style.

  Args:
    input_graph_def: A GraphDef containing a model.

  Returns:
    Modified graph with BN ops removed, and modified weights.

  Raises:
    ValueError: If the graph is badly formed with duplicate node names.
  """

    input_node_map = {}
    for node in input_graph_def.node:
        if node.name not in input_node_map.keys():
            input_node_map[node.name] = node
        else:
            raise ValueError("Duplicate node names detected for ", node.name)

    nodes_to_skip = {}
    new_ops = []
    for node in input_graph_def.node:
        if node.op != "BatchNormWithGlobalNormalization":
            continue

        conv_op = node_from_map(input_node_map, node.input[0])
        if conv_op.op != "Conv2D":
            tf.logging.warning("Didn't find expected Conv2D input to '%s'" %
                               node.name)
            continue

        weights_op = node_from_map(input_node_map, conv_op.input[1])
        if weights_op.op != "Const":
            tf.logging.warning(
                "Didn't find expected conv Constant input to '%s',"
                " found %s instead. Maybe because freeze_graph wasn't"
                " run first?" % (conv_op.name, weights_op))
            continue
        weights = values_from_const(weights_op)
        channel_count = weights.shape[3]

        mean_op = node_from_map(input_node_map, node.input[1])
        if mean_op.op != "Const":
            tf.logging.warning(
                "Didn't find expected mean Constant input to '%s',"
                " found %s instead. Maybe because freeze_graph wasn't"
                " run first?" % (node.name, mean_op))
            continue
        mean_value = values_from_const(mean_op)
        if mean_value.shape != (channel_count, ):
            tf.logging.warning(
                "Incorrect shape for mean, found %s, expected %s,"
                " for node %s" % (str(mean_value.shape), str(
                    (channel_count, )), node.name))
            continue

        var_op = node_from_map(input_node_map, node.input[2])
        if var_op.op != "Const":
            tf.logging.warning(
                "Didn't find expected var Constant input to '%s',"
                " found %s instead. Maybe because freeze_graph wasn't"
                " run first?" % (node.name, var_op))
            continue
        var_value = values_from_const(var_op)
        if var_value.shape != (channel_count, ):
            tf.logging.warning(
                "Incorrect shape for var, found %s, expected %s,"
                " for node %s" % (str(var_value.shape), str(
                    (channel_count, )), node.name))
            continue

        beta_op = node_from_map(input_node_map, node.input[3])
        if beta_op.op != "Const":
            tf.logging.warning(
                "Didn't find expected beta Constant input to '%s',"
                " found %s instead. Maybe because freeze_graph wasn't"
                " run first?" % (node.name, beta_op))
            continue
        beta_value = values_from_const(beta_op)
        if beta_value.shape != (channel_count, ):
            tf.logging.warning(
                "Incorrect shape for beta, found %s, expected %s,"
                " for node %s" % (str(beta_value.shape), str(
                    (channel_count, )), node.name))
            continue

        gamma_op = node_from_map(input_node_map, node.input[4])
        if gamma_op.op != "Const":
            tf.logging.warning(
                "Didn't find expected gamma Constant input to '%s',"
                " found %s instead. Maybe because freeze_graph wasn't"
                " run first?" % (node.name, gamma_op))
            continue
        gamma_value = values_from_const(gamma_op)
        if gamma_value.shape != (channel_count, ):
            tf.logging.warning(
                "Incorrect shape for gamma, found %s, expected %s,"
                " for node %s" % (str(gamma_value.shape), str(
                    (channel_count, )), node.name))
            continue

        variance_epsilon_value = node.attr["variance_epsilon"].f
        scale_after_normalization = node.attr["scale_after_normalization"].b
        nodes_to_skip[node.name] = True
        nodes_to_skip[weights_op.name] = True
        nodes_to_skip[mean_op.name] = True
        nodes_to_skip[var_op.name] = True
        nodes_to_skip[beta_op.name] = True
        nodes_to_skip[gamma_op.name] = True
        nodes_to_skip[conv_op.name] = True

        if scale_after_normalization:
            scale_value = ((1.0 / np.vectorize(math.sqrt)
                            (var_value + variance_epsilon_value)) *
                           gamma_value)
        else:
            scale_value = (
                1.0 /
                np.vectorize(math.sqrt)(var_value + variance_epsilon_value))
        offset_value = (-mean_value * scale_value) + beta_value
        scaled_weights = np.copy(weights)
        it = np.nditer(scaled_weights,
                       flags=["multi_index"],
                       op_flags=["readwrite"])
        while not it.finished:
            current_scale = scale_value[it.multi_index[3]]
            it[0] *= current_scale
            it.iternext()
        scaled_weights_op = tf.NodeDef()
        scaled_weights_op.op = "Const"
        scaled_weights_op.name = weights_op.name
        scaled_weights_op.attr["dtype"].CopyFrom(weights_op.attr["dtype"])
        scaled_weights_op.attr["value"].CopyFrom(
            tf.AttrValue(tensor=tensor_util.make_tensor_proto(
                scaled_weights, weights.dtype.type, weights.shape)))
        new_conv_op = tf.NodeDef()
        new_conv_op.CopyFrom(conv_op)
        offset_op = tf.NodeDef()
        offset_op.op = "Const"
        offset_op.name = conv_op.name + "_bn_offset"
        offset_op.attr["dtype"].CopyFrom(mean_op.attr["dtype"])
        offset_op.attr["value"].CopyFrom(
            tf.AttrValue(tensor=tensor_util.make_tensor_proto(
                offset_value, mean_value.dtype.type, offset_value.shape)))
        bias_add_op = tf.NodeDef()
        bias_add_op.op = "BiasAdd"
        bias_add_op.name = node.name
        bias_add_op.attr["T"].CopyFrom(conv_op.attr["T"])
        bias_add_op.input.extend([new_conv_op.name, offset_op.name])
        new_ops.extend(
            [scaled_weights_op, new_conv_op, offset_op, bias_add_op])

    result_graph_def = tf.GraphDef()
    for node in input_graph_def.node:
        if node.name in nodes_to_skip:
            continue
        new_node = tf.NodeDef()
        new_node.CopyFrom(node)
        result_graph_def.node.extend([new_node])

    result_graph_def.node.extend(new_ops)
    return result_graph_def
예제 #13
0
def PruneGraph(graph_def):
    #Prune graph
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block/multi_head_attention/MatMul_1")
    node.input[0] = "Tower_0/gpu/transform2_block/multi_head_attention/Softmax"
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_1/multi_head_attention/MatMul_1")
    node.input[
        0] = "Tower_0/gpu/transform2_block_1/multi_head_attention/Softmax"
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_2/multi_head_attention/MatMul_1")
    node.input[
        0] = "Tower_0/gpu/transform2_block_2/multi_head_attention/Softmax"
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_3/multi_head_attention/MatMul_1")
    node.input[
        0] = "Tower_0/gpu/transform2_block_3/multi_head_attention/Softmax"
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_4/multi_head_attention/MatMul_1")
    node.input[
        0] = "Tower_0/gpu/transform2_block_4/multi_head_attention/Softmax"
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_5/multi_head_attention/MatMul_1")
    node.input[
        0] = "Tower_0/gpu/transform2_block_5/multi_head_attention/Softmax"

    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_1/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block_1/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_2/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block_2/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_3/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block_3/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_4/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block_4/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_5/add")
    node.input[
        1] = "Tower_0/gpu/transform2_block_5/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd"

    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_1/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block_1/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_2/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block_2/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_3/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block_3/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_4/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block_4/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"
    node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_5/add_1")
    node.input[
        1] = "Tower_0/gpu/transform2_block_5/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd"

    node = getNodeByName(graph_def, "SparseToDense")
    node.input[2] = "GatherNd"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices"

    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def, "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices"
    tf.constant(
        [1],
        name=
        "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices"
    )
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance")
    node.input[
        1] = "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices"

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))
    node = getNodeByName(
        graph_def,
        "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices"
    )
    node.attr["value"].CopyFrom(
        tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2])))

    return graph_def
예제 #14
0
def convert_frozen_model_to_NWHC(path):
    graph_def_file = path

    tf.reset_default_graph()
    graph_def = tf.GraphDef()
    with tf.Session() as sess:
        # Read binary pb graph from file
        with tf.gfile.Open(graph_def_file, "rb") as f:
            data2read = f.read()
            graph_def.ParseFromString(data2read)
        tf.graph_util.import_graph_def(graph_def, name='')

        # Get Nodes
        conv_nodes = [
            n for n in sess.graph.get_operations()
            if n.type in ['Conv2D', 'MaxPool', 'AvgPool']
        ]
        for n_org in conv_nodes:
            # Transpose input
            assert len(n_org.inputs) == 1 or len(n_org.inputs) == 2
            org_inp_tens = sess.graph.get_tensor_by_name(n_org.inputs[0].name)
            inp_tens = tf.transpose(org_inp_tens, [0, 2, 3, 1],
                                    name=n_org.name + '_transp_input')
            op_inputs = [inp_tens]

            # Get filters for Conv but don't transpose
            if n_org.type == 'Conv2D':
                filter_tens = sess.graph.get_tensor_by_name(
                    n_org.inputs[1].name)
                op_inputs.append(filter_tens)

            # Attributes without data_format, NWHC is default
            atts = {
                key: n_org.node_def.attr[key]
                for key in list(n_org.node_def.attr.keys())
                if key != 'data_format'
            }
            if n_org.type in ['MaxPool', 'AvgPool']:
                kl = atts['ksize'].list.i
                ksl = [kl[0], kl[2], kl[3], kl[1]]
                st = atts['strides'].list.i
                stl = [st[0], st[2], st[3], st[1]]
                atts['ksize'] = tf.AttrValue(list=tf.AttrValue.ListValue(
                    i=ksl))
                atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue(
                    i=stl))

            if n_org.type == 'Conv2D':
                st = atts['strides'].list.i
                stl = [st[0], st[2], st[3], st[1]]
                atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue(
                    i=stl))
            # Create new Operation
            # print(n_org.type, n_org.name, list(n_org.inputs), n_org.node_def.attr['data_format'])
            op = sess.graph.create_op(op_type=n_org.type,
                                      inputs=op_inputs,
                                      name=n_org.name + '_new',
                                      attrs=atts)
            out_tens = sess.graph.get_tensor_by_name(n_org.name + '_new' +
                                                     ':0')
            out_trans = tf.transpose(out_tens, [0, 3, 1, 2],
                                     name=n_org.name + '_transp_out')
            # assert out_trans.shape == sess.graph.get_tensor_by_name(n_org.name + ':0').shape
            print(out_trans.shape,
                  sess.graph.get_tensor_by_name(n_org.name + ':0').shape)
            # Update Connections
            out_nodes = [
                n for n in sess.graph.get_operations()
                if n_org.outputs[0] in n.inputs
            ]
            for out in out_nodes:
                for j, nam in enumerate(out.inputs):
                    if n_org.outputs[0] == nam:
                        out._update_input(j, out_trans)

        # Delete old nodes
        graph_def = sess.graph.as_graph_def()
        for on in conv_nodes:
            graph_def.node.remove(on.node_def)

        # Write graph
        tf.io.write_graph(graph_def,
                          "",
                          graph_def_file.rsplit('.', 1)[0] + '_toco.pb',
                          as_text=False)
예제 #15
0
    def tf_run_frozen_graph(self, file, xla, parallel, warmup, num_iter):
        print("run frozen graph----------------------------")
        graph_def, graph = self.import_graph(file)
        if (self.debug):
            print()
            print('Operations:')
        assert graph is not None
        ops = graph.get_operations()  # type: Iterable[tf.Operation]
        input_nodes = []
        variables_nodes = []
        last_nodes = []
        for op in ops:
            if (self.debug):
                print('- {0:20s} "{1}" ({2} outputs)'.format(
                    op.type, op.name, len(op.outputs)))
            last_nodes = op.outputs
            if op.type == 'Placeholder':
                for node in op.outputs:
                    input_nodes.append(node)
            if "Variable" in op.type:
                variables_nodes.append(op)

        if (self.debug):
            print()
            print('Sources (operations without inputs):')
            for op in ops:
                if len(op.inputs) > 0:
                    continue
                print('- {0}'.format(op.name))

            print()
            print('Operation inputs:')
            for op in ops:
                if len(op.inputs) == 0:
                    continue
                print('- {0:20}'.format(op.name))
                print('  {0}'.format(', '.join(i.name for i in op.inputs)))

            print()
            print('Tensors:')
            for op in ops:
                for out in op.outputs:
                    print('- {0:20} {1:10} "{2}"'.format(
                        str(out.shape), out.dtype.name, out.name))
        with tf.Session(graph=graph) as sess:
            var_inits = []
            g_def = graph.as_graph_def()
            for var in variables_nodes:
                vt = graph.get_tensor_by_name(var.outputs[0].name)
                # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer)
                # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer)
                # Ones initializer
                dt = tf.as_dtype(vt.dtype.base_dtype).as_datatype_enum
                dt_int32 = tf.as_dtype(tf.int32).as_datatype_enum

                init = tf.NodeDef(
                    name=var.name + "/ones",
                    op="Fill",
                    input=[var.name + "/ones/shape", var.name + "/ones/const"],
                    attr={
                        'T': tf.AttrValue(type=dt),
                        'index_type': tf.AttrValue(type=dt_int32)
                    })

                shape = tf.NodeDef(
                    name=var.name + "/ones/shape",
                    op="Const",
                    attr={
                        "dtype":
                        tf.AttrValue(type=dt_int32),
                        "value":
                        tf.AttrValue(tensor=tf.make_tensor_proto(
                            vt.get_shape().as_list()))
                    })

                const = tf.NodeDef(
                    name=var.name + "/ones/const",
                    op="Const",
                    #dtype =tf.AttrValue(type=dt),
                    attr={
                        "dtype": tf.AttrValue(type=dt),
                        "value":
                        tf.AttrValue(tensor=tf.make_tensor_proto(1.0, dt))
                    })

                node = tf.NodeDef(name=var.name + "/assign",
                                  op='Assign',
                                  input=[var.name, var.name + "/ones"],
                                  attr={
                                      'use_locking': tf.AttrValue(b=False),
                                      'validate_shape': tf.AttrValue(b=True),
                                      'T': tf.AttrValue(type=dt)
                                  })
                g_def.node.extend([shape, const, init, node])
                var_inits.append("^" + var.name + "/assign")

            noop_assign = tf.NodeDef(name="init_all_var",
                                     op="NoOp",
                                     input=var_inits)
            g_def.node.extend([noop_assign])

        tf.reset_default_graph()
        tf.import_graph_def(g_def)

        session_conf = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            graph_options=tf.GraphOptions(infer_shapes=True),
            inter_op_parallelism_threads=parallel)

        if xla:
            session_conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        with tf.Session(config=session_conf) as sess:
            init = tf.get_default_graph().get_operation_by_name(
                "import/init_all_var")

            input_nodes = []
            varlist = []
            feed_dict = {}
            aps = []

            ops = tf.get_default_graph().get_operations()
            for op in ops:
                if op.type == 'Placeholder':
                    for node in op.outputs:
                        feed_dict[node] = np.ones(
                            node.shape, dtype=node.dtype.as_numpy_dtype())

            # Get result of applygradient
            for op in ops:
                if "ApplyGradient" in str(op.type):
                    aps.append(op)
                    varlist.append(op.inputs[0])

            last_outputs = []
            num_nodes = len(ops)
            name2nodeIdx_map = {}
            for i in range(num_nodes):
                name2nodeIdx_map[ops[i].name] = i
            node_outputs_ = [[] for i in range(num_nodes)]
            for n in range(num_nodes):
                op = ops[n]
                pending_count = len(op.inputs)
                for i in range(pending_count):
                    input_name_id = op.inputs[i].name.split(':')
                    node_outputs_[name2nodeIdx_map[input_name_id[0]]].append(n)
            for n in range(num_nodes):
                if len(node_outputs_[n]) == 0 and ops[n].type != 'NoOp':
                    print('- {0:20s} {1}'.format(ops[n].type, ops[n].name))
                    for m in range(len(ops[n].inputs)):
                        print('<-in-- {0:20s}'.format(ops[n].inputs[m].name))
                        last_outputs.append(ops[n].inputs[m])

            # Init as Ones
            sess.run(init)
            # Get vals before apply_gradients
            for i in range(warmup):
                ret = sess.run(last_outputs + varlist, feed_dict)
                for i in range(0, len(last_outputs)):
                    out_flat = ret[i].flat
                    if (len(out_flat) > 0):
                        max_len = min(10, len(out_flat))
                        print(last_outputs[i].name)
                        print(out_flat[:max_len], "...(size=", len(out_flat),
                              "end with", out_flat[-1], ")")
                # Do the apply_gradient
                sess.run(init)
                ret1 = sess.run(varlist + aps, feed_dict)
                print("Updated:")
                for i in range(0, len(varlist)):
                    print(varlist[i].name, ret1[i])

            iter_times = []
            for i in range(num_iter):
                start_time = time.time()
                ret = sess.run(last_outputs + varlist, feed_dict)
                ret1 = sess.run(varlist + aps, feed_dict)
                iter_time = (time.time() - start_time) * 1000
                iter_times.append(iter_time)
                print("Iteration time %f ms" % (iter_time))

            print("Summary: [min, max, mean] = [%f, %f, %f] ms" %
                  (min(iter_times), max(iter_times),
                   sum(iter_times) / len(iter_times)))