def set_attr_dtype(self, node, key, value): node.attr[key].CopyFrom(tf.AttrValue(type=value.as_datatype_enum))
def set_attr_tensor(self, node, key, value, dtype, shape=None): node.attr[key].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( value, dtype=dtype, shape=shape)))
op_inputs = [inp_tens] # Get filters for Conv but don't transpose if n_org.type == 'Conv2D': filter_tens = sess.graph.get_tensor_by_name(n_org.inputs[1].name) op_inputs.append(filter_tens) # Attributes without data_format, NWHC is default atts = { key: n_org.node_def.attr[key] for key in list(n_org.node_def.attr.keys()) if key != 'data_format' } if 'ksize' in atts: kl = atts['ksize'].list.i ksl = [kl[0], kl[2], kl[3], kl[1]] atts['ksize'] = tf.AttrValue(list=tf.AttrValue.ListValue(i=ksl)) if 'strides' in atts: st = atts['strides'].list.i stl = [st[0], st[2], st[3], st[1]] atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue(i=stl)) # Create new Operation op = sess.graph.create_op(op_type=n_org.type, inputs=op_inputs, name=n_org.name + '_new', attrs=atts) out_tens = sess.graph.get_tensor_by_name(n_org.name + '_new' + ':0') out_trans = tf.transpose(out_tens, [0, 3, 1, 2], name=n_org.name + '_transp_out') assert out_trans.shape == sess.graph.get_tensor_by_name(n_org.name + ':0').shape
def _apply_colocation_attr_map(colocation_attr_map, absolute_import_scope): """Rewrites colocation constraints in the current default graph. Nodes in `absolute_import_scope` get their "_class" attr lists rewritten according to `colocation_attr_map`: each entry that matches a key gets replaced by the associated values (with deduplication). The node's device is updated accordingly. Args: colocation_attr_map: as returned by _build_colocation_attr_map. absolute_import_scope: as for fix_colocation_after_import. Raises: ValueError: if rewriting runs into an inconsistent value in `colocation_attr_map`. """ graph = tf.get_default_graph() for op in graph.get_operations(): # Rewrite the values of the "_class" attr that store colocation constraints. # NOTE: The colocation_group loc:@X of a node with itself is not stored # explicitly as an attr, so rewrite errors for loc:@X are not triggered # by the mere existence of X. if not op.name.startswith(absolute_import_scope + "/"): continue try: class_values = op.get_attr("_class") except ValueError: continue # No _class attr found; nothing to do. new_attr_value = tf.AttrValue() new_coloc_groups = [] for class_value in class_values: if class_value.startswith(tf.compat.as_bytes("loc:@")): if class_value not in colocation_attr_map: rewritten_class_value = [class_value] else: rewritten_class_value = ( colocation_attr_map[class_value]. GetConsistentValueOrRaise( "Failed to rewrite colocation constraints while applying " "hub.Module:\n" "The module graph contains a node {op!r} " "that has a colocation constraint {class_value!r} " "with ambiguous rewriting {old_value!r} vs {new_value!r} " "because {old_reason} and {new_reason}, respectively.\n" "To fix, avoid publishing a module with inputs comprising " "multiple outputs of one op that is referenced in " "tf.colocate_with(...) constraints on other ops.", { "op": op.name, "class_value": class_value })) new_coloc_groups.extend(rewritten_class_value) else: new_attr_value.list.s.append(class_value) new_coloc_groups = sorted(set(new_coloc_groups)) new_attr_value.list.s.extend(new_coloc_groups) op._set_attr("_class", new_attr_value) # pylint: disable=protected-access # Mimic the code of tf.import_graph_def(): If there are colocation # constraints, use any of them to set the device (overriding what the # device function stack would do), without attempting to merge or check for # equality. If they were inconsistent, TensorFlow's C++ runtime would fail # anyways due to conflicting colocation constraints. # Note that Hub imports GraphDefs with devices cleared, so this code deals # with the result of import_graph_def, not a setting saved in the module. if new_coloc_groups: new_coloc_device = "" for new_coloc_group in new_coloc_groups: assert new_coloc_group.startswith(tf.compat.as_bytes("loc:@")) new_coloc_target_op = graph.get_operation_by_name( tf.compat.as_str_any(new_coloc_group[5:])) new_coloc_device = new_coloc_target_op.device if new_coloc_device: break # Set this, even if empty, to avoid retaining an outdated value. op._set_device(new_coloc_device) # pylint: disable=protected-access
def set_attr_float(node, key, value): try: node.attr[key].CopyFrom(tf.AttrValue(f=value)) except KeyError: pass
def set_attr_dtype(node, key, value): try: node.attr[key].CopyFrom(tf.AttrValue(type=value.as_datatype_enum)) except KeyError: pass
def set_attr_string(node, key, value): try: node.attr[key].CopyFrom(tf.AttrValue(s=value)) except KeyError: pass
def set_attr_int_list(node, key, value): list_value = tf.AttrValue.ListValue(i=value) try: node.attr[key].CopyFrom(tf.AttrValue(list=list_value)) except KeyError: pass
import tensorflow as tf input_path = sys.argv[1] output_path = sys.argv[2] with open(input_path, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name="") graph = tf.get_default_graph() output_graph_def = tf.GraphDef() for node in graph_def.node: replace_node = tf.NodeDef() replace_node.CopyFrom(node) if node.name == "value": continue if node.name == "zero/value_head/Reshape_1": replace_node.name = "value" if node.name == "inputs": replace_node.attr["dtype"].CopyFrom( tf.AttrValue(type=tf.float32.as_datatype_enum)) for i, inp in enumerate(node.input): if inp == "Cast": replace_node.input[i] = "inputs" output_graph_def.node.extend([replace_node]) with open(output_path, "wb") as f: f.write(output_graph_def.SerializeToString())
def ssd_mobilenet_v1_unsupported_nodes_to_plugin_nodes(ssd_graph, input_shape): """Makes ssd_graph TensorRT comparible using graphsurgeon. This function takes ssd_graph, which contains graphsurgeon DynamicGraph data structure. This structure describes frozen Tensorflow graph, that can be modified using graphsurgeon (by deleting, adding, replacing certain nodes). The graph is modified by removing Tensorflow operations that are not supported by TensorRT's UffParser and replacing them with custom layer plugin nodes. Note: This specific implementation works only for ssd_mobilenet_v2_coco_2018_03_29 network. Args: ssd_graph (gs.DynamicGraph): graph to convert input_shape: input shape in CHW format Returns: gs.DynamicGraph: UffParser compatible SSD graph """ # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph # channels = ModelData.get_input_channels() # height = ModelData.get_input_height() # width = ModelData.get_input_width() channels, height, width = input_shape Input = gs.create_plugin_node(name="Input", op="Placeholder", dtype=tf.float32, shape=[1, channels, height, width]) PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT", minSize=0.2, maxSize=0.95, aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], variance=[0.1, 0.1, 0.2, 0.2], featureMapShapes=[19, 10, 5, 3, 2, 1], numLayers=6 ) NMS = gs.create_plugin_node( name="NMS", op="NMS_TRT", shareLocation=1, varianceEncodedInTarget=0, backgroundLabelId=0, confidenceThreshold=1e-8, nmsThreshold=0.6, topK=100, keepTopK=100, numClasses=91, inputOrder=[0, 2, 1], confSigmoid=1, isNormalized=1 ) concat_priorbox = gs.create_node( "concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2 ) concat_box_loc = gs.create_plugin_node( "concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0 ) concat_box_conf = gs.create_plugin_node( "concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0 ) const = tf.NodeDef(name="Const", op="Const", attr={"dtype": tf.AttrValue(type=1), "value": tf.AttrValue(tensor=tf.make_tensor_proto([1, 1], dtype=tf.float32))}) death_list = ["strided_slice_7"] ssd_graph.remove(ssd_graph.find_nodes_by_path(death_list)) ssd_graph.find_nodes_by_path("Shape_6")[0].input.remove("Preprocessor/sub") # Create a mapping of namespace names -> plugin nodes. namespace_plugin_map = { "MultipleGridAnchorGenerator": concat_priorbox, "Postprocessor": NMS, "Preprocessor/map": Input, "ToFloat": Input, # "image_tensor": Input, "strided_slice_6": PriorBox, "Shape_6": const, "Concatenate": concat_priorbox, "concat": concat_box_loc, "concat_1": concat_box_conf } for node in ssd_graph.graph_inputs: namespace_plugin_map[node.name] = Input # Create a new graph by collapsing namespaces ssd_graph.collapse_namespaces(namespace_plugin_map) # Remove the outputs, so we just have a single output node (NMS). # If remove_exclusive_dependencies is True, the whole graph will be removed! ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False) # Disconnect the Input node from NMS, as it expects to have only 3 inputs. to_remove_nodes = ["Input", "Preprocessor/stack_1"] for node in to_remove_nodes: if node in ssd_graph.find_nodes_by_op("NMS_TRT")[0].input: ssd_graph.find_nodes_by_op("NMS_TRT")[0].input.remove(node) if node == "Preprocessor/stack_1": ssd_graph.remove(node) ssd_graph.find_nodes_by_path("Input")[0].input.remove("image_tensor:0") return ssd_graph
def fuse_resize_and_conv(input_graph_def, output_node_names): """Merges preceding resize and mirror pad ops into a specialized convolution. There's a common pattern of enlarging the input to a convolution using a resize operation, and also using MirrorPad to extend the boundaries to that zero edge pixels don't bleed inwards when convolving. This routine looks for that pattern of operations, and fuses them together into a Conv2DWithResizeOp. Args: input_graph_def: A GraphDef containing a model. Returns: Modified graph with resize and pad ops merged. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in input_graph_def.node: if node.name not in input_node_map.keys(): input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) node_reference_count = collections.defaultdict(int) for node in input_graph_def.node: for input_name in node.input: stripped_name = node_name_from_input(input_name) node_reference_count[stripped_name] += 1 for output_name in output_node_names: node_reference_count[output_name] += 1 new_ops = [] for node in input_graph_def.node: if node.op != "Conv2D": continue conv_op = node input_op = node_from_map(input_node_map, conv_op.input[0]) if input_op.op == "MirrorPad": mirror_pad_op = input_op resize_op = node_from_map(input_node_map, mirror_pad_op.input[0]) if resize_op.op != "ResizeBilinear": resize_op = None else: mirror_pad_op = None if input_op.op == "ResizeBilinear": resize_op = input_op else: resize_op = None # There are no ops to be fused into the conv, so skip replacing this one. if not mirror_pad_op and not resize_op: continue # We're replacing this node, so make sure the old one is removed. node_reference_count[conv_op.name] = 0 if mirror_pad_op: node_reference_count[mirror_pad_op.name] -= 1 if resize_op: node_reference_count[resize_op.name] -= 1 fused_conv_op = tf.NodeDef() if resize_op: fused_conv_op.op = "FusedResizeAndPadConv2D" else: fused_conv_op.op = "FusedPadConv2D" fused_conv_op.name = conv_op.name if mirror_pad_op: mirror_paddings_name = mirror_pad_op.input[1] mirror_paddings_mode = mirror_pad_op.attr["mode"] else: # If there was no MirrorPad op, then create settings that make the padding # stage of the fused operation a no-op. paddings_op = tf.NodeDef() paddings_op.op = "Const" paddings_op.name = conv_op.name + "_dummy_paddings" paddings_op.attr["dtype"].CopyFrom( tf.AttrValue(type=tf.int32.as_datatype_enum)) paddings_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( [0, 0, 0, 0, 0, 0, 0, 0], tf.int32, [4, 2]))) new_ops.extend([paddings_op]) mirror_paddings_name = paddings_op.name mirror_paddings_mode = tf.AttrValue(s=b"REFLECT") if resize_op: fused_conv_op.input.extend([ resize_op.input[0], resize_op.input[1], mirror_paddings_name, conv_op.input[1] ]) fused_conv_op.attr["resize_align_corners"].CopyFrom( resize_op.attr["align_corners"]) else: fused_conv_op.input.extend([ mirror_pad_op.input[0], mirror_paddings_name, conv_op.input[1] ]) fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"]) fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode) fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"]) fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"]) new_ops.extend([fused_conv_op]) result_graph_def = tf.GraphDef() for node in input_graph_def.node: if node_reference_count[node.name] < 1: continue new_node = tf.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def fold_batch_norms(input_graph_def): """Removes batch normalization ops by folding them into convolutions. Batch normalization during training has multiple dynamic parameters that are updated, but once the graph is finalized these become constants. That means there's an opportunity to reduce the computations down to a scale and addition, rather than the more expensive multiple ops, and even bake the scaling into the convolution weights. This function identifies the typical pattern of batch normalization subgraphs, and performs the transformation to fold the computations down into a simpler form. It currently only spots batch normalization that's performed by the BatchNormWithGlobalNormalization op, and will need to be extended in the future to handle the newer style. Args: input_graph_def: A GraphDef containing a model. Returns: Modified graph with BN ops removed, and modified weights. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in input_graph_def.node: if node.name not in input_node_map.keys(): input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) nodes_to_skip = {} new_ops = [] for node in input_graph_def.node: if node.op != "BatchNormWithGlobalNormalization": continue conv_op = node_from_map(input_node_map, node.input[0]) if conv_op.op != "Conv2D": tf.logging.warning("Didn't find expected Conv2D input to '%s'" % node.name) continue weights_op = node_from_map(input_node_map, conv_op.input[1]) if weights_op.op != "Const": tf.logging.warning( "Didn't find expected conv Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (conv_op.name, weights_op)) continue weights = values_from_const(weights_op) channel_count = weights.shape[3] mean_op = node_from_map(input_node_map, node.input[1]) if mean_op.op != "Const": tf.logging.warning( "Didn't find expected mean Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, mean_op)) continue mean_value = values_from_const(mean_op) if mean_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for mean, found %s, expected %s," " for node %s" % (str(mean_value.shape), str( (channel_count, )), node.name)) continue var_op = node_from_map(input_node_map, node.input[2]) if var_op.op != "Const": tf.logging.warning( "Didn't find expected var Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, var_op)) continue var_value = values_from_const(var_op) if var_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for var, found %s, expected %s," " for node %s" % (str(var_value.shape), str( (channel_count, )), node.name)) continue beta_op = node_from_map(input_node_map, node.input[3]) if beta_op.op != "Const": tf.logging.warning( "Didn't find expected beta Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, beta_op)) continue beta_value = values_from_const(beta_op) if beta_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for beta, found %s, expected %s," " for node %s" % (str(beta_value.shape), str( (channel_count, )), node.name)) continue gamma_op = node_from_map(input_node_map, node.input[4]) if gamma_op.op != "Const": tf.logging.warning( "Didn't find expected gamma Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, gamma_op)) continue gamma_value = values_from_const(gamma_op) if gamma_value.shape != (channel_count, ): tf.logging.warning( "Incorrect shape for gamma, found %s, expected %s," " for node %s" % (str(gamma_value.shape), str( (channel_count, )), node.name)) continue variance_epsilon_value = node.attr["variance_epsilon"].f scale_after_normalization = node.attr["scale_after_normalization"].b nodes_to_skip[node.name] = True nodes_to_skip[weights_op.name] = True nodes_to_skip[mean_op.name] = True nodes_to_skip[var_op.name] = True nodes_to_skip[beta_op.name] = True nodes_to_skip[gamma_op.name] = True nodes_to_skip[conv_op.name] = True if scale_after_normalization: scale_value = ((1.0 / np.vectorize(math.sqrt) (var_value + variance_epsilon_value)) * gamma_value) else: scale_value = ( 1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) offset_value = (-mean_value * scale_value) + beta_value scaled_weights = np.copy(weights) it = np.nditer(scaled_weights, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: current_scale = scale_value[it.multi_index[3]] it[0] *= current_scale it.iternext() scaled_weights_op = tf.NodeDef() scaled_weights_op.op = "Const" scaled_weights_op.name = weights_op.name scaled_weights_op.attr["dtype"].CopyFrom(weights_op.attr["dtype"]) scaled_weights_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( scaled_weights, weights.dtype.type, weights.shape))) new_conv_op = tf.NodeDef() new_conv_op.CopyFrom(conv_op) offset_op = tf.NodeDef() offset_op.op = "Const" offset_op.name = conv_op.name + "_bn_offset" offset_op.attr["dtype"].CopyFrom(mean_op.attr["dtype"]) offset_op.attr["value"].CopyFrom( tf.AttrValue(tensor=tensor_util.make_tensor_proto( offset_value, mean_value.dtype.type, offset_value.shape))) bias_add_op = tf.NodeDef() bias_add_op.op = "BiasAdd" bias_add_op.name = node.name bias_add_op.attr["T"].CopyFrom(conv_op.attr["T"]) bias_add_op.input.extend([new_conv_op.name, offset_op.name]) new_ops.extend( [scaled_weights_op, new_conv_op, offset_op, bias_add_op]) result_graph_def = tf.GraphDef() for node in input_graph_def.node: if node.name in nodes_to_skip: continue new_node = tf.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def PruneGraph(graph_def): #Prune graph node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block/multi_head_attention/MatMul_1") node.input[0] = "Tower_0/gpu/transform2_block/multi_head_attention/Softmax" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_1/multi_head_attention/MatMul_1") node.input[ 0] = "Tower_0/gpu/transform2_block_1/multi_head_attention/Softmax" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_2/multi_head_attention/MatMul_1") node.input[ 0] = "Tower_0/gpu/transform2_block_2/multi_head_attention/Softmax" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_3/multi_head_attention/MatMul_1") node.input[ 0] = "Tower_0/gpu/transform2_block_3/multi_head_attention/Softmax" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_4/multi_head_attention/MatMul_1") node.input[ 0] = "Tower_0/gpu/transform2_block_4/multi_head_attention/Softmax" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_5/multi_head_attention/MatMul_1") node.input[ 0] = "Tower_0/gpu/transform2_block_5/multi_head_attention/Softmax" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block/add") node.input[ 1] = "Tower_0/gpu/transform2_block/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_1/add") node.input[ 1] = "Tower_0/gpu/transform2_block_1/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_2/add") node.input[ 1] = "Tower_0/gpu/transform2_block_2/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_3/add") node.input[ 1] = "Tower_0/gpu/transform2_block_3/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_4/add") node.input[ 1] = "Tower_0/gpu/transform2_block_4/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_5/add") node.input[ 1] = "Tower_0/gpu/transform2_block_5/multi_head_attention/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_1/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block_1/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_2/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block_2/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_3/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block_3/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_4/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block_4/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "Tower_0/gpu/transform2_block_5/add_1") node.input[ 1] = "Tower_0/gpu/transform2_block_5/ffn_mlp/conv_1d_1/conv_1d_operation/BiasAdd" node = getNodeByName(graph_def, "SparseToDense") node.input[2] = "GatherNd" tf.constant( [1], name= "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices" tf.constant( [1], name= "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices" ) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance") node.input[ 1] = "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices" node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_1/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_1/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_2/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_2/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_3/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_3/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_4/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_4/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_5/ln_layer/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) node = getNodeByName( graph_def, "Tower_0/gpu/transform2_block_5/ln_layer_1/moments/variance/reduction_indices" ) node.attr["value"].CopyFrom( tf.AttrValue(tensor=tf.make_tensor_proto([1], tf.int32, [2]))) return graph_def
def convert_frozen_model_to_NWHC(path): graph_def_file = path tf.reset_default_graph() graph_def = tf.GraphDef() with tf.Session() as sess: # Read binary pb graph from file with tf.gfile.Open(graph_def_file, "rb") as f: data2read = f.read() graph_def.ParseFromString(data2read) tf.graph_util.import_graph_def(graph_def, name='') # Get Nodes conv_nodes = [ n for n in sess.graph.get_operations() if n.type in ['Conv2D', 'MaxPool', 'AvgPool'] ] for n_org in conv_nodes: # Transpose input assert len(n_org.inputs) == 1 or len(n_org.inputs) == 2 org_inp_tens = sess.graph.get_tensor_by_name(n_org.inputs[0].name) inp_tens = tf.transpose(org_inp_tens, [0, 2, 3, 1], name=n_org.name + '_transp_input') op_inputs = [inp_tens] # Get filters for Conv but don't transpose if n_org.type == 'Conv2D': filter_tens = sess.graph.get_tensor_by_name( n_org.inputs[1].name) op_inputs.append(filter_tens) # Attributes without data_format, NWHC is default atts = { key: n_org.node_def.attr[key] for key in list(n_org.node_def.attr.keys()) if key != 'data_format' } if n_org.type in ['MaxPool', 'AvgPool']: kl = atts['ksize'].list.i ksl = [kl[0], kl[2], kl[3], kl[1]] st = atts['strides'].list.i stl = [st[0], st[2], st[3], st[1]] atts['ksize'] = tf.AttrValue(list=tf.AttrValue.ListValue( i=ksl)) atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue( i=stl)) if n_org.type == 'Conv2D': st = atts['strides'].list.i stl = [st[0], st[2], st[3], st[1]] atts['strides'] = tf.AttrValue(list=tf.AttrValue.ListValue( i=stl)) # Create new Operation # print(n_org.type, n_org.name, list(n_org.inputs), n_org.node_def.attr['data_format']) op = sess.graph.create_op(op_type=n_org.type, inputs=op_inputs, name=n_org.name + '_new', attrs=atts) out_tens = sess.graph.get_tensor_by_name(n_org.name + '_new' + ':0') out_trans = tf.transpose(out_tens, [0, 3, 1, 2], name=n_org.name + '_transp_out') # assert out_trans.shape == sess.graph.get_tensor_by_name(n_org.name + ':0').shape print(out_trans.shape, sess.graph.get_tensor_by_name(n_org.name + ':0').shape) # Update Connections out_nodes = [ n for n in sess.graph.get_operations() if n_org.outputs[0] in n.inputs ] for out in out_nodes: for j, nam in enumerate(out.inputs): if n_org.outputs[0] == nam: out._update_input(j, out_trans) # Delete old nodes graph_def = sess.graph.as_graph_def() for on in conv_nodes: graph_def.node.remove(on.node_def) # Write graph tf.io.write_graph(graph_def, "", graph_def_file.rsplit('.', 1)[0] + '_toco.pb', as_text=False)
def tf_run_frozen_graph(self, file, xla, parallel, warmup, num_iter): print("run frozen graph----------------------------") graph_def, graph = self.import_graph(file) if (self.debug): print() print('Operations:') assert graph is not None ops = graph.get_operations() # type: Iterable[tf.Operation] input_nodes = [] variables_nodes = [] last_nodes = [] for op in ops: if (self.debug): print('- {0:20s} "{1}" ({2} outputs)'.format( op.type, op.name, len(op.outputs))) last_nodes = op.outputs if op.type == 'Placeholder': for node in op.outputs: input_nodes.append(node) if "Variable" in op.type: variables_nodes.append(op) if (self.debug): print() print('Sources (operations without inputs):') for op in ops: if len(op.inputs) > 0: continue print('- {0}'.format(op.name)) print() print('Operation inputs:') for op in ops: if len(op.inputs) == 0: continue print('- {0:20}'.format(op.name)) print(' {0}'.format(', '.join(i.name for i in op.inputs))) print() print('Tensors:') for op in ops: for out in op.outputs: print('- {0:20} {1:10} "{2}"'.format( str(out.shape), out.dtype.name, out.name)) with tf.Session(graph=graph) as sess: var_inits = [] g_def = graph.as_graph_def() for var in variables_nodes: vt = graph.get_tensor_by_name(var.outputs[0].name) # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer) # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer) # Ones initializer dt = tf.as_dtype(vt.dtype.base_dtype).as_datatype_enum dt_int32 = tf.as_dtype(tf.int32).as_datatype_enum init = tf.NodeDef( name=var.name + "/ones", op="Fill", input=[var.name + "/ones/shape", var.name + "/ones/const"], attr={ 'T': tf.AttrValue(type=dt), 'index_type': tf.AttrValue(type=dt_int32) }) shape = tf.NodeDef( name=var.name + "/ones/shape", op="Const", attr={ "dtype": tf.AttrValue(type=dt_int32), "value": tf.AttrValue(tensor=tf.make_tensor_proto( vt.get_shape().as_list())) }) const = tf.NodeDef( name=var.name + "/ones/const", op="Const", #dtype =tf.AttrValue(type=dt), attr={ "dtype": tf.AttrValue(type=dt), "value": tf.AttrValue(tensor=tf.make_tensor_proto(1.0, dt)) }) node = tf.NodeDef(name=var.name + "/assign", op='Assign', input=[var.name, var.name + "/ones"], attr={ 'use_locking': tf.AttrValue(b=False), 'validate_shape': tf.AttrValue(b=True), 'T': tf.AttrValue(type=dt) }) g_def.node.extend([shape, const, init, node]) var_inits.append("^" + var.name + "/assign") noop_assign = tf.NodeDef(name="init_all_var", op="NoOp", input=var_inits) g_def.node.extend([noop_assign]) tf.reset_default_graph() tf.import_graph_def(g_def) session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, graph_options=tf.GraphOptions(infer_shapes=True), inter_op_parallelism_threads=parallel) if xla: session_conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 with tf.Session(config=session_conf) as sess: init = tf.get_default_graph().get_operation_by_name( "import/init_all_var") input_nodes = [] varlist = [] feed_dict = {} aps = [] ops = tf.get_default_graph().get_operations() for op in ops: if op.type == 'Placeholder': for node in op.outputs: feed_dict[node] = np.ones( node.shape, dtype=node.dtype.as_numpy_dtype()) # Get result of applygradient for op in ops: if "ApplyGradient" in str(op.type): aps.append(op) varlist.append(op.inputs[0]) last_outputs = [] num_nodes = len(ops) name2nodeIdx_map = {} for i in range(num_nodes): name2nodeIdx_map[ops[i].name] = i node_outputs_ = [[] for i in range(num_nodes)] for n in range(num_nodes): op = ops[n] pending_count = len(op.inputs) for i in range(pending_count): input_name_id = op.inputs[i].name.split(':') node_outputs_[name2nodeIdx_map[input_name_id[0]]].append(n) for n in range(num_nodes): if len(node_outputs_[n]) == 0 and ops[n].type != 'NoOp': print('- {0:20s} {1}'.format(ops[n].type, ops[n].name)) for m in range(len(ops[n].inputs)): print('<-in-- {0:20s}'.format(ops[n].inputs[m].name)) last_outputs.append(ops[n].inputs[m]) # Init as Ones sess.run(init) # Get vals before apply_gradients for i in range(warmup): ret = sess.run(last_outputs + varlist, feed_dict) for i in range(0, len(last_outputs)): out_flat = ret[i].flat if (len(out_flat) > 0): max_len = min(10, len(out_flat)) print(last_outputs[i].name) print(out_flat[:max_len], "...(size=", len(out_flat), "end with", out_flat[-1], ")") # Do the apply_gradient sess.run(init) ret1 = sess.run(varlist + aps, feed_dict) print("Updated:") for i in range(0, len(varlist)): print(varlist[i].name, ret1[i]) iter_times = [] for i in range(num_iter): start_time = time.time() ret = sess.run(last_outputs + varlist, feed_dict) ret1 = sess.run(varlist + aps, feed_dict) iter_time = (time.time() - start_time) * 1000 iter_times.append(iter_time) print("Iteration time %f ms" % (iter_time)) print("Summary: [min, max, mean] = [%f, %f, %f] ms" % (min(iter_times), max(iter_times), sum(iter_times) / len(iter_times)))