def testQuantizedTypes(self): # Test with array. data = [(21,), (22,), (23,)] t = tensor_util.make_tensor_proto(data, dtype=tf.qint32) self.assertProtoEquals(""" dtype: DT_QINT32 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\000\000\026\000\000\000\027\000\000\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(tf.qint32.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=tf.quint8) self.assertProtoEquals(""" dtype: DT_QUINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(tf.quint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=tf.qint8) self.assertProtoEquals(""" dtype: DT_QINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(tf.qint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a)
def main(): # Connect with the gRPC server server_address = "127.0.0.1:50051" request_timeout = 5.0 channel = grpc.insecure_channel(server_address) stub = predict_pb2.PredictionServiceStub(channel) # Make request data request = predict_pb2.PredictRequest() image = Image.open('../mnist_jpgs/4/pic_test1010.png') array = np.array(image)/(255*1.0) samples_features = array.reshape([-1,784]) # samples_features = np.array( # [[10, 10, 10, 8, 6, 1, 8, 9, 1], [10, 10, 10, 8, 6, 1, 8, 9, 1]]) samples_keys = np.array([1]) # Convert numpy to TensorProto request.inputs["features"].CopyFrom(tensor_util.make_tensor_proto( samples_features)) request.inputs["key"].CopyFrom(tensor_util.make_tensor_proto(samples_keys)) # Invoke gRPC request response = stub.Predict(request, request_timeout) # Convert TensorProto to numpy result = {} for k, v in response.outputs.items(): result[k] = tensor_util.MakeNdarray(v) print(result)
def testTensorShapeVerification(self): array = np.array([[1], [2]]) correct_shape = (2, 1) incorrect_shape = (1, 2) tensor_util.make_tensor_proto(array, shape=correct_shape, verify_shape=True) with self.assertRaises(TypeError): tensor_util.make_tensor_proto( array, shape=incorrect_shape, verify_shape=True)
def testTransformGraph(self): input_graph_def = graph_pb2.GraphDef() const_op1 = input_graph_def.node.add() const_op1.op = "Const" const_op1.name = "const_op1" const_op1.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) const_op1.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( [1, 2], dtypes.float32, [1, 2]))) const_op2 = input_graph_def.node.add() const_op2.op = "Const" const_op2.name = "const_op2" const_op2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) const_op2.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( [3, 4], dtypes.float32, [1, 2]))) # Create an add that has two constants as inputs. add_op = input_graph_def.node.add() add_op.op = "Add" add_op.attr["T"].CopyFrom(attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) add_op.name = "add_op" add_op.input.extend(["const_op1", "const_op2"]) # Create a relu that reads from the add. relu_op = input_graph_def.node.add() relu_op.op = "Relu" relu_op.attr["T"].CopyFrom(attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) relu_op.name = "relu_op" relu_op.input.extend(["add_op"]) # We're specifying that add_op is the final output, and so the relu isn't # needed. input_names = [] output_names = ["add_op"] transforms = ["strip_unused_nodes"] transformed_graph_def = TransformGraph(input_graph_def, input_names, output_names, transforms) # We expect that the relu is no longer present after running the transform. for node in transformed_graph_def.node: self.assertNotEqual("Relu", node.op)
def testFloatSizesLessValues(self): t = tensor_util.make_tensor_proto(10.0, shape=[1, 3]) self.assertProtoEquals(""" dtype: DT_FLOAT tensor_shape { dim { size: 1 } dim { size: 3 } } float_val: 10.0 """, t)
def convert_variables_to_constants(sess, input_graph_def, output_node_names): variable_names = [] variable_dict_names = [] for node in input_graph_def.node: if node.op == "Assign": variable_name = node.input[0] variable_dict_names.append(variable_name) variable_names.append(variable_name + ":0") returned_variables = sess.run(variable_names) found_variables = dict(zip(variable_dict_names, returned_variables)) print("Frozen %d variables." % len(returned_variables)) inference_graph = extract_sub_graph(input_graph_def, output_node_names) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = graph_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom(attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto(data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def testStringWithImplicitRepeat(self): t = tensor_util.make_tensor_proto(["f", "g"], shape=[3, 4]) a = tensor_util.MakeNdarray(t) self.assertAllEqual( np.array([[b"f", b"g", b"g", b"g"], [b"g", b"g", b"g", b"g"], [b"g", b"g", b"g", b"g"]], dtype=np.object), a)
def testLowRankSupported(self): t = tensor_util.make_tensor_proto(np.array(7)) self.assertProtoEquals(""" dtype: DT_INT64 tensor_shape {} int64_val: 7 """, t)
def testNoOutputs(self): with session_lib.Session() as sess: # Build a function with a single Const node, whose output is ignored. fdef = function_pb2.FunctionDef() fdef.signature.name = "KernelWithNoOutputs" node = node_def_pb2.NodeDef() node.op = "Const" node.name = "ignored" node.attr["dtype"].type = dtypes.int32.as_datatype_enum tensor = tensor_util.make_tensor_proto([0], dtype=dtypes.int32, shape=[]) node.attr["value"].tensor.CopyFrom(tensor) fdef.node_def.extend([node]) # Check that calling the result as a compiled kernel doesn't crash. @function.Defun(compiled=True) def KernelWithNoOutputs(): return constant_op.constant(100) # Hack to override the definition. By accessing .definition, we # force the _DefinedFunction initialized internally. Then, we # replace it's internal FunctionDef proto. We do this hack here # because one typically can't construct KernelWithNoOutputs # function via Defun decorator directly. _ = KernelWithNoOutputs.definition foo = KernelWithNoOutputs foo._definition = fdef call = KernelWithNoOutputs() sess.run(call, {})
def testFloatTypesWithImplicitRepeat(self): for dtype, nptype in [ (tf.float32, np.float32), (tf.float64, np.float64)]: t = tensor_util.make_tensor_proto([10.0], shape=[3, 4], dtype=dtype) a = tensor_util.MakeNdarray(t) self.assertAllClose(np.array([[10.0, 10.0, 10.0, 10.0], [10.0, 10.0, 10.0, 10.0], [10.0, 10.0, 10.0, 10.0]], dtype=nptype), a)
def set_attr_tensor(node, key, value, dtype, shape=None): try: node.attr[key].CopyFrom(tf.AttrValue( tensor=tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))) except KeyError: pass
def testShapeEquals(self): t = tensor_util.make_tensor_proto([10, 20, 30, 40], shape=[2, 2]) self.assertTrue(tensor_util.ShapeEquals(t, [2, 2])) self.assertTrue(tensor_util.ShapeEquals(t, (2, 2))) self.assertTrue(tensor_util.ShapeEquals(t, tensor_util.MakeTensorShapeProto([2, 2]))) self.assertFalse(tensor_util.ShapeEquals(t, [5, 3])) self.assertFalse(tensor_util.ShapeEquals(t, [1, 4])) self.assertFalse(tensor_util.ShapeEquals(t, [4]))
def testComplexWithImplicitRepeat(self): t = tensor_util.make_tensor_proto((1+1j), shape=[3, 4], dtype=tf.complex64) a = tensor_util.MakeNdarray(t) self.assertAllClose(np.array([[(1+1j), (1+1j), (1+1j), (1+1j)], [(1+1j), (1+1j), (1+1j), (1+1j)], [(1+1j), (1+1j), (1+1j), (1+1j)]], dtype=np.complex64), a)
def testLongNpArray(self): t = tensor_util.make_tensor_proto(np.array([10, 20, 30])) self.assertProtoEquals(""" dtype: DT_INT64 tensor_shape { dim { size: 3 } } tensor_content: "\\n\000\000\000\000\000\000\000\024\000\000\000\000\000\000\000\036\000\000\000\000\000\000\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.int64, a.dtype)
def testFloatSizes(self): t = tensor_util.make_tensor_proto([10.0, 20.0, 30.0], shape=[1, 3]) self.assertProtoEquals(""" dtype: DT_FLOAT tensor_shape { dim { size: 1 } dim { size: 3 } } tensor_content: "\000\000 A\000\000\240A\000\000\360A" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.float32, a.dtype) self.assertAllClose(np.array([[10.0, 20.0, 30.0]], dtype=np.float32), a)
def testString(self): t = tensor_util.make_tensor_proto("foo") self.assertProtoEquals(""" dtype: DT_STRING tensor_shape {} string_val: "foo" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.object, a.dtype) self.assertEquals([b"foo"], a)
def testUnsupportedDTypes(self): with self.assertRaises(TypeError): tensor_util.make_tensor_proto(np.array([1]), 0) with self.assertRaises(TypeError): tensor_util.make_tensor_proto(3, dtype=dtypes.qint8) with self.assertRaises(TypeError): tensor_util.make_tensor_proto([3], dtype=dtypes.qint8) # Validate the helpful error message when trying to convert an # unconvertible list as strings. with self.assertRaisesRegexp(TypeError, "Failed to convert object"): tensor_util.make_tensor_proto([tensor_shape.Dimension(1)])
def testLong(self): t = tensor_util.make_tensor_proto(10, dtype=dtypes.int64) self.assertProtoEquals(""" dtype: DT_INT64 tensor_shape {} int64_val: 10 """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.int64, a.dtype) self.assertAllClose(np.array(10, dtype=np.int64), a)
def testComplexWithImplicitRepeat(self): for dtype, np_dtype in [(tf.complex64, np.complex64), (tf.complex128, np.complex128)]: t = tensor_util.make_tensor_proto((1+1j), shape=[3, 4], dtype=dtype) a = tensor_util.MakeNdarray(t) self.assertAllClose(np.array([[(1+1j), (1+1j), (1+1j), (1+1j)], [(1+1j), (1+1j), (1+1j), (1+1j)], [(1+1j), (1+1j), (1+1j), (1+1j)]], dtype=np_dtype), a)
def testIntMixedWithDimension(self): # Github issue: 11974 dtype = dtypes.int32 nptype = np.int32 t = tensor_util.make_tensor_proto( [10, tensor_shape.Dimension(20), 30], dtype=dtype) self.assertEquals(dtype, t.dtype) a = tensor_util.MakeNdarray(t) self.assertEquals(nptype, a.dtype) self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a)
def testInt(self): t = tensor_util.make_tensor_proto(10) self.assertProtoEquals(""" dtype: DT_INT32 tensor_shape {} int_val: 10 """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.int32, a.dtype) self.assertAllClose(np.array(10, dtype=np.int32), a)
def testTypeMismatch(self): with self.test_session(): expected = np.random.rand(3, 4, 5).astype(np.uint8) tensor_proto = tensor_util.make_tensor_proto(expected) serialized = tf.placeholder(tf.string) tensor = tf.parse_tensor(serialized, tf.uint16) with self.assertRaisesOpError(r"Type mismatch between parsed tensor \(uint8\) and dtype " r"\(uint16\)"): tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
def testIntNDefaultType(self): t = tensor_util.make_tensor_proto([10, 20, 30, 40], shape=[2, 2]) self.assertProtoEquals(""" dtype: DT_INT32 tensor_shape { dim { size: 2 } dim { size: 2 } } tensor_content: "\\n\000\000\000\024\000\000\000\036\000\000\000(\000\000\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.int32, a.dtype) self.assertAllClose(np.array([[10, 20], [30, 40]], dtype=np.int32), a)
def testFloat(self): t = tensor_util.make_tensor_proto(10.0) self.assertProtoEquals(""" dtype: DT_FLOAT tensor_shape {} float_val: 10.0 """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.float32, a.dtype) self.assertAllClose(np.array(10.0, dtype=np.float32), a)
def convert_variables_to_constants(sess, input_graph_def, output_node_names): """Replaces all the variables in a graph with constants of the same values. If you have a trained graph containing Variable ops, it can be convenient to convert them all to Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. Args: sess: Active TensorFlow session containing the variables. input_graph_def: GraphDef object holding the network. output_node_names: List of name strings for the result nodes of the graph. Returns: GraphDef containing a simplified version of the original. """ found_variables = {} variable_names = [] variable_dict_names = [] for node in input_graph_def.node: if node.op == "Assign": variable_name = node.input[0] variable_dict_names.append(variable_name) variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) logging.info("Frozen %d variables." % len(returned_variables)) # This graph only includes the nodes needed to evaluate the output nodes, and # removes unneeded nodes like those involved in saving and assignment. inference_graph = extract_sub_graph(input_graph_def, output_node_names) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = graph_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom(attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto(data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def testIntTypesWithImplicitRepeat(self): for dtype, nptype in [(dtypes.int64, np.int64), (dtypes.int32, np.int32), (dtypes.uint8, np.uint8), (dtypes.uint16, np.uint16), (dtypes.int16, np.int16), (dtypes.int8, np.int8)]: self.assertAllEqual( np.array([[10, 11, 12, 12], [12, 12, 12, 12], [12, 12, 12, 12]], dtype=nptype), tensor_util.MakeNdarray( tensor_util.make_tensor_proto([10, 11, 12], shape=[3, 4], dtype=dtype)))
def testComplex128(self): t = tensor_util.make_tensor_proto((1 + 2j), dtype=dtypes.complex128) self.assertProtoEquals(""" dtype: DT_COMPLEX128 tensor_shape {} dcomplex_val: 1 dcomplex_val: 2 """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.complex128, a.dtype) self.assertAllEqual(np.array(1 + 2j), a)
def testLargeInt(self): value = np.iinfo(np.int64).max t = tensor_util.make_tensor_proto(value) self.assertProtoEquals(""" dtype: DT_INT64 tensor_shape {} int64_val: %d """ % value, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.int64, a.dtype) self.assertAllClose(np.array(value, dtype=np.int64), a)
def testFloat(self): value = 10.0 t = tensor_util.make_tensor_proto(value) self.assertProtoEquals(""" dtype: DT_FLOAT tensor_shape {} float_val: %.1f """ % value, t) a = tensor_util.MakeNdarray(t) self.assertEquals(np.float32, a.dtype) self.assertAllClose(np.array(value, dtype=np.float32), a)
def testIntTypes(self): for dtype, nptype in [(dtypes.int32, np.int32), (dtypes.uint8, np.uint8), (dtypes.uint16, np.uint16), (dtypes.int16, np.int16), (dtypes.int8, np.int8)]: # Test with array. t = tensor_util.make_tensor_proto([10, 20, 30], dtype=dtype) self.assertEquals(dtype, t.dtype) self.assertProtoEquals("dim { size: 3 }", t.tensor_shape) a = tensor_util.MakeNdarray(t) self.assertEquals(nptype, a.dtype) self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a) # Test with ndarray. t = tensor_util.make_tensor_proto(np.array([10, 20, 30], dtype=nptype)) self.assertEquals(dtype, t.dtype) self.assertProtoEquals("dim { size: 3 }", t.tensor_shape) a = tensor_util.MakeNdarray(t) self.assertEquals(nptype, a.dtype) self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a)
def testQuantizedTypes(self): # Test with array. data = [(21,), (22,), (23,)] t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint32) if sys.byteorder == "big": self.assertProtoEquals(""" dtype: DT_QINT32 tensor_shape { dim { size: 3 } } tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027" """, t) else: self.assertProtoEquals(""" dtype: DT_QINT32 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\000\000\026\000\000\000\027\000\000\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint32.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint8) self.assertProtoEquals(""" dtype: DT_QUINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.quint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint8) self.assertProtoEquals(""" dtype: DT_QINT8 tensor_shape { dim { size: 3 } } tensor_content: "\025\026\027" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint8.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint16) if sys.byteorder == "big": self.assertProtoEquals(""" dtype: DT_QUINT16 tensor_shape { dim { size: 3 } } tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals(""" dtype: DT_QUINT16 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\026\000\027\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.quint16.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a) t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint16) if sys.byteorder == "big": self.assertProtoEquals(""" dtype: DT_QINT16 tensor_shape { dim { size: 3 } } tensor_content: "\000\025\000\026\000\027" """, t) else: self.assertProtoEquals(""" dtype: DT_QINT16 tensor_shape { dim { size: 3 } } tensor_content: "\025\000\026\000\027\000" """, t) a = tensor_util.MakeNdarray(t) self.assertEquals(dtypes.qint16.as_numpy_dtype, a.dtype) self.assertAllEqual(np.array(data, dtype=a.dtype), a)
def numpy_array_to_observation(array): obs = env_service_pb2.Observation() obs.observation.CopyFrom(tensor_util.make_tensor_proto(array)) return obs
def testNestedNumpyArrayWithDType(self): t = tensor_util.make_tensor_proto([10.0, 20.0, np.array(30.0)], dtype=dtypes.float32) a = tensor_util.MakeNdarray(t) self.assertEqual(np.float32, a.dtype) self.assertAllClose(np.array([10.0, 20.0, 30.0], dtype=np.float32), a)
def _update_bias(self): """ Convert the bias from float to int. """ for node_name in self.node_mapping: current_node = self.node_mapping[node_name] current_node_op = current_node.op if current_node_op in self.fused_requantized_bias_op: bias_node = self.node_mapping[self.get_node_name_from_input( current_node.input[2])] bias_node_type = current_node.attr['Tbias'] if bias_node_type.type != dtypes.float32 or bias_node_type.type == dtypes.qint32: continue input_node_name = self.get_node_name_from_input( current_node.input[0]) if self.node_mapping[input_node_name].op == "QuantizeV2": continue found_last_conv_flag = False input_node = current_node last_conv_node = None while not found_last_conv_flag: input_node = self.node_mapping[ self.get_node_name_from_input(input_node.input[0])] if input_node.op in self.offset_map: found_last_conv_flag = True last_conv_node = input_node elif input_node.op in "QuantizedConcatV2": found_last_conv_flag = False elif input_node.op not in ( "QuantizedMaxPool", "QuantizedAvgPool", ): found_last_conv_flag = True if not last_conv_node: continue min_filter_node = self.node_mapping[current_node.input[5]] max_filter_node = self.node_mapping[current_node.input[6]] min_filter = min_filter_node.attr['value'].tensor.float_val[0] max_filter = max_filter_node.attr['value'].tensor.float_val[0] offset_value = self.offset_map[current_node_op] min_freezed_output_node = self.node_mapping[ last_conv_node.input[offset_value]] max_freezed_output_node = self.node_mapping[ last_conv_node.input[offset_value + 1]] min_input = min_freezed_output_node.attr[ 'value'].tensor.float_val[0] max_input = max_freezed_output_node.attr[ 'value'].tensor.float_val[0] bias_scale = 255.0 * 127.0 / ( max(abs(max_input), abs(min_input)) * max(abs(max_filter), abs(min_filter))) bias_tensor = (tensor_util.MakeNdarray( bias_node.attr['value'].tensor)) bias_length = bias_tensor.shape[0] q_bias = [] for i in range(bias_length): q_bias.append(int(bias_tensor[i] * bias_scale)) current_node.attr['Tbias'].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.qint32.as_datatype_enum)) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.qint32.as_datatype_enum)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( q_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr[ 'value'].tensor.dtype = dtypes.qint32.as_datatype_enum
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_list): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] uint8_type = dtypes.quint8.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum for index, node in enumerate(input_graph_def.node): if index in fuse_op_list: input_node = input_node_map[node.input[0]] if input_node.op == 'QuantizeV2': new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" for _, value in enumerate(node.input): new_node.input.append(value) weights_node_name = node.input[1] bias_node_name = node.input[2] min_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[1])] max_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[2])] requantize_node = input_graph_def.node[index + 3] frozen_max_node = input_graph_def.node[index + 2] frozen_min_node = input_graph_def.node[index + 1] new_node.name = requantize_node.name min_filter_node_name = node.input[5] max_filter_node_name = node.input[6] new_node.input.append(frozen_min_node.name) new_node.input.append(frozen_max_node.name) min_filter_node = input_node_map[min_filter_node_name] max_filter_node = input_node_map[max_filter_node_name] new_node.attr["T1"].CopyFrom(node.attr['T1']) new_node.attr["T2"].CopyFrom(node.attr['T2']) min_input_value = (tensor_util.MakeNdarray( min_input_node.attr['value'].tensor)) max_input_value = (tensor_util.MakeNdarray( max_input_node.attr['value'].tensor)) min_filter_value = (tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor)) max_filter_value = (tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor)) weights_tensor = tensor_util.MakeNdarray( input_node_map[weights_node_name].attr['value'].tensor) bias_tensor = tensor_util.MakeNdarray( input_node_map[bias_node_name].attr['value'].tensor) bias_scale = 255.0 * 127.0 / ( max(abs(max_input_value), abs(min_input_value)) * max(abs(max_filter_value), abs(min_filter_value))) QaAmin = 255 * min_input_value / (max_input_value - min_input_value) int32_bias = [] for bias_index, value in enumerate( np.sum(np.array(weights_tensor, dtype=np.int32), axis=0, dtype=np.int32)): int32_bias.append( int(bias_tensor[bias_index] * bias_scale + value * QaAmin)) bias_node = self.check_node_existence( output_graph_def, bias_node_name) if not bias_node: bias_node = input_node_map[bias_node_name] bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( int32_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) output_graph_def.node.extend( [new_node, frozen_max_node, frozen_min_node]) elif input_node.op == "Requantize": new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = input_graph_def.node[index + 3].name for _, value in enumerate(node.input): new_node.input.append(value) weights_node_name = node.input[1] bias_node_name = node.input[2] min_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[3])] max_input_node = input_node_map[ self.get_node_name_from_input(input_node.input[4])] requantize_node = input_graph_def.node[index + 3] frozen_max_node = input_graph_def.node[index + 2] frozen_min_node = input_graph_def.node[index + 1] skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) new_node.input.append(frozen_min_node.name) new_node.input.append(frozen_max_node.name) min_filter_node_name = node.input[5] max_filter_node_name = node.input[6] min_filter_node = input_node_map[min_filter_node_name] max_filter_node = input_node_map[max_filter_node_name] new_node.attr["T1"].CopyFrom(node.attr['T1']) new_node.attr["T2"].CopyFrom(node.attr['T2']) min_input_value = (tensor_util.MakeNdarray( min_input_node.attr['value'].tensor)) max_input_value = (tensor_util.MakeNdarray( max_input_node.attr['value'].tensor)) min_filter_value = (tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor)) max_filter_value = (tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor)) bias_tensor = tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor) bias_scale = 255.0 * 127.0 / ( max(abs(max_input_value), abs(min_input_value)) * max(abs(max_filter_value), abs(min_filter_value))) bias_int32 = [int(i * bias_scale) for i in bias_tensor] bias_node = self.check_node_existence( output_graph_def, bias_node_name) if not bias_node: bias_node = input_node_map[bias_node_name] bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_int32, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) output_graph_def.node.extend( [new_node, frozen_max_node, frozen_min_node]) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def convert_variables_to_constants_v2(func): """Replaces all the variables in a graph with constants of the same values. TensorFlow 2.0 function for converting all Variable ops into Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. This function runs Grappler's function inlining optimization in order to return a single subgraph. The current implementation only works for graphs that do not contain any control flow or embedding related ops. Args: func: ConcreteFunction. Returns: ConcreteFunction containing a simplified version of the original. """ # TODO(nupurgarg): Replace ResourceGather with Gather. # TODO(nupurgarg): Change attr for Variables in control flow and functions. graph_def = _run_inline_graph_optimization(func) # Identify the ReadVariableOps. get_name = lambda name: name.split(":")[0] map_name_to_node = {get_name(node.name): node for node in graph_def.node} # TODO(b/125838789): Use `func.graph.captures`. # Get mapping from input name to variable value. tensor_data = {} map_name_to_handle = {} input_tensors = func.inputs[-len(func.captured_inputs):] for var in func.graph.variables: index = func.captured_inputs.index(var.handle) tensor_name = get_name(input_tensors[index].name) tensor_data[tensor_name] = var.numpy() map_name_to_handle[tensor_name] = var.handle # Get mapping from input name to value for non-variable placeholders. map_name_to_value = {} for name_tensor, value_tensor in zip(input_tensors, func.captured_inputs): tensor_name = get_name(name_tensor.name) if tensor_name not in map_name_to_handle: map_name_to_value[tensor_name] = value_tensor resource_identities = {} placeholders = {} converted_input_indices = set() for node in graph_def.node: if node.name in map_name_to_value: # Get the dtype and data for the Placeholders whose values are stored as # Tensors. This is the case for values that were originally Const ops. tensor = map_name_to_value[node.name] placeholders[node.name] = { "dtype": node.attr["dtype"], "data": tensor.numpy(), } converted_input_indices.add( func.captured_inputs.index(map_name_to_value[node.name])) if node.op == "ReadVariableOp": # Get name of Placeholder op associated with ReadVariableOp. There can be # an Identity in between the ReadVariableOp and Placeholder. Store the # Identity ops with the associated dtypes. input_name = get_name(node.input[0]) while map_name_to_node[input_name].op == "Identity": resource_identities[input_name] = node.attr["dtype"] input_name = get_name(map_name_to_node[input_name].input[0]) if map_name_to_node[input_name].op != "Placeholder": raise ValueError( "Cannot find the Placeholder op that is an input " "to the ReadVariableOp.") # Build a map of Placeholder ops that are inputs to ReadVariableOps to the # variable's dtype and data. placeholders[input_name] = { "dtype": node.attr["dtype"], "data": tensor_data[input_name], } converted_input_indices.add( func.captured_inputs.index(map_name_to_handle[input_name])) # Reconstruct the graph with constants in place of variables. output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in graph_def.node: output_node = output_graph_def.node.add() # Convert Placeholder ops to Const ops. if input_node.name in placeholders: dtype = placeholders[input_node.name]["dtype"] data = placeholders[input_node.name]["data"] output_node.op = "Const" output_node.name = input_node.name output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].tensor.CopyFrom( tensor_util.make_tensor_proto(data, dtype=dtype.type, shape=data.shape)) how_many_converted += 1 # Change the dtype for Identity ops that are inputs to ReadVariableOps. elif input_node.name in resource_identities: output_node.CopyFrom(input_node) output_node.attr["T"].CopyFrom( resource_identities[input_node.name]) # Convert ReadVariableOps into Identity ops. elif input_node.op == "ReadVariableOp": output_node.op = "Identity" output_node.name = input_node.name output_node.input.extend([input_node.input[0]]) output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) if "_class" in input_node.attr: output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: output_node.CopyFrom(input_node) logging.info("Converted %d variables to const ops.", how_many_converted) # Create a ConcreteFunction from the new GraphDef. converted_inputs = set( [input_tensors[index] for index in converted_input_indices]) not_converted_inputs = set(func.inputs).difference(converted_inputs) not_converted_inputs_map = { tensor.name: tensor for tensor in not_converted_inputs } new_input_names = [tensor.name for tensor in not_converted_inputs] new_output_names = [tensor.name for tensor in func.outputs] new_func = wrap_function.function_from_graph_def(output_graph_def, new_input_names, new_output_names) # Manually propagate shape for input tensors where the shape is not correctly # propagated. Scalars shapes are lost when wrapping the function. for input_tensor in new_func.inputs: input_tensor.set_shape( not_converted_inputs_map[input_tensor.name].shape) return new_func
def convert_variables_to_constants(sess, input_graph_def, output_node_names, logger, variable_names_whitelist=None, variable_names_blacklist=None, use_fp16=False): from tensorflow.python.framework.graph_util_impl import extract_sub_graph from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.framework import tensor_util def patch_dtype(input_node, field_name, output_node): if use_fp16 and (field_name in input_node.attr) and ( input_node.attr[field_name].type == types_pb2.DT_FLOAT): output_node.attr[field_name].CopyFrom( attr_value_pb2.AttrValue(type=types_pb2.DT_HALF)) if use_fp16: logger.warning( 'fp16 is turned on! ' 'Note that not all CPU and GPU support fast fp16 instructions, ' 'worst case you will have degraded performance!') inference_graph = extract_sub_graph(input_graph_def, output_node_names) variable_names = [] variable_dict_names = [] for node in inference_graph.node: if node.op in ["Variable", "VariableV2", "VarHandleOp"]: variable_name = node.name if ((variable_names_whitelist is not None and variable_name not in variable_names_whitelist) or (variable_names_blacklist is not None and variable_name in variable_names_blacklist)): continue variable_dict_names.append(variable_name) if node.op == "VarHandleOp": variable_names.append(variable_name + "/Read/ReadVariableOp:0") else: variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) logger.info("freezing %d variables...", len(returned_variables)) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = node_def_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] if use_fp16 and dtype.type == types_pb2.DT_FLOAT: output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( data.astype('float16'), dtype=types_pb2.DT_HALF, shape=data.shape))) else: output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 elif input_node.op == "ReadVariableOp" and (input_node.input[0] in found_variables): # placeholder nodes # print('- %s | %s ' % (input_node.name, input_node.attr["dtype"])) output_node.op = "Identity" output_node.name = input_node.name output_node.input.extend([input_node.input[0]]) output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) if "_class" in input_node.attr: output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: # mostly op nodes output_node.CopyFrom(input_node) patch_dtype(input_node, 'dtype', output_node) patch_dtype(input_node, 'T', output_node) patch_dtype(input_node, 'DstT', output_node) patch_dtype(input_node, 'SrcT', output_node) patch_dtype(input_node, 'Tparams', output_node) if use_fp16 and ('value' in output_node.attr) and ( output_node.attr['value'].tensor.dtype == types_pb2.DT_FLOAT): # hard-coded value need to be converted as well output_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( output_node.attr['value'].tensor.float_val[0], dtype=types_pb2.DT_HALF))) output_graph_def.node.extend([output_node]) output_graph_def.library.CopyFrom(inference_graph.library) logger.info("Converted %d variables to const ops.", how_many_converted) return output_graph_def
def create_test_graph(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" input_node.attr["dtype"].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) conv1_weight_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" conv1_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) conv1_node.attr['strides'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv1_node.attr['dilations'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) conv1_node.attr['data_format'].CopyFrom( attr_value_pb2.AttrValue(s=b'NHWC')) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( bias_value, bias_value.dtype.type, bias_value.shape))) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" bias_add_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) bias_add_node.attr['data_format'].CopyFrom( attr_value_pb2.AttrValue(s=b'NHWC')) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" relu_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) conv2_weight_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_weight_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" conv2_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) conv2_node.attr['strides'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv2_node.attr['dilations'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) conv2_node.attr['data_format'].CopyFrom( attr_value_pb2.AttrValue(s=b'NHWC')) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) bias_node2.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_node2.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( bias_value2, bias_value2.dtype.type, bias_value2.shape))) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" bias_add_node2.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) bias_add_node2.attr['data_format'].CopyFrom( attr_value_pb2.AttrValue(s=b'NHWC')) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" relu_node2.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) conv3_weight_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_weight_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" conv3_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) conv3_node.attr['strides'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv3_node.attr['dilations'].CopyFrom( attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue( i=[1, 1, 1, 1]))) conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) conv3_node.attr['data_format'].CopyFrom( attr_value_pb2.AttrValue(s=b'NHWC')) test_graph = graph_pb2.GraphDef() test_graph.node.extend([ input_node, conv1_weight_node, conv1_node, bias_node, bias_add_node, relu_node, conv2_weight_node, conv2_node, bias_node2, bias_add_node2, relu_node2, conv3_weight_node, conv3_node, ]) return test_graph
def constant(value, dtype=None, shape=None, name="Const", verify_shape=False): """Creates a constant tensor. The resulting tensor is populated with values of type `dtype`, as specified by arguments `value` and (optionally) `shape` (see examples below). The argument `value` can be a constant value, or a list of values of type `dtype`. If `value` is a list, then the length of the list must be less than or equal to the number of elements implied by the `shape` argument (if specified). In the case where the list length is less than the number of elements specified by `shape`, the last element in the list will be used to fill the remaining entries. The argument `shape` is optional. If present, it specifies the dimensions of the resulting tensor. If not present, the shape of `value` is used. If the argument `dtype` is not specified, then the type is inferred from the type of `value`. For example: ```python # Constant 1-D Tensor populated with value list. tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) => [1 2 3 4 5 6 7] # Constant 2-D tensor populated with scalar value -1. tensor = tf.constant(-1.0, shape=[2, 3]) => [[-1. -1. -1.] [-1. -1. -1.]] ``` Args: value: A constant value (or list) of output type `dtype`. dtype: The type of the elements of the resulting tensor. shape: Optional dimensions of resulting tensor. name: Optional name for the tensor. verify_shape: Boolean that enables verification of a shape of values. Returns: A Constant Tensor. Raises: TypeError if shape is incorrectly specified or unsupported. """ if not context.in_graph_mode(): if shape is None: return ops.convert_to_eager_tensor(value, dtype) t = ops.convert_to_eager_tensor(value, dtype) shape = tensor_shape.as_shape(shape) if shape == t.shape: return t if verify_shape: raise TypeError("Expected Tensor's shape: %s, got %s." % (tuple(shape), tuple(t.shape))) num_t = t.shape.num_elements() # TODO(josh11b): Implement shape -> eager tensor conversion. if num_t == shape.num_elements(): return _eager_reshape(t, shape.as_list()) if num_t == 1: return _eager_fill(shape.as_list(), t) raise TypeError( "Eager execution of tf.constant with unsupported shape " "(value has %d elements, shape is %s with %d elements)." % (num_t, shape, shape.num_elements())) g = ops.get_default_graph() tensor_value = attr_value_pb2.AttrValue() tensor_value.tensor.CopyFrom( tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape)) dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype) const_tensor = g.create_op("Const", [], [dtype_value.type], attrs={ "value": tensor_value, "dtype": dtype_value }, name=name).outputs[0] return const_tensor
def extract_example_parser_configuration(parse_example_op, sess): """Returns an ExampleParserConfig proto. Args: parse_example_op: A ParseExample `Operation` sess: A tf.Session needed to obtain some configuration values. Returns: A ExampleParserConfig proto. Raises: ValueError: If attributes are inconsistent. """ config = example_parser_configuration_pb2.ExampleParserConfiguration() num_sparse = parse_example_op.get_attr("Nsparse") num_dense = parse_example_op.get_attr("Ndense") total_features = num_dense + num_sparse sparse_types = parse_example_op.get_attr("sparse_types") dense_types = parse_example_op.get_attr("Tdense") dense_shapes = parse_example_op.get_attr("dense_shapes") if len(sparse_types) != num_sparse: raise ValueError("len(sparse_types) attribute does not match " "Nsparse attribute (%d vs %d)" % (len(sparse_types), num_sparse)) if len(dense_types) != num_dense: raise ValueError("len(dense_types) attribute does not match " "Ndense attribute (%d vs %d)" % (len(dense_types), num_dense)) if len(dense_shapes) != num_dense: raise ValueError("len(dense_shapes) attribute does not match " "Ndense attribute (%d vs %d)" % (len(dense_shapes), num_dense)) # Skip over the serialized input, and the names input. fetch_list = parse_example_op.inputs[2:] # Fetch total_features key names and num_dense default values. if len(fetch_list) != (total_features + num_dense): raise ValueError("len(fetch_list) does not match total features + num_dense" "(%d vs %d" % (len(fetch_list), (total_features + num_dense))) fetched = sess.run(fetch_list) if len(fetched) != len(fetch_list): raise ValueError("len(fetched) does not match len(fetch_list)" "(%d vs %d" % (len(fetched), len(fetch_list))) # Fetch indices. sparse_keys_start = 0 dense_keys_start = sparse_keys_start + num_sparse dense_def_start = dense_keys_start + num_dense # Output tensor indices. sparse_indices_start = 0 sparse_values_start = num_sparse sparse_shapes_start = sparse_values_start + num_sparse dense_values_start = sparse_shapes_start + num_sparse # Dense features. for i in range(num_dense): key = fetched[dense_keys_start + i] feature_config = config.feature_map[key] # Convert the default value numpy array fetched from the session run # into a TensorProto. fixed_config = feature_config.fixed_len_feature fixed_config.default_value.CopyFrom(tensor_util.make_tensor_proto(fetched[ dense_def_start + i])) # Convert the shape from the attributes # into a TensorShapeProto. fixed_config.shape.CopyFrom(tensor_shape.TensorShape(dense_shapes[ i]).as_proto()) fixed_config.dtype = dense_types[i] # Get the output tensor name. fixed_config.values_output_tensor_name = parse_example_op.outputs[ dense_values_start + i].name # Sparse features. for i in range(num_sparse): key = fetched[sparse_keys_start + i] feature_config = config.feature_map[key] var_len_feature = feature_config.var_len_feature var_len_feature.dtype = sparse_types[i] var_len_feature.indices_output_tensor_name = parse_example_op.outputs[ sparse_indices_start + i].name var_len_feature.values_output_tensor_name = parse_example_op.outputs[ sparse_values_start + i].name var_len_feature.shapes_output_tensor_name = parse_example_op.outputs[ sparse_shapes_start + i].name return config
def testUnsupportedDType(self): with self.assertRaises(TypeError): tensor_util.make_tensor_proto(np.array([1]), 0)
def testStringWithImplicitRepeat(self): t = tensor_util.make_tensor_proto("f", shape=[3, 4]) a = tensor_util.MakeNdarray(t) self.assertAllEqual(np.array([[b"f"] * 4] * 3, dtype=np.object), a)
def convert_variables_to_constants(sess, input_graph_def, output_node_names, variable_names_whitelist=None, variable_names_blacklist=None, use_fp16=False): from tensorflow.python.framework.graph_util_impl import extract_sub_graph from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.framework import tensor_util def patch_dtype(input_node, field_name, output_node): if use_fp16 and (field_name in input_node.attr) and ( input_node.attr[field_name].type == types_pb2.DT_FLOAT): output_node.attr[field_name].CopyFrom( attr_value_pb2.AttrValue(type=types_pb2.DT_HALF)) inference_graph = extract_sub_graph(input_graph_def, output_node_names) variable_names = [] variable_dict_names = [] for node in inference_graph.node: if node.op in ["Variable", "VariableV2", "VarHandleOp"]: variable_name = node.name if ((variable_names_whitelist is not None and variable_name not in variable_names_whitelist) or (variable_names_blacklist is not None and variable_name in variable_names_blacklist)): continue variable_dict_names.append(variable_name) if node.op == "VarHandleOp": variable_names.append(variable_name + "/Read/ReadVariableOp:0") else: variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = node_def_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] if use_fp16 and dtype.type == types_pb2.DT_FLOAT: output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( data.astype("float16"), dtype=types_pb2.DT_HALF, shape=data.shape))) else: output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 elif input_node.op == "ReadVariableOp" and (input_node.input[0] in found_variables): # placeholder nodes # print('- %s | %s ' % (input_node.name, input_node.attr["dtype"])) output_node.op = "Identity" output_node.name = input_node.name output_node.input.extend([input_node.input[0]]) output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) if "_class" in input_node.attr: output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: # mostly op nodes output_node.CopyFrom(input_node) patch_dtype(input_node, "dtype", output_node) patch_dtype(input_node, "T", output_node) patch_dtype(input_node, "DstT", output_node) patch_dtype(input_node, "SrcT", output_node) patch_dtype(input_node, "Tparams", output_node) if use_fp16 and ("value" in output_node.attr) and ( output_node.attr["value"].tensor.dtype == types_pb2.DT_FLOAT): # hard-coded value need to be converted as well output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( output_node.attr["value"].tensor.float_val[0], dtype=types_pb2.DT_HALF))) output_graph_def.node.extend([output_node]) output_graph_def.library.CopyFrom(inference_graph.library) return output_graph_def
def set_attr_tensor(self, node, key, value, dtype, shape=None): node.attr[key].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( value, dtype=dtype, shape=shape)))
class TestGraph_util(unittest.TestCase): x_node = node_def_pb2.NodeDef() x_node.name = "placeholder" x_node.op = "Placeholder" input0_node = node_def_pb2.NodeDef() input0_node.name = "input0" input0_node.op = "Const" input0_value = np.float32(np.abs(np.random.randn(4, 3, 2))) input0_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( input0_value, input0_value.dtype.type, input0_value.shape))) input1_node = node_def_pb2.NodeDef() input1_node.name = "input1" input1_node.op = "Const" input1_value = np.float32(np.abs(np.random.randn(4, 1, 1))) input1_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( input1_value, input1_value.dtype.type, input1_value.shape))) add_node = node_def_pb2.NodeDef() add_node.op = "Add" add_node.name = "add" add_node.input.extend([input0_node.name, input1_node.name]) input2_node = node_def_pb2.NodeDef() input2_node.name = "input2" input2_node.op = "Const" input2_value = np.float32(np.abs(np.random.randn(1))) input2_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( input2_value, input2_value.dtype.type, input2_value.shape))) input3_node = node_def_pb2.NodeDef() input3_node.name = "input3" input3_node.op = "Const" input3_value = np.float32(np.abs(np.random.randn(1))) input3_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( input3_value, input3_value.dtype.type, input3_value.shape))) mul_node = node_def_pb2.NodeDef() mul_node.op = "Mul" mul_node.name = "mul" mul_node.input.extend([add_node.name, input3_node.name]) sqrt_node = node_def_pb2.NodeDef() sqrt_node.name = "rsqrt" sqrt_node.op = "Rsqrt" sqrt_node.input.extend([mul_node.name]) sqrt1_node = node_def_pb2.NodeDef() sqrt1_node.op = "Relu" sqrt1_node.name = "sqrt1" sqrt1_node.input.extend([sqrt_node.name]) block_node = node_def_pb2.NodeDef() block_node.name = "block_output" block_node.op = "Add" block_node.input.extend([x_node.name, sqrt1_node.name]) res_node = node_def_pb2.NodeDef() res_node.name = "res_add" res_node.op = "Add" res_node.input.extend([sqrt_node.name, input2_node.name]) end_node = node_def_pb2.NodeDef() end_node.name = "end" end_node.op = "Add" end_node.input.extend([block_node.name, res_node.name]) graph_def = graph_pb2.GraphDef() graph_def.node.extend([ x_node, input0_node, input1_node, input2_node, input3_node, add_node, mul_node, sqrt_node, sqrt1_node, block_node, res_node, end_node ]) def test_replace_constant_graph_with_constant_node(self): graph_analyzer = GraphAnalyzer() graph_analyzer.graph = copy.deepcopy(self.graph_def) graph_analyzer.parse_graph() new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( self.add_node.name + "_const", new_constant_value, new_constant_type) assert graph_analyzer.replace_constant_graph_with_constant_node( new_constant_node, self.add_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 10 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( self.mul_node.name + "_const", new_constant_value, new_constant_type) assert graph_analyzer.replace_constant_graph_with_constant_node( new_constant_node, self.mul_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 8 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( self.sqrt_node.name + "_const", new_constant_value, new_constant_type) assert graph_analyzer.replace_constant_graph_with_constant_node( new_constant_node, self.sqrt_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 7 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( self.block_node.name + "_const", new_constant_value, new_constant_type) assert not graph_analyzer.replace_constant_graph_with_constant_node( new_constant_node, self.block_node.name) def test_replace_node(self): graph_analyzer = GraphAnalyzer() graph_analyzer.graph = copy.deepcopy(self.graph_def) graph_analyzer.parse_graph() new_add_node = node_def_pb2.NodeDef() new_add_node.op = "Add" new_add_node.name = "add1" new_add_node.input.extend( [self.input0_node.name, self.input1_node.name]) graph_analyzer.replace_node(new_add_node, self.add_node.name, [self.mul_node.name]) result_graph = graph_analyzer.dump_graph() assert self.add_node not in list(result_graph.node) assert new_add_node in list(result_graph.node)
def generate_output_graph(input_graph_def, input_node_map, output_node_map, fuse_op_list, fuse_op_deq_list): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] int8_type = dtypes.qint8.as_datatype_enum uint8_type = dtypes.quint8.as_datatype_enum float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum for index, node in enumerate(input_graph_def.node): if index in fuse_op_list: const_node_1 = input_graph_def.node[index + 1] const_node_2 = input_graph_def.node[index + 2] requantize_node = input_graph_def.node[index + 3] new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = requantize_node.name for _, value in enumerate(node.input): new_node.input.append(value) new_node.input.append(const_node_1.name) new_node.input.append(const_node_2.name) new_node.attr["Tinput"].CopyFrom(node.attr['Tinput']) new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter']) new_node.attr["strides"].CopyFrom(node.attr['strides']) new_node.attr["padding"].CopyFrom(node.attr['padding']) if input_node_map[new_node.input[0]].op.find("Requantize") != -1: bias_node = input_node_map[new_node.input[2]] last_node = input_node_map[new_node.input[0]] max_input_node = (input_node_map[last_node.input[4][:-2]]) min_input_node = (input_node_map[last_node.input[3][:-2]]) max_filter = input_node_map[new_node.input[6]] min_filter = input_node_map[new_node.input[5]] min_input = (min_input_node.attr['value'].tensor.float_val)[0] max_input = (max_input_node.attr['value'].tensor.float_val)[0] if 'Depthwise' in node.op or "RequantizePerChannel" in [ node.op for node in output_node_map[node.name] ]: channel_size = max_filter.attr[ 'value'].tensor.tensor_shape.dim[0].size max_filter_tensor = tensor_util.MakeNdarray( max_filter.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter.attr['value'].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter.attr['value'].tensor.float_val)[0]) bias_tensor = tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor) bias_length = bias_tensor.shape[0] scales = [] for i in range(channel_size): scales.append(255.0 * 127.0 / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append((int)(bias_tensor[i] * scales[0])) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( int32_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type skip_node_name.append(bias_node.name) output_graph_def.node.extend([bias_node]) new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if "padding_list" in node.attr: new_node.attr["padding_list"].CopyFrom( node.attr['padding_list']) if "dilations" in node.attr: new_node.attr["dilations"].CopyFrom(node.attr['dilations']) if node.op == "QuantizedConv2D" or node.op == "QuantizedConv2DWithBias": new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=int8_type)) else: new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) skip_list.append(index + 1) skip_list.append(index + 2) skip_list.append(index + 3) output_graph_def.node.extend( [new_node, const_node_1, const_node_2]) elif index in skip_list or node.name in skip_node_name: continue elif node.op == "Dequantize": new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) new_node.attr["mode"].s = b"SCALED" p_node = input_node_map[new_node.input[0]] pp_node = input_node_map[p_node.name].input[0] if input_node_map[pp_node].op.find("Relu") != -1 or p_node.op in ( "QuantizedAvgPool", "QuantizedMaxPool", "QuantizedConcatV2"): new_node.attr["T"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) else: new_node.attr["T"].CopyFrom( attr_value_pb2.AttrValue(type=int8_type)) output_graph_def.node.extend([new_node]) elif index in fuse_op_deq_list: original_summand_node = input_node_map[ input_graph_def.node[index].input[-1]] sum_const_node_1 = input_graph_def.node[index + 1] sum_const_node_2 = input_graph_def.node[index + 2] sum_requantize_node = input_graph_def.node[index + 3] new_node = node_def_pb2.NodeDef() new_node.op = node.op + "AndRequantize" new_node.name = sum_requantize_node.name for _, value in enumerate(node.input[:-1]): new_node.input.append(value) new_node.input.append(sum_const_node_1.name) new_node.input.append(sum_const_node_2.name) new_node.input.append( input_node_map[original_summand_node.name].input[0]) new_node.input.append( input_node_map[original_summand_node.name].input[0] + ":1") new_node.input.append( input_node_map[original_summand_node.name].input[0] + ":2") # skip_list.append(index + 1) # skip_list.append(index + 2) skip_list.append(index + 3) new_node.attr["Tinput"].CopyFrom(node.attr['Tinput']) new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter']) new_node.attr["strides"].CopyFrom(node.attr['strides']) new_node.attr["padding"].CopyFrom(node.attr['padding']) if input_node_map[new_node.input[0]].op.find("Requantize") != -1: bias_node = input_node_map[new_node.input[2]] last_node = input_node_map[new_node.input[0]] max_input_node = (input_node_map[last_node.input[4][:-2]]) min_input_node = (input_node_map[last_node.input[3][:-2]]) max_filter = input_node_map[new_node.input[6]] min_filter = input_node_map[new_node.input[5]] min_input = (min_input_node.attr['value'].tensor.float_val)[0] max_input = (max_input_node.attr['value'].tensor.float_val)[0] if "RequantizePerChannel" in [ node.op for node in output_node_map[node.name] ]: channel_size = max_filter.attr[ 'value'].tensor.tensor_shape.dim[0].size max_filter_tensor = tensor_util.MakeNdarray( max_filter.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter.attr['value'].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter.attr['value'].tensor.float_val)[0]) bias_tensor = (tensor_util.MakeNdarray( input_node_map[new_node.input[2]].attr['value'].tensor)) bias_length = bias_tensor.shape[0] scales = [] for i in range(channel_size): scales.append(255.0 * 127.0 / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) int32_bias = [] if channel_size > 1: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[i])) else: for i in range(bias_length): int32_bias.append(int(bias_tensor[i] * scales[0])) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( int32_bias, dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = qint32_type new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=qint32_type)) skip_node_name.append(bias_node.name) output_graph_def.node.extend([bias_node]) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) if "padding_list" in node.attr: new_node.attr["padding_list"].CopyFrom( node.attr['padding_list']) if "dilations" in node.attr: new_node.attr["dilations"].CopyFrom(node.attr['dilations']) new_node.attr["out_type"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) summand_op_type = uint8_type if dtypes.as_dtype( original_summand_node.attr["T"].type ) == uint8_type else int8_type if summand_op_type == int8_type: new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" new_node.attr["Tsummand"].CopyFrom( attr_value_pb2.AttrValue(type=summand_op_type)) output_graph_def.node.extend([new_node]) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def _bf16_convert(self, bf16_node_name): self.converted_ops.append(bf16_node_name) bf16_node_detail = self.cur_graph.node_name_details[bf16_node_name] bf16_node = bf16_node_detail.node bf16_node_inputs = list(bf16_node.input) if 'T' in bf16_node.attr and bf16_node.attr['T'] != \ attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) and \ bf16_node.op != 'Dequantize': return for each_input in bf16_node_inputs: each_input_detail = self.cur_graph.node_name_details[ Helper.node_name_from_input(each_input)] each_input_node = each_input_detail.node # Const + Cast => Const optimization if each_input_node.op == "Const": if each_input_node.attr["dtype"] == attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum): fp32_value = tensor_util.MakeNdarray( each_input_node.attr.get('value').tensor) Helper.set_attr_dtype(each_input_node, "dtype", dtypes.bfloat16) each_input_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( fp32_value, dtypes.bfloat16, fp32_value.shape))) self.converted_ops.append(each_input) elif 'T' in each_input_node.attr and each_input_node.attr['T'] != \ attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) and \ each_input_node.op != 'Dequantize': continue # Cast + Cast => O optimization elif (each_input_node.op == "Cast" and each_input_node.attr["SrcT"] == attr_value_pb2.AttrValue( type=dtypes.bfloat16.as_datatype_enum)): cast_input_name = each_input_node.input[0] for index, input_name in enumerate(bf16_node.input): if input_name == each_input_node.name: bf16_node.input[index] = cast_input_name self.cur_graph.node_name_details[ cast_input_name].outputs.append(bf16_node_name) if len(each_input_detail.outputs) == 1: self.cur_graph.remove_node(each_input) self.cur_graph.node_name_details[ cast_input_name].outputs.remove(each_input) elif (each_input not in self.fp32_ops + self.converted_ops and each_input_node.op in BF16Convert.WHITE_LIST + \ BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST and len(each_input_detail.outputs) == 1): self._bf16_convert(each_input) # TODO: Consider multi-output case elif each_input in self.converted_ops: pass else: if each_input + "_FP32toBF16" not in list( self.cur_graph.node_name_details.keys()): input_cast_node = Helper.create_node( "Cast", each_input.replace(':', '__') + "_FP32toBF16", [each_input]) Helper.set_attr_dtype(input_cast_node, "DstT", dtypes.bfloat16) Helper.set_attr_dtype(input_cast_node, "SrcT", dtypes.float32) Helper.set_attr_bool(input_cast_node, "Truncate", False) self.cur_graph.add_node(input_cast_node, each_input, [bf16_node_name]) else: input_cast_node = self.cur_graph.node_name_details[ each_input + "_FP32toBF16"].node for index, input_name in enumerate(bf16_node.input): if Helper.node_name_from_input( input_name) == each_input: bf16_node.input[index] = input_cast_node.name self.cur_graph.node_name_details[ input_cast_node.name].outputs.append(bf16_node_name) # TODO: Need consider different op type Helper.set_attr_dtype(bf16_node, "T", dtypes.bfloat16) bf16_node_outputs = copy.deepcopy(bf16_node_detail.outputs) for each_output in bf16_node_outputs: each_output_detail = self.cur_graph.node_name_details[each_output] each_output_node = each_output_detail.node # Need consider output node op type if (each_output_node.op == "Cast" and each_output_node.attr["DstT"] == attr_value_pb2.AttrValue( type=dtypes.bfloat16.as_datatype_enum)): for cast_output in each_output_detail.outputs: cast_output_node = self.cur_graph.node_name_details[ cast_output].node for index, input_name in enumerate(cast_output_node.input): if each_output == input_name: cast_output_node.input[index] = bf16_node.name bf16_node_detail.outputs.remove(each_output) bf16_node_detail.outputs.extend(each_output_detail.outputs) self.cur_graph.remove_node(each_output) elif (each_output not in self.fp32_ops + self.converted_ops and each_output_node.op in BF16Convert.WHITE_LIST + \ BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST): # TODO: Consider multi node inputs case, check others inputs whether # converted to BF16 self._bf16_convert(each_output) elif each_output in self.converted_ops: pass else: if bf16_node_name + \ "_BF16toFP32" not in list(self.cur_graph.node_name_details.keys()): output_cast_node = Helper.create_node( "Cast", bf16_node_name + "_BF16toFP32", [bf16_node_name]) Helper.set_attr_dtype(output_cast_node, "DstT", dtypes.float32) Helper.set_attr_dtype(output_cast_node, "SrcT", dtypes.bfloat16) Helper.set_attr_bool(output_cast_node, "Truncate", False) self.cur_graph.add_node(output_cast_node, bf16_node_name, [each_output]) else: output_cast_node = self.cur_graph.node_name_details[ bf16_node_name + "_BF16toFP32"].node for index, input_name in enumerate(each_output_node.input): if bf16_node_name == input_name: each_output_node.input[ index] = output_cast_node.name self.cur_graph.node_name_details[ bf16_node_name + "_BF16toFP32"].outputs.append(each_output) return
def testShapeTooLarge(self): with self.assertRaises(ValueError): tensor_util.make_tensor_proto(np.array([1, 2]), shape=[1])
def convert_variables_to_constants(sess, input_graph_def, output_node_names): """Replaces all the variables in a graph with constants of the same values. If you have a trained graph containing Variable ops, it can be convenient to convert them all to Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. Args: sess: Active TensorFlow session containing the variables. input_graph_def: GraphDef object holding the network. output_node_names: List of name strings for the result nodes of the graph. Returns: GraphDef containing a simplified version of the original. """ print('call convert_variables') found_variables = {} variable_name_list = [] found_variables_list = [] print('search nodes...') for i, node in enumerate(input_graph_def.node): # print('node %s' % node) if node.op == "Assign": variable_name_list.append(node.input[0]) sys.stdout.write( "\r%s" % "node: {0}/{1}".format(i + 1, len(input_graph_def.node))) sys.stdout.flush() print('') print('{0} nodes founded'.format(len(variable_name_list))) print('evaluate nodes..') found_variables_list = sess.run([v + ":0" for v in variable_name_list]) print('insert values..') for i, v in enumerate(variable_name_list): found_variables[v] = found_variables_list[i] sys.stdout.write( "\r%s" % "node: {0}/{1}".format(i + 1, len(variable_name_list))) sys.stdout.flush() print('') # This graph only includes the nodes needed to evaluate the output nodes, and # removes unneeded nodes like those involved in saving and assignment. inference_graph = graph_util.extract_sub_graph(input_graph_def, output_node_names) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = graph_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def do_transform(self): """Removes batch normalization ops by folding them into convolutions. Batch normalization during training has multiple dynamic parameters that are updated, but once the graph is finalized these become constants. That means there's an opportunity to reduce the computations down to a scale and addition, rather than the more expensive multiple ops, and even bake the scaling into the convolution weights. This function identifies the typical pattern of batch normalization subgraphs, and performs the transformation to fold the computations down into a simpler form. It currently only spots batch normalization that's performed by the BatchNormWithGlobalNormalization and FusedBatchNorm ops, and will need to be extended in the future to handle the newer style. Args: input_graph_def: A GraphDef containing a model. Returns: Modified graph with BN ops removed, and modified weights. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in self.input_graph.node: if node.name not in input_node_map: input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) nodes_to_skip = {} new_ops = [] for node in self.input_graph.node: if node.op not in ("BatchNormWithGlobalNormalization", "FusedBatchNorm", "FusedBatchNormV3"): continue conv_op = self.node_from_map( input_node_map, node.input[self.INPUT_ORDER[node.op].index("conv_op")]) if conv_op.op != "Conv2D" and conv_op.op != "DepthwiseConv2dNative": tf_logging.warning( "Didn't find expected Conv2D or DepthwiseConv2dNative" " input to '%s'" % node.name) continue weights_op = self.node_from_map(input_node_map, conv_op.input[1]) if weights_op.op != "Const": tf_logging.warning( "Didn't find expected conv Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (conv_op.name, weights_op)) continue weights = self.values_from_const(weights_op) if conv_op.op == "Conv2D": channel_count = weights.shape[3] elif conv_op.op == "DepthwiseConv2dNative": channel_count = weights.shape[2] * weights.shape[3] mean_op = self.node_from_map( input_node_map, node.input[self.INPUT_ORDER[node.op].index("mean_op")]) if mean_op.op != "Const": tf_logging.warning( "Didn't find expected mean Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, mean_op)) continue mean_value = self.values_from_const(mean_op) if mean_value.shape != (channel_count, ): tf_logging.warning( "Incorrect shape for mean, found %s, expected %s," " for node %s" % (str(mean_value.shape), str((channel_count, )), node.name)) continue var_op = self.node_from_map( input_node_map, node.input[self.INPUT_ORDER[node.op].index("var_op")]) if var_op.op != "Const": tf_logging.warning( "Didn't find expected var Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, var_op)) continue var_value = self.values_from_const(var_op) if var_value.shape != (channel_count, ): tf_logging.warning( "Incorrect shape for var, found %s, expected %s," " for node %s" % (str(var_value.shape), str((channel_count, )), node.name)) continue beta_op = self.node_from_map( input_node_map, node.input[self.INPUT_ORDER[node.op].index("beta_op")]) if beta_op.op != "Const": tf_logging.warning( "Didn't find expected beta Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, beta_op)) continue beta_value = self.values_from_const(beta_op) if beta_value.shape != (channel_count, ): tf_logging.warning( "Incorrect shape for beta, found %s, expected %s," " for node %s" % (str(beta_value.shape), str((channel_count, )), node.name)) continue gamma_op = self.node_from_map( input_node_map, node.input[self.INPUT_ORDER[node.op].index("gamma_op")]) if gamma_op.op != "Const": tf_logging.warning( "Didn't find expected gamma Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, gamma_op)) continue gamma_value = self.values_from_const(gamma_op) if gamma_value.shape != (channel_count, ): tf_logging.warning( "Incorrect shape for gamma, found %s, expected %s," " for node %s" % (str(gamma_value.shape), str( (channel_count, )), node.name)) continue variance_epsilon_value = node.attr[self.EPSILON_ATTR[node.op]].f nodes_to_skip[node.name] = True nodes_to_skip[weights_op.name] = True nodes_to_skip[mean_op.name] = True nodes_to_skip[var_op.name] = True nodes_to_skip[beta_op.name] = True nodes_to_skip[gamma_op.name] = True nodes_to_skip[conv_op.name] = True if self.scale_after_normalization(node): scale_value = ((1.0 / np.vectorize(math.sqrt) (var_value + variance_epsilon_value)) * gamma_value) else: scale_value = (1.0 / np.vectorize( math.sqrt)(var_value + variance_epsilon_value)) offset_value = (-mean_value * scale_value) + beta_value scaled_weights = np.copy(weights) it = np.nditer(scaled_weights, flags=["multi_index"], op_flags=["readwrite"]) if conv_op.op == "Conv2D": while not it.finished: current_scale = scale_value[it.multi_index[3]] it[0] *= current_scale it.iternext() elif conv_op.op == "DepthwiseConv2dNative": channel_multiplier = weights.shape[3] while not it.finished: current_scale = scale_value[it.multi_index[2] * channel_multiplier + it.multi_index[3]] it[0] *= current_scale it.iternext() scaled_weights_op = node_def_pb2.NodeDef() scaled_weights_op.op = "Const" scaled_weights_op.name = weights_op.name scaled_weights_op.attr["dtype"].CopyFrom(weights_op.attr["dtype"]) scaled_weights_op.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( scaled_weights, weights.dtype.type, weights.shape))) new_conv_op = node_def_pb2.NodeDef() new_conv_op.CopyFrom(conv_op) offset_op = node_def_pb2.NodeDef() offset_op.op = "Const" offset_op.name = conv_op.name + "_bn_offset" offset_op.attr["dtype"].CopyFrom(mean_op.attr["dtype"]) offset_op.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( offset_value, mean_value.dtype.type, offset_value.shape))) bias_add_op = node_def_pb2.NodeDef() bias_add_op.op = "BiasAdd" bias_add_op.name = node.name bias_add_op.attr["T"].CopyFrom(conv_op.attr["T"]) bias_add_op.attr["data_format"].CopyFrom( conv_op.attr["data_format"]) bias_add_op.input.extend([new_conv_op.name, offset_op.name]) new_ops.extend( [scaled_weights_op, new_conv_op, offset_op, bias_add_op]) result_graph_def = graph_pb2.GraphDef() for node in self.input_graph.node: if node.name in nodes_to_skip: continue new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def merge_BN_with_conv(node, value_consts, nodes_to_skip, new_ops, old_ops, input_node_map): # "input_op" input_op = node_from_map(input_node_map, node.input[YDWU_INPUT_ORDER[node.op].index("BN_input_op")]) # if input_op.op != "Const": # tf_logging.warning("Didn't find expected mean Constant input to '%s'," # " found %s instead. Maybe because freeze_graph wasn't" # " run first?" % (node.name, input_op)) # continue # mean_value = values_from_const(input_op) # if mean_value.shape != (channel_count,): # tf_logging.warning("Incorrect shape for mean, found %s, expected %s," # " for node %s" % (str(mean_value.shape), str( # (channel_count,)), node.name)) # continue channel_count = {} # "old_Conv_op" if old_ops.has_key('old_Conv'): old_Conv_op = old_ops["old_Conv"] weights_op = node_from_map(input_node_map, old_Conv_op.input[1]) if weights_op.op != "Const": tf_logging.warning("Didn't find expected conv Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (old_Conv_op.name, weights_op)) return False weights = values_from_const(weights_op) if old_Conv_op.op == 'DepthwiseConv2dNative': value_consts["channel"] = 2 channel_count = weights.shape[2] else: value_consts["channel"] = 3 channel_count = weights.shape[3] # "old_BiasAdd_op" if old_ops.has_key('old_BiasAdd'): old_BiasAdd_op = old_ops['old_BiasAdd'] bias_op = node_from_map(input_node_map, old_BiasAdd_op.input[1]) if bias_op.op != "Const": tf_logging.warning("Didn't find expected conv Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (old_BiasAdd_op.name, bias_op)) return False bias_value = values_from_const(bias_op) # "mean_op" mean_op = node_from_map(input_node_map, node.input[YDWU_INPUT_ORDER[node.op].index("mean_op")]) if mean_op.op != "Const": tf_logging.warning("Didn't find expected mean Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, mean_op)) return False mean_value = values_from_const(mean_op) if mean_value.shape != (channel_count,): tf_logging.warning("Incorrect shape for mean, found %s, expected %s," " for node %s" % (str(mean_value.shape), str( (channel_count,)), node.name)) return False # "var_op" var_op = node_from_map(input_node_map, node.input[YDWU_INPUT_ORDER[node.op].index("var_op")]) if var_op.op != "Const": tf_logging.warning("Didn't find expected var Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, var_op)) return False var_value = values_from_const(var_op) if var_value.shape != (channel_count,): tf_logging.warning("Incorrect shape for var, found %s, expected %s," " for node %s" % (str(var_value.shape), str( (channel_count,)), node.name)) return False # "beta_op" beta_op = node_from_map(input_node_map, node.input[YDWU_INPUT_ORDER[node.op].index("beta_op")]) if beta_op.op != "Const": tf_logging.warning("Didn't find expected beta Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, beta_op)) return False beta_value = values_from_const(beta_op) if beta_value.shape != (channel_count,): tf_logging.warning("Incorrect shape for beta, found %s, expected %s," " for node %s" % (str(beta_value.shape), str( (channel_count,)), node.name)) return False # "gamma_op" gamma_op = node_from_map(input_node_map, node.input[YDWU_INPUT_ORDER[node.op].index("gamma_op")]) if gamma_op.op != "Const": tf_logging.warning("Didn't find expected gamma Constant input to '%s'," " found %s instead. Maybe because freeze_graph wasn't" " run first?" % (node.name, gamma_op)) return False gamma_value = values_from_const(gamma_op) if gamma_value.shape != (channel_count,): tf_logging.warning("Incorrect shape for gamma, found %s, expected %s," " for node %s" % (str(gamma_value.shape), str( (channel_count,)), node.name)) return False variance_epsilon_value = node.attr[YDWU_EPSILON_ATTR[node.op]].f nodes_to_skip[node.name] = True nodes_to_skip[weights_op.name] = True nodes_to_skip[bias_op.name] = True nodes_to_skip[mean_op.name] = True nodes_to_skip[var_op.name] = True nodes_to_skip[beta_op.name] = True nodes_to_skip[gamma_op.name] = True if scale_after_normalization(node): # FusedBatchNorm scale_value = ( (1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) * gamma_value) else: # BatchNormWithGlobalNormalization scale_value = ( 1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) offset_value = (bias_value - mean_value) * scale_value + beta_value scaled_weights = np.copy(weights) it = np.nditer( scaled_weights, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: current_scale = scale_value[it.multi_index[value_consts["channel"]]] it[0] *= current_scale it.iternext() ## add new op. scaled_weights_op = node_def_pb2.NodeDef() scaled_weights_op.op = "Const" scaled_weights_op.name = weights_op.name scaled_weights_op.attr["dtype"].CopyFrom(weights_op.attr["dtype"]) scaled_weights_op.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( scaled_weights, weights.dtype.type, weights.shape))) new_conv_op = node_def_pb2.NodeDef() new_conv_op.CopyFrom(old_Conv_op) offset_op = node_def_pb2.NodeDef() offset_op.op = "Const" offset_op.name = bias_op.name # + "_bn_offset" offset_op.attr["dtype"].CopyFrom(bias_op.attr["dtype"]) offset_op.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( offset_value, mean_value.dtype.type, offset_value.shape))) # print(offset_value.shape) new_bias_add_op = node_def_pb2.NodeDef() new_bias_add_op.op = "BiasAdd" new_bias_add_op.name = old_BiasAdd_op.name new_bias_add_op.attr["T"].CopyFrom(old_BiasAdd_op.attr["T"]) new_bias_add_op.input.extend([new_conv_op.name, offset_op.name]) new_ops.extend([scaled_weights_op, new_conv_op, offset_op, new_bias_add_op]) return True
def fuse_resize_and_conv(input_graph_def, output_node_names): """Merges preceding resize and mirror pad ops into a specialized convolution. There's a common pattern of enlarging the input to a convolution using a resize operation, and also using MirrorPad to extend the boundaries to that zero edge pixels don't bleed inwards when convolving. This routine looks for that pattern of operations, and fuses them together into a Conv2DWithResizeOp. Args: input_graph_def: A GraphDef containing a model. output_node_names: A list of names of the nodes that produce the final results. Returns: Modified graph with resize and pad ops merged. Raises: ValueError: If the graph is badly formed with duplicate node names. """ input_node_map = {} for node in input_graph_def.node: if node.name not in input_node_map: input_node_map[node.name] = node else: raise ValueError("Duplicate node names detected for ", node.name) node_reference_count = collections.defaultdict(int) for node in input_graph_def.node: for input_name in node.input: stripped_name = node_name_from_input(input_name) node_reference_count[stripped_name] += 1 for output_name in output_node_names: node_reference_count[output_name] += 1 new_ops = [] for node in input_graph_def.node: if node.op != "Conv2D": continue conv_op = node input_op = node_from_map(input_node_map, conv_op.input[0]) if input_op.op == "MirrorPad": mirror_pad_op = input_op resize_op = node_from_map(input_node_map, mirror_pad_op.input[0]) if resize_op.op != "ResizeBilinear": resize_op = None else: mirror_pad_op = None if input_op.op == "ResizeBilinear": resize_op = input_op else: resize_op = None # There are no ops to be fused into the conv, so skip replacing this one. if not mirror_pad_op and not resize_op: continue # We're replacing this node, so make sure the old one is removed. node_reference_count[conv_op.name] = 0 if mirror_pad_op: node_reference_count[mirror_pad_op.name] -= 1 if resize_op: node_reference_count[resize_op.name] -= 1 fused_conv_op = node_def_pb2.NodeDef() if resize_op: fused_conv_op.op = "FusedResizeAndPadConv2D" else: fused_conv_op.op = "FusedPadConv2D" fused_conv_op.name = conv_op.name if mirror_pad_op: mirror_paddings_name = mirror_pad_op.input[1] mirror_paddings_mode = mirror_pad_op.attr["mode"] else: # If there was no MirrorPad op, then create settings that make the padding # stage of the fused operation a no-op. paddings_op = node_def_pb2.NodeDef() paddings_op.op = "Const" paddings_op.name = conv_op.name + "_dummy_paddings" paddings_op.attr["dtype"].CopyFrom( attr_value_pb2.AttrValue(type=dtypes.int32.as_datatype_enum)) paddings_op.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( [0, 0, 0, 0, 0, 0, 0, 0], dtypes.int32, [4, 2]))) new_ops.extend([paddings_op]) mirror_paddings_name = paddings_op.name mirror_paddings_mode = attr_value_pb2.AttrValue(s=b"REFLECT") if resize_op: fused_conv_op.input.extend([ resize_op.input[0], resize_op.input[1], mirror_paddings_name, conv_op.input[1] ]) fused_conv_op.attr["resize_align_corners"].CopyFrom( resize_op.attr["align_corners"]) else: fused_conv_op.input.extend( [mirror_pad_op.input[0], mirror_paddings_name, conv_op.input[1]]) fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"]) fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode) fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"]) fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"]) new_ops.extend([fused_conv_op]) result_graph_def = graph_pb2.GraphDef() for node in input_graph_def.node: if node_reference_count[node.name] < 1: continue new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) result_graph_def.node.extend([new_node]) result_graph_def.node.extend(new_ops) return result_graph_def
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_name): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] for index, node in enumerate(input_graph_def.node): if node.name in fuse_op_name: conv_node = input_node_map[node.name] bn_node = input_node_map[fuse_op_name[node.name]] scales, offsets = self.get_scale_and_offset_values( input_node_map, bn_node) weights_node_name = conv_node.input[1] weights_node = input_node_map[weights_node_name] for bn_input in bn_node.input: skip_node_name.append(bn_input) skip_node_name.append(bn_node.name) new_node = node_def_pb2.NodeDef() new_node.op = conv_node.op new_node.name = conv_node.name for _, value in enumerate(node.input): new_node.input.append(value) weights_node_tensor_shape = weights_node.attr[ 'value'].tensor.tensor_shape if conv_node.op == 'Conv2D': weights_cols = weights_node_tensor_shape.dim[3].size elif conv_node.op == "DepthwiseConv2dNative": weights_cols = weights_node_tensor_shape.dim[ 2].size * weights_node_tensor_shape.dim[3].size else: weights_cols = weights_node_tensor_shape.dim[1].size weights_tensor = tensor_util.MakeNdarray( weights_node.attr['value'].tensor) new_weights = [] for index, i in enumerate(weights_tensor.flat): new_weights_value = weights_tensor.flat[index] * scales[ index % weights_cols] new_weights.append(new_weights_value) new_bn = [] for index in range(weights_cols): new_bn_value = offsets[index] new_bn.append(new_bn_value) weights_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( new_weights, dtypes.float32, weights_tensor.shape))) bias_offset_node = node_def_pb2.NodeDef() bias_offset_node.op = "Const" bias_offset_node.name = conv_node.name + "_bn_offset" bias_offset_node.attr["dtype"].CopyFrom( attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) bias_offset_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( new_bn, dtypes.float32, [weights_cols]))) biasadd_node = node_def_pb2.NodeDef() biasadd_node.op = "BiasAdd" biasadd_node.name = bn_node.name if "data_format" in conv_node.attr: biasadd_node.attr["data_format"].CopyFrom( conv_node.attr['data_format']) biasadd_node.attr["T"].CopyFrom(conv_node.attr['T']) biasadd_node.input.append(conv_node.name) biasadd_node.input.append(bias_offset_node.name) for key in conv_node.attr: new_node.attr[key].CopyFrom(conv_node.attr[key]) output_graph_def.node.extend( [weights_node, bias_offset_node, biasadd_node, new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def convert_variables_to_constants_v2(func): """Replaces all the variables in a graph with constants of the same values. TensorFlow 2.0 function for converting all Variable ops into Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. This function runs Grappler's function inlining optimization in order to return a single subgraph. The current implementation only works for graphs that do not contain any control flow or embedding related ops. Args: func: ConcreteFunction. Returns: ConcreteFunction containing a simplified version of the original. """ # TODO(nupurgarg): Replace ResourceGather with Gather. # TODO(nupurgarg): Change attr for Variables in control flow and functions. graph_def = _run_inline_graph_optimization(func) # Identify the ReadVariableOps. get_name = lambda name: name.split(":")[0] map_name_to_node = {get_name(node.name): node for node in graph_def.node} # TODO(b/125838789): Use `func.graph.captures`. # Get mapping from input name to variable value. tensor_data = {} input_tensors = func.inputs[-len(func.captured_inputs):] for var in func.graph.variables: index = func.captured_inputs.index(var.handle) tensor = input_tensors[index] tensor_data[get_name(tensor.name)] = var.numpy() resource_identities = {} resource_placeholders = {} for node in graph_def.node: if node.op == "ReadVariableOp": # Get name of Placeholder op associated with ReadVariableOp. There can be # an Identity in between the ReadVariableOp and Placeholder. Store the # Identity ops with the associated dtypes. input_name = get_name(node.input[0]) while map_name_to_node[input_name].op == "Identity": resource_identities[input_name] = node.attr["dtype"] input_name = get_name(map_name_to_node[input_name].input[0]) if map_name_to_node[input_name].op != "Placeholder": raise ValueError("Cannot find the Placeholder op that is an input " "to the ReadVariableOp.") # Build a map of Placeholder ops that are inputs to ReadVariableOps to the # variable's dtype and data. resource_placeholders[input_name] = { "dtype": node.attr["dtype"], "data": tensor_data[input_name], } # Reconstruct the graph with constants in place of variables. output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in graph_def.node: output_node = output_graph_def.node.add() # Convert Placeholder ops that are inputs to ReadVariableOps into Const ops. if input_node.name in resource_placeholders: dtype = resource_placeholders[input_node.name]["dtype"] data = resource_placeholders[input_node.name]["data"] output_node.op = "Const" output_node.name = input_node.name output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].tensor.CopyFrom( tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape)) how_many_converted += 1 # Change the dtype for Identity ops that are inputs to ReadVariableOps. elif input_node.name in resource_identities: output_node.CopyFrom(input_node) output_node.attr["T"].CopyFrom(resource_identities[input_node.name]) # Convert ReadVariableOps into Identity ops. elif input_node.op == "ReadVariableOp": output_node.op = "Identity" output_node.name = input_node.name output_node.input.extend([input_node.input[0]]) output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) if "_class" in input_node.attr: output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: output_node.CopyFrom(input_node) logging.info("Converted %d variables to const ops.", how_many_converted) # TODO(b/126613403): Use wrap_function.function_from_graph_def. return _construct_concrete_function(func, output_graph_def)
def do_transformation(self): float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( self.fuse_patterns[self.version]) for i in target_nodes: # TODO Remove below checker once the TF's limitation removed. if len(i) == 5: continue quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node requantize_node_name = i[1] requantize_node = self.graph_info[requantize_node_name].node requested_output_min_name = requantize_node.input[3] requested_output_max_name = requantize_node.input[4] deq_node_name = i[2] quantized_node_op = i[-1][0] new_node = node_def_pb2.NodeDef() new_node.op = quantized_node_op + "AndDequantize" new_node.name = requantize_node_name for _, value in enumerate(quantized_node.input): new_node.input.append(value) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) if 'T1' in quantized_node.attr: new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) if 'T2' in quantized_node.attr: new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) top_node_name = Helper.node_name_from_input( quantized_node.input[0]) max_filter_node = self.graph_info[new_node.input[6]].node min_filter_node = self.graph_info[new_node.input[5]].node last_node = self.graph_info[new_node.input[0]].node bias_node = self.graph_info[new_node.input[2]].node max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node min_input_value = ( min_input_node.attr['value'].tensor.float_val)[0] max_input_value = ( max_input_node.attr['value'].tensor.float_val)[0] max_filter_value = ( max_filter_node.attr['value'].tensor.float_val)[0] min_filter_value = ( min_filter_node.attr['value'].tensor.float_val)[0] weights_tensor = tensor_util.MakeNdarray( self.graph_info[new_node.input[1]].node.attr['value'].tensor) bias_tensor = tensor_util.MakeNdarray( self.graph_info[new_node.input[2]].node.attr['value'].tensor) is_min_first = bool( quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') input_range = max_input_value - min_input_value if is_min_first else max( abs(max_input_value), abs(min_input_value)) int32_bias = Helper.generate_int32_bias_for_matmul( bias_tensor, weights_tensor, input_range, max_input_value, min_input_value, max_filter_value, min_filter_value) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=float32_type if self.device == 'gpu' else qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( bias_tensor if self.device == 'gpu' else int32_bias, dtypes.float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = float32_type \ if self.device == 'gpu' else qint32_type new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ if self.device == 'gpu' else qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) self.graph_analyzer.remove_node(requantize_node_name) if self.graph_info[deq_node_name].outputs: self.graph_analyzer.replace_single_node( new_node, [top_node_name], quantized_node_name, self.graph_info[deq_node_name].outputs, deq_node_name) self.graph_analyzer.remove_node(deq_node_name) else: self.graph_analyzer.remove_node(deq_node_name) new_node.name = deq_node_name self.graph_analyzer.replace_single_node( new_node, [top_node_name], quantized_node_name, [], deq_node_name) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph()
def do_transformation(self): """Fuse the quantized op with the following requantize op. Returns: [graphdef]: the optimized graphdef object """ uint8_type = dtypes.quint8.as_datatype_enum float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum while True: target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( self.fuse_patterns['default']) if len(target_nodes) == 0: break i = target_nodes[0] quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node requantize_node_name = i[1] requantize_node = self.graph_info[requantize_node_name].node requested_output_min_name = requantize_node.input[3] requested_output_max_name = requantize_node.input[4] quantized_node_op = i[-1][0] new_node = node_def_pb2.NodeDef() new_node.op = quantized_node_op + "AndRequantize" new_node.name = requantize_node_name for _, value in enumerate(quantized_node.input): new_node.input.append(value) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) if 'T1' in quantized_node.attr: new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) if 'T2' in quantized_node.attr: new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) parent_node_name = Helper.node_name_from_input( quantized_node.input[0]) max_filter_node = self.graph_info[new_node.input[6]].node min_filter_node = self.graph_info[new_node.input[5]].node last_node = self.graph_info[new_node.input[0]].node is_min_first = bool( quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') if last_node.op.find('Requantize') != -1 or last_node.op.find( 'QuantizeV2') != -1: bias_node = self.graph_info[new_node.input[2]].node max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node min_input_value = ( min_input_node.attr['value'].tensor.float_val)[0] max_input_value = ( max_input_node.attr['value'].tensor.float_val)[0] max_filter_value = ( max_filter_node.attr['value'].tensor.float_val)[0] min_filter_value = ( min_filter_node.attr['value'].tensor.float_val)[0] weights_tensor = tensor_util.MakeNdarray(self.graph_info[ new_node.input[1]].node.attr['value'].tensor) bias_tensor = tensor_util.MakeNdarray(self.graph_info[ new_node.input[2]].node.attr['value'].tensor) input_range = max_input_value - min_input_value if is_min_first else max( abs(max_input_value), abs(min_input_value)) int32_bias = Helper.generate_int32_bias_for_matmul( bias_tensor, weights_tensor, input_range, max_input_value, min_input_value, max_filter_value, min_filter_value) bias_node.attr['dtype'].CopyFrom( attr_value_pb2.AttrValue(type=float32_type if self. device == 'gpu' else qint32_type)) bias_node.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( bias_tensor if self.device == 'gpu' else int32_bias, dtypes.float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) bias_node.attr['value'].tensor.dtype = float32_type \ if self.device == 'gpu' else qint32_type new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ if self.device == 'gpu' else qint32_type)) new_node.attr["Toutput"].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) #TODO enabled below commit once the graph refactor pre_optimize commmitted. if quantized_node_op.find('Relu') == -1: deq_node_name = self.graph_info[ requantize_node_name].outputs[0] deq_node = self.graph_info[deq_node_name].node deq_node.attr['T'].CopyFrom( attr_value_pb2.AttrValue(type=uint8_type)) else: new_node.attr["Tbias"].CopyFrom( attr_value_pb2.AttrValue(type=float32_type)) self.graph_analyzer.replace_single_node( new_node, [parent_node_name], quantized_node_name, [self.graph_info[requantize_node_name].outputs[0]], requantize_node_name) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph()
def generate_output_graph(self, input_graph_def, input_node_map, fuse_op_name): output_graph_def = graph_pb2.GraphDef() skip_list = [] skip_node_name = [] for index, node in enumerate(input_graph_def.node): if node.name in fuse_op_name: skip_list.append(index + 1) original_node = input_node_map[node.name] mul_node = input_node_map[fuse_op_name[node.name]] weights_node_name = original_node.input[1] weights_node = input_node_map[weights_node_name] mul_value_node_name = mul_node.input[1] mul_value_node = input_node_map[mul_value_node_name] new_node = node_def_pb2.NodeDef() new_node.op = original_node.op new_node.name = mul_node.name for _, value in enumerate(node.input): new_node.input.append(value) if original_node.op == "DepthwiseConv2dNative": weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[ 2].size * weights_node.attr[ 'value'].tensor.tensor_shape.dim[3].size elif original_node.op == "Conv2D": weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[3].size else: weights_col = weights_node.attr[ 'value'].tensor.tensor_shape.dim[1].size mul_value_node_tensor = mul_value_node.attr['value'].tensor weights_node_tensor = weights_node.attr['value'].tensor if len(mul_value_node_tensor.tensor_shape.dim ) != 1 or mul_value_node_tensor.tensor_shape.dim[ 0].size != weights_col: print("Invalid Mul OP fusion.") mul_value_node_list = [ i for i in tensor_util.MakeNdarray( mul_value_node_tensor).flat ] new_weights = [] for index, i in enumerate( tensor_util.MakeNdarray(weights_node_tensor).flat): new_weights_value = i * mul_value_node_list[ index % len(mul_value_node_list)] new_weights.append(new_weights_value) weights_node.attr['value'].CopyFrom( attr_value_pb2. AttrValue(tensor=tensor_util.make_tensor_proto( new_weights, dtypes.float32, tensor_util.MakeNdarray(weights_node_tensor).shape))) skip_node_name.append(weights_node.name) output_graph_def.node.extend([weights_node]) for key in original_node.attr: new_node.attr[key].CopyFrom(original_node.attr[key]) output_graph_def.node.extend([new_node]) elif index in skip_list or node.name in skip_node_name: continue else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) output_graph_def.node.extend([new_node]) return output_graph_def
def convert_variables_to_constants(sess, input_graph_def, output_node_names, variable_names_whitelist=None, variable_names_blacklist=None): """Replaces all the variables in a graph with constants of the same values. If you have a trained graph containing Variable ops, it can be convenient to convert them all to Const ops holding the same values. This makes it possible to describe the network fully with a single GraphDef file, and allows the removal of a lot of ops related to loading and saving the variables. Args: sess: Active TensorFlow session containing the variables. input_graph_def: GraphDef object holding the network. output_node_names: List of name strings for the result nodes of the graph. variable_names_whitelist: The set of variable names to convert (by default, all variables are converted). variable_names_blacklist: The set of variable names to omit converting to constants. Returns: GraphDef containing a simplified version of the original. """ # This graph only includes the nodes needed to evaluate the output nodes, and # removes unneeded nodes like those involved in saving and assignment. inference_graph = extract_sub_graph(input_graph_def, output_node_names) found_variables = {} variable_names = [] variable_dict_names = [] for node in inference_graph.node: if node.op in ["Variable", "VariableV2"]: variable_name = node.name if ((variable_names_whitelist is not None and variable_name not in variable_names_whitelist) or (variable_names_blacklist is not None and variable_name in variable_names_blacklist)): continue variable_dict_names.append(variable_name) variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: returned_variables = [] found_variables = dict(zip(variable_dict_names, returned_variables)) logging.info("Froze %d variables.", len(returned_variables)) output_graph_def = graph_pb2.GraphDef() how_many_converted = 0 for input_node in inference_graph.node: output_node = node_def_pb2.NodeDef() if input_node.name in found_variables: output_node.op = "Const" output_node.name = input_node.name dtype = input_node.attr["dtype"] data = found_variables[input_node.name] output_node.attr["dtype"].CopyFrom(dtype) output_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) output_graph_def.library.CopyFrom(inference_graph.library) print("Converted %d variables to const ops." % how_many_converted) return output_graph_def
def _concrete_tensor_to_proto(tensor): return tensor_util.make_tensor_proto(tensor.numpy())
def zero_const(node): val = tf.make_ndarray(node.attr.get("value").tensor) new_val = val * 0.0 new_tensor = tensor_util.make_tensor_proto(new_val, new_val.dtype, new_val.shape) node.attr["value"].CopyFrom(attr_value_pb2.AttrValue(tensor=new_tensor))