def test_map_fn(self): def fn0(elem): res = elem + elem * elem return res def fn1(elem): res = elem[0] * elem[1] + elem[0] return res x_val = 100 * np.random.random_sample([2, 10]).astype(np.float32) y_val = 100 * np.random.random_sample([2, 10]).astype(np.float32) # test fn0 x = tf_placeholder(tf.float32, shape=x_val.shape, name="input_0") x_ = tf.identity(x) res_ = tf.map_fn(fn0, x_, dtype=tf.float32) _ = tf.identity(res_, name="output_0") input_names_with_port = ["input_0:0"] output_names_with_port = ["output_0:0"] self._run_test_case(input_names_with_port, output_names_with_port) tf_reset_default_graph() # test fn1 x = tf_placeholder(tf.float32, shape=x_val.shape, name="input_0") y = tf_placeholder(tf.float32, shape=y_val.shape, name="input_1") x_ = tf.identity(x) y_ = tf.identity(y) res_ = tf.map_fn(fn1, (x_, y_), dtype=tf.float32) _ = tf.identity(res_, name="output_0") input_names_with_port = ["input_0:0", "input_1:0"] output_names_with_port = ["output_0:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def test_while_loop_with_ta_read_and_write(self): i = tf_placeholder(tf.int32, (), name="input_1") inputs = tf_placeholder(tf.float32, (10, ), name="input_2") inputs_2 = tf.identity(inputs) input_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True).unstack(inputs_2) output_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) c = lambda i, *_: tf.logical_and(tf.less(i, 10), i >= 0) def b(i, out_ta): new_i = tf.add(i, 1) x = input_ta.read(i) x = x + 3 out_ta_new = out_ta.write(i, x) return new_i, out_ta_new i_final, out_final = tf.while_loop(c, b, [i, output_ta]) _ = tf.identity(i_final, name="i") _ = tf.identity(out_final.stack(), name="output_ta") input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["i:0", "output_ta:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def get_output_shapes(node_def, input_dtypes, input_shapes, inp_consts): """Returns a list of the output shapes of an op. input_dtypes should be tf dtypes.""" from tf2onnx.tf_loader import tf_session, tf_placeholder # pylint: disable=import-outside-toplevel if node_def.op in ["Prelu", "Enter"]: return [input_shapes[0]] if node_def.op == "Merge": # Find the first non-None shape (if it exists) and return it non_none = ([t for t in input_shapes if t is not None] + [None])[0] # The second output of merge is a scalar int indicating which input was selected return [non_none, []] if node_def.op == "Placeholder": shape = None if 'shape' in node_def.attr: shape = [d.size for d in node_def.attr['shape'].shape.dim] shape = [None if d == -1 else d for d in shape] if len(shape) == 0: # According to TF docs, "If the shape has 0 dimensions, the shape is unconstrained." shape = None return [shape] del node_def.input[:] node_def.name = "node" if "_class" in node_def.attr: # Remove colocation information (list of nodes tf wants computed on same device) del node_def.attr["_class"] g = tf.Graph() with g.as_default(): for i, (dtype, shape, const) in enumerate(zip(input_dtypes, input_shapes, inp_consts)): inp = "input" + str(i) if const is None: if shape is not None and -1 in shape: shape = [d if d != -1 else None for d in shape] tf_placeholder(dtype, name=inp, shape=shape) else: tf.constant(const, dtype, name=inp) node_def.input.append(inp) mini_graph_def = g.as_graph_def() mini_graph_def.node.append(node_def) g2 = tf.Graph() with g2.as_default(): with tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') node = sess.graph.get_operation_by_name("node") outputs_shapes = [ tf_utils.get_tf_tensor_shape(out) for out in node.outputs ] return outputs_shapes
def test_while_loop_in_cond(self): x_val = np.array([1, 2, 3], dtype=np.float32) y_val = np.array([4, 5, 6], dtype=np.float32) x = tf_placeholder(tf.float32, x_val.shape, name="input_1") y = tf_placeholder(tf.float32, y_val.shape, name="input_2") def cond_graph(): b = tf.constant(np.array([0], dtype=np.int32), dtype=tf.int32) # while_loop c = lambda y: tf.reduce_any(tf.less(y, 10)) b = lambda i: tf.add(y, 1) return tf.while_loop(c, b, [y]) res = tf.cond(x[0] < y[0], lambda: x, cond_graph, name="test_cond") _ = tf.identity(res, name="output") input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["output:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def read_tf_node_def_attrs(node_def, input_dtypes, input_shapes): """Given a tf node def, returns a dict of attribute names to values""" from tf2onnx.tf_loader import tf_session, tf_placeholder # pylint: disable=import-outside-toplevel del node_def.input[:] node_def.name = "node" # read_tf_node_attrs uses some tf methods that require the node to be loaded into a valid TF graph g = tf.Graph() with g.as_default(): for i, (dtype, shape) in enumerate(zip(input_dtypes, input_shapes)): inp = "input" + str(i) tf_placeholder(dtype, name=inp, shape=shape) node_def.input.append(inp) mini_graph_def = g.as_graph_def() mini_graph_def.node.append(node_def) g2 = tf.Graph() with g2.as_default(): with tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') node = sess.graph.get_operation_by_name("node") return read_tf_node_attrs(node)
def run_test_case(self, func, feed_dict, input_names_with_port, output_names_with_port, rtol=1e-07, atol=1e-5, convert_var_to_const=True, constant_fold=True, check_value=True, check_shape=True, check_dtype=True, process_args=None, onnx_feed_dict=None, graph_validator=None, as_session=False, large_model=False): # optional - passed to process_tf_graph if process_args is None: process_args = {} # optional - pass distinct feed_dict to onnx runtime if onnx_feed_dict is None: onnx_feed_dict = feed_dict input_names_with_port = list(feed_dict) tf_reset_default_graph() graph_def = None np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) expected = func(*input_list) if isinstance(expected, (list, tuple)): # list or tuple expected = [x.numpy() for x in expected] else: # single result expected = [expected.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=output_names_with_port, large_model=large_model) else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in output_names_with_port: output_dict.append(sess.graph.get_tensor_by_name(out_name)) expected = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=output_names_with_port) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), output_names_with_port, graph_def, fold_constant=constant_fold) tf_reset_default_graph() with tf_session() as sess: const_node_values = None if large_model: const_node_values = compress_graph_def(graph_def) tf.import_graph_def(graph_def, name='') if self.config.is_debug_mode: model_path = os.path.join( self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) g = process_tf_graph(sess.graph, opset=self.config.opset, input_names=list(feed_dict.keys()), output_names=output_names_with_port, target=self.config.target, const_node_values=const_node_values, **process_args) g = optimizer.optimize_graph(g) actual = self.run_backend(g, output_names_with_port, onnx_feed_dict, large_model) for expected_val, actual_val in zip(expected, actual): if check_value: self.assertAllClose(expected_val, actual_val, rtol=rtol, atol=atol) if check_dtype: self.assertEqual(expected_val.dtype, actual_val.dtype) # why need shape checke: issue when compare [] with scalar # https://github.com/numpy/numpy/issues/11071 if check_shape: self.assertEqual(expected_val.shape, actual_val.shape) if graph_validator: self.assertTrue(graph_validator(g)) return g
def freeze_and_run_tf(self, func, feed_dict, outputs, as_session, premade_placeholders, large_model, constant_fold): np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) result = func(*input_list) if isinstance(result, (list, tuple)): # list or tuple result = [x.numpy() for x in result] else: # single result result = [result.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=outputs, large_model=large_model) initialized_tables = None else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] if not premade_placeholders: for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in outputs: output_dict.append(sess.graph.get_tensor_by_name(out_name)) result = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=outputs) table_names, key_dtypes, value_dtypes = get_hash_table_info( graph_def) initialized_tables = {} for n, k_dtype, val_dtype in zip(table_names, key_dtypes, value_dtypes): h = lookup_ops.hash_table_v2(k_dtype, val_dtype, shared_name=n) k, v = lookup_ops.lookup_table_export_v2( h, k_dtype, val_dtype) initialized_tables[n] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), outputs, graph_def, fold_constant=constant_fold) model_path = os.path.join( self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) return result, graph_def, initialized_tables
def test_bidrectional_attention_wrapper_lstm_encoder(self): size = 30 time_step = 3 input_size = 4 attn_size = size batch_size = 9 # shape [batch size, time step, size] # attention_state: usually the output of an RNN encoder. # This tensor should be shaped `[batch_size, max_time, ...]` encoder_time_step = time_step encoder_x_val = np.random.randn(encoder_time_step, input_size).astype('f') encoder_x_val = np.stack([encoder_x_val] * batch_size) encoder_x = tf_placeholder(tf.float32, encoder_x_val.shape, name="input_1") encoder_cell = tf.nn.rnn_cell.LSTMCell(size) attention_states, _ = tf.nn.dynamic_rnn(encoder_cell, encoder_x, dtype=tf.float32) # [9, 3, 30], [9, 30] attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( attn_size, attention_states) match_input_fn = lambda curr_input, state: tf.concat( [curr_input, state], axis=-1) cell = tf.nn.rnn_cell.LSTMCell(size) match_cell_fw = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=attn_size, cell_input_fn=match_input_fn, output_attention=False) match_cell_bk = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=attn_size, cell_input_fn=match_input_fn, output_attention=False) decoder_time_step = 6 decoder_x_val = np.random.randn(decoder_time_step, batch_size, input_size).astype('f') decoder_x = tf_placeholder(tf.float32, decoder_x_val.shape, name="input_2") seq_length = tf_placeholder(tf.int32, (batch_size), name="input_3") (match_output_fw, match_output_bk), (match_state_fw, match_state_bk) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=match_cell_fw, cell_bw=match_cell_bk, inputs=decoder_x, sequence_length=tf.identity(seq_length), dtype=tf.float32, time_major=True) matched_output = tf.concat([match_output_fw, match_output_bk], axis=-1) matched_state = tf.concat( [match_state_fw.cell_state, match_state_bk.cell_state], -1) _ = tf.identity(matched_output, name="output_0") _ = tf.identity(matched_state, name="final_state") input_names_with_port = ["input_1:0", "input_2:0", "input_3:0"] output_names_with_port = ["output_0:0", "final_state:0"] self._run_test_case(input_names_with_port, output_names_with_port)
def compute_const_folding_using_tf(g, const_node_values, graph_outputs): """Find nodes with constant inputs and compute their values using TF""" if const_node_values is None: const_node_values = {} graph_outputs = set(graph_outputs) from tf2onnx.tf_loader import tf_session, tf_placeholder # pylint: disable=import-outside-toplevel ops = g.get_operations() outputs_to_values = {} outputs_to_dtypes = {} outputs_to_shapes = {} shape_node_outputs = {} def is_small_shape(x): return np.product(x) <= 1000 def is_huge_shape(x): return np.product(x) >= 1000000 for node in ops: # Load values of constants. Use const_node_values if possible if node.type in ["Const", "ConstV2"]: tensor = node.node_def.attr["value"].tensor if node.name in const_node_values: tensor.tensor_content = const_node_values[node.name] outputs_to_values[node.outputs[0].name] = get_tf_tensor_data( tensor) outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype for out in node.outputs: outputs_to_shapes[out.name] = get_tf_tensor_shape(out) for node in ops: if node.type == "Shape": shape = outputs_to_shapes.get(node.inputs[0].name) if shape is not None: shape_node_outputs[node.outputs[0].name] = shape unneeded_outputs = set() progress = True while progress: progress = False for node in ops: # Find ops with constant inputs and compute their values input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] if node.type == 'StridedSlice' and input_names[0] in shape_node_outputs \ and output_names[0] not in outputs_to_values: shape = shape_node_outputs[input_names[0]] i = get_index_from_strided_slice_of_shape( node, outputs_to_values) if i is not None and 0 <= i < len( shape) and shape[i] is not None: np_dtype = map_onnx_to_numpy_type( map_tf_dtype(node.outputs[0].dtype)) outputs_to_values[output_names[0]] = np.array( shape[i], dtype=np_dtype) outputs_to_dtypes[ node.outputs[0].name] = node.outputs[0].dtype progress = True can_fold = node.type not in [ 'Enter', 'Placeholder', 'PlaceholderWithDefault' ] can_fold = can_fold and len(input_names) > 0 and all( inp in outputs_to_values for inp in input_names) # We can only fold nodes with a single output can_fold = can_fold and len( output_names) == 1 and output_names[0] not in outputs_to_values # Skip if value already computed, used, and discarded can_fold = can_fold and output_names[ 0] not in unneeded_outputs and output_names[ 0] not in graph_outputs if can_fold: # Make a mini graph containing just the node to fold g2 = tf.Graph() with g2.as_default(): for inp in input_names: tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) mini_graph_def = g2.as_graph_def() mini_graph_def.node.append(node.node_def) g3 = tf.Graph() with g3.as_default(): feed_dict = {} inp_shapes = [] for inp in input_names: inp_np = outputs_to_values[inp] feed_dict[inp] = inp_np inp_shapes.append(inp_np.shape) try: with tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') results = sess.run(output_names, feed_dict=feed_dict) if is_huge_shape(results[0].shape) and all( is_small_shape(inp) for inp in inp_shapes): logger.debug( "Skipping folding of node %s since result shape %s is much larger " "than input shapes %s", node.name, results[0].shape, inp_shapes) else: outputs_to_values[output_names[0]] = results[0] outputs_to_dtypes[ output_names[0]] = node.outputs[0].dtype progress = True except Exception: # pylint: disable=broad-except logger.debug("Could not fold node %s", node.name) unneeded_outputs.update(outputs_to_values.keys()) for node in ops: # Mark values we need to keep input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] if len(output_names) == 1 and output_names[0] in outputs_to_values: continue for i in input_names: if i in unneeded_outputs: unneeded_outputs.remove(i) for node in unneeded_outputs: # Remove unneeded values to prevent memory usage explosion if node in outputs_to_values: del outputs_to_values[node] del outputs_to_dtypes[node] for node in ops: # We don't need the constants any more if node.type in ["Const", "ConstV2" ] and node.outputs[0].name in outputs_to_values: del outputs_to_values[node.outputs[0].name] del outputs_to_dtypes[node.outputs[0].name] logger.info("Computed %d values for constant folding", len(outputs_to_values)) return outputs_to_values, outputs_to_dtypes
def compute_const_folding_using_tf(g, const_node_values): """Find nodes with constant inputs and compute their values using TF""" if const_node_values is None: const_node_values = {} from tf2onnx.tf_loader import tf_session, tf_placeholder # pylint: disable=import-outside-toplevel ops = g.get_operations() outputs_to_values = {} outputs_to_dtypes = {} for node in ops: # Load values of constants. Use const_node_values if possible if node.type in ["Const", "ConstV2"]: tensor = node.node_def.attr["value"].tensor if node.name in const_node_values: tensor.tensor_content = const_node_values[node.name] outputs_to_values[node.outputs[0].name] = get_tf_tensor_data( tensor) outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype unneeded_outputs = set() progress = True while progress: progress = False for node in ops: # Find ops with constant inputs and compute their values input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] can_fold = node.type not in ['Enter'] can_fold = can_fold and len(input_names) > 0 and all( inp in outputs_to_values for inp in input_names) # We can only fold nodes with a single output can_fold = can_fold and len( output_names) == 1 and output_names[0] not in outputs_to_values # Skip if value already computed, used, and discarded can_fold = can_fold and output_names[0] not in unneeded_outputs if can_fold: # Make a mini graph containing just the node to fold g2 = tf.Graph() with g2.as_default(): for inp in input_names: tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) mini_graph_def = g2.as_graph_def() mini_graph_def.node.append(node.node_def) g3 = tf.Graph() with g3.as_default(): feed_dict = {} for inp in input_names: feed_dict[inp] = outputs_to_values[inp] try: with tf_session() as sess: tf.import_graph_def(mini_graph_def, name='') results = sess.run(output_names, feed_dict=feed_dict) outputs_to_values[output_names[0]] = results[0] outputs_to_dtypes[ output_names[0]] = node.outputs[0].dtype progress = True except Exception: # pylint: disable=broad-except logger.debug("Could not fold node %s", node.name) unneeded_outputs.update(outputs_to_values.keys()) for node in ops: # Mark values we need to keep input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] if len(output_names) == 1 and output_names[0] in outputs_to_values: continue for i in input_names: if i in unneeded_outputs: unneeded_outputs.remove(i) for node in unneeded_outputs: # Remove unneeded values to prevent memory usage explosion if node in outputs_to_values: del outputs_to_values[node] del outputs_to_dtypes[node] for node in ops: # We don't need the constants any more if node.type in ["Const", "ConstV2" ] and node.outputs[0].name in outputs_to_values: del outputs_to_values[node.outputs[0].name] del outputs_to_dtypes[node.outputs[0].name] logger.info("Computed %d values for constant folding", len(outputs_to_values)) return outputs_to_values, outputs_to_dtypes
def freeze_and_run_tf(self, func, feed_dict, outputs, as_session, premade_placeholders, large_model): np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) result = func(*input_list) if isinstance(result, (list, tuple)): # list or tuple result = [x.numpy() for x in result] else: # single result result = [result.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=outputs, large_model=large_model) initialized_tables = None else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] if not premade_placeholders: for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in outputs: output_dict.append(sess.graph.get_tensor_by_name(out_name)) result = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=outputs) table_info = get_hash_table_info(graph_def) initialized_tables = {} for info in table_info: if info.shared_name is None: continue h = lookup_ops.hash_table_v2(info.key_dtype, info.val_dtype, shared_name=info.shared_name) k, v = lookup_ops.lookup_table_export_v2( h, info.key_dtype, info.val_dtype) initialized_tables[info.shared_name] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), outputs, graph_def) return result, graph_def, initialized_tables