def test_apply_transform_missing_features(self): with self.assertRaises(ValueError): with tf.Graph().as_default(): with tf.Session().as_default(): input_features = {} saved_transform_io.apply_saved_transform( self._test_saved_model, input_features)
def test_apply_transform_type_mismatch(self): with self.assertRaises(ValueError): with tf.Graph().as_default(): with tf.Session().as_default(): input_strings = tf.constant(['bogus']) # tf.string input_features = {'x': input_strings} saved_transform_io.apply_saved_transform( self._test_saved_model, input_features)
def test_apply_transform_shape_mismatch(self): with self.assertRaises(ValueError): with tf.Graph().as_default(): with tf.Session().as_default(): input_floats = tf.constant(1234.0) # tf.float32 input_features = {'x': input_floats} saved_transform_io.apply_saved_transform( self._test_saved_model, input_features)
def test_sparse_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_float = tf.sparse_placeholder(tf.float32) output = input_float / 5.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.Graph().as_default(): with tf.Session().as_default() as session: indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64) values = np.array([1.0, 2.0], dtype=np.float32) shape = np.array([7, 9, 2], dtype=np.int64) input_sparse = tf.SparseTensor(indices=indices, values=values, dense_shape=shape) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_sparse * 10} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) output_sparse = outputs['output'] self.assertTrue(isinstance(output_sparse, tf.SparseTensor)) result = session.run(output_sparse) # indices and shape unchanged; values divided by 2 self.assertEqual(indices.tolist(), result.indices.tolist()) self.assertEqual([2.0, 4.0], result.values.tolist()) self.assertEqual(shape.tolist(), result.dense_shape.tolist())
def test_table_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_string = tf.placeholder(tf.string) # Map string through a table, in this case based on a constant tensor. table = lookup.index_table_from_tensor( tf.constant(['cat', 'dog', 'giraffe'])) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.Graph().as_default(): with tf.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_string = tf.constant('dog') inputs = {'input': input_string} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) session.run(tf.tables_initializer()) result = session.run(outputs['output']) self.assertEqual(1, result)
def raw_training_input_fn(): """Training input function that reads raw data and applies transforms.""" if key_feature_name is not None: keys, raw_data = tf.contrib.learn.io.read_keyed_batch_features( raw_data_file_pattern, training_batch_size, raw_training_feature_spec, reader, **read_batch_features_args) else: raw_data = tf.contrib.learn.io.read_batch_features( raw_data_file_pattern, training_batch_size, raw_training_feature_spec, reader, **read_batch_features_args) transformed_data = saved_transform_io.apply_saved_transform( transform_savedmodel_dir, raw_data) transformed_features = { k: v for k, v in six.iteritems(transformed_data) if k in transformed_feature_keys} transformed_labels = { k: v for k, v in six.iteritems(transformed_data) if k in transformed_label_keys} if convert_scalars_to_vectors: transformed_features = _convert_scalars_to_vectors(transformed_features) transformed_labels = _convert_scalars_to_vectors(transformed_labels) if key_feature_name is not None: transformed_features[key_feature_name] = keys if len(transformed_labels) == 1: (_, transformed_labels), = transformed_labels.items() return transformed_features, transformed_labels
def test_apply_transform_extra_features_no_passthrough(self): with self.assertRaises(ValueError): with tf.Graph().as_default(): with tf.Session().as_default(): input_floats = tf.constant([1234.0]) # tf.float32 input_features = {'x': input_floats, 'extra_1': tf.constant('1'), 'extra_2': tf.constant('2')} _ = saved_transform_io.apply_saved_transform( self._test_saved_model, input_features)
def test_apply_saved_transform_to_tensor_outside_scope(self): with tf.Graph().as_default(): input_floats = tf.constant([1237.0]) # tf.float32 with tf.name_scope('my_scope'): with tf.Session().as_default() as session: input_features = {'x': input_floats} transformed_features = saved_transform_io.apply_saved_transform( self._test_saved_model, input_features) self.assertEqual(['x_scaled'], transformed_features.keys()) result_tensor = transformed_features['x_scaled'] self.assertAllEqual(session.run(result_tensor), [247.0])
def testImportAndExportWithTensorValueMapping(self): # Export the function "z = x * min(y) + x + min(y)" with min(y) replaced by # 6. def preprocessing_fn(inputs): return { 'z': api.map(lambda x, y: x * y + x + y, inputs['x'], analyzers.min(inputs['y'])) } input_schema = self.toSchema({ 'x': tf.FixedLenFeature((), tf.float32), 'y': tf.FixedLenFeature((), tf.float32) }) inputs, outputs = impl_helper.run_preprocessing_fn( preprocessing_fn, input_schema) saved_model_dir = os.path.join(self.get_temp_dir(), 'replace_original') input_columns_to_statistics = impl_helper.make_transform_fn_def( input_schema, inputs, outputs, saved_model_dir) self.assertEqual(len(input_columns_to_statistics.keys()), 1) y_min_input_name = input_columns_to_statistics.keys()[0] g = tf.Graph() with g.as_default(): x = tf.placeholder(tf.float32, ()) y = tf.placeholder(tf.float32, ()) z = x * y + x + y new_saved_model_dir = os.path.join(self.get_temp_dir(), 'replace_new') impl_helper.replace_tensors_with_constant_values( saved_model_dir, new_saved_model_dir, { y_min_input_name: impl_helper.ConstantTensorValue(6, tf.float32, ()) }) # Import the function, applying it to constants for x and y. g = tf.Graph() with g.as_default(): x = tf.constant(5, tf.float32, (1, )) y = tf.constant(1000, tf.float32, (1, )) # Value is never used. outputs = saved_transform_io.apply_saved_transform( new_saved_model_dir, { 'x': x, 'y': y }) z = outputs['z'] sess = tf.Session() with sess.as_default(): # Check result is 5 * 6 + 5 + 6 = 41. self.assertEqual(41, z.eval())
def test_stale_asset_collections_are_cleaned(self): vocabulary_file = os.path.join(compat.as_bytes(test.get_temp_dir()), compat.as_bytes('asset')) file_io.write_string_to_file(vocabulary_file, 'foo bar baz') export_path = os.path.join(tempfile.mkdtemp(), 'export') # create a SavedModel including assets with tf.Graph().as_default(): with tf.Session().as_default() as session: input_string = tf.placeholder(tf.string) # Map string through a table loaded from an asset file table = lookup.index_table_from_file(vocabulary_file, num_oov_buckets=12, default_value=12) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) # Load it and save it again repeatedly, verifying that the asset collections # remain valid. for _ in [1, 2, 3]: with tf.Graph().as_default() as g: with tf.Session().as_default() as session: input_string = tf.constant('dog') inputs = {'input': input_string} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) self.assertEqual( 1, len(g.get_collection(ops.GraphKeys.ASSET_FILEPATHS))) self.assertEqual( 0, len( g.get_collection( tf.saved_model.constants.ASSETS_KEY))) # Check that every ASSET_FILEPATHS refers to a Tensor in the graph. # If not, get_tensor_by_name() raises KeyError. for asset_path in g.get_collection( ops.GraphKeys.ASSET_FILEPATHS): tensor_name = asset_path.name g.get_tensor_by_name(tensor_name) export_path = os.path.join(tempfile.mkdtemp(), 'export') saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path)
def testImportAndExportDense(self): # Export the function "z = x * y + x + y" def preprocessing_fn(inputs): return { 'z': api.map(lambda x, y: x * y + x + y, inputs['x'], inputs['y']) } input_schema = self.toSchema({ 'x': tf.FixedLenFeature((), tf.float32), 'y': tf.FixedLenFeature((), tf.float32) }) inputs, outputs = impl_helper.run_preprocessing_fn( preprocessing_fn, input_schema) saved_model_dir = os.path.join(self.get_temp_dir(), 'dense') _ = impl_helper.make_transform_fn_def(input_schema, inputs, outputs, saved_model_dir) # Import the function, applying it to constants for x and y. g = tf.Graph() with g.as_default(): x = tf.constant(5, tf.float32, (1, )) y = tf.constant(6, tf.float32, (1, )) outputs = saved_transform_io.apply_saved_transform( saved_model_dir, { 'x': x, 'y': y }) z = outputs['z'] sess = tf.Session() with sess.as_default(): # Check result is 5 * 6 + 5 + 6 = 41. self.assertEqual(41, z.eval()) # Import the graph, feeding it values for x and y. g = tf.Graph() with g.as_default(): inputs, outputs = impl_helper.load_transform_fn_def( saved_model_dir) x = inputs['x'] y = inputs['y'] z = outputs['z'] sess = tf.Session() with sess.as_default(): # Check result is 5 * 6 + 5 + 6 = 41. self.assertEqual(41, sess.run(z, {x: [5], y: [6]}))
def test_apply_saved_transform(self): with tf.Graph().as_default() as graph: with tf.Session().as_default(): input_floats = tf.constant([1234.0]) # tf.float32 input_features = {'x': input_floats} transformed_features = saved_transform_io.apply_saved_transform( self._test_saved_model, input_features) self.assertEqual(['x_scaled'], transformed_features.keys()) result_tensor = transformed_features['x_scaled'] self.assertTrue(isinstance(result_tensor, tf.Tensor)) self.assertEqual(graph.get_tensor_by_name('Const:0'), input_floats) self.assertEqual( graph.get_tensor_by_name('transform/truediv:0'), result_tensor)
def testImportAndExportSparse(self): # Export the function "z = x + y" def preprocessing_fn(inputs): return {'z': api.map(tf.sparse_add, inputs['x'], inputs['y'])} input_schema = self.toSchema({ 'x': tf.VarLenFeature(tf.float32), 'y': tf.VarLenFeature(tf.float32) }) inputs, outputs = impl_helper.run_preprocessing_fn( preprocessing_fn, input_schema) saved_model_dir = os.path.join(self.get_temp_dir(), 'sparse') _ = impl_helper.make_transform_fn_def(input_schema, inputs, outputs, saved_model_dir) # Import the function, applying it to constants for x and y. g = tf.Graph() with g.as_default(): x = tf.SparseTensor(indices=[[0]], values=tf.constant(5, shape=(1, ), dtype=tf.float32), dense_shape=[1]) y = tf.SparseTensor(indices=[[0]], values=tf.constant(6, shape=(1, ), dtype=tf.float32), dense_shape=[1]) outputs = saved_transform_io.apply_saved_transform( saved_model_dir, { 'x': x, 'y': y }) z = outputs['z'] sess = tf.Session() with sess.as_default(): # Check result is 5 + 6 = 11. result = z.eval() self.assertEqual(result.indices, [[0]]) self.assertEqual(result.values, [11]) self.assertEqual(result.dense_shape, [1])
def test_dense_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_float = tf.placeholder(tf.float32) # show that unrelated & unmapped placeholders do not interfere tf.placeholder(tf.int64) output = input_float / 5.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.Graph().as_default(): with tf.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_float = tf.constant(25.0) * 2 inputs = {'input': input_float} outputs = saved_transform_io.apply_saved_transform(export_path, inputs) result = session.run(outputs['output']) # (25 * 2) / 5 = 10 self.assertEqual(10.0, result)
def raw_training_input_fn(): """Training input function that reads raw data and applies transforms.""" if isinstance(raw_data_file_pattern, six.string_types): filepath_list = [raw_data_file_pattern] else: filepath_list = raw_data_file_pattern files = [] for path in filepath_list: files.extend(file_io.get_matching_files(path)) filename_queue = tf.train.string_input_producer( files, num_epochs=num_epochs, shuffle=randomize_input) csv_id, csv_lines = tf.TextLineReader().read_up_to( filename_queue, training_batch_size) queue_capacity = (reader_num_threads + 3) * training_batch_size + min_after_dequeue if randomize_input: batch_csv_id, batch_csv_lines = tf.train.shuffle_batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, min_after_dequeue=min_after_dequeue, enqueue_many=True, num_threads=reader_num_threads) else: batch_csv_id, batch_csv_lines = tf.train.batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, enqueue_many=True, num_threads=reader_num_threads) record_defaults = [] for k in raw_keys: if column_schemas[k].representation.default_value is not None: # Note that the default_value could be 'false' value like '' or 0 value = tf.constant( [column_schemas[k].representation.default_value], dtype=column_schemas[k].domain.dtype) else: value = tf.constant([], dtype=column_schemas[k].domain.dtype) record_defaults.append(value) parsed_tensors = tf.decode_csv(batch_csv_lines, record_defaults, name='csv_to_tensors') raw_data = {k: v for k, v in zip(raw_keys, parsed_tensors)} transformed_data = saved_transform_io.apply_saved_transform( transform_savedmodel_dir, raw_data) transformed_features = { k: v for k, v in six.iteritems(transformed_data) if k not in transformed_label_keys } transformed_labels = { k: v for k, v in six.iteritems(transformed_data) if k in transformed_label_keys } if convert_scalars_to_vectors: transformed_features = input_fn_maker._convert_scalars_to_vectors( transformed_features) transformed_labels = input_fn_maker._convert_scalars_to_vectors( transformed_labels) # TODO(b/35264116): remove this when all estimators accept label dict if len(transformed_labels) == 1: (_, transformed_labels), = transformed_labels.items() return transformed_features, transformed_labels