def DefaultPrepro(batch_size, repeat_size): return dpr.prepros.Serial( dprp.TFRecordSequenceExample(fields=[ dpr.Field(name="x", shape=(), dtype=tf.float32), dpr.Field(name="y", shape=(), dtype=tf.float32) ]), dpr.prepros.Batch(batch_size=batch_size), dpr.prepros.Repeat(repeat_size, modes=[tf.estimator.ModeKeys.TRAIN]), )
def InferencePrepro(batch_size, count, inputs: str = "inputs/x"): return dpr.prepros.Serial( dpr.prepros.TFRecordSequenceExample(fields=[ dpr.Field(name="x", shape=(), dtype=tf.float32), dpr.Field(name="y", shape=(), dtype=tf.float32) ]), dpr.prepros.Map(dpr.layers.Select(inputs="x", outputs=inputs), update=False), dpr.prepros.Batch(batch_size=batch_size), dpr.prepros.Take(count=count), )
def test_prepro_record_sequence_example(dummy_tfrecord: str): """Test TFRecordSequenceReader""" fields = [ dpr.Field(name="a", shape=(), dtype=tf.string), dpr.Field(name="b", shape=(None, 2), dtype=tf.int64) ] reader = dpr.readers.TFRecordReader([dummy_tfrecord]) prepro = dpr.prepros.TFRecordSequenceExample(fields=fields) dataset = reader.as_dataset() dataset = prepro(dataset) elements = list(dpr.readers.from_dataset(dataset)) assert len(elements) == 1 assert elements[0].keys() == {"a", "b"}
def test_prepros_padded_batch(dataset): """Test Padded Batch""" fields = [dpr.Field(name="a", shape=[None], dtype=tf.int32, default=-1)] prepro_fn = dpr.prepros.PaddedBatch(2, fields) reader = dpr.readers.from_dataset(prepro_fn(dataset)) expected = [{"a": [[0, -1], [0, 1]]}] np.testing.assert_equal(list(reader), expected)
def test_jobs_save_dataset(tmpdir): """Test SaveDataset""" path = str(tmpdir.join("data")) field = deepr.Field(name="x", shape=(2, 2), dtype=tf.int64) # Define dataset def _gen(): for idx in range(5): yield {"x": np.reshape(np.arange(4) * idx, (2, 2))} # Run SaveDataset job input_fn = deepr.readers.GeneratorReader(_gen, output_types={"x": field.dtype}, output_shapes={"x": field.shape}) job = deepr.jobs.SaveDataset(input_fn=input_fn, path=path, fields=[field], chunk_size=2, secs=1) job.run() # Read dataset reader = deepr.readers.TFRecordReader(path=path, shuffle=False, num_parallel_reads=None, num_parallel_calls=None) prepro_fn = deepr.prepros.FromExample([field]) idx = 0 for idx, (got, expected) in enumerate( zip(deepr.readers.from_dataset(prepro_fn(reader())), _gen())): np.testing.assert_equal(got, expected) assert idx == 4
def test_writers_record_simple(tmpdir, shape, dtype, chunk_size): """Simple test for record writer.""" path = str(tmpdir.join("data.tfrecord.gz")) if not chunk_size else str( tmpdir.join("data")) size = 1 static_shape = [s if s is not None else 2 for s in shape] for s in static_shape: size *= s # Define dataset def _gen(): for idx in range(5): yield {"x": np.reshape(np.arange(size) * idx, static_shape)} dataset = tf.data.Dataset.from_generator(_gen, output_types={"x": dtype}, output_shapes={"x": shape}) # Write dataset field = dpr.Field(name="x", shape=shape, dtype=dtype) prepro_fn = dpr.prepros.ToExample([field]) writer = dpr.writers.TFRecordWriter(path=path, chunk_size=chunk_size) writer.write(prepro_fn(dataset)) # Read dataset reader = dpr.readers.TFRecordReader(path=path, shuffle=False, num_parallel_reads=None, num_parallel_calls=None) prepro_fn = dpr.prepros.FromExample([field]) idx = 0 for idx, (got, expected) in enumerate( zip(dpr.readers.from_dataset(prepro_fn(reader())), _gen())): np.testing.assert_equal(got, expected) assert idx == 4
def test_prepros_to_example(): """Test ToExample.""" x = dpr.Field(name="x", shape=(2, 2), dtype=tf.int64) y = dpr.Field(name="y", shape=(None, None, None), dtype=tf.int64) uid = dpr.Field(name="uid", shape=(), dtype=tf.string) tensor = { "x": np.array([[0, 1], [2, 3]]), "y": np.ones([2, 3, 4], dtype=np.int64), "uid": b"1234" } to_example = dpr.prepros.ToExample(fields=[x, y, uid]) example = to_example.map_func(tensor) assert isinstance(example, tf.Tensor) assert example.dtype == tf.string assert example.shape == () with tf.Session() as sess: sess.run(example)
def test_prepros_from_example(): """Test FromExample.""" example = tf.train.Example(features=tf.train.Features( feature={ "x": tf.train.Feature(int64_list=tf.train.Int64List(value=[0, 1, 2, 3])) })) serialized = example.SerializeToString() from_example = dpr.prepros.FromExample( fields=[dpr.Field(name="x", shape=(2, 2), dtype=tf.int64)]) got = from_example.map_func(serialized) assert isinstance(got["x"], tf.Tensor) assert got["x"].shape == (2, 2) with tf.Session() as sess: np.testing.assert_equal(sess.run(got), {"x": np.array([[0, 1], [2, 3]])})
def TARGET_POSITIVES_ONE_HOT(vocab_size): # pylint: disable=invalid-name return deepr.Field(name="targetPositivesOneHot", shape=(vocab_size, ), dtype=tf.int64)
"""Fields for MovieLens.""" import tensorflow as tf import deepr UID = deepr.Field(name="uid", shape=(), dtype=tf.int64) INPUT_POSITIVES = deepr.Field(name="inputPositives", shape=(None, ), dtype=tf.int64) TARGET_POSITIVES = deepr.Field(name="targetPositives", shape=(None, ), dtype=tf.int64) TARGET_NEGATIVES = deepr.Field(name="targetNegatives", shape=(None, None), dtype=tf.int64) INPUT_MASK = deepr.Field(name="inputMask", dtype=tf.bool, shape=(None, ), default=False) TARGET_MASK = deepr.Field(name="targetMask", dtype=tf.bool, shape=(None, ), default=False)
def test_startswith(): """Test startswith method""" assert dpr.Field(name="inputPositive", shape=[None, None], dtype=tf.int32).startswith("input")
def test_has_fixed_len(shape, expected): """Test has_fixed_len method""" field = dpr.Field(name="name", shape=shape, dtype=tf.int32) assert field.has_fixed_len() == expected assert field.has_fixed_len() != field.has_var_len()
import deepr as dpr @pytest.mark.parametrize("shape, expected", [([None, None], False), ([2], True), ([None, 2], False)]) def test_has_fixed_len(shape, expected): """Test has_fixed_len method""" field = dpr.Field(name="name", shape=shape, dtype=tf.int32) assert field.has_fixed_len() == expected assert field.has_fixed_len() != field.has_var_len() @pytest.mark.parametrize( "field", [ dpr.Field(name="name", shape=[None, None], dtype=tf.int32), dpr.Field(name="name", shape=[2], dtype=tf.int32), dpr.Field(name="name", shape=[None, 2], dtype=tf.int32), ], ) def test_as_feature(field): """Test as_feature method""" field.as_feature() def test_startswith(): """Test startswith method""" assert dpr.Field(name="inputPositive", shape=[None, None], dtype=tf.int32).startswith("input")
"y": np.ones([2, 3, 4], dtype=np.int64), "uid": b"1234" } to_example = dpr.prepros.ToExample(fields=[x, y, uid]) example = to_example.map_func(tensor) assert isinstance(example, tf.Tensor) assert example.dtype == tf.string assert example.shape == () with tf.Session() as sess: sess.run(example) @pytest.mark.parametrize( "field, tensor", [ (dpr.Field(name="x", shape=(), dtype=tf.string), b"1234"), (dpr.Field(name="x", shape=(2, ), dtype=tf.int64), np.arange(2)), (dpr.Field(name="x", shape=(2, 2), dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))), (dpr.Field(name="x", shape=(None, 2), dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))), (dpr.Field(name="x", shape=(None, None), dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))), (dpr.Field(name="x", shape=(2, 3, 4), dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4), (2, 3, 4))), (dpr.Field(name="x", shape=(None, 3, 4), dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4), (2, 3, 4))), (dpr.Field(name="x", shape=(None, None, 4), dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4),
def INPUT_POSITIVES_ONE_HOT(vocab_size): # pylint: disable=invalid-name return dpr.Field(name="inputPositivesOneHot", shape=(vocab_size, ), dtype=tf.int64)
"""Tests for common.field""" import pytest import tensorflow as tf import deepr @pytest.mark.parametrize( "field, expected", [ (deepr.Field(name="name", shape=[None, None], dtype=tf.int64), tf.io.VarLenFeature(dtype=tf.int64)), (deepr.Field(name="name", shape=[None], dtype=tf.int64), tf.io.VarLenFeature(dtype=tf.int64)), (deepr.Field(name="name", shape=[2], dtype=tf.int64), tf.io.FixedLenFeature(shape=(2, ), dtype=tf.int64)), ( deepr.Field(name="name", shape=[None, 2], dtype=tf.int64), tf.io.FixedLenSequenceFeature(shape=(2, ), dtype=tf.int64), ), ], ) def test_feature_specs(field, expected): """Test as_feature method""" assert field.feature_specs == expected def test_startswith(): """Test startswith method""" assert deepr.Field(name="inputPositive",