Exemple #1
0
def DefaultPrepro(batch_size, repeat_size):
    return dpr.prepros.Serial(
        dprp.TFRecordSequenceExample(fields=[
            dpr.Field(name="x", shape=(), dtype=tf.float32),
            dpr.Field(name="y", shape=(), dtype=tf.float32)
        ]),
        dpr.prepros.Batch(batch_size=batch_size),
        dpr.prepros.Repeat(repeat_size, modes=[tf.estimator.ModeKeys.TRAIN]),
    )
Exemple #2
0
def InferencePrepro(batch_size, count, inputs: str = "inputs/x"):
    return dpr.prepros.Serial(
        dpr.prepros.TFRecordSequenceExample(fields=[
            dpr.Field(name="x", shape=(), dtype=tf.float32),
            dpr.Field(name="y", shape=(), dtype=tf.float32)
        ]),
        dpr.prepros.Map(dpr.layers.Select(inputs="x", outputs=inputs),
                        update=False),
        dpr.prepros.Batch(batch_size=batch_size),
        dpr.prepros.Take(count=count),
    )
def test_prepro_record_sequence_example(dummy_tfrecord: str):
    """Test TFRecordSequenceReader"""
    fields = [
        dpr.Field(name="a", shape=(), dtype=tf.string),
        dpr.Field(name="b", shape=(None, 2), dtype=tf.int64)
    ]
    reader = dpr.readers.TFRecordReader([dummy_tfrecord])
    prepro = dpr.prepros.TFRecordSequenceExample(fields=fields)
    dataset = reader.as_dataset()
    dataset = prepro(dataset)
    elements = list(dpr.readers.from_dataset(dataset))
    assert len(elements) == 1
    assert elements[0].keys() == {"a", "b"}
Exemple #4
0
def test_prepros_padded_batch(dataset):
    """Test Padded Batch"""
    fields = [dpr.Field(name="a", shape=[None], dtype=tf.int32, default=-1)]
    prepro_fn = dpr.prepros.PaddedBatch(2, fields)
    reader = dpr.readers.from_dataset(prepro_fn(dataset))
    expected = [{"a": [[0, -1], [0, 1]]}]
    np.testing.assert_equal(list(reader), expected)
Exemple #5
0
def test_jobs_save_dataset(tmpdir):
    """Test SaveDataset"""
    path = str(tmpdir.join("data"))
    field = deepr.Field(name="x", shape=(2, 2), dtype=tf.int64)

    # Define dataset
    def _gen():
        for idx in range(5):
            yield {"x": np.reshape(np.arange(4) * idx, (2, 2))}

    # Run SaveDataset job
    input_fn = deepr.readers.GeneratorReader(_gen,
                                             output_types={"x": field.dtype},
                                             output_shapes={"x": field.shape})
    job = deepr.jobs.SaveDataset(input_fn=input_fn,
                                 path=path,
                                 fields=[field],
                                 chunk_size=2,
                                 secs=1)
    job.run()

    # Read dataset
    reader = deepr.readers.TFRecordReader(path=path,
                                          shuffle=False,
                                          num_parallel_reads=None,
                                          num_parallel_calls=None)
    prepro_fn = deepr.prepros.FromExample([field])
    idx = 0
    for idx, (got, expected) in enumerate(
            zip(deepr.readers.from_dataset(prepro_fn(reader())), _gen())):
        np.testing.assert_equal(got, expected)
    assert idx == 4
Exemple #6
0
def test_writers_record_simple(tmpdir, shape, dtype, chunk_size):
    """Simple test for record writer."""
    path = str(tmpdir.join("data.tfrecord.gz")) if not chunk_size else str(
        tmpdir.join("data"))
    size = 1
    static_shape = [s if s is not None else 2 for s in shape]
    for s in static_shape:
        size *= s

    # Define dataset
    def _gen():
        for idx in range(5):
            yield {"x": np.reshape(np.arange(size) * idx, static_shape)}

    dataset = tf.data.Dataset.from_generator(_gen,
                                             output_types={"x": dtype},
                                             output_shapes={"x": shape})

    # Write dataset
    field = dpr.Field(name="x", shape=shape, dtype=dtype)
    prepro_fn = dpr.prepros.ToExample([field])
    writer = dpr.writers.TFRecordWriter(path=path, chunk_size=chunk_size)
    writer.write(prepro_fn(dataset))

    # Read dataset
    reader = dpr.readers.TFRecordReader(path=path,
                                        shuffle=False,
                                        num_parallel_reads=None,
                                        num_parallel_calls=None)
    prepro_fn = dpr.prepros.FromExample([field])
    idx = 0
    for idx, (got, expected) in enumerate(
            zip(dpr.readers.from_dataset(prepro_fn(reader())), _gen())):
        np.testing.assert_equal(got, expected)
    assert idx == 4
Exemple #7
0
def test_prepros_to_example():
    """Test ToExample."""
    x = dpr.Field(name="x", shape=(2, 2), dtype=tf.int64)
    y = dpr.Field(name="y", shape=(None, None, None), dtype=tf.int64)
    uid = dpr.Field(name="uid", shape=(), dtype=tf.string)
    tensor = {
        "x": np.array([[0, 1], [2, 3]]),
        "y": np.ones([2, 3, 4], dtype=np.int64),
        "uid": b"1234"
    }
    to_example = dpr.prepros.ToExample(fields=[x, y, uid])
    example = to_example.map_func(tensor)
    assert isinstance(example, tf.Tensor)
    assert example.dtype == tf.string
    assert example.shape == ()
    with tf.Session() as sess:
        sess.run(example)
Exemple #8
0
def test_prepros_from_example():
    """Test FromExample."""
    example = tf.train.Example(features=tf.train.Features(
        feature={
            "x":
            tf.train.Feature(int64_list=tf.train.Int64List(value=[0, 1, 2, 3]))
        }))
    serialized = example.SerializeToString()
    from_example = dpr.prepros.FromExample(
        fields=[dpr.Field(name="x", shape=(2, 2), dtype=tf.int64)])
    got = from_example.map_func(serialized)
    assert isinstance(got["x"], tf.Tensor)
    assert got["x"].shape == (2, 2)
    with tf.Session() as sess:
        np.testing.assert_equal(sess.run(got),
                                {"x": np.array([[0, 1], [2, 3]])})
Exemple #9
0
def TARGET_POSITIVES_ONE_HOT(vocab_size):
    # pylint: disable=invalid-name
    return deepr.Field(name="targetPositivesOneHot",
                       shape=(vocab_size, ),
                       dtype=tf.int64)
Exemple #10
0
"""Fields for MovieLens."""

import tensorflow as tf

import deepr

UID = deepr.Field(name="uid", shape=(), dtype=tf.int64)

INPUT_POSITIVES = deepr.Field(name="inputPositives",
                              shape=(None, ),
                              dtype=tf.int64)

TARGET_POSITIVES = deepr.Field(name="targetPositives",
                               shape=(None, ),
                               dtype=tf.int64)

TARGET_NEGATIVES = deepr.Field(name="targetNegatives",
                               shape=(None, None),
                               dtype=tf.int64)

INPUT_MASK = deepr.Field(name="inputMask",
                         dtype=tf.bool,
                         shape=(None, ),
                         default=False)

TARGET_MASK = deepr.Field(name="targetMask",
                          dtype=tf.bool,
                          shape=(None, ),
                          default=False)

Exemple #11
0
def test_startswith():
    """Test startswith method"""
    assert dpr.Field(name="inputPositive", shape=[None, None],
                     dtype=tf.int32).startswith("input")
Exemple #12
0
def test_has_fixed_len(shape, expected):
    """Test has_fixed_len method"""
    field = dpr.Field(name="name", shape=shape, dtype=tf.int32)
    assert field.has_fixed_len() == expected
    assert field.has_fixed_len() != field.has_var_len()
Exemple #13
0
import deepr as dpr


@pytest.mark.parametrize("shape, expected", [([None, None], False),
                                             ([2], True), ([None, 2], False)])
def test_has_fixed_len(shape, expected):
    """Test has_fixed_len method"""
    field = dpr.Field(name="name", shape=shape, dtype=tf.int32)
    assert field.has_fixed_len() == expected
    assert field.has_fixed_len() != field.has_var_len()


@pytest.mark.parametrize(
    "field",
    [
        dpr.Field(name="name", shape=[None, None], dtype=tf.int32),
        dpr.Field(name="name", shape=[2], dtype=tf.int32),
        dpr.Field(name="name", shape=[None, 2], dtype=tf.int32),
    ],
)
def test_as_feature(field):
    """Test as_feature method"""
    field.as_feature()


def test_startswith():
    """Test startswith method"""
    assert dpr.Field(name="inputPositive", shape=[None, None],
                     dtype=tf.int32).startswith("input")
Exemple #14
0
        "y": np.ones([2, 3, 4], dtype=np.int64),
        "uid": b"1234"
    }
    to_example = dpr.prepros.ToExample(fields=[x, y, uid])
    example = to_example.map_func(tensor)
    assert isinstance(example, tf.Tensor)
    assert example.dtype == tf.string
    assert example.shape == ()
    with tf.Session() as sess:
        sess.run(example)


@pytest.mark.parametrize(
    "field, tensor",
    [
        (dpr.Field(name="x", shape=(), dtype=tf.string), b"1234"),
        (dpr.Field(name="x", shape=(2, ), dtype=tf.int64), np.arange(2)),
        (dpr.Field(name="x", shape=(2, 2),
                   dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))),
        (dpr.Field(name="x", shape=(None, 2),
                   dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))),
        (dpr.Field(name="x", shape=(None, None),
                   dtype=tf.int64), np.reshape(np.arange(2 * 2), (2, 2))),
        (dpr.Field(name="x", shape=(2, 3, 4),
                   dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4),
                                               (2, 3, 4))),
        (dpr.Field(name="x", shape=(None, 3, 4),
                   dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4),
                                               (2, 3, 4))),
        (dpr.Field(name="x", shape=(None, None, 4),
                   dtype=tf.int64), np.reshape(np.arange(2 * 3 * 4),
Exemple #15
0
def INPUT_POSITIVES_ONE_HOT(vocab_size):
    # pylint: disable=invalid-name
    return dpr.Field(name="inputPositivesOneHot",
                     shape=(vocab_size, ),
                     dtype=tf.int64)
Exemple #16
0
"""Tests for common.field"""

import pytest
import tensorflow as tf

import deepr


@pytest.mark.parametrize(
    "field, expected",
    [
        (deepr.Field(name="name", shape=[None, None],
                     dtype=tf.int64), tf.io.VarLenFeature(dtype=tf.int64)),
        (deepr.Field(name="name", shape=[None],
                     dtype=tf.int64), tf.io.VarLenFeature(dtype=tf.int64)),
        (deepr.Field(name="name", shape=[2], dtype=tf.int64),
         tf.io.FixedLenFeature(shape=(2, ), dtype=tf.int64)),
        (
            deepr.Field(name="name", shape=[None, 2], dtype=tf.int64),
            tf.io.FixedLenSequenceFeature(shape=(2, ), dtype=tf.int64),
        ),
    ],
)
def test_feature_specs(field, expected):
    """Test as_feature method"""
    assert field.feature_specs == expected


def test_startswith():
    """Test startswith method"""
    assert deepr.Field(name="inputPositive",