def testSerializedContainingSparseFeature(self): original = [ example(features=features({ "val": float_feature([3, 4]), "idx": int64_feature([5, 10]) })), example(features=features({ "val": float_feature([]), # empty float list "idx": int64_feature([]) })), example(features=features({ "val": feature(), # feature with nothing in it # missing idx feature })), example(features=features({ "val": float_feature([1, 2, -1]), "idx": int64_feature([0, 9, 3]) # unsorted })) ] serialized = [m.SerializeToString() for m in original] expected_sp = sparse_tensor.SparseTensorValue( # indices, values, shape np.array([[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64), np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array([4, 13], dtype=np.int64)) # batch == 4, max_elems = 13 expected_output = {"sp": expected_sp,} self._test( ops.convert_to_tensor(serialized), {"sp": parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, [13])}, expected_values=expected_output, create_iterator_twice=True)
def testSingleExampleWithSparseAndSparseFeatureAndDense(self): original = example(features=features({ "c": float_feature([3, 4]), "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]), "st_a": float_feature([3.0, 4.0]) })) serialized = original.SerializeToString() expected_st_a = ( np.array( [[0], [1]], dtype=np.int64), # indices np.array( [3.0, 4.0], dtype=np.float32), # values np.array( [2], dtype=np.int64)) # shape: max_values = 2 expected_sp = ( # indices, values, shape np.array( [[0], [3]], dtype=np.int64), np.array( ["a", "b"], dtype="|S"), np.array( [13], dtype=np.int64)) # max_values = 13 a_default = [1, 2, 3] b_default = np.random.rand(3, 3).astype(bytes) expected_output = { "st_a": expected_st_a, "sp": expected_sp, "a": [a_default], "b": b_default, "c": np.array( [3, 4], dtype=np.float32), } self._test( { "example_names": ops.convert_to_tensor("in1"), "serialized": ops.convert_to_tensor(serialized), "features": { "st_a": parsing_ops.VarLenFeature(dtypes.float32), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), # Feature "c" must be provided, since it has no default_value. "c": parsing_ops.FixedLenFeature((2,), dtypes.float32), } }, expected_output)
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self): expected_st_a = ( # indices, values, shape np.empty( (0, 2), dtype=np.int64), # indices np.empty( (0,), dtype=np.int64), # sp_a is DT_INT64 np.array( [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 expected_sp = ( # indices, values, shape np.array( [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array( ["a", "b", "c"], dtype="|S"), np.array( [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 original = [ example(features=features({ "c": float_feature([3, 4]), "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "c": float_feature([1, 2]), "val": bytes_feature([b"c"]), "idx": int64_feature([7]) })) ] serialized = [m.SerializeToString() for m in original] a_default = [1, 2, 3] b_default = np.random.rand(3, 3).astype(bytes) expected_output = { "st_a": expected_st_a, "sp": expected_sp, "a": np.array(2 * [[a_default]]), "b": np.array(2 * [b_default]), "c": np.array( [[3, 4], [1, 2]], dtype=np.float32), } self._test( ops.convert_to_tensor(serialized), { "st_a": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), # Feature "c" must be provided, since it has no default_value. "c": parsing_ops.FixedLenFeature((2,), dtypes.float32), }, expected_values=expected_output)
def testSerializedContainingSparseFeatureReuse(self): original = [ example(features=features({ "val1": float_feature([3, 4]), "val2": float_feature([5, 6]), "idx": int64_feature([5, 10]) })), example(features=features({ "val1": float_feature([]), # empty float list "idx": int64_feature([]) })), ] expected_outputs = [{ "sp1": (np.array([[5], [10]], dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32), np.array([13], dtype=np.int64)), "sp2": (np.array([[5], [10]], dtype=np.int64), np.array([5.0, 6.0], dtype=np.float32), np.array([7], dtype=np.int64)) }, { "sp1": empty_sparse(np.float32, shape=[13]), "sp2": empty_sparse(np.float32, shape=[7]) }] for proto, expected_output in zip(original, expected_outputs): self._test( { "serialized": ops.convert_to_tensor( proto.SerializeToString()), "features": { "sp1": parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13), "sp2": parsing_ops.SparseFeature("idx", "val2", dtypes.float32, size=7, already_sorted=True) } }, expected_output)
def testSerializedContainingSparseFeatureReuse(self): original = [ example(features=features({ "val1": float_feature([3, 4]), "val2": float_feature([5, 6]), "idx": int64_feature([5, 10]) })), example(features=features({ "val1": float_feature([]), # empty float list "idx": int64_feature([]) })), ] serialized = [m.SerializeToString() for m in original] expected_sp1 = ( # indices, values, shape np.array( [[0, 5], [0, 10]], dtype=np.int64), np.array( [3.0, 4.0], dtype=np.float32), np.array( [2, 13], dtype=np.int64)) # batch == 2, max_elems = 13 expected_sp2 = ( # indices, values, shape np.array( [[0, 5], [0, 10]], dtype=np.int64), np.array( [5.0, 6.0], dtype=np.float32), np.array( [2, 7], dtype=np.int64)) # batch == 2, max_elems = 13 expected_output = { "sp1": expected_sp1, "sp2": expected_sp2, } self._test({ "serialized": ops.convert_to_tensor(serialized), "features": { "sp1": parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13), "sp2": parsing_ops.SparseFeature( "idx", "val2", dtypes.float32, size=7, already_sorted=True) } }, expected_output)
def testSerializedContaining3DSparseFeature(self): original = [ example(features=features({ "val": float_feature([3, 4]), "idx0": int64_feature([5, 10]), "idx1": int64_feature([0, 2]), })), example(features=features({ "val": float_feature([]), # empty float list "idx0": int64_feature([]), "idx1": int64_feature([]), })), example(features=features({ "val": feature(), # feature with nothing in it # missing idx feature })), example(features=features({ "val": float_feature([1, 2, -1]), "idx0": int64_feature([0, 9, 3]), # unsorted "idx1": int64_feature([1, 0, 2]), })) ] expected_outputs = [{ "sp": (np.array([[5, 0], [10, 2]], dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32), np.array([13, 3], dtype=np.int64)) }, { "sp": empty_sparse(np.float32, shape=[13, 3]) }, { "sp": empty_sparse(np.float32, shape=[13, 3]) }, { "sp": (np.array([[0, 1], [3, 2], [9, 0]], dtype=np.int64), np.array([1.0, -1.0, 2.0], dtype=np.float32), np.array([13, 3], dtype=np.int64)) }] for proto, expected_output in zip(original, expected_outputs): self._test( { "serialized": ops.convert_to_tensor( proto.SerializeToString()), "features": { "sp": parsing_ops.SparseFeature(["idx0", "idx1"], "val", dtypes.float32, [13, 3]) } }, expected_output)
def testSerializedContaining3DSparseFeature(self): original = [ example(features=features({ "val": float_feature([3, 4]), "idx0": int64_feature([5, 10]), "idx1": int64_feature([0, 2]), })), example(features=features({ "val": float_feature([]), # empty float list "idx0": int64_feature([]), "idx1": int64_feature([]), })), example(features=features({ "val": feature(), # feature with nothing in it # missing idx feature })), example(features=features({ "val": float_feature([1, 2, -1]), "idx0": int64_feature([0, 9, 3]), # unsorted "idx1": int64_feature([1, 0, 2]), })) ] serialized = [m.SerializeToString() for m in original] expected_sp = ( # indices np.array( [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]], dtype=np.int64), # values np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), # shape batch == 4, max_elems = 13 np.array([4, 13, 3], dtype=np.int64)) expected_output = {"sp": expected_sp,} self._test( ops.convert_to_tensor(serialized), { "sp": parsing_ops.SparseFeature(["idx0", "idx1"], "val", dtypes.float32, [13, 3]) }, expected_values=expected_output)
def testSerializedContainingSparseAndSparseFeatureWithReuse(self): original = [ example(features=features({ "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "val": bytes_feature([b"c", b"d"]), "idx": int64_feature([7, 1]) })) ] expected_outputs = [{ "idx": (np.array([[0], [1]], dtype=np.int64), np.array([0, 3], dtype=np.int64), np.array([2], dtype=np.int64)), "sp": (np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype=bytes), np.array([13], dtype=np.int64)) }, { "idx": (np.array([[0], [1]], dtype=np.int64), np.array([7, 1], dtype=np.int64), np.array([2], dtype=np.int64)), "sp": (np.array([[1], [7]], dtype=np.int64), np.array(["d", "c"], dtype=bytes), np.array([13], dtype=np.int64)) }] for proto, expected_output in zip(original, expected_outputs): self._test( { "serialized": ops.convert_to_tensor( proto.SerializeToString()), "features": { "idx": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]), } }, expected_output)
def testSerializedContainingSparseAndSparseFeatureWithReuse(self): expected_idx = ( # indices, values, shape np.array( [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64), np.array([0, 3, 7, 1]), np.array( [2, 2], dtype=np.int64)) # batch == 4, max_elems = 2 expected_sp = ( # indices, values, shape np.array( [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array( ["a", "b", "d", "c"], dtype="|S"), np.array( [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 original = [ example(features=features({ "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "val": bytes_feature([b"c", b"d"]), "idx": int64_feature([7, 1]) })) ] names = ["in1", "in2"] serialized = [m.SerializeToString() for m in original] expected_output = { "idx": expected_idx, "sp": expected_sp, } self._test({ "example_names": names, "serialized": ops.convert_to_tensor(serialized), "features": { "idx": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), } }, expected_output)
def testSerializedContainingSparseAndSparseFeatureWithReuse(self): expected_idx = sparse_tensor.SparseTensorValue( # indices, values, shape np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64), np.array([0, 3, 7, 1]), np.array([2, 2], dtype=np.int64)) # batch == 4, max_elems = 2 expected_sp = sparse_tensor.SparseTensorValue( # indices, values, shape np.array([[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array(["a", "b", "d", "c"], dtype="|S"), np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 original = [ example(features=features({ "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "val": bytes_feature([b"c", b"d"]), "idx": int64_feature([7, 1]) })) ] serialized = [m.SerializeToString() for m in original] expected_output = { "idx": expected_idx, "sp": expected_sp, } self._test( ops.convert_to_tensor(serialized), { "idx": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]), }, expected_values=expected_output, create_iterator_twice=True)
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault( self): original = [ example(features=features({ "c": float_feature([3, 4]), "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "c": float_feature([1, 2]), "val": bytes_feature([b"c"]), "idx": int64_feature([7]) })) ] a_default = np.array([[1, 2, 3]], dtype=np.int64) b_default = np.random.rand(3, 3).astype(bytes) expected_st_a = empty_sparse(np.int64) expected_outputs = [{ "st_a": expected_st_a, "sp": (np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype=bytes), np.array([13], dtype=np.int64)), "a": a_default, "b": b_default, "c": np.array([3, 4], dtype=np.float32) }, { "st_a": expected_st_a, "sp": (np.array([[7]], dtype=np.int64), np.array(["c"], dtype=bytes), np.array([13], dtype=np.int64)), "a": a_default, "b": b_default, "c": np.array([1, 2], dtype=np.float32) }] for proto, expected_output in zip(original, expected_outputs): self._test( { "serialized": ops.convert_to_tensor( proto.SerializeToString()), "features": { "st_a": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), # Feature "c" must be provided, since it has no default_value. "c": parsing_ops.FixedLenFeature((2, ), dtypes.float32), } }, expected_output)