def testSerializedContainingSparseFeature(self):
    original = [
        example(features=features({
            "val": float_feature([3, 4]),
            "idx": int64_feature([5, 10])
        })),
        example(features=features({
            "val": float_feature([]),  # empty float list
            "idx": int64_feature([])
        })),
        example(features=features({
            "val": feature(),  # feature with nothing in it
            # missing idx feature
        })),
        example(features=features({
            "val": float_feature([1, 2, -1]),
            "idx":
                int64_feature([0, 9, 3])  # unsorted
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_sp = sparse_tensor.SparseTensorValue(  # indices, values, shape
        np.array([[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
        np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
        np.array([4, 13], dtype=np.int64))  # batch == 4, max_elems = 13

    expected_output = {"sp": expected_sp,}

    self._test(
        ops.convert_to_tensor(serialized),
        {"sp": parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, [13])},
        expected_values=expected_output,
        create_iterator_twice=True)
Esempio n. 2
0
  def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
    original = example(features=features({
        "c": float_feature([3, 4]),
        "val": bytes_feature([b"a", b"b"]),
        "idx": int64_feature([0, 3]),
        "st_a": float_feature([3.0, 4.0])
    }))

    serialized = original.SerializeToString()

    expected_st_a = (
        np.array(
            [[0], [1]], dtype=np.int64),  # indices
        np.array(
            [3.0, 4.0], dtype=np.float32),  # values
        np.array(
            [2], dtype=np.int64))  # shape: max_values = 2

    expected_sp = (  # indices, values, shape
        np.array(
            [[0], [3]], dtype=np.int64), np.array(
                ["a", "b"], dtype="|S"), np.array(
                    [13], dtype=np.int64))  # max_values = 13

    a_default = [1, 2, 3]
    b_default = np.random.rand(3, 3).astype(bytes)
    expected_output = {
        "st_a": expected_st_a,
        "sp": expected_sp,
        "a": [a_default],
        "b": b_default,
        "c": np.array(
            [3, 4], dtype=np.float32),
    }

    self._test(
        {
            "example_names":
                ops.convert_to_tensor("in1"),
            "serialized":
                ops.convert_to_tensor(serialized),
            "features": {
                "st_a":
                    parsing_ops.VarLenFeature(dtypes.float32),
                "sp":
                    parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
                "a":
                    parsing_ops.FixedLenFeature(
                        (1, 3), dtypes.int64, default_value=a_default),
                "b":
                    parsing_ops.FixedLenFeature(
                        (3, 3), dtypes.string, default_value=b_default),
                # Feature "c" must be provided, since it has no default_value.
                "c":
                    parsing_ops.FixedLenFeature((2,), dtypes.float32),
            }
        },
        expected_output)
Esempio n. 3
0
  def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
    expected_st_a = (  # indices, values, shape
        np.empty(
            (0, 2), dtype=np.int64),  # indices
        np.empty(
            (0,), dtype=np.int64),  # sp_a is DT_INT64
        np.array(
            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
    expected_sp = (  # indices, values, shape
        np.array(
            [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array(
                ["a", "b", "c"], dtype="|S"), np.array(
                    [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13

    original = [
        example(features=features({
            "c": float_feature([3, 4]),
            "val": bytes_feature([b"a", b"b"]),
            "idx": int64_feature([0, 3])
        })), example(features=features({
            "c": float_feature([1, 2]),
            "val": bytes_feature([b"c"]),
            "idx": int64_feature([7])
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    a_default = [1, 2, 3]
    b_default = np.random.rand(3, 3).astype(bytes)
    expected_output = {
        "st_a": expected_st_a,
        "sp": expected_sp,
        "a": np.array(2 * [[a_default]]),
        "b": np.array(2 * [b_default]),
        "c": np.array(
            [[3, 4], [1, 2]], dtype=np.float32),
    }

    self._test(
        ops.convert_to_tensor(serialized),
        {
            "st_a":
                parsing_ops.VarLenFeature(dtypes.int64),
            "sp":
                parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
            "a":
                parsing_ops.FixedLenFeature(
                    (1, 3), dtypes.int64, default_value=a_default),
            "b":
                parsing_ops.FixedLenFeature(
                    (3, 3), dtypes.string, default_value=b_default),
            # Feature "c" must be provided, since it has no default_value.
            "c":
                parsing_ops.FixedLenFeature((2,), dtypes.float32),
        },
        expected_values=expected_output)
    def testSerializedContainingSparseFeatureReuse(self):
        original = [
            example(features=features({
                "val1": float_feature([3, 4]),
                "val2": float_feature([5, 6]),
                "idx": int64_feature([5, 10])
            })),
            example(features=features({
                "val1": float_feature([]),  # empty float list
                "idx": int64_feature([])
            })),
        ]

        expected_outputs = [{
            "sp1": (np.array([[5], [10]], dtype=np.int64),
                    np.array([3.0, 4.0],
                             dtype=np.float32), np.array([13],
                                                         dtype=np.int64)),
            "sp2": (np.array([[5], [10]], dtype=np.int64),
                    np.array([5.0, 6.0],
                             dtype=np.float32), np.array([7], dtype=np.int64))
        }, {
            "sp1": empty_sparse(np.float32, shape=[13]),
            "sp2": empty_sparse(np.float32, shape=[7])
        }]

        for proto, expected_output in zip(original, expected_outputs):
            self._test(
                {
                    "serialized": ops.convert_to_tensor(
                        proto.SerializeToString()),
                    "features": {
                        "sp1":
                        parsing_ops.SparseFeature("idx", "val1",
                                                  dtypes.float32, 13),
                        "sp2":
                        parsing_ops.SparseFeature("idx",
                                                  "val2",
                                                  dtypes.float32,
                                                  size=7,
                                                  already_sorted=True)
                    }
                }, expected_output)
Esempio n. 5
0
  def testSerializedContainingSparseFeatureReuse(self):
    original = [
        example(features=features({
            "val1": float_feature([3, 4]),
            "val2": float_feature([5, 6]),
            "idx": int64_feature([5, 10])
        })),
        example(features=features({
            "val1": float_feature([]),  # empty float list
            "idx": int64_feature([])
        })),
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_sp1 = (  # indices, values, shape
        np.array(
            [[0, 5], [0, 10]], dtype=np.int64), np.array(
                [3.0, 4.0], dtype=np.float32), np.array(
                    [2, 13], dtype=np.int64))  # batch == 2, max_elems = 13

    expected_sp2 = (  # indices, values, shape
        np.array(
            [[0, 5], [0, 10]], dtype=np.int64), np.array(
                [5.0, 6.0], dtype=np.float32), np.array(
                    [2, 7], dtype=np.int64))  # batch == 2, max_elems = 13

    expected_output = {
        "sp1": expected_sp1,
        "sp2": expected_sp2,
    }

    self._test({
        "serialized": ops.convert_to_tensor(serialized),
        "features": {
            "sp1":
                parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
            "sp2":
                parsing_ops.SparseFeature(
                    "idx", "val2", dtypes.float32, size=7, already_sorted=True)
        }
    }, expected_output)
    def testSerializedContaining3DSparseFeature(self):
        original = [
            example(features=features({
                "val": float_feature([3, 4]),
                "idx0": int64_feature([5, 10]),
                "idx1": int64_feature([0, 2]),
            })),
            example(features=features({
                "val": float_feature([]),  # empty float list
                "idx0": int64_feature([]),
                "idx1": int64_feature([]),
            })),
            example(features=features({
                "val": feature(),  # feature with nothing in it
                # missing idx feature
            })),
            example(features=features({
                "val": float_feature([1, 2, -1]),
                "idx0": int64_feature([0, 9, 3]),  # unsorted
                "idx1": int64_feature([1, 0, 2]),
            }))
        ]

        expected_outputs = [{
            "sp": (np.array([[5, 0], [10, 2]], dtype=np.int64),
                   np.array([3.0, 4.0],
                            dtype=np.float32), np.array([13, 3],
                                                        dtype=np.int64))
        }, {
            "sp": empty_sparse(np.float32, shape=[13, 3])
        }, {
            "sp": empty_sparse(np.float32, shape=[13, 3])
        }, {
            "sp": (np.array([[0, 1], [3, 2], [9, 0]], dtype=np.int64),
                   np.array([1.0, -1.0, 2.0],
                            dtype=np.float32), np.array([13, 3],
                                                        dtype=np.int64))
        }]

        for proto, expected_output in zip(original, expected_outputs):
            self._test(
                {
                    "serialized": ops.convert_to_tensor(
                        proto.SerializeToString()),
                    "features": {
                        "sp":
                        parsing_ops.SparseFeature(["idx0", "idx1"], "val",
                                                  dtypes.float32, [13, 3])
                    }
                }, expected_output)
Esempio n. 7
0
  def testSerializedContaining3DSparseFeature(self):
    original = [
        example(features=features({
            "val": float_feature([3, 4]),
            "idx0": int64_feature([5, 10]),
            "idx1": int64_feature([0, 2]),
        })),
        example(features=features({
            "val": float_feature([]),  # empty float list
            "idx0": int64_feature([]),
            "idx1": int64_feature([]),
        })),
        example(features=features({
            "val": feature(),  # feature with nothing in it
            # missing idx feature
        })),
        example(features=features({
            "val": float_feature([1, 2, -1]),
            "idx0": int64_feature([0, 9, 3]),  # unsorted
            "idx1": int64_feature([1, 0, 2]),
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_sp = (
        # indices
        np.array(
            [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
            dtype=np.int64),
        # values
        np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
        # shape batch == 4, max_elems = 13
        np.array([4, 13, 3], dtype=np.int64))

    expected_output = {"sp": expected_sp,}

    self._test(
        ops.convert_to_tensor(serialized), {
            "sp":
                parsing_ops.SparseFeature(["idx0", "idx1"], "val",
                                          dtypes.float32, [13, 3])
        },
        expected_values=expected_output)
    def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
        original = [
            example(features=features({
                "val": bytes_feature([b"a", b"b"]),
                "idx": int64_feature([0, 3])
            })),
            example(features=features({
                "val": bytes_feature([b"c", b"d"]),
                "idx": int64_feature([7, 1])
            }))
        ]

        expected_outputs = [{
            "idx": (np.array([[0], [1]],
                             dtype=np.int64), np.array([0, 3], dtype=np.int64),
                    np.array([2], dtype=np.int64)),
            "sp": (np.array([[0], [3]],
                            dtype=np.int64), np.array(["a", "b"], dtype=bytes),
                   np.array([13], dtype=np.int64))
        }, {
            "idx": (np.array([[0], [1]],
                             dtype=np.int64), np.array([7, 1], dtype=np.int64),
                    np.array([2], dtype=np.int64)),
            "sp": (np.array([[1], [7]],
                            dtype=np.int64), np.array(["d", "c"], dtype=bytes),
                   np.array([13], dtype=np.int64))
        }]

        for proto, expected_output in zip(original, expected_outputs):
            self._test(
                {
                    "serialized": ops.convert_to_tensor(
                        proto.SerializeToString()),
                    "features": {
                        "idx":
                        parsing_ops.VarLenFeature(dtypes.int64),
                        "sp":
                        parsing_ops.SparseFeature(["idx"], "val",
                                                  dtypes.string, [13]),
                    }
                }, expected_output)
Esempio n. 9
0
  def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
    expected_idx = (  # indices, values, shape
        np.array(
            [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
        np.array([0, 3, 7, 1]), np.array(
            [2, 2], dtype=np.int64))  # batch == 4, max_elems = 2

    expected_sp = (  # indices, values, shape
        np.array(
            [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array(
                ["a", "b", "d", "c"], dtype="|S"), np.array(
                    [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13

    original = [
        example(features=features({
            "val": bytes_feature([b"a", b"b"]),
            "idx": int64_feature([0, 3])
        })), example(features=features({
            "val": bytes_feature([b"c", b"d"]),
            "idx": int64_feature([7, 1])
        }))
    ]

    names = ["in1", "in2"]
    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        "idx": expected_idx,
        "sp": expected_sp,
    }

    self._test({
        "example_names": names,
        "serialized": ops.convert_to_tensor(serialized),
        "features": {
            "idx": parsing_ops.VarLenFeature(dtypes.int64),
            "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
        }
    }, expected_output)
  def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
    expected_idx = sparse_tensor.SparseTensorValue(  # indices, values, shape
        np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
        np.array([0, 3, 7, 1]),
        np.array([2, 2], dtype=np.int64))  # batch == 4, max_elems = 2

    expected_sp = sparse_tensor.SparseTensorValue(  # indices, values, shape
        np.array([[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64),
        np.array(["a", "b", "d", "c"], dtype="|S"),
        np.array([2, 13], dtype=np.int64))  # batch == 4, max_elems = 13

    original = [
        example(features=features({
            "val": bytes_feature([b"a", b"b"]),
            "idx": int64_feature([0, 3])
        })), example(features=features({
            "val": bytes_feature([b"c", b"d"]),
            "idx": int64_feature([7, 1])
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        "idx": expected_idx,
        "sp": expected_sp,
    }

    self._test(
        ops.convert_to_tensor(serialized), {
            "idx":
                parsing_ops.VarLenFeature(dtypes.int64),
            "sp":
                parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
        },
        expected_values=expected_output,
        create_iterator_twice=True)
    def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(
            self):
        original = [
            example(features=features({
                "c": float_feature([3, 4]),
                "val": bytes_feature([b"a", b"b"]),
                "idx": int64_feature([0, 3])
            })),
            example(features=features({
                "c": float_feature([1, 2]),
                "val": bytes_feature([b"c"]),
                "idx": int64_feature([7])
            }))
        ]

        a_default = np.array([[1, 2, 3]], dtype=np.int64)
        b_default = np.random.rand(3, 3).astype(bytes)

        expected_st_a = empty_sparse(np.int64)

        expected_outputs = [{
            "st_a":
            expected_st_a,
            "sp": (np.array([[0], [3]],
                            dtype=np.int64), np.array(["a", "b"], dtype=bytes),
                   np.array([13], dtype=np.int64)),
            "a":
            a_default,
            "b":
            b_default,
            "c":
            np.array([3, 4], dtype=np.float32)
        }, {
            "st_a":
            expected_st_a,
            "sp": (np.array([[7]], dtype=np.int64), np.array(["c"],
                                                             dtype=bytes),
                   np.array([13], dtype=np.int64)),
            "a":
            a_default,
            "b":
            b_default,
            "c":
            np.array([1, 2], dtype=np.float32)
        }]

        for proto, expected_output in zip(original, expected_outputs):
            self._test(
                {
                    "serialized": ops.convert_to_tensor(
                        proto.SerializeToString()),
                    "features": {
                        "st_a":
                        parsing_ops.VarLenFeature(dtypes.int64),
                        "sp":
                        parsing_ops.SparseFeature("idx", "val", dtypes.string,
                                                  13),
                        "a":
                        parsing_ops.FixedLenFeature(
                            (1, 3), dtypes.int64, default_value=a_default),
                        "b":
                        parsing_ops.FixedLenFeature(
                            (3, 3), dtypes.string, default_value=b_default),
                        # Feature "c" must be provided, since it has no default_value.
                        "c":
                        parsing_ops.FixedLenFeature((2, ), dtypes.float32),
                    }
                },
                expected_output)