def test_deterministic_dataset_mixture_distribution(self):
     # Create a dataset of infinite eights.
     a = tf.data.Dataset.from_tensor_slices([8]).repeat(None)
     # Create a normal sampling of integers around mean=5
     b = tf.data.Dataset.from_tensor_slices(
         tf.cast(tf.random.normal(shape=[1000], mean=5, stddev=2.0),
                 tf.int32))
     # Create a mixture of 1000 integers (bounded by the size of `b` since `a` is
     # infinite).
     mixture1 = dataset_utils.build_dataset_mixture(a,
                                                    b,
                                                    a_probability=0.8,
                                                    seed=0)
     mixture2 = dataset_utils.build_dataset_mixture(a,
                                                    b,
                                                    a_probability=0.8,
                                                    seed=0)
     mixture3 = dataset_utils.build_dataset_mixture(a,
                                                    b,
                                                    a_probability=0.8,
                                                    seed=1)
     counts1 = collections.Counter(self.evaluate(x) for x in mixture1)
     counts2 = collections.Counter(self.evaluate(x) for x in mixture2)
     counts3 = collections.Counter(self.evaluate(x) for x in mixture3)
     self.assertEqual(counts1, counts2)
     self.assertNotEqual(counts1, counts3)
 def test_non_deterministic_dataset_mixture_different(self):
     num_examples = 100
     a = tf.data.Dataset.from_tensor_slices([0] * num_examples)
     b = tf.data.Dataset.from_tensor_slices([1] * num_examples)
     mixture_1 = dataset_utils.build_dataset_mixture(a,
                                                     b,
                                                     a_probability=0.5)
     mixture_2 = dataset_utils.build_dataset_mixture(a,
                                                     b,
                                                     a_probability=0.5)
     # The mixtures should produce different samples.
     self.assertNotEqual(self.evaluate(list(iter(mixture_1))),
                         self.evaluate(list(iter(mixture_2))))
Esempio n. 3
0
 def test_non_deterministic_dataset_mixture_different(self):
   tf.random.set_seed(None)  # re-enable non-determinism in the unittests.
   # Make two mixtures of zeros and ones, long enough that it is extremely
   # unlikely that randomly picking between the two will ever yield the same
   # result.
   num_examples = 100
   a = tf.data.Dataset.from_tensor_slices([0] * num_examples)
   b = tf.data.Dataset.from_tensor_slices([1] * num_examples)
   mixture_1 = dataset_utils.build_dataset_mixture(a, b, a_probability=0.5)
   mixture_2 = dataset_utils.build_dataset_mixture(a, b, a_probability=0.5)
   # The mixtures should produce different samples.
   self.assertNotEqual(
       self.evaluate(list(iter(mixture_1))),
       self.evaluate(list(iter(mixture_2))))
 def test_deterministic_dataset_mixture(self):
     a = tf.data.Dataset.range(5)
     b = tf.data.Dataset.range(5).map(lambda x: x + 5)
     mixture1 = dataset_utils.build_dataset_mixture(a,
                                                    b,
                                                    a_probability=0.5,
                                                    seed=0)
     mixture2 = dataset_utils.build_dataset_mixture(a,
                                                    b,
                                                    a_probability=0.5,
                                                    seed=0)
     examples1 = [self.evaluate(x) for x in mixture1]
     examples2 = [self.evaluate(x) for x in mixture2]
     self.assertAllEqual(examples1, examples2)
Esempio n. 5
0
  def test_deterministic_dataset_mixture_distribution(self):
    tf.random.set_seed(0)
    # Create a dataset of infinite fives.
    a = tf.data.Dataset.from_tensor_slices([8]).repeat(None)
    # Create a normal sampling of integers around mean=5
    b = tf.data.Dataset.from_tensor_slices(
        tf.cast(tf.random.normal(shape=[1000], mean=5, stddev=2.0), tf.int32))
    # Create a mixture of 1000 integers (bounded by the size of `b` since `a` is
    # infinite).
    mixture = dataset_utils.build_dataset_mixture(
        a, b, a_probability=0.8, op_seed=0)

    # Count each label. Expect approximately 800 values of '8', then the
    # remaining 200 normally distributed around 5.
    counts = collections.Counter(self.evaluate(x) for x in mixture)
    self.assertEqual(
        {
            8: 809,
            4: 41,
            3: 35,
            5: 35,
            6: 23,
            7: 21,
            2: 20,
            1: 7,
            9: 4,
            10: 2,
            11: 1,
            0: 1,
            -2: 1,
        },
        counts,
        msg=str(counts))
Esempio n. 6
0
 def test_deterministic_dataset_mixture(self):
     a = tf.data.Dataset.range(5)
     b = tf.data.Dataset.range(5).map(lambda x: x + 5)
     mixture = dataset_utils.build_dataset_mixture(a, b, a_probability=0.5)
     expected_examples = [0, 1, 2, 3, 9]
     actual_examples = [self.evaluate(x) for x in mixture]
     self.assertAllEqual(expected_examples, actual_examples)