Beispiel #1
0
 def run(paths, true_files, true_labels):
     with self.subTest(paths=paths):
         dataset = spn.FileDataset(
             self.data_path(paths),
             num_vars=1,
             num_vals=1,
             num_labels=1,  # Not relevant here
             num_epochs=1,
             batch_size=1,
             shuffle=False,
             shuffle_batch=False)
         fqueue = dataset._get_file_queue()
         ftensor, ltensor = dataset._get_file_label_tensors()
         files1 = []
         files2 = []
         labels = []
         with spn.session() as (sess, run):
             while run():
                 f = sess.run(fqueue.dequeue())
                 files1.append(str(f, 'utf-8'))
         with spn.session() as (sess, run):
             while run():
                 f, l = sess.run([ftensor, ltensor])
                 files2.append(str(f, 'utf-8'))
                 labels.append(str(l, 'utf-8'))
         self.assertEqual(files1, self.data_path(true_files))
         self.assertEqual(files2, self.data_path(true_files))
         self.assertEqual(labels, true_labels)
    def test_gaussian_mixture_dataset_without_final(self):
        """Batch generation (without smaller final batch) for
        GaussianMixtureDataset"""
        # Tests: - normalization of weights,
        #        - components with and without labels
        # Note: shuffling is NOT tested
        components = [
            spn.GaussianMixtureDataset.Component(0.301, [1, 1],
                                                 [[1, 0], [0, 1]]),
            spn.GaussianMixtureDataset.Component(0.2, [2, 2], [[1, 0], [0, 1]],
                                                 10),
            spn.GaussianMixtureDataset.Component(0.2, [1, 1], [[1, 0], [0, 1]])
        ]
        dataset = spn.GaussianMixtureDataset(components=components,
                                             num_samples=100,
                                             num_epochs=2,
                                             batch_size=90,
                                             shuffle=False,
                                             num_threads=1,
                                             allow_smaller_final_batch=False)
        # Get batches
        data = dataset.get_data()
        batches = []
        with spn.session() as (sess, run):
            while run():
                out = sess.run(data)
                batches.append(out)

        # Num of batches
        self.assertEqual(len(batches), 2)

        # Batch size = 90
        batch1 = batches[0]
        batch2 = batches[1]
        self.assertTupleEqual(batch1[0].shape, (90, 2))  # samples
        self.assertTupleEqual(batch2[0].shape, (90, 2))
        self.assertTupleEqual(batch1[1].shape, (90, 1))  # labels
        self.assertTupleEqual(batch2[1].shape, (90, 1))
        self.assertTupleEqual(batch1[2].shape, (90, ))  # likelihoods
        self.assertTupleEqual(batch2[2].shape, (90, ))

        # Data type
        self.assertTrue(np.issubdtype(batch1[0].dtype, np.floating))
        self.assertTrue(np.issubdtype(batch2[0].dtype, np.floating))
        self.assertTrue(np.issubdtype(batch1[1].dtype, np.integer))
        self.assertTrue(np.issubdtype(batch2[1].dtype, np.integer))
        self.assertTrue(np.issubdtype(batch1[2].dtype, np.floating))
        self.assertTrue(np.issubdtype(batch2[2].dtype, np.floating))

        # Are the overlapping parts of the two batches identical?
        np.testing.assert_array_equal(batch1[0][:80], batch2[0][10:])
        np.testing.assert_array_equal(batch1[1][:80], batch2[1][10:])
        np.testing.assert_array_equal(batch1[2][:80], batch2[2][10:])

        # Number of samples per component
        epoch_labels = np.concatenate([batch1[1], batch2[1][:10]])
        self.assertEqual((epoch_labels == 0).sum(), 43)
        self.assertEqual((epoch_labels == 10).sum(), 28)
        self.assertEqual((epoch_labels == 2).sum(), 29)  # Roundoff error
Beispiel #3
0
 def generic_dataset_test(self, dataset):
     data = dataset.get_data()
     # Check if sample size is set
     for d in data:
         self.assertIsNotNone(d.shape[1].value)
     # Check values
     with spn.session() as (sess, run):
         # while run():  # Getting 1 image only
         out = sess.run(data)
         return out
Beispiel #4
0
 def generic_dataset_test(self, dataset, correct_batches, tol=0.0):
     data = dataset.get_data()
     # Check if sample size is set
     if type(data) is tf.Tensor:
         self.assertIsNotNone(data.shape[1].value)
     else:
         for d in data:
             self.assertIsNotNone(d.shape[1].value)
     # Check values
     batches = []
     with spn.session() as (sess, run):
         while run():
             out = sess.run(data)
             batches.append(out)
     self.assertEqual(len(batches), len(correct_batches))
     for b, cb in zip(batches, correct_batches):
         if isinstance(b, list):
             self.assertEqual(len(b), len(cb))
             for bb, cbcb in zip(b, cb):
                 if cbcb is None:
                     self.assertIs(bb, None)
                 else:
                     self.assertEqual(bb.dtype, cbcb.dtype)
                     if (np.issubdtype(bb.dtype, np.floating)
                             or np.issubdtype(bb.dtype, np.integer)):
                         np.testing.assert_allclose(bb, cbcb, atol=tol)
                     else:
                         np.testing.assert_equal(bb, cbcb)
         else:
             if cb is None:
                 self.assertIs(b, None)
             else:
                 self.assertEqual(b.dtype, cb.dtype)
                 if (np.issubdtype(b.dtype, np.floating)
                         or np.issubdtype(b.dtype, np.integer)):
                     np.testing.assert_allclose(b, cb, atol=tol)
                 else:
                     np.testing.assert_equal(b, cb)