Example #1
0
    def test_cached_ttv_array_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = CachedTTVArrayLikeDataSource(dummy_data_source,
                                                ttv,
                                                data_name='dummy',
                                                cache_name='test')

        self.assertEqual(len(array_ds), 3)

        all_values = array_ds[:]

        self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3]))))

        f = h5py.File('test.cache.hdf5', 'a')
        self.assertEqual(len(f['dummy']), len(array_ds))

        for in_cache, in_data_source in zip(f['dummy'], array_ds):
            self.assertTrue(np.all(in_cache == in_data_source))

        # changing a value in the cache now should alter the results returned by the dataset.
        f['dummy'][0] = 322
        all_values = all_values = array_ds[:]
        self.assertTrue(np.all(np.in1d(all_values, np.array([322, 2, 3]))))

        # now resetting the cache, we shoud get the original results
        f['dummy' +
          CachedTTVArrayLikeDataSource.CACHE_BITARRAY_SUFFIX][:] = False
        array_ds._CachedTTVArrayLikeDataSource__init_existence_cache()

        all_values = array_ds[:]
        self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3]))))
Example #2
0
    def test_shuffle_deterministic(self):
        """Test that the sufffle made in several lookup tables are the same."""

        ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_large_ttv.yaml'))

        lt_unshuffled = TTVLookupTable(ttv, shuffle_in_set=False)
        lt1 = TTVLookupTable(ttv, shuffle_in_set=True)
        lt2 = TTVLookupTable(ttv, shuffle_in_set=True)

        for set_name in ['test', 'train', 'validation']:
            start_unshuf, end_unshuf = lt_unshuffled.get_set_bounds('test')
            uris_unshuf = lt_unshuffled[start_unshuf:end_unshuf]

            start_shuf, end_shuf = lt1.get_set_bounds('test')
            uris_shuf = lt1[start_shuf:end_shuf]

            self.assertEqual(set(uris_shuf), set(uris_unshuf))
            self.assertFalse(
                np.all(
                    uris_shuf == uris_unshuf
                )
            )




        np.testing.assert_equal(lt1[:], lt2[:])
Example #3
0
    def test_ttv_examples_generator(self):
        data_source = DummyDataSource()

        def make_target(X, key, subjectID, subject_info_data_source):
            metadata = yaml_to_dict(subject_info_data_source[subjectID])
            return metadata['legs']


        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        examples_ds = TTVExamplesDataSource(data_source, make_target, ttv, subject_info_dir)

        self.assertEqual(
            examples_ds['blorp_2'],
            (data_source.data['blorp_2'], 1)
        )
        self.assertEqual(
            examples_ds['blerp_1'],
            (data_source.data['blerp_1'], 2)
        )
        self.assertEqual(
            examples_ds['shlerp_322'],
            (data_source.data['shlerp_322'], 3)
        )
        self.assertEqual(
            examples_ds[['shlerp_322', 'blerp_1', 'blorp_2']],
            ([data_source.data[x] for x in ['shlerp_322', 'blerp_1', 'blorp_2']], [3, 2, 1])
        )
Example #4
0
    def test_cached_ttv_array_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = CachedTTVArrayLikeDataSource(dummy_data_source, ttv, data_name='dummy', cache_name='test')

        self.assertEqual(len(array_ds), 3)

        all_values = array_ds[:]

        self.assertTrue(
            np.all(
                np.in1d(
                    all_values,
                    np.array([1, 2, 3])
                )
            )
        )

        f = h5py.File('test.cache.hdf5', 'a')
        self.assertEqual(len(f['dummy']), len(array_ds))

        for in_cache, in_data_source in zip(f['dummy'], array_ds):
            self.assertTrue(
                np.all(
                    in_cache == in_data_source
                )
            )

        # changing a value in the cache now should alter the results returned by the dataset.
        f['dummy'][0] = 322
        all_values = all_values = array_ds[:]
        self.assertTrue(
            np.all(
                np.in1d(
                    all_values,
                    np.array([322, 2, 3])
                )
            )
        )

        # now resetting the cache, we shoud get the original results
        del f['dummy']
        f['dummy'] = np.repeat(CachedTTVArrayLikeDataSource.CACHE_MAGIC, 3)
        array_ds._CachedTTVArrayLikeDataSource__init_existence_cache()

        all_values = array_ds[:]
        self.assertTrue(
            np.all(
                np.in1d(
                    all_values,
                    np.array([1, 2, 3])
                )
            )
        )
Example #5
0
    def test_ttv_array_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)

        self.assertEqual(len(array_ds), 3)

        all_values = np.fromiter((x for x in array_ds[:]), dtype='int16')

        self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3]))))
Example #6
0
    def test_ttv_lookup_table(self):
        ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml'))
        lt = TTVLookupTable(ttv)

        self.assertEqual(len(lt), 3)

        for set_name in ['test', 'train', 'validation']:
            start, end = lt.get_set_bounds(set_name)

            uris_in_set = sum((x for x in ttv[set_name].values()), [])

            self.assertEqual(set(lt[start:end]), set(uris_in_set))
Example #7
0
    def test_ttv_lookup_table_shuffled(self):
        ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml'))
        ttv['train'] = dict((str(i), [str(i)]) for i in range(100))

        lt = TTVLookupTable(ttv, shuffle_in_set=True)

        start, end = lt.get_set_bounds('train')

        uris_in_set = sum((x for x in ttv['train'].values()), [])

        self.assertEqual(set(lt[start:end]), set(uris_in_set))

        self.assertFalse(lt[start:end] == uris_in_set)
Example #8
0
    def test_ttv_lookup_table(self):
        ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml'))
        lt = TTVLookupTable(ttv)

        self.assertEqual(
            len(lt),
            3
        )

        for set_name in ['test', 'train', 'validation']:
            start, end = lt.get_set_bounds(set_name)

            uris_in_set = sum((x for x in ttv[set_name].values()), [])

            self.assertEqual(
                set(lt[start:end]),
                set(uris_in_set)
            )
Example #9
0
    def test_ttv_array_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)

        self.assertEqual(len(array_ds), 3)

        all_values = np.fromiter((x for x in array_ds[:]), dtype='int16')

        self.assertTrue(
            np.all(
                np.in1d(
                    all_values,
                    np.array([1, 2, 3])
                )
            )
        )
Example #10
0
    def test_ttv_lookup_table_shuffled(self):
        ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml'))
        ttv['train'] = dict((str(i), [str(i)]) for i in range(100))

        lt = TTVLookupTable(ttv, shuffle_in_set=True)

        start, end = lt.get_set_bounds('train')

        uris_in_set = sum((x for x in ttv['train'].values()), [])

        self.assertEqual(
            set(lt[start:end]),
            set(uris_in_set)
        )


        self.assertFalse(
            lt[start:end] ==
            uris_in_set
        )
Example #11
0
    def test_shuffle_deterministic(self):
        """Test that the sufffle made in several lookup tables are the same."""

        ttv = yaml_to_dict(
            os.path.join(DUMMY_DATA_PATH, 'dummy_large_ttv.yaml'))

        lt_unshuffled = TTVLookupTable(ttv, shuffle_in_set=False)
        lt1 = TTVLookupTable(ttv, shuffle_in_set=True)
        lt2 = TTVLookupTable(ttv, shuffle_in_set=True)

        for set_name in ['test', 'train', 'validation']:
            start_unshuf, end_unshuf = lt_unshuffled.get_set_bounds('test')
            uris_unshuf = lt_unshuffled[start_unshuf:end_unshuf]

            start_shuf, end_shuf = lt1.get_set_bounds('test')
            uris_shuf = lt1[start_shuf:end_shuf]

            self.assertEqual(set(uris_shuf), set(uris_unshuf))
            self.assertFalse(np.all(uris_shuf == uris_unshuf))

        np.testing.assert_equal(lt1[:], lt2[:])
Example #12
0
    def test_subarray_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)


        def get_all_values_set(ttv, set_name):
            data_set = ttv[set_name]
            uris = []
            for subjectID in data_set:
                uris += data_set[subjectID]
            return uris

        for set_name in ['test', 'train', 'validation']:
            set_ds = array_ds.get_set(set_name)

            self.assertTrue(len(set_ds), 1)
            self.assertEqual(
                [x for x in set_ds[:]],
                [dummy_data_source[x] for x in get_all_values_set(ttv, set_name)]
            )
Example #13
0
    def test_subarray_like_data_source(self):
        dummy_data_source = DummyDataSource()
        subject_info_dir = os.path.join('test', 'dummy_data', 'metadata')
        ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml'))

        array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv)

        def get_all_values_set(ttv, set_name):
            data_set = ttv[set_name]
            uris = []
            for subjectID in data_set:
                uris += data_set[subjectID]
            return uris

        for set_name in ['test', 'train', 'validation']:
            set_ds = array_ds.get_set(set_name)

            self.assertTrue(len(set_ds), 1)

            self.assertEqual([x for x in set_ds[:]], [
                dummy_data_source[x]
                for x in get_all_values_set(ttv, set_name)
            ])
Example #14
0
 def make_target(X, key, subjectID, subject_info_data_source):
     metadata = yaml_to_dict(subject_info_data_source[subjectID])
     return metadata['legs']