def test_cached_ttv_array_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = CachedTTVArrayLikeDataSource(dummy_data_source, ttv, data_name='dummy', cache_name='test') self.assertEqual(len(array_ds), 3) all_values = array_ds[:] self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3])))) f = h5py.File('test.cache.hdf5', 'a') self.assertEqual(len(f['dummy']), len(array_ds)) for in_cache, in_data_source in zip(f['dummy'], array_ds): self.assertTrue(np.all(in_cache == in_data_source)) # changing a value in the cache now should alter the results returned by the dataset. f['dummy'][0] = 322 all_values = all_values = array_ds[:] self.assertTrue(np.all(np.in1d(all_values, np.array([322, 2, 3])))) # now resetting the cache, we shoud get the original results f['dummy' + CachedTTVArrayLikeDataSource.CACHE_BITARRAY_SUFFIX][:] = False array_ds._CachedTTVArrayLikeDataSource__init_existence_cache() all_values = array_ds[:] self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3]))))
def test_shuffle_deterministic(self): """Test that the sufffle made in several lookup tables are the same.""" ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_large_ttv.yaml')) lt_unshuffled = TTVLookupTable(ttv, shuffle_in_set=False) lt1 = TTVLookupTable(ttv, shuffle_in_set=True) lt2 = TTVLookupTable(ttv, shuffle_in_set=True) for set_name in ['test', 'train', 'validation']: start_unshuf, end_unshuf = lt_unshuffled.get_set_bounds('test') uris_unshuf = lt_unshuffled[start_unshuf:end_unshuf] start_shuf, end_shuf = lt1.get_set_bounds('test') uris_shuf = lt1[start_shuf:end_shuf] self.assertEqual(set(uris_shuf), set(uris_unshuf)) self.assertFalse( np.all( uris_shuf == uris_unshuf ) ) np.testing.assert_equal(lt1[:], lt2[:])
def test_ttv_examples_generator(self): data_source = DummyDataSource() def make_target(X, key, subjectID, subject_info_data_source): metadata = yaml_to_dict(subject_info_data_source[subjectID]) return metadata['legs'] subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) examples_ds = TTVExamplesDataSource(data_source, make_target, ttv, subject_info_dir) self.assertEqual( examples_ds['blorp_2'], (data_source.data['blorp_2'], 1) ) self.assertEqual( examples_ds['blerp_1'], (data_source.data['blerp_1'], 2) ) self.assertEqual( examples_ds['shlerp_322'], (data_source.data['shlerp_322'], 3) ) self.assertEqual( examples_ds[['shlerp_322', 'blerp_1', 'blorp_2']], ([data_source.data[x] for x in ['shlerp_322', 'blerp_1', 'blorp_2']], [3, 2, 1]) )
def test_cached_ttv_array_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = CachedTTVArrayLikeDataSource(dummy_data_source, ttv, data_name='dummy', cache_name='test') self.assertEqual(len(array_ds), 3) all_values = array_ds[:] self.assertTrue( np.all( np.in1d( all_values, np.array([1, 2, 3]) ) ) ) f = h5py.File('test.cache.hdf5', 'a') self.assertEqual(len(f['dummy']), len(array_ds)) for in_cache, in_data_source in zip(f['dummy'], array_ds): self.assertTrue( np.all( in_cache == in_data_source ) ) # changing a value in the cache now should alter the results returned by the dataset. f['dummy'][0] = 322 all_values = all_values = array_ds[:] self.assertTrue( np.all( np.in1d( all_values, np.array([322, 2, 3]) ) ) ) # now resetting the cache, we shoud get the original results del f['dummy'] f['dummy'] = np.repeat(CachedTTVArrayLikeDataSource.CACHE_MAGIC, 3) array_ds._CachedTTVArrayLikeDataSource__init_existence_cache() all_values = array_ds[:] self.assertTrue( np.all( np.in1d( all_values, np.array([1, 2, 3]) ) ) )
def test_ttv_array_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv) self.assertEqual(len(array_ds), 3) all_values = np.fromiter((x for x in array_ds[:]), dtype='int16') self.assertTrue(np.all(np.in1d(all_values, np.array([1, 2, 3]))))
def test_ttv_lookup_table(self): ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml')) lt = TTVLookupTable(ttv) self.assertEqual(len(lt), 3) for set_name in ['test', 'train', 'validation']: start, end = lt.get_set_bounds(set_name) uris_in_set = sum((x for x in ttv[set_name].values()), []) self.assertEqual(set(lt[start:end]), set(uris_in_set))
def test_ttv_lookup_table_shuffled(self): ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml')) ttv['train'] = dict((str(i), [str(i)]) for i in range(100)) lt = TTVLookupTable(ttv, shuffle_in_set=True) start, end = lt.get_set_bounds('train') uris_in_set = sum((x for x in ttv['train'].values()), []) self.assertEqual(set(lt[start:end]), set(uris_in_set)) self.assertFalse(lt[start:end] == uris_in_set)
def test_ttv_lookup_table(self): ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml')) lt = TTVLookupTable(ttv) self.assertEqual( len(lt), 3 ) for set_name in ['test', 'train', 'validation']: start, end = lt.get_set_bounds(set_name) uris_in_set = sum((x for x in ttv[set_name].values()), []) self.assertEqual( set(lt[start:end]), set(uris_in_set) )
def test_ttv_array_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv) self.assertEqual(len(array_ds), 3) all_values = np.fromiter((x for x in array_ds[:]), dtype='int16') self.assertTrue( np.all( np.in1d( all_values, np.array([1, 2, 3]) ) ) )
def test_ttv_lookup_table_shuffled(self): ttv = yaml_to_dict(os.path.join(DUMMY_DATA_PATH, 'dummy_ttv.yaml')) ttv['train'] = dict((str(i), [str(i)]) for i in range(100)) lt = TTVLookupTable(ttv, shuffle_in_set=True) start, end = lt.get_set_bounds('train') uris_in_set = sum((x for x in ttv['train'].values()), []) self.assertEqual( set(lt[start:end]), set(uris_in_set) ) self.assertFalse( lt[start:end] == uris_in_set )
def test_shuffle_deterministic(self): """Test that the sufffle made in several lookup tables are the same.""" ttv = yaml_to_dict( os.path.join(DUMMY_DATA_PATH, 'dummy_large_ttv.yaml')) lt_unshuffled = TTVLookupTable(ttv, shuffle_in_set=False) lt1 = TTVLookupTable(ttv, shuffle_in_set=True) lt2 = TTVLookupTable(ttv, shuffle_in_set=True) for set_name in ['test', 'train', 'validation']: start_unshuf, end_unshuf = lt_unshuffled.get_set_bounds('test') uris_unshuf = lt_unshuffled[start_unshuf:end_unshuf] start_shuf, end_shuf = lt1.get_set_bounds('test') uris_shuf = lt1[start_shuf:end_shuf] self.assertEqual(set(uris_shuf), set(uris_unshuf)) self.assertFalse(np.all(uris_shuf == uris_unshuf)) np.testing.assert_equal(lt1[:], lt2[:])
def test_subarray_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv) def get_all_values_set(ttv, set_name): data_set = ttv[set_name] uris = [] for subjectID in data_set: uris += data_set[subjectID] return uris for set_name in ['test', 'train', 'validation']: set_ds = array_ds.get_set(set_name) self.assertTrue(len(set_ds), 1) self.assertEqual( [x for x in set_ds[:]], [dummy_data_source[x] for x in get_all_values_set(ttv, set_name)] )
def test_subarray_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv) def get_all_values_set(ttv, set_name): data_set = ttv[set_name] uris = [] for subjectID in data_set: uris += data_set[subjectID] return uris for set_name in ['test', 'train', 'validation']: set_ds = array_ds.get_set(set_name) self.assertTrue(len(set_ds), 1) self.assertEqual([x for x in set_ds[:]], [ dummy_data_source[x] for x in get_all_values_set(ttv, set_name) ])
def make_target(X, key, subjectID, subject_info_data_source): metadata = yaml_to_dict(subject_info_data_source[subjectID]) return metadata['legs']