def test_group_names_DataGeneratorDisk(): iu.resize_folder('images/', 'images1/', image_size_dst=(100, 100), overwrite=True) gp = gen_params.copy() gp.inputs = ['filename'] gp.group_names = ['images/'] gp.data_path = '' g = gr.DataGeneratorDisk(ids, **gp) assert gen.get_sizes(g[0]) == '([array<2,224,224,3>], [array<2,1>])' gp.group_names = ['images/', 'images1/'] g = gr.DataGeneratorDisk(ids, **gp) assert gen.get_sizes( g[0]) == '([array<2,224,224,3>, array<2,100,100,3>], [array<2,1>])' gp.group_names = [['images/'], ['images1/']] sizes = [] for i in range(100): g = gr.DataGeneratorDisk(ids, **gp) sizes.append(g[0][0][0].shape[1]) assert np.unique(sizes).shape[0] > 1 shutil.rmtree('images1/')
def test_DataGeneratorDisk(): g = gr.DataGeneratorDisk(ids, **gen_params) g.inputs = ['filename', 'filename'] assert gen.get_sizes( g[0]) == '([array<2,224,224,3>, array<2,224,224,3>], [array<2,1>])' g.inputs_df = ['score', 'score'] g.inputs = [] g.outputs = [] assert gen.get_sizes(g[0]) == '([array<2,2>], [])' g.inputs_df = [['score'], ['score', 'score']] assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])' g.inputs_df = [] g.outputs = ['score'] assert gen.get_sizes(g[0]) == '([], [array<2,1>])' g.outputs = ['score', ['score']] with pytest.raises(AssertionError): g[0] g.outputs = [['score'], ['score']] assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'
def test_DataGeneratorHDF5(): gen_params_local = gen_params.copy() gen_params_local.update(data_path='data.h5', inputs=['filename']) g = gr.DataGeneratorHDF5(ids, **gen_params_local) assert gen.get_sizes(g[0]) == '([array<2,1>], [array<2,1>])' g.inputs_df = ['score', 'score'] g.inputs = [] g.outputs = [] assert gen.get_sizes(g[0]) == '([array<2,2>], [])' g.inputs_df = [['score'], ['score', 'score']] assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])' g.inputs_df = [] g.outputs = ['score'] assert gen.get_sizes(g[0]) == '([], [array<2,1>])' g.outputs = ['score', ['score']] with pytest.raises(AssertionError): g[0] g.outputs = [['score'], ['score']] assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'
def test_read_fn_DataGeneratorDisk(): import os def read_fn(name, g): # g is the parent generator object # name is the image name read from the DataFrame image_path = os.path.join(g.data_path, name) return iu.resize_image(iu.read_image(image_path), (100, 100)) g = gr.DataGeneratorDisk(ids, read_fn=read_fn, **gen_params) gen.get_sizes(g[0]) == '([array<2,100,100,3>], [array<2,1>])'
def test_init_DataGeneratorDisk(): g = gr.DataGeneratorDisk(ids, **gen_params) assert isinstance(g[0], tuple) assert isinstance(g[0][0], list) assert isinstance(g[0][1], list) assert (gen.get_sizes(g[0]) == '([array<2,224,224,3>], [array<2,1>])') assert (np.all(g[0][1][0] == np.array([[1], [2]])))
def test_accessor_function_numpy_array(): ids = pd.DataFrame( dict(a=range(10), b=list(np.random.randint(0, 10, (10, 2, 2))))) gen_params = Munch(batch_size=4, data_path=None, input_shape=None, inputs_df=lambda ids: [ids[['a']].values], outputs=['b'], shuffle=False, fixed_batches=True) # test using a function to access data from ids # test if data in ids items can be ndarrays g = gr.DataGeneratorDisk(ids, **gen_params) assert gen.get_sizes(g[0]) == '([array<4,1>], [array<4,2,2>])' # test if double inputs works gen_params.outputs = ['a', 'a'] g = gr.DataGeneratorDisk(ids, **gen_params) assert gen.get_sizes(g[0]) == '([array<4,1>], [array<4,2>])'
def test_callable_outputs_DataGeneratorHDF5(): d = {'features': [1, 2, 3, 4, 5], 'mask': [1, 0, 1, 1, 0]} df = pd.DataFrame(data=d) def filter_features(df): return np.array(df.loc[df['mask'] == 1, ['features']]) gen_params_local = gen_params.copy() gen_params_local.update(data_path=None, outputs=filter_features, inputs=[], inputs_df=['features'], shuffle=False, batch_size=5) g = gr.DataGeneratorHDF5(df, **gen_params_local) assert gen.get_sizes(g[0]) == '([array<5,1>], array<3,1>)' assert all(np.squeeze(g[0][0]) == np.arange(1, 6)) assert all(np.squeeze(g[0][1]) == [1, 3, 4])
def test_get_sizes(): x = np.array([[1, 2, 3]]) assert gen.get_sizes(([x.T], 1, [4, 5])) == '([array<3,1>], <1>, [<1>, <1>])' assert gen.get_sizes(np.array([[1, [1, 2]]])) == 'array<1,2>'
def test_basics_deterministic_shuffle_consistency_group_by(): ids = pd.DataFrame( dict(a=range(10), b=list(range(9, -1, -1)), c=np.arange(10) < 5)) gen_params = Munch(batch_size=4, data_path=None, input_shape=None, inputs_df=['a'], outputs=['b'], shuffle=False, fixed_batches=True) # check `fixed_batches` switch g = gr.DataGeneratorDisk(ids, **gen_params) assert np.array_equal( [gen.get_sizes(x) for x in g], ['([array<4,1>], [array<4,1>])', '([array<4,1>], [array<4,1>])']) assert np.array_equal(g[0][0][0].squeeze(), range(4)) gen_params.fixed_batches = False g = gr.DataGeneratorDisk(ids, **gen_params) assert np.array_equal([gen.get_sizes(x) for x in g], [ '([array<4,1>], [array<4,1>])', '([array<4,1>], [array<4,1>])', '([array<2,1>], [array<2,1>])' ]) assert np.array_equal(g[2][0][0].squeeze(), [8, 9]) # check randomized gen_params.shuffle = True gen_params.fixed_batches = False # maintain g = gr.DataGeneratorDisk(ids, **gen_params) # check if it returns all items data = list(zip(*list(g))) data0 = np.concatenate([l[0] for l in data[0]], axis=0).squeeze() data1 = np.concatenate([l[0] for l in data[1]], axis=0).squeeze() assert np.array_equal(np.sort(data0), np.arange(10)) assert np.array_equal(np.sort(data1), np.arange(10)) # check if randomization is applied, consistently num_randoms0 = 0 num_randoms1 = 0 for i in range(100): g = gr.DataGeneratorDisk(ids, **gen_params) data = list(zip(*list(g))) data0 = np.concatenate([l[0] for l in data[0]], axis=0).squeeze() data1 = np.concatenate([l[0] for l in data[1]], axis=0).squeeze() # check consistency ids_ = ids.copy() ids_.index = ids_.a np.array_equal(ids_.loc[data0].b, data1) num_randoms0 += not np.array_equal(data0, np.arange(10)) num_randoms1 += not np.array_equal(data1, np.arange(10)) # check randomization, at least once assert num_randoms0 assert num_randoms0 # check deterministic gen_params.shuffle = True gen_params.deterministic = np.random.randint(100) assert np.array_equal( gr.DataGeneratorDisk(ids, **gen_params)[0], gr.DataGeneratorDisk(ids, **gen_params)[0]) gen_params.update(fixed_batches=False, shuffle=True, group_by='c', deterministic=False) g = gr.DataGeneratorDisk(ids, **gen_params) data = list(zip(*list(g))) data = [[l[0] for l in d] for d in data] data_conc = [np.concatenate(d, axis=0) for d in data] # returns all df = pd.DataFrame(np.concatenate(data_conc, axis=1), columns=('a', 'b')) x = df.merge(ids, on='a') assert np.all(x.b_x == x.b_y) # each batch returns a single group ids_ = ids.copy() ids_.index = ids_.a for i, d in enumerate(data[0]): assert ids_.loc[d[0]].c.unique().shape == (1, )