def lean_mnist(): """ tfaug classification example Returns ------- None. """ os.makedirs(DATADIR + 'mnist', exist_ok=True) # load mnist dataset (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # save as tfrecord TfrecordConverter().from_ary_label(x_train, y_train, DATADIR + 'mnist/train.tfrecord') TfrecordConverter().from_ary_label(x_test, y_test, DATADIR + 'mnist/test.tfrecord') batch_size, shuffle_buffer = 25, 25 # create training and validation dataset using tfaug: ds_train, train_cnt = (DatasetCreator( shuffle_buffer=shuffle_buffer, batch_size=batch_size, repeat=True, random_zoom=[0.1, 0.1], random_rotation=20, random_shear=[10, 10], random_blur=10, training=True).from_tfrecords([DATADIR + 'mnist/train.tfrecord'])) ds_valid, valid_cnt = (DatasetCreator( shuffle_buffer=shuffle_buffer, batch_size=batch_size, repeat=True, training=False).from_tfrecords([DATADIR + 'mnist/test.tfrecord'])) model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10) ]) model.compile( optimizer=tf.keras.optimizers.Adam(0.002), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['sparse_categorical_accuracy']) # learn model model.fit(ds_train, epochs=10, validation_data=ds_valid, steps_per_epoch=train_cnt // batch_size, validation_steps=valid_cnt // batch_size) # evaluation result model.evaluate(ds_valid, steps=valid_cnt // batch_size, verbose=2)
def test_from_path(self): BATCH_SIZE = 2 flist_imgs = [DATADIR + 'Lenna.png'] * 10 flist_seglabels = flist_imgs.copy() img_org = np.array(Image.open(flist_imgs[0])) clslabels = list(range(10)) path_tfrecord = DATADIR + 'test_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_seglabels, path_tfrecord) with self.subTest('check segmentation label'): # check segmentation label dc = DatasetCreator(1, BATCH_SIZE, training=True) ds, imgcnt = dc.from_tfrecords([path_tfrecord]) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert (label == img_org).numpy().all(), 'labels is changed' with self.subTest('check multiple inputs and labels'): # check segmentation label dc = DatasetCreator(1, BATCH_SIZE, training=True) path_tfrecord = DATADIR + 'test_from_path_multi.tfrecord' TfrecordConverter().from_path_label( list(zip(flist_imgs, flist_imgs)), list(zip(flist_imgs, flist_imgs)), path_tfrecord) ds, imgcnt = dc.from_tfrecords([path_tfrecord]) for i, features in enumerate(ds): assert (features['image_in0'] == img_org ).numpy().all(), 'image is changed' assert (features['image_in1'] == img_org ).numpy().all(), 'image1 is changed' assert (features['label_in0'] == img_org ).numpy().all(), 'labels is changed' assert (features['label_in1'] == img_org ).numpy().all(), 'labels is changed' with self.subTest('check class label'): # check class label path_tfrecord = DATADIR + 'test_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert all(label.numpy() == clslabels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]), '''
def test_concat_patch(self): x_size, y_size = 20, 16 patch_size = 3, 5 img = np.arange(y_size * x_size).reshape(y_size, x_size, 1) splitted = TfrecordConverter().split_to_patch(img, patch_size, 0) cy_size = -(-y_size // patch_size[0]) cx_size = -(-x_size // patch_size[1]) ret = TfrecordConverter().concat_patch(splitted, cy_size, cx_size) assert ( img == ret[:img.shape[0], :img.shape[1]]).all(), 'value is changed'
def test_sharded_from_path(self): flist_imgs = [DATADIR + 'Lenna.png'] * 10 flist_seglabels = flist_imgs.copy() img_org = np.array(Image.open(flist_imgs[0])) clslabels = list(range(10)) path_tfrecord = DATADIR + 'test_shards_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_seglabels, path_tfrecord, image_per_shard=3) path_tfrecords = glob(DATADIR + 'test_shards_from_path_?.tfrecord') assert len(path_tfrecords) == 4, 'num of shards is invalid' # check segmentation label dc = DatasetCreator(1, 1, training=True) ds, imgcnt = dc.from_tfrecords(path_tfrecords) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert (label == img_org).numpy().all(), 'labels is changed' path_tfrecord = DATADIR + 'test_shards_from_path_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord, image_per_shard=2) path_tfrecords = glob(DATADIR + 'test_shards_from_path_seg_?.tfrecord') assert len(path_tfrecords) == 5, 'num of shards is invalid' # check class label dc = DatasetCreator(False, 1, training=True) ds, datacnt = dc.from_tfrecords(path_tfrecords) list_label = [] for i, (img, label) in enumerate(ds): list_label.append(label.numpy()) assert (img == img_org).numpy().all(), 'image was changed' label_all = np.concatenate(sorted(list_label)) assert all(label_all == clslabels), 'label was changed'
def toy_example(): # prepare inputs and labels batch_size = 2 shuffle_buffer = 10 filepaths = [DATADIR + 'Lenna.png'] * 10 class_labels = np.random.randint(0, 10, 10) # define tfrecord path path_record = DATADIR + 'multi_input.tfrecord' # generate tfrecords in a one-line TfrecordConverter().from_path_label(filepaths, class_labels, path_record) # define augmentation parameters aug_parms = { 'random_rotation': 5, 'random_flip_left_right': True, 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_crop': None, 'random_blur': [0.5, 1.5] } # set augmentation and learning parameters to dataset dc = DatasetCreator(shuffle_buffer, batch_size, **aug_parms, repeat=True, training=True) # define dataset and number of dataset ds, imgcnt = dc.from_tfrecords(path_record) # define the handling of multiple inputs => just resize and concat # multiple inputs were named {'image_in0', 'image_in1' , ...} in inputs dictionary def concat_inputs(inputs, label): resized = tf.image.resize(inputs['image_in1'], (512, 512)) concated = tf.concat([inputs['image_in0'], resized], axis=-1) # resized = tf.image.resize(concated, (224, 224)) return concated, label ds = ds.map(concat_inputs) # define the model mbnet = tf.keras.applications.MobileNetV2(input_shape=[512, 512, 6], include_top=True, weights=None) mbnet.compile(optimizer="adam", loss="mse", metrics=["mae"]) # learn the model mbnet.fit( ds, epochs=10, steps_per_epoch=imgcnt // batch_size, )
def test_get_patch(self): im = np.arange(4 * 5 * 3, dtype=np.uint8).reshape(4, 5, 3) patches = TfrecordConverter().split_to_patch(im, [1, 2], [1, 1]) assert patches.shape == (4 * 3, 3, 4, 3), "invalid patch shape" tmp = np.zeros((3, 4, 3)) tmp[1:, 1:, :] = im[0:2, 0:3, :] assert (patches[0, :, :, :] == tmp).all(), "invalid patch values" tmp = np.zeros((3, 4, 3)) tmp[0:2, 0:2, :] = im[-2:, -2:, :] assert (patches[-1, :, :, :] == tmp).all(), "invalid patch values"
def download_and_convert_ADE20k(input_size, overlap_buffer): """ Donload and Converts the ADE20k dataset into tfrecord format. """ link = r'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip' dstdir = DATADIR + 'ADE20k/' os.makedirs(dstdir, exist_ok=True) if not os.path.isfile(dstdir + 'ADEChallengeData2016.zip'): print('start donloading ADE20k...', flush=True) with requests.get(link, stream=True) as response: total_size_in_bytes = int(response.headers.get( 'content-length', 0)) block_size = 1024 # 1 Kilobyte progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) with open(dstdir + 'ADEChallengeData2016.zip', 'wb') as f: for data in response.iter_content(block_size): progress_bar.update(len(data)) f.write(data) progress_bar.close() assert total_size_in_bytes != 0 and progress_bar.n == total_size_in_bytes,\ "download ADE20k failed" if len(glob(dstdir + 'ADEChallengeData2016/images/validation/ADE_*.jpg')) != 2000: print('unzipping ADE20k...') from zipfile import ZipFile with ZipFile(dstdir + 'ADEChallengeData2016.zip', 'r') as zipObj: # Extract all the contents of zip file in current directory zipObj.extractall(dstdir) dstdir += 'ADEChallengeData2016/' print('convert grayscale images to RGB:', 'test') for dirname in ['training', 'validation']: imgs = glob(f'{dstdir}images/{dirname}/ADE_*.jpg') gray_idxs = [ i for i in range(len(imgs)) if len(Image.open(imgs[i]).getbands()) < 3 ] for rmidx in gray_idxs: im = Image.open(imgs[rmidx]) im = im.convert('RGB') im.save(imgs[rmidx]) print('converted L to RGB:', imgs[rmidx]) # plot random label sample print('start check ADE20k_label', 'test') check_ADE20k_label() converter = TfrecordConverter() patchdir = dstdir + 'patch/' if len(glob(patchdir + 'images/*/ADE_*_no*.jpg')) < 6e4: print('splitting imgs to patch...', flush=True) # split images into patch overlap_buffer = [overlap_buffer, overlap_buffer] for dirname in ['training', 'validation']: print('convert', dirname, 'into patch') os.makedirs(f'{patchdir}images/{dirname}', exist_ok=True) os.makedirs(f'{patchdir}annotations/{dirname}', exist_ok=True) srcimgs = glob(f'{dstdir}/images/{dirname}/ADE_*.jpg') for path in tqdm(srcimgs): im = np.array(Image.open(path)) lb = np.array( Image.open( os.sep.join( Path(path).parts[:-3] + ('annotations', dirname, Path(path).stem + '.png')))) img_patches = converter.split_to_patch(im, input_size, overlap_buffer, dtype=np.uint8) lbl_pathces = converter.split_to_patch(lb, input_size, overlap_buffer, dtype=np.uint8) basename = Path(path).stem for no, (img_patch, lbl_patch) in enumerate(zip(img_patches, lbl_pathces)): Image.fromarray(img_patch).save( f'{patchdir}images/{dirname}/{basename}_no{no}.jpg') Image.fromarray(lbl_patch).save( f'{patchdir}annotations/{dirname}/{basename}_no{no}.png' ) image_per_shards = 1000 if len(glob(dstdir + 'tfrecord/*_*.tfrecords')) != 101: print('convert ADE20k to tfrecord', flush=True) os.makedirs(dstdir + 'tfrecord', exist_ok=True) for dirname in ['training', 'validation']: imgs = glob(f'{patchdir}/images/{dirname}/ADE_*.jpg') # shuffle image order random.shuffle(imgs) path_labels = [ os.sep.join( Path(path).parts[:-3] + ('annotations', dirname, Path(path).stem + '.png')) for path in imgs ] converter.from_path_label(imgs, path_labels, dstdir + f'tfrecord/{dirname}.tfrecords', image_per_shards) path_tfrecord = DATADIR + 'ADE20k/ADEChallengeData2016/tfrecord/validation_1.tfrecords' # check converted tfrecord dc = DatasetCreator(False, 10, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) piyo = next(iter(ds.take(1))) plt.imshow(piyo[0][5])
def test_from_ary_label(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'num_transforms': 10 } BATCH_SIZE = 2 with Image.open(DATADIR + 'Lenna.png').convert('RGB') as img: image = np.asarray(img) image = np.tile(image, (10 * BATCH_SIZE, 1, 1, 1)) # add channel 4 image = np.concatenate([ image, np.zeros(image.shape[:3], dtype=np.uint8)[:, :, :, np.newaxis] ], axis=3) labels = list(range(10)) * BATCH_SIZE with self.subTest('classification'): # test for classification path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_ary_label(image, labels, path_tfrecord) # for preproc, set input dimension DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:3], 3] def preproc(img, lbl): return (img[:, :, :, :3], lbl) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, preproc=preproc, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 test = next(iter(ds)) for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert img.shape[3] == 3, "data shape is invalid" with self.subTest('segmentation'): #test for segmentation path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_ary_label(image, image, path_tfrecord) def preproc(img, lbl): return (img, lbl[:, :, :, :3]) DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:]] DATAGEN_CONF['input_label_shape'] = [ BATCH_SIZE, *image.shape[1:3], 3 ] dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, preproc=preproc, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert img.shape[3] == 4, "data shape is invalid" assert label.shape[3] == 3, "data shape is invalid" with self.subTest('no label'): # test for no labels path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:]] TfrecordConverter().from_ary_label(image, None, path_tfrecord) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape == [BATCH_SIZE, *random_crop_size, 4], "crop size is invalid"
def test_private_functions_in_DatasetCreator(self): BATCH_SIZE = 2 img_org = Image.open(DATADIR + 'Lenna.png') shape = list(np.array(img_org).shape) fp32 = np.array(Image.open(DATADIR + 'Lenna.png')).astype( np.float32) // 256 Image.fromarray(fp32[:, :, 0]).save(DATADIR + 'Lenna.tif') clslabels = list(range(10)) flist_imgs = [(DATADIR + 'Lenna.png', DATADIR + 'Lenna.tif', DATADIR + 'Lenna.png') for i in range(10)] path_tfrecord = DATADIR + 'test_3_inimgs.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(1, BATCH_SIZE, training=True) path_tfrecords = [path_tfrecord, path_tfrecord] (ds, num_img, label_type, imgs_dtype, imgs_shape, labels_shape, labels_dtype) = dc._get_ds_tfrecord(1, path_tfrecords) # test _set_formats example_formats = dc._gen_example(label_type, labels_dtype, imgs_dtype, imgs_shape) decoders = dc._decoder_creator(label_type, labels_dtype, labels_shape, imgs_dtype, imgs_shape) assert example_formats['image_in0'].dtype == tf.string assert example_formats['image_in1'].dtype == tf.string assert example_formats['image_in2'].dtype == tf.string assert example_formats['label_in0'].dtype == tf.int64 ds_decoded = (ds.batch(BATCH_SIZE).apply( tf.data.experimental.parse_example_dataset(example_formats)).map( decoders)) # define augmentation datagen_confs = {'random_rotation': 5, 'num_transforms': 5} inputs_shape, input_label_shape = dc._get_inputs_shapes( ds_decoded, label_type, len(imgs_dtype)) seeds = np.random.uniform(0, 2**32, (int(1e6))) if len(imgs_dtype) > 1: # multiple input aug_funs = [] for shape in inputs_shape: datagen_confs['input_shape'] = shape aug_funs.append(AugmentImg(**datagen_confs, seeds=seeds)) if label_type == 'segmentation': datagen_confs['input_shape'] = input_label_shape aug_funs.append(AugmentImg(**datagen_confs, seeds=seeds)) elif label_type == 'class': aug_funs.append(lambda x: x) aug_fun = dc._apply_aug(aug_funs) ds_aug = ds_decoded.map(aug_fun) ds_out = ds_aug.map(dc._ds_to_dict(example_formats.keys())) test_ret = next(iter(ds_out)) assert test_ret['image_in0'].shape == [BATCH_SIZE, *imgs_shape[0] ], "invalid image 0 size" assert test_ret['image_in1'].shape == [BATCH_SIZE, *imgs_shape[1] ], "invalid image 1 size" assert test_ret['image_in2'].shape == [BATCH_SIZE, *imgs_shape[2] ], "invalid image 2 size" assert test_ret['label_in0'].shape == BATCH_SIZE, "invalid label size"
def test_multi_inputs_labels(self): BATCH_SIZE = 2 NUM_DATA = 10 img_org = np.array(Image.open(DATADIR + 'Lenna.png')) clslabels = list(range(NUM_DATA)) # test uint8 and float32 tiff # save as float32 tiff fp32 = np.array(Image.open(DATADIR + 'Lenna.png')).astype( np.float32) // 256 Image.fromarray(fp32[:, :, 0]).save(DATADIR + 'Lenna.tif') flist_imgs = [(DATADIR + 'Lenna.png', DATADIR + 'Lenna.tif', DATADIR + 'Lenna.png') for i in range(NUM_DATA)] with self.subTest('3 inputs 3 labels classification'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, [list(range(3))] * NUM_DATA, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert (inputs['label_in0'] == [ 0, 0 ]).numpy().all(), 'label_in0 is changed' assert (inputs['label_in1'] == [ 1, 1 ]).numpy().all(), 'label_in0 is changed' assert (inputs['label_in2'] == [ 2, 2 ]).numpy().all(), 'label_in0 is changed' with self.subTest('3 inputs classification'): # test 3 images in tfrecord and classification path_tfrecord = DATADIR + 'test_3_inimgs.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert all(inputs['label_in0'].numpy() == clslabels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]), 'label is changed' with self.subTest('3 inputs segmentation and augmentation'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, [DATADIR + 'Lenna.png'] * 10, path_tfrecord) dc = DatasetCreator( False, BATCH_SIZE, random_rotation=20, # random_contrast=[1.4, 2], num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): # assert (inputs[0]['image_in0'] == img_org).numpy( # ).all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == inputs['image_in0']).numpy( ).all( ), 'in_image0 and in_image2 do not have same transformation' assert (inputs['label_in0'] == inputs['image_in0']).numpy( ).all(), 'label_in0 and in_image0 is no changed' with self.subTest('3 inputs 3 labels segmentation'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_imgs, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert (inputs['label_in0'] == img_org).numpy().all(), \ 'label_in0 is changed' assert (inputs['label_in1'] == fp32).numpy().all(), \ 'label_in1 is changed' assert (inputs['label_in2'] == img_org).numpy().all(), \ 'label_in2 is changed'
def test_from_tfrecord_sample_ratio(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_hue': 0.01, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'random_saturation': [0.5, 2], 'num_transforms': 10 } BATCH_SIZE = 5 flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE # test for ratio_samples labels = [0] * 10 * BATCH_SIZE path_tfrecord_0 = DATADIR + 'ds_from_tfrecord_0.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_0) labels = [1] * 10 * BATCH_SIZE path_tfrecord_1 = DATADIR + 'ds_from_tfrecord_1.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_1) dc = DatasetCreator(5, 10, repeat=False, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([0.1, 1000], dtype=np.float32)) img, label = next(iter(ds.take(1))) assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert all( label == 1), "sampled label is invalid this sometimes happen" ds, cnt = DatasetCreator(5, 50, repeat=False, **DATAGEN_CONF, training=True).from_tfrecords( [[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([1, 1], dtype=np.float32)) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 2, "repetition count is invalid" assert any(label == 1) and any(label == 0), "sampled label is invalid" # check for sampling ratio dc = DatasetCreator(5, 10, repeat=True, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([1, 10], dtype=np.float32)) ds = ds.take(200) cnt_1, cnt_0 = 0, 0 for img, label in ds: cnt_0 += (label.numpy() == 0).sum() cnt_1 += (label.numpy() == 1).sum() assert 1/10 - 1/100 < cnt_0 / cnt_1 < 1/10 + 1/100,\ "sampling ratio is invalid. this happen randomely. please retry:"\ + str(cnt_0/cnt_1)
def test_from_tfrecord(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_hue': 0.01, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'random_saturation': [0.5, 2], 'num_transforms': 10 } BATCH_SIZE = 2 flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE labels = [0] * 10 * BATCH_SIZE # test for classification with self.subTest('for classification'): path_tfrecord_0 = DATADIR + 'ds_from_tfrecord_0.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_0) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord_0]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" tool.plot_dsresult(ds.take(10), BATCH_SIZE, 10, DATADIR + 'test_ds_from_tfrecord.png') # test for segmentation with self.subTest('for segmentation'): path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_path_label(flist, flist, path_tfrecord) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert label.shape[1:3] == random_crop_size, "crop size is invalid" tool.plot_dsresult(ds.take(10), BATCH_SIZE, 10, DATADIR + 'test_ds_from_tfrecord.png')