def duplicate_with_noise(emojis, repeat=1000): width, height = emojis["images"][0].shape augmenter = ImageAugmenter( width, height, # width and height of the image (must be the same for all images in the batch) hflip=False, # flip horizontally with 50% probability vflip=False, # flip vertically with 50% probability scale_to_percent=( 0.9, 1.1), # scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=5, # rotate between -25 and +25 degrees shear_deg=5, # shear between -10 and +10 degrees translation_x_px=2, # translate between -5 and +5 px on the x-axis translation_y_px=2 # translate between -5 and +5 px on the y-axis ) ret = copy.deepcopy(emojis) for i in range(repeat): print("Iter {0}".format(i)) augmented_emojis = augmenter.augment_batch( np.array(emojis["images"], dtype=np.uint8)) ret["images"] += list(augmented_emojis) ret["labels"] += emojis["labels"] images, labels = sklearn.utils.shuffle(ret["images"], ret["labels"]) ret["images"] = images ret["labels"] = labels images = [] for image in ret["images"]: image = skimage.transform.resize(image, (28, 28), preserve_range=True) # image = skimage.util.random_noise(image, mode="gaussian") images.append(image) ret["images"] = images return ret
def augment_image(image): image = 255 - image width, height = img_size image = cv2.resize(image, (width, height)) augmenter = ImageAugmenter( width, height, # width and height of the image (must be the same for all images in the batch) hflip=False, # flip horizontally with 50% probability vflip=False, # flip vertically with 50% probability scale_to_percent=( 0.9, 1.05), # 1.1 scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=2, # 2 rotate between -25 and +25 degrees shear_deg=5, # 25 shear between -10 and +10 degrees translation_x_px=8, # 1 translate between -5 and +5 px on the x-axis translation_y_px=2, # (-6, 4) blur_radius=0, # blur radius that will be applied between 0..blur_radius noise_variance=0, motion_blur_radius=0, motion_blur_strength=0) image = augmenter.augment_batch(np.array([image], dtype=np.uint8))[0] image *= 255 image = 255 - image image = image.astype(np.uint8) # cv2.imshow("image", image) # cv2.waitKey(0) return image
def __init__(self, logfile, data_3d=True): self.train_data, self.train_labels = load_density_data( '../birads_dataset/train-sq-512/', need_3d=data_3d) print(self.train_labels[0:]) self.test_data, self.test_labels = load_density_data( '../birads_dataset/dev-sq-512/', need_3d=data_3d) #self.n_classes = len(set(CLASSES.values())) self.h = 512 self.w = 512 if data_3d: self.c = 1 else: self.c = 1 self._idx = 1 self.classes_map = CLASSES self.n_classes = len(set(self.classes_map.values())) #self.load_small_data_for_debug = FLAGS.load_small_data_for_debug self.should_enforce_class_balance = True #FLAGS.should_enforce_class_balance #self.verbose = FLAGS.verbose #self.path_to_image_directory = FLAGS.path_to_image_directory #self.MODEL_CLASS = MODEL_CLASS #self.image_width, self.image_height, self.c = MODEL_CLASS.get_image_dimensions() # Load Data #self.load_data_from_metadata() self.print_all_label_statistics(logfile) if self.should_enforce_class_balance: print("Enforcing Class balance") self.enforce_class_balance() self.print_all_label_statistics(logfile) self.trainnum = self.train_data.shape[0] self.testnum = self.test_data.shape[0] #self.training_examples_count = self.train_labels.shape[0] #self.dev_examples_count = self.dev_labels.shape[0] #self.test_examples_count = self.test_labels.shape[0] #self.n_train_examples, self.n_dev_examples, self.n_test_examples = self.train_data[0].shape[0], self.dev_data[0].shape[0], self.test_data[0].shape[0] self.augment_training_data = False self.augmenter = ImageAugmenter( self.w, self. h, # width and height of the image (must be the same for all images in the batch) hflip=True, # flip horizontally with 50% probability vflip=True, scale_to_percent= 1.2, # scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=10, # rotate between -25 and +25 degrees shear_deg=5, # shear between -10 and +10 degrees translation_x_px=20, # translate between -5 and +5 px on the x-axis translation_y_px=20 # translate between -5 and +5 px on the y-axis )
def augment_image(image): image = 255 - image width, height = img_size image = cv2.resize(image, (width, height)) augmenter = ImageAugmenter(width, height, # width and height of the image (must be the same for all images in the batch) hflip=False, # flip horizontally with 50% probability vflip=False, # flip vertically with 50% probability scale_to_percent=(0.9, 1.05), # 1.1 scale the image to 70%-130% of its original size scale_axis_equally=False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=2, # 2 rotate between -25 and +25 degrees shear_deg=5, # 25 shear between -10 and +10 degrees translation_x_px=8, # 1 translate between -5 and +5 px on the x-axis translation_y_px=2, # (-6, 4) blur_radius=0, # blur radius that will be applied between 0..blur_radius noise_variance=0, motion_blur_radius=0, motion_blur_strength=0 ) image = augmenter.augment_batch(np.array([image], dtype=np.uint8))[0] image *= 255 image = 255 - image image = image.astype(np.uint8) # cv2.imshow("image", image) # cv2.waitKey(0) return image
def test_non_square_images(self): """Test whether transformation of images with unequal x and y axis sizes works as expected.""" y_size = 11 x_size = 4 image_before = np.zeros((y_size, x_size), dtype=np.uint8) image_target = np.zeros((y_size, x_size), dtype=np.float32) # place a bright white line in the center (of the y-axis, so left to right) # Augmenter will move it up by 2 (translation on y by -2) y_line_pos = int(y_size/2) + 1 for x_pos in range(x_size): image_before[y_line_pos][x_pos] = 255 image_target[y_line_pos - 2][x_pos] = 1.0 augmenter = ImageAugmenter(x_size, y_size, translation_y_px=(-2,-2)) nb_augment = 100 images = np.resize([image_before], (nb_augment, y_size, x_size)) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_after, image_target): nb_similar += 1 self.assertEqual(nb_augment, nb_similar)
def main(): """Plot example augmentations for Lena and an image loaded from a file.""" # try on a lena image image = data.lena() augmenter = ImageAugmenter(image.shape[0], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) augmenter.plot_image(image, 100) # check loading of images from file and augmenting them image = misc.imread("chameleon.png") augmenter = ImageAugmenter(image.shape[1], image.shape[0], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) augmenter.plot_image(image, 50) # move the channel from index 2 (3rd position) to index 0 (1st position) # so (y, x, rgb) becomes (rgb, y, x) # try if it still works image = np.rollaxis(image, 2, 0) augmenter = ImageAugmenter(image.shape[2], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5, channel_is_first_axis=True) augmenter.plot_image(image, 50)
def augmentor(self, img): aug = ImageAugmenter(img_height_px=img.shape[0], img_width_px=img.shape[1], scale_to_percent=self.scale_to_percent, rotation_deg=self.rotation_deg, shear_deg=self.shear_deg, translation_x_px=self.translation_x_px, translation_y_px=self.translation_y_px, transform_channels_equally=self.transform_channels_equally) # copy_imgs = np.asarray([img for _ in range(0, count)]) # return agu.augment_batch(copy_imgs) return aug.augment_ordered(img)
def augmentor(self, img): aug = ImageAugmenter( img_height_px=img.shape[0], img_width_px=img.shape[1], scale_to_percent=self.scale_to_percent, rotation_deg=self.rotation_deg, shear_deg=self.shear_deg, translation_x_px=self.translation_x_px, translation_y_px=self.translation_y_px, transform_channels_equally=self.transform_channels_equally) # copy_imgs = np.asarray([img for _ in range(0, count)]) # return agu.augment_batch(copy_imgs) return aug.augment_ordered(img)
def test_translation_y(self): """Testing translation on the y-axis.""" image_before = [[ 0, 0], [255, 255]] image_target = [[1.0, 1.0], [ 0, 0]] images = np.array([image_before]).astype(np.uint8) # translate always by -1px on y-axis augmenter = ImageAugmenter(2, 2, translation_y_px=(-1,-1)) # all must be similar for _ in range(100): image_after = augmenter.augment_batch(images)[0] self.assertTrue(np.allclose(image_target, image_after))
def test_rotation(self): """Test rotation of 90 degrees on an image that should change upon rotation.""" image_before = [[0, 255, 0], [0, 255, 0], [0, 255, 0]] image_target = [[ 0, 0, 0], [1.0, 1.0, 1.0], [ 0, 0, 0]] images = np.array([image_before]).astype(np.uint8) augmenter = ImageAugmenter(3, 3, rotation_deg=(90, 90)) image_after = augmenter.augment_batch(images)[0] self.assertTrue(np.allclose(image_target, image_after))
def test_translation_x(self): """Testing translation on the x-axis.""" #image_before = np.zeros((2, 2), dtype=np.uint8) image_before = [[255, 0], [255, 0]] #image_after = np.zeros((2, 2), dtype=np.float32) image_target = [[0, 1.0], [0, 1.0]] images = np.array([image_before]).astype(np.uint8) augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1)) # all must be similar for _ in range(100): image_after = augmenter.augment_batch(images)[0] self.assertTrue(np.allclose(image_target, image_after))
def test_no_information_leaking(self): """Tests whether the image provided to augment_batch() is changed instead of only simply returned in the changed form (leaking information / hidden sideffects).""" image_before = [[255, 0, 255, 0, 255], [ 0, 255, 0, 255, 0], [255, 255, 255, 255, 255], [ 0, 255, 0, 255, 0], [255, 0, 255, 0, 255]] image_before = np.array(image_before, dtype=np.uint8) image_before_copy = np.copy(image_before) nb_augment = 100 images = np.resize([image_before], (nb_augment, 5, 5)) augmenter = ImageAugmenter(5, 5, hflip=True, vflip=True, scale_to_percent=1.5, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) images_after = augmenter.augment_batch(images) self.assertTrue(np.array_equal(image_before, image_before_copy))
def test_no_blacks(self): """Test whether random augmentations can cause an image to turn completely black (cval=0.0), which should never happen.""" image_before = data.camera() y_size, x_size = image_before.shape augmenter = ImageAugmenter(x_size, y_size, scale_to_percent=1.5, scale_axis_equally=False, rotation_deg=90, shear_deg=20, translation_x_px=10, translation_y_px=10) image_black = np.zeros(image_before.shape, dtype=np.float32) nb_augment = 100 images = np.resize([image_before], (nb_augment, y_size, x_size)) images_augmented = augmenter.augment_batch(images) nb_black = 0 for image_after in images_augmented: if np.allclose(image_after, image_black): nb_black += 1 self.assertEqual(nb_black, 0)
def test_shear(self): """Very rough test of shear: It simply measures whether image tend to be significantly different after shear (any change).""" image_before = [[0, 255, 0], [0, 255, 0], [0, 255, 0]] image_target = [[0, 1.0, 0], [0, 1.0, 0], [0, 1.0, 0]] images = np.array([image_before]).astype(np.uint8) augmenter = ImageAugmenter(3, 3, shear_deg=50) # the majority should be different from the source image nb_different = 0 nb_augment = 1000 for _ in range(nb_augment): image_after = augmenter.augment_batch(images)[0] if not np.allclose(image_target, image_after): nb_different += 1 self.assertTrue(nb_different > nb_augment*0.9)
def test_rotation_invariant(self): """Test rotation of -90 to 90 degrees on an rotation invariant image.""" image_before = [[0, 0, 0], [0, 255, 0], [0, 0, 0]] image_target = [[0, 0, 0], [0, 1.0, 0], [0, 0, 0]] images = np.array([image_before]).astype(np.uint8) # random rotation of up to 180 degress augmenter = ImageAugmenter(3, 3, rotation_deg=180) # all must be similar to target nb_similar = 0 for _ in range(100): image_after = augmenter.augment_batch(images)[0] # some tolerance here - interpolation problems can let the image # change a bit, even though it should be invariant to rotations if np.allclose(image_target, image_after, atol=0.1): nb_similar += 1 self.assertEquals(nb_similar, 100)
def test_scaling(self): """Rough test for zooming/scaling (only zoom in / scaling >1.0). The test is rough, because interpolation problems make the result of scaling on synthetic images rather hard to predict (and unintuitive). """ size_x = 4 size_y = 4 # a 4x4 image of which the center 3x3 pixels are bright white, # everything else black image_before = np.zeros((size_y, size_x)) image_before[1:size_y-1, 1:size_x-1] = 255 images = np.array([image_before]).astype(np.uint8) # about 200% zoom in augmenter = ImageAugmenter(size_x, size_y, scale_to_percent=(1.99, 1.99), scale_axis_equally=True) image_after = augmenter.augment_batch(images)[0] # we scale positively (zoom in), therefor we expect the center bright # spot to grow, resulting in a higher total brightness self.assertTrue(np.sum(image_after) > np.sum(image_before)/255)
def test_single_channel(self): """Tests images with channels (e.g. RGB channels).""" # One single channel # channel is last axis # test by translating an image with one channel on the x-axis (1 px) image_before = np.zeros((2, 2, 1), dtype=np.uint8) image_before[0, 0, 0] = 255 image_before[1, 0, 0] = 255 image_target = np.zeros((2, 2, 1), dtype=np.float32) image_target[0, 1, 0] = 1.0 image_target[1, 1, 0] = 1.0 images = np.array([image_before]).astype(np.uint8) augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1)) # all must be similar for _ in range(100): image_after = augmenter.augment_batch(images)[0] self.assertTrue(np.allclose(image_target, image_after)) # One single channel # channel is first axis # test by translating an image with one channel on the x-axis (1 px) image_before = np.zeros((1, 2, 2), dtype=np.uint8) image_before[0] = [[255, 0], [255, 0]] image_target = np.zeros((1, 2, 2), dtype=np.float32) image_target[0] = [[0, 1.0], [0, 1.0]] images = np.array([image_before]).astype(np.uint8) augmenter = ImageAugmenter(2, 2, translation_x_px=(1,1), channel_is_first_axis=True) # all must be similar for _ in range(100): image_after = augmenter.augment_batch(images)[0] self.assertTrue(np.allclose(image_target, image_after))
def main(): """Iterates over the images in each directory, shrinks and augments each one.""" nb_processed = 0 nb_errors = 0 nb_total = len(get_all_filepaths([download_dir for download_dir, write_to_dir in DIRS])) # iterate over directories (read-directory and save-to-directory) for download_dir, write_to_dir in DIRS: print("Reading from '%s'" % (download_dir,)) print("Writing to '%s'" % (write_to_dir,)) # create directory if it doesnt exist if not os.path.exists(write_to_dir): os.makedirs(write_to_dir) # load filepaths of images in directory fps_img = get_all_filepaths([download_dir]) # iterate over each image for fp_img in fps_img: print("Image %d of %d (%.2f%%) (%s)" \ % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img)) try: filename = fp_img[fp_img.rfind("/")+1:] # dont use misc.imload, fails for grayscale images image = ndimage.imread(fp_img, mode="RGB") image_orig = np.copy(image) #misc.imshow(image) #print(image) #print(image.shape) height = image_orig.shape[0] width = image_orig.shape[1] wh_ratio = width / height # add padding at the borders of the image # then augment image batch = np.zeros((AUGMENTATIONS, height+(2*PADDING), width+(2*PADDING), 3), dtype=np.uint8) img_padded = np.pad(image, ((PADDING, PADDING), (PADDING, PADDING), (0, 0)), mode="median") for i in range(0, AUGMENTATIONS): batch[i] = np.copy(img_padded) ia = ImageAugmenter(width+(2*PADDING), height+(2*PADDING), channel_is_first_axis=False, hflip=True, vflip=False, scale_to_percent=(1.05, 1.2), scale_axis_equally=True, rotation_deg=5, shear_deg=1, translation_x_px=15, translation_y_px=15) batch = ia.augment_batch(batch) for i in range(0, AUGMENTATIONS): # remove padding image = batch[i, PADDING:-PADDING, PADDING:-PADDING, ...] # shrink the image to desired height/width sizes # first delete rows/columns until aspect ratio matches desired aspect ratio # then resize # doing this after the augmentation should decrease the likelihood of # ending with badly looking black areas at the borders of the image removed = 0 while not (wh_ratio - EPSILON <= RATIO_WIDTH_TO_HEIGHT <= wh_ratio + EPSILON): if wh_ratio < RATIO_WIDTH_TO_HEIGHT: # height value is too high # remove more from top than from bottom, because we have sky images and # hence much similar content at top and only a few rows of pixels with # different content at the bottom if removed % 4 != 0: # remove one row at the top image = image[1:height-0, :, ...] else: # remove one row at the bottom image = image[0:height-1, :, ...] else: # width value is too high if removed % 2 == 0: # remove one column at the left image = image[:, 1:width-0, ...] else: # remove one column at the right image = image[:, 0:width-1, ...] height = image.shape[0] width = image.shape[1] wh_ratio = width / height removed += 1 image_resized = misc.imresize(image, (SCALE_HEIGHT, SCALE_WIDTH)) # save augmented image filename_aug = filename.replace(".jp", "__%d.jp" % (i)) misc.imsave(os.path.join(write_to_dir, filename_aug), image_resized) except IOError as exc: # sometimes downloaded images cannot be read by imread() # this should catch these cases print("I/O error({0}): {1}".format(exc.errno, exc.strerror)) nb_errors += 1 nb_processed += 1 print("Processed %d images" % (nb_processed,)) print("Encountered %d errors" % (nb_errors,)) print("Finished.")
def main(): """Measure time required to generate augmentations matrices and to apply them. """ batch_size = 64 nb_runs = 20 # Measure time required to generate 100k augmentation matrices """ print("Generating 100 times 1000 augmentation matrices of size 64x64...") start = time.time() for _ in range(100): create_aug_matrices(1000, 64, 64, scale_to_percent=1.5, scale_axis_equally=False, rotation_deg=20, shear_deg=20, translation_x_px=5, translation_y_px=5) print("Done in %.8f" % (time.time() - start,)) """ # Test Performance on 64 images of size 512x512 pixels image = data.lena() images = np.resize( image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[0], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) run_tests(augmenter, images, nb_runs) print("") print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) print("(With 1000 pregenerated matrices)") augmenter.pregenerate_matrices(1000) run_tests(augmenter, images, nb_runs) print("") # Test Performance on 64 images of size 64x64 pixels image = data.lena() image = misc.imresize(image, (64, 64)) images = np.resize( image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[0], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) run_tests(augmenter, images, nb_runs) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) print("(With 1000 pregenerated matrices)") augmenter.pregenerate_matrices(1000) run_tests(augmenter, images, nb_runs) print("") # Time required to augment 1,000,000 images of size 32x32 print("Augmenting 1000 batches of 1000 lena images (1 million total)" \ ", each of size 32x32...") image = data.lena() image = misc.imresize(image, (32, 32)) batch_size = 1000 images = np.resize( image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[1], image.shape[0], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) augmenter.pregenerate_matrices(1000) start = time.time() for _ in range(1000): augmenter.augment_batch(images) print("Done in %.8fs" % (time.time() - start, )) print("") # Time required to augment 1,000,000 images of size 32x32 # but using only one matrix without the class (no library overhead from # ImageAugmenter) # Notice that this does not include horizontal and vertical flipping, # which is done via numpy in the ImageAugmenter class. print("Augmenting 1000 batches of 1000 lena images (1 million total)" \ ", each of size 32x32, using one matrix directly (no ImageAugmenter " \ "class)...") matrices = create_aug_matrices(1, image.shape[1], image.shape[0], scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) matrix = matrices[0] start = time.time() for _ in range(1000): for image in images: augmented_image = tf.warp(image, matrix) print("Done in %.8fs" % (time.time() - start, ))
from scipy import misc import numpy as np directory = os.getcwd() + "/training/positive_generated/" files = [f for f in listdir(directory) if isfile(join(directory, f))] for file in files: if "jpg" not in file and "png" not in file: continue image = misc.imread("./training/positive_generated/" + file) # image=Image.open("./training/positive/"+file) # for i in range(0,21): # image.save(os.getcwd()+"/training/positive_expanded/"+file.strip(".jpg").strip(".png")+str(i)+".jpg") height = image.shape[0] width = image.shape[1] augmenter = ImageAugmenter( width, height, # width and height of the image (must be the same for all images in the batch) hflip=True, # flip horizontally with 50% probability vflip=True, # flip vertically with 50% probability scale_to_percent=1.3, # scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=25, # rotate between -25 and +25 degrees shear_deg=10, # shear between -10 and +10 degrees translation_x_px=5, # translate between -5 and +5 px on the x-axis translation_y_px=5 # translate between -5 and +5 px on the y-axis ) fig = augmenter.plot_image(image, name=file, nb_repeat=20)
def main(): """Measure time required to generate augmentations matrices and to apply them. """ batch_size = 64 nb_runs = 20 # Measure time required to generate 100k augmentation matrices """ print("Generating 100 times 1000 augmentation matrices of size 64x64...") start = time.time() for _ in range(100): create_aug_matrices(1000, 64, 64, scale_to_percent=1.5, scale_axis_equally=False, rotation_deg=20, shear_deg=20, translation_x_px=5, translation_y_px=5) print("Done in %.8f" % (time.time() - start,)) """ # Test Performance on 64 images of size 512x512 pixels image = data.lena() images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[0], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) run_tests(augmenter, images, nb_runs) print("") print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) print("(With 1000 pregenerated matrices)") augmenter.pregenerate_matrices(1000) run_tests(augmenter, images, nb_runs) print("") # Test Performance on 64 images of size 64x64 pixels image = data.lena() image = misc.imresize(image, (64, 64)) images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[0], image.shape[1], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) run_tests(augmenter, images, nb_runs) print("Running tests on %d images of shape %s" % (batch_size, str(image.shape))) print("(With 1000 pregenerated matrices)") augmenter.pregenerate_matrices(1000) run_tests(augmenter, images, nb_runs) print("") # Time required to augment 1,000,000 images of size 32x32 print("Augmenting 1000 batches of 1000 lena images (1 million total)" \ ", each of size 32x32...") image = data.lena() image = misc.imresize(image, (32, 32)) batch_size = 1000 images = np.resize(image, (batch_size, image.shape[0], image.shape[1], image.shape[2])) augmenter = ImageAugmenter(image.shape[1], image.shape[0], hflip=True, vflip=True, scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) augmenter.pregenerate_matrices(1000) start = time.time() for _ in range(1000): augmenter.augment_batch(images) print("Done in %.8fs" % (time.time() - start,)) print("") # Time required to augment 1,000,000 images of size 32x32 # but using only one matrix without the class (no library overhead from # ImageAugmenter) # Notice that this does not include horizontal and vertical flipping, # which is done via numpy in the ImageAugmenter class. print("Augmenting 1000 batches of 1000 lena images (1 million total)" \ ", each of size 32x32, using one matrix directly (no ImageAugmenter " \ "class)...") matrices = create_aug_matrices(1, image.shape[1], image.shape[0], scale_to_percent=1.3, scale_axis_equally=False, rotation_deg=25, shear_deg=10, translation_x_px=5, translation_y_px=5) matrix = matrices[0] start = time.time() for _ in range(1000): for image in images: augmented_image = tf.warp(image, matrix) print("Done in %.8fs" % (time.time() - start,))
class DensityLoader(object): def __init__(self, logfile, data_3d=True): self.train_data, self.train_labels = load_density_data( '../birads_dataset/train-sq-512/', need_3d=data_3d) print(self.train_labels[0:]) self.test_data, self.test_labels = load_density_data( '../birads_dataset/dev-sq-512/', need_3d=data_3d) #self.n_classes = len(set(CLASSES.values())) self.h = 512 self.w = 512 if data_3d: self.c = 1 else: self.c = 1 self._idx = 1 self.classes_map = CLASSES self.n_classes = len(set(self.classes_map.values())) #self.load_small_data_for_debug = FLAGS.load_small_data_for_debug self.should_enforce_class_balance = True #FLAGS.should_enforce_class_balance #self.verbose = FLAGS.verbose #self.path_to_image_directory = FLAGS.path_to_image_directory #self.MODEL_CLASS = MODEL_CLASS #self.image_width, self.image_height, self.c = MODEL_CLASS.get_image_dimensions() # Load Data #self.load_data_from_metadata() self.print_all_label_statistics(logfile) if self.should_enforce_class_balance: print("Enforcing Class balance") self.enforce_class_balance() self.print_all_label_statistics(logfile) self.trainnum = self.train_data.shape[0] self.testnum = self.test_data.shape[0] #self.training_examples_count = self.train_labels.shape[0] #self.dev_examples_count = self.dev_labels.shape[0] #self.test_examples_count = self.test_labels.shape[0] #self.n_train_examples, self.n_dev_examples, self.n_test_examples = self.train_data[0].shape[0], self.dev_data[0].shape[0], self.test_data[0].shape[0] self.augment_training_data = False self.augmenter = ImageAugmenter( self.w, self. h, # width and height of the image (must be the same for all images in the batch) hflip=True, # flip horizontally with 50% probability vflip=True, scale_to_percent= 1.2, # scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=10, # rotate between -25 and +25 degrees shear_deg=5, # shear between -10 and +10 degrees translation_x_px=20, # translate between -5 and +5 px on the x-axis translation_y_px=20 # translate between -5 and +5 px on the y-axis ) def print_label_statistics(self, labels, logfile, labels_label): f = open(logfile, 'a+') class_count = {key: 0 for key in set(self.classes_map.values())} for label in labels: class_count[np.argmax(label)] += 1 print("Class Balance for {}: {}. Total #: {}".format( labels_label, class_count, len(labels))) f.write("Class Balance for {}: {}. Total #: {}\n ".format( labels_label, class_count, len(labels))) f.close() return class_count def print_all_label_statistics(self, logfile): self.print_label_statistics(self.train_labels, logfile, "Train") #self.print_label_statistics(self.dev_labels, "Dev") self.print_label_statistics(self.test_labels, logfile, "Test") def enforce_class_balance(self): #self.train_data, self.train_labels = self.enforce_class_balance_helper(self.train_data, self.train_labels) #self.dev_data, self.dev_labels = self.enforce_class_balance_helper(self.dev_data, self.dev_labels) self.test_data, self.test_labels = self.enforce_class_balance_helper( self.test_data, self.test_labels) def enforce_class_balance_helper(self, data, labels): class_count = {key: 0 for key in set(self.classes_map.values())} for i in range(labels.shape[0]): label = labels[i][...] class_count[np.argmax(label)] += 1 min_class_count = min(class_count.values()) image_data = data #image_data, additional_data = data image_data_new = [] #additional_data_new = [] labels_new = [] for cl, count in class_count.iteritems(): label_target = [ 1 if i == cl else 0 for i in range(len(set(class_count.values()))) ] indicies = np.where(labels == label_target)[0] indicies = list(set(indicies)) cur_count = 0 for index in indicies: if cur_count < min_class_count: image_data_new.append(image_data[index][...]) #additional_data_new.append(additional_data[index][...]) labels_new.append(labels[index][...]) cur_count += 1 image_data_new = np.array(image_data_new) #additional_data_new = np.array(additional_data_new) #data_new = (image_data_new, additional_data_new) labels_new = np.array(labels_new) return image_data_new, labels_new def augment_images(self, images): augmented_images = ((images + 0.5) * 255.0).astype('uint8') augmented_images = self.augmenter.augment_batch(augmented_images) - 0.5 return augmented_images def next_batch(self, batch_size, data_group='train'): images_batch = np.zeros((batch_size, self.h, self.w, self.c)) labels_batch = np.zeros((batch_size, self.n_classes)) for i in range(batch_size): images_batch[i, ...] = self.train_data[self._idx].reshape( (self.h, self.w, self.c)) labels_batch[i, ...] = self.train_labels[self._idx] self._idx += 1 if self._idx == self.trainnum: self._idx = 0 #self.train_data = shuffle(self.train_data, random_state=20) #self.train_labels = shuffle(self.train_labels, random_state=20) self.train_data, self.train_labels = shuffle(self.train_data, self.train_labels, random_state=20) if data_group == 'train' and self.augment_training_data: images_batch = self.augment_images(images_batch) return images_batch, labels_batch def load_test(self): #print('test image size of {} :'.format(str(test_data.shape))) return self.test_data.reshape( (-1, self.h, self.w, self.c)), self.test_labels
continue bbox = [x1, y1, x2, y2] # resize marker bbox_shape = [x2 - x1, y2 - y1] tmp_marker = cv2.resize(marker, (bbox_shape[0], bbox_shape[1])) # Image augmentation height = tmp_marker.shape[0] width = tmp_marker.shape[1] augmenter = ImageAugmenter( width, height, # width and height of the image (must be the same for all images in the batch) hflip=True, # flip horizontally with 50% probability vflip=True, # flip vertically with 50% probability scale_to_percent= 1.5, # scale the image to 70%-130% of its original size scale_axis_equally= False, # allow the axis to be scaled unequally (e.g. x more than y) rotation_deg=45, # rotate between -25 and +25 degrees shear_deg=20, # shear between -10 and +10 degrees translation_x_px= 0, # translate between -5 and +5 px on the x-axis translation_y_px= 0 # translate between -5 and +5 px on the y-axis ) # augment a batch containing only this image # the input array must have dtype uint8 (ie. values 0-255), as is the case for scipy's imread() # the output array will have dtype float32 (0.0-1.0) and can be fed directly into a neural network tmp_marker = augmenter.augment_batch( np.array([tmp_marker], dtype=np.uint8)) # Convert tmp_marker back to uint8 format tmp_marker = tmp_marker[0] * 255 tmp_marker = tmp_marker.astype(np.uint8)
"""Returns X and Y examples to train/test on. Args: count: Maximum number of different images to return (this will be increased by the augmentation number, i.e. count=1 with augmentations=10 will return 10+1 examples). start_at: Start index of the first example to return. augmentations: How often each image will be augmented. Returns: (X, Y) with X being a tensor of images and Y being in array of rows [center x, center y, height/2, width/2] of each face rectangle. """ # low strength augmentation because we will not change the coordinates, so the image # should be kept mostly the same ia = ImageAugmenter(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, channel_is_first_axis=False, hflip=False, vflip=False, scale_to_percent=(0.95, 1.05), scale_axis_equally=True, rotation_deg=5, shear_deg=2, translation_x_px=1, translation_y_px=1) images_filepaths = get_all_filepaths(DIRS) images = [] labels = [] for image_filepath in images_filepaths[start_at:start_at+count]: coords_filepath = "%s.cat" % (image_filepath,) image, (center_y, center_x), (scale_y, scale_x) = get_image_with_rectangle(image_filepath, coords_filepath) # get_image_with_rectangle returns None if the coordinates file was not found, # which is the case for one image in 10k cats dataset if image is not None: images.append(image / 255) # project pixel values to 0-1 y = [center_y, center_x, scale_y, scale_x]
def test_two_channels(self): """Tests augmentation of images with two channels (either first or last axis of each image). Tested using x-translation.""" # ----------------------------------------------- # two channels, # channel is the FIRST axis of each image # ----------------------------------------------- augmenter = ImageAugmenter(2, 2, translation_y_px=(0,1), channel_is_first_axis=True) image_before = np.zeros((2, 2, 2)).astype(np.uint8) # 1st channel: top row white, bottom row black image_before[0][0][0] = 255 image_before[0][0][1] = 255 image_before[0][1][0] = 0 image_before[0][1][1] = 0 # 2nd channel: top right corner white, everything else black image_before[1][0][0] = 0 image_before[1][0][1] = 255 image_before[1][1][0] = 0 image_before[1][1][1] = 0 # ^ channel # ^ y (row) # ^ x (column) image_target = np.zeros((2, 2, 2)).astype(np.float32) # 1st channel: bottom row white, bottom row black image_target[0][0][0] = 0 image_target[0][0][1] = 0 image_target[0][1][0] = 1.0 image_target[0][1][1] = 1.0 # 2nd channel: bottom right corner white, everything else black image_target[1][0][0] = 0 image_target[1][0][1] = 0 image_target[1][1][0] = 0 image_target[1][1][1] = 1.0 nb_augment = 1000 image = np.array([image_before]).astype(np.uint8) images = np.resize(image, (nb_augment, 2, 2, 2)) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_target, image_after): nb_similar += 1 self.assertTrue(nb_similar > (nb_augment*0.4) and nb_similar < (nb_augment*0.6)) # ----------------------------------------------- # two channels, # channel is the LAST axis of each image # ----------------------------------------------- augmenter = ImageAugmenter(2, 2, translation_y_px=(0,1), channel_is_first_axis=False) image_before = np.zeros((2, 2, 2)).astype(np.uint8) # 1st channel: top row white, bottom row black image_before[0][0][0] = 255 image_before[0][1][0] = 255 image_before[1][0][0] = 0 image_before[1][1][0] = 0 # 2nd channel: top right corner white, everything else black image_before[0][0][1] = 0 image_before[0][1][1] = 255 image_before[1][0][1] = 0 image_before[1][1][1] = 0 # ^ y # ^ x # ^ channel image_target = np.zeros((2, 2, 2)).astype(np.float32) # 1st channel: bottom row white, bottom row black image_target[0][0][0] = 0 image_target[0][1][0] = 0 image_target[1][0][0] = 1.0 image_target[1][1][0] = 1.0 # 2nd channel: bottom right corner white, everything else black image_target[0][0][1] = 0 image_target[0][1][1] = 0 image_target[1][0][1] = 0 image_target[1][1][1] = 1.0 nb_augment = 1000 image = np.array([image_before]).astype(np.uint8) images = np.resize(image, (nb_augment, 2, 2, 2)) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_target, image_after): nb_similar += 1 self.assertTrue(nb_similar > (nb_augment*0.4) and nb_similar < (nb_augment*0.6))
def test_vertical_flipping(self): """Tests vertical flipping of images (mirror on x-axis).""" image_before = [[255, 0, 0], [ 0, 255, 255], [ 0, 0, 255]] image_before = np.array(image_before, dtype=np.uint8) image_target = [[ 0, 0, 1.0], [ 0, 1.0, 1.0], [1.0, 0, 0]] image_target = np.array(image_target, dtype=np.float32) nb_augment = 1000 images = np.resize([image_before], (nb_augment, 3, 3)) # Test using just "False" for vflip (should be exactly 0%) augmenter = ImageAugmenter(3, 3, vflip=False) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_after, image_target): nb_similar += 1 self.assertEqual(nb_similar, 0) # Test using just "True" for vflip (should be ~50%) augmenter = ImageAugmenter(3, 3, vflip=True) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_after, image_target): nb_similar += 1 self.assertTrue(nb_similar > nb_augment*0.4 and nb_similar < nb_augment*0.6) # Test using a probability (float value) for vflip (vflip=0.9, # should be ~90%) augmenter = ImageAugmenter(3, 3, vflip=0.9) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_after, image_target): nb_similar += 1 self.assertTrue(nb_similar > nb_augment*0.8 and nb_similar <= nb_augment*1.0) # Test with multiple channels image_before = np.zeros((2, 3, 3), dtype=np.uint8) image_before[0] = [[255, 255, 0], [255, 0, 0], [ 0, 0, 0]] image_before[1] = [[ 0, 255, 0], [ 0, 255, 0], [ 0, 0, 255]] image_target = np.zeros((2, 3, 3), dtype=np.float32) image_target[0] = [[ 0, 0, 0], [1.0, 0, 0], [1.0, 1.0, 0]] image_target[1] = [[ 0, 0, 1.0], [ 0, 1.0, 0], [ 0, 1.0, 0]] images = np.resize([image_before], (nb_augment, 2, 3, 3)) augmenter = ImageAugmenter(3, 3, vflip=1.0, channel_is_first_axis=True) images_augmented = augmenter.augment_batch(images) nb_similar = 0 for image_after in images_augmented: if np.allclose(image_after, image_target): nb_similar += 1 self.assertTrue(nb_similar > nb_augment*0.9 and nb_similar <= nb_augment*1.0)
def test_transform_channels_unequally(self): """Tests whether 2 or more channels can be augmented non-identically at the same time. E.g. channel 0 is rotated by 20 degress, channel 1 (of the same image) is rotated by 5 degrees. """ # two channels, channel is first axis of each image augmenter = ImageAugmenter(3, 3, translation_x_px=(0,1), transform_channels_equally=False, channel_is_first_axis=True) image_before = np.zeros((2, 3, 3)).astype(np.uint8) image_before[0] = [[255, 0, 0], [ 0, 0, 0], [ 0, 0, 0]] image_before[1] = [[ 0, 0, 0], [ 0, 0, 0], [ 0, 255, 0]] # ^ channel image_target = np.zeros((2, 3, 3)).astype(np.float32) image_target[0] = [[ 0, 1.0, 0], [ 0, 0, 0], [ 0, 0, 0]] image_target[1] = [[ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 1.0]] nb_similar_channel_0 = 0 nb_similar_channel_1 = 0 nb_equally_transformed = 0 #nb_unequally_transformed = 0 nb_augment = 1000 image = np.array([image_before]).astype(np.uint8) images = np.resize(image, (nb_augment, 2, 3, 3)) images_augmented = augmenter.augment_batch(images) # augment 1000 times and count how often the channels were transformed # in equal or unequal ways. for image_after in images_augmented: similar_channel_0 = np.allclose(image_target[0], image_after[0]) similar_channel_1 = np.allclose(image_target[1], image_after[1]) if similar_channel_0: nb_similar_channel_0 += 1 if similar_channel_1: nb_similar_channel_1 += 1 if similar_channel_0 == similar_channel_1: nb_equally_transformed += 1 #else: # nb_unequally_transformed += 1 # each one should be around 50% self.assertTrue(nb_similar_channel_0 > 0.40*nb_augment and nb_similar_channel_0 < 0.60*nb_augment) self.assertTrue(nb_similar_channel_1 > 0.40*nb_augment and nb_similar_channel_1 < 0.60*nb_augment) self.assertTrue(nb_equally_transformed > 0.40*nb_augment and nb_equally_transformed < 0.60*nb_augment)
def main(): """Iterates over the images in each directory, shrinks and augments each one.""" nb_processed = 0 nb_errors = 0 nb_total = len( get_all_filepaths( [download_dir for download_dir, write_to_dir in DIRS])) # iterate over directories (read-directory and save-to-directory) for download_dir, write_to_dir in DIRS: print("Reading from '%s'" % (download_dir, )) print("Writing to '%s'" % (write_to_dir, )) # create directory if it doesnt exist if not os.path.exists(write_to_dir): os.makedirs(write_to_dir) # load filepaths of images in directory fps_img = get_all_filepaths([download_dir]) # iterate over each image for fp_img in fps_img: print("Image %d of %d (%.2f%%) (%s)" \ % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img)) try: filename = fp_img[fp_img.rfind("/") + 1:] # dont use misc.imload, fails for grayscale images image = ndimage.imread(fp_img, mode="RGB") image_orig = np.copy(image) #misc.imshow(image) #print(image) #print(image.shape) height = image_orig.shape[0] width = image_orig.shape[1] wh_ratio = width / height # add padding at the borders of the image # then augment image batch = np.zeros((AUGMENTATIONS, height + (2 * PADDING), width + (2 * PADDING), 3), dtype=np.uint8) img_padded = np.pad(image, ((PADDING, PADDING), (PADDING, PADDING), (0, 0)), mode="median") for i in range(0, AUGMENTATIONS): batch[i] = np.copy(img_padded) ia = ImageAugmenter(width + (2 * PADDING), height + (2 * PADDING), channel_is_first_axis=False, hflip=True, vflip=False, scale_to_percent=(1.05, 1.2), scale_axis_equally=True, rotation_deg=5, shear_deg=1, translation_x_px=15, translation_y_px=15) batch = ia.augment_batch(batch) for i in range(0, AUGMENTATIONS): # remove padding image = batch[i, PADDING:-PADDING, PADDING:-PADDING, ...] # shrink the image to desired height/width sizes # first delete rows/columns until aspect ratio matches desired aspect ratio # then resize # doing this after the augmentation should decrease the likelihood of # ending with badly looking black areas at the borders of the image removed = 0 while not (wh_ratio - EPSILON <= RATIO_WIDTH_TO_HEIGHT <= wh_ratio + EPSILON): if wh_ratio < RATIO_WIDTH_TO_HEIGHT: # height value is too high # remove more from top than from bottom, because we have sky images and # hence much similar content at top and only a few rows of pixels with # different content at the bottom if removed % 4 != 0: # remove one row at the top image = image[1:height - 0, :, ...] else: # remove one row at the bottom image = image[0:height - 1, :, ...] else: # width value is too high if removed % 2 == 0: # remove one column at the left image = image[:, 1:width - 0, ...] else: # remove one column at the right image = image[:, 0:width - 1, ...] height = image.shape[0] width = image.shape[1] wh_ratio = width / height removed += 1 image_resized = misc.imresize(image, (SCALE_HEIGHT, SCALE_WIDTH)) # save augmented image filename_aug = filename.replace(".jp", "__%d.jp" % (i)) misc.imsave(os.path.join(write_to_dir, filename_aug), image_resized) except IOError as exc: # sometimes downloaded images cannot be read by imread() # this should catch these cases print("I/O error({0}): {1}".format(exc.errno, exc.strerror)) nb_errors += 1 nb_processed += 1 print("Processed %d images" % (nb_processed, )) print("Encountered %d errors" % (nb_errors, )) print("Finished.")
def main(): """ Main function. Does the following step by step: * Load images (from which to extract cat faces) from SOURCE_DIR * Initialize model (as trained via train_cat_face_locator.py) * Prepares images for the model (i.e. shrinks them, squares them) * Lets model locate cat faces in the images * Projects face coordinates onto original images * Squares the face rectangles (as we want to get square images at the end) * Extracts faces from images with some pixels of padding around theM * Augments each face image several times * Removes the padding from each face image * Resizes each face image to OUT_SCALE (height, width) * Saves each face image (unaugmented + augmented images) """ # -------------- # load images # -------------- images, paths = get_images([SOURCE_DIR]) images = images paths = paths # we will use the image filenames when saving the images at the end images_filenames = [path[path.rfind("/")+1:] for path in paths] # -------------- # create model # -------------- #model = create_model_tiny(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, Adam()) model = create_model(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, Adam()) load_weights_seq(model, WEIGHTS_FILEPATH) # -------------- # make all images square with required sizes # and roll color channel to dimension index 1 (required by theano) # -------------- paddings = [] images_padded = np.zeros((len(images), MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, 3)) for idx, image in enumerate(images): if idx == 0: print(idx, image.shape, paths[idx]) image_padded, (pad_top, pad_right, pad_bottom, pad_left) = square_image(image) images_padded[idx] = misc.imresize(image_padded, (MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH)) paddings.append((pad_top, pad_right, pad_bottom, pad_left)) #misc.imshow(images_padded[0]) # roll color channel images_padded = np.rollaxis(images_padded, 3, 1) # project to 0-1 images_padded /= 255 #print(images_padded[0]) # -------------- # predict positions of faces # -------------- coordinates_predictions = predict_on_images(model, images_padded) print("[Predicted positions]", coordinates_predictions[0]) """ for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions): marked_image = visualize_rectangle(images_padded[idx]*255, tl_x, br_x, tl_y, br_y, \ (255,), channel_is_first_axis=True) misc.imshow(marked_image) """ # -------------- # project coordinates from small padded images to full-sized original images (without padding) # -------------- coordinates_orig = [] for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions): pad_top, pad_right, pad_bottom, pad_left = paddings[idx] height_full = images[idx].shape[0] + pad_top + pad_bottom width_full = images[idx].shape[1] + pad_right + pad_left height_orig = images[idx].shape[0] width_orig = images[idx].shape[1] tl_y_perc = tl_y / MODEL_IMAGE_HEIGHT tl_x_perc = tl_x / MODEL_IMAGE_WIDTH br_y_perc = br_y / MODEL_IMAGE_HEIGHT br_x_perc = br_x / MODEL_IMAGE_WIDTH # coordinates on full sized squared image version tl_y_full = int(tl_y_perc * height_full) tl_x_full = int(tl_x_perc * width_full) br_y_full = int(br_y_perc * height_full) br_x_full = int(br_x_perc * width_full) # remove paddings to get coordinates on original images tl_y_orig = tl_y_full - pad_top tl_x_orig = tl_x_full - pad_left br_y_orig = br_y_full - pad_top br_x_orig = br_x_full - pad_left # fix broken coordinates # anything below 0 # anything above image height (y) or width (x) # anything where top left >= bottom right tl_y_orig = min(max(tl_y_orig, 0), height_orig) tl_x_orig = min(max(tl_x_orig, 0), width_orig) br_y_orig = min(max(br_y_orig, 0), height_orig) br_x_orig = min(max(br_x_orig, 0), width_orig) if tl_y_orig >= br_y_orig: tl_y_orig = br_y_orig - 1 if tl_x_orig >= br_x_orig: tl_x_orig = br_x_orig - 1 coordinates_orig.append((tl_y_orig, tl_x_orig, br_y_orig, br_x_orig)) """ # project face coordinates to original image sizes coordinates_orig = [] for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_nopad): height_orig = images[idx].shape[0] width_orig = images[idx].shape[1] tl_y_perc = tl_y / MODEL_IMAGE_HEIGHT tl_x_perc = tl_x / MODEL_IMAGE_WIDTH br_y_perc = br_y / MODEL_IMAGE_HEIGHT br_x_perc = br_x / MODEL_IMAGE_WIDTH tl_y_orig = int(tl_y_perc * height_orig) tl_x_orig = int(tl_x_perc * width_orig) br_y_orig = int(br_y_perc * height_orig) br_x_orig = int(br_x_perc * width_orig) coordinates_orig.append((tl_y_orig, tl_x_orig, br_y_orig, br_x_orig)) print("[Coordinates on original image]", coordinates_orig[0]) # remove padding from predicted face coordinates # tl = top left, br = bottom right coordinates_nopad = [] for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_predictions): pad_top, pad_right, pad_bottom, pad_left = paddings[idx] tl_y_nopad = tl_y - pad_top tl_x_nopad = tl_x - pad_left br_y_nopad = br_y - pad_top br_x_nopad = br_x - pad_left tpl = (tl_y_nopad, tl_x_nopad, br_y_nopad, br_x_nopad) tpl_fixed = [max(coord, 0) for coord in tpl] if tpl_fixed[0] >= tpl_fixed[2]: tpl_fixed[2] += 1 elif tpl_fixed[1] >= tpl_fixed[3]: tpl_fixed[3] += 1 tpl_fixed = tuple(tpl_fixed) if tpl != tpl_fixed: print("[WARNING] Predicted coordinate below 0 after padding-removel. Bad prediction." \ " (In image %d, coordinates nopad: %s, coordinates pred: %s)" \ % (idx, tpl, coordinates_predictions[idx])) coordinates_nopad.append(tpl_fixed) """ print("[Removed padding from predicted coordinates]", coordinates_orig[0]) # -------------- # square faces # -------------- coordinates_orig_square = [] for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_orig): height = br_y - tl_y width = br_x - tl_x i = 0 # we remove here instead of adding rows/cols, because that way we wont exceed the # image maximum sizes while height > width: if i % 2 == 0: tl_y += 1 else: br_y -= 1 height -= 1 i += 1 while width > height: if i % 2 == 0: tl_x += 1 else: br_x -= 1 width -= 1 i += 1 print("New height:", (br_y-tl_y), "New width:", (br_x-tl_x)) coordinates_orig_square.append((tl_y, tl_x, br_y, br_x)) print("[Squared face coordinates]", coordinates_orig_square[0]) # -------------- # pad faces # -------------- # extract "padded" faces, where the padding is part of the original image # (N pixels around the face) # After doing that, we can augment the "padded" faces, then remove the padding and have less # augmentation damage (i.e. areas that would otherwise be black will now be filled with parts # of the original image) faces_padded = [] for idx, (tl_y, tl_x, br_y, br_x) in enumerate(coordinates_orig_square): image = images[idx] # we pad the whole image by N pixels so that we can savely extract an area of N pixels # around the face image_padded = np.pad(image, ((AUGMENTATION_PADDING, AUGMENTATION_PADDING), \ (AUGMENTATION_PADDING, AUGMENTATION_PADDING), \ (0, 0)), mode=str("median")) face_padded = image_padded[tl_y:br_y+2*AUGMENTATION_PADDING, \ tl_x:br_x+2*AUGMENTATION_PADDING, \ ...] faces_padded.append(face_padded) print("[Extracted face with padding]") misc.imshow(faces_padded[0]) # -------------- # augment and save images # -------------- for idx, face_padded in enumerate(faces_padded): # these should be the same values for all images image_height = face_padded.shape[0] image_width = face_padded.shape[1] print("[specs of padded face] height", image_height, "width", image_width) # augment the padded images ia = ImageAugmenter(image_width, image_height, channel_is_first_axis=False, hflip=True, vflip=False, scale_to_percent=(0.90, 1.10), scale_axis_equally=True, rotation_deg=45, shear_deg=0, translation_x_px=8, translation_y_px=8) images_aug = np.zeros((AUGMENTATION_ITERATIONS, image_height, image_width, 3), dtype=np.uint8) for i in range(AUGMENTATION_ITERATIONS): images_aug[i, ...] = face_padded print("images_aug.shape", images_aug.shape) images_aug = ia.augment_batch(images_aug) # randomly change brightness of whole images for idx_aug, image_aug in enumerate(images_aug): by_percent = random.uniform(0.90, 1.10) images_aug[idx_aug] = np.clip(image_aug * by_percent, 0.0, 1.0) print("images_aug.shape [0]:", images_aug.shape) # add gaussian noise # skipped, because that could be added easily in torch as a layer #images_aug = images_aug + np.random.normal(0.0, 0.05, images_aug.shape) # remove the padding images_aug = images_aug[:, AUGMENTATION_PADDING:-AUGMENTATION_PADDING, AUGMENTATION_PADDING:-AUGMENTATION_PADDING, ...] print("images_aug.shape [1]:", images_aug.shape) # add the unaugmented image images_aug = np.vstack((images_aug, \ [face_padded[AUGMENTATION_PADDING:-AUGMENTATION_PADDING, \ AUGMENTATION_PADDING:-AUGMENTATION_PADDING, \ ...]])) print("images_aug.shape [2]:", images_aug.shape) # save images for i, image_aug in enumerate(images_aug): if image_aug.shape[0] * image_aug.shape[1] < MINIMUM_AREA: print("Ignoring image %d / %d because it is too small (area of %d vs min. %d)" \ % (idx, i, image_aug.shape[0] * image_aug.shape[1], MINIMUM_AREA)) else: image_resized = misc.imresize(image_aug, (OUT_SCALE, OUT_SCALE)) filename_aug = "%s_%d.jpg" % (images_filenames[idx].replace(".jpg", ""), i) #misc.imshow(image_resized) misc.imsave(os.path.join(TARGET_DIR, filename_aug), image_resized)
def main(): """Iterates over the images in each directory, shrinks and augments each one.""" nb_processed = 0 nb_errors = 0 nb_total = len( get_all_filepaths( [download_dir for download_dir, write_to_dir in DIRS])) # iterate over directories (read-directory and save-to-directory) for download_dir, write_to_dir in DIRS: print("Reading from '%s'" % (download_dir, )) print("Writing to '%s'" % (write_to_dir, )) # create directory if it doesnt exist if not os.path.exists(write_to_dir): os.makedirs(write_to_dir) # load filepaths of images in directory fps_img = get_all_filepaths([download_dir]) # iterate over each image for fp_img in fps_img: print("Image %d of %d (%.2f%%) (%s)" \ % (nb_processed+1, nb_total, 100*(nb_processed+1)/nb_total, fp_img)) try: filename = fp_img[fp_img.rfind("/") + 1:] # dont use misc.imload, fails for grayscale images image = ndimage.imread(fp_img, mode="RGB") image_orig = np.copy(image) misc.imshow(image) print(image) print(image.shape) height = image_orig.shape[0] width = image_orig.shape[1] wh_ratio = width / height # add padding at the borders of the image # then augment image batch = np.zeros((AUGMENTATIONS, height + (2 * PADDING), width + (2 * PADDING), 3), dtype=np.uint8) img_padded = np.pad(image, ((PADDING, PADDING), (PADDING, PADDING), (0, 0)), mode="median") for i in range(0, AUGMENTATIONS): batch[i] = np.copy(img_padded) ia = ImageAugmenter(width + (2 * PADDING), height + (2 * PADDING), channel_is_first_axis=False, hflip=True, vflip=False, scale_to_percent=(1.05, 1.2), scale_axis_equally=True, rotation_deg=5, shear_deg=1, translation_x_px=15, translation_y_px=15) batch = ia.augment_batch(batch) for i in range(0, AUGMENTATIONS): image_resized = misc.imresize( image, (SCALE_HEIGHT, SCALE_WIDTH)) # save augmented image filename_aug = filename.replace(".jp", "__%d.jp" % (i)) misc.imsave(os.path.join(write_to_dir, filename_aug), image_resized) except IOError as exc: # sometimes downloaded images cannot be read by imread() # this should catch these cases print("I/O error({0}): {1}".format(exc.errno, exc.strerror)) nb_errors += 1 nb_processed += 1 print("Processed %d images" % (nb_processed, )) print("Encountered %d errors" % (nb_errors, )) print("Finished.")