def test_time_augmentation(image): aug1 = iaa.Pad(percent=(0., 0.2, 0.2, 0.), keep_size=True) aug2 = iaa.Pad(percent=(0.2, 0., 0.2, 0.), keep_size=True) aug3 = iaa.Pad(percent=(0.2, 0., 0., 0.2), keep_size=True) aug4 = iaa.Affine(rotate=15) aug5 = iaa.Fliplr() return [image] + [ i.augment_image(image) for i in [aug1, aug2, aug3, aug4, aug5] ]
def create_sequence(): aug_1 = iaa.Pad(percent=((0.15, 0.3), (0.15, 0.3), (0.15, 0.3), (0.15, 0.3)), pad_mode="constant", pad_cval=(0, 255)) aug_2 = iaa.Pad(percent=((0.45, 0.6), (0.45, 0.6), (0.45, 0.6), (0.45, 0.6)), pad_mode="constant", pad_cval=(0, 255)) aug_3 = iaa.Pad(percent=((0.75, 0.9), (0.75, 0.9), (0.75, 0.9), (0.75, 0.9)), pad_mode="constant", pad_cval=(0, 255)) return aug_1, aug_2, aug_3
def logic(self, image): for param in self.augmentation_params: self.augmentation_data.append([ str(param.augmentation_value), iaa.Pad(px=param.augmentation_value).to_deterministic(). augment_image(image), param.detection_tag ])
def __init__(self): st = lambda aug: iaa.Sometimes(0.5, aug) self.seq = iaa.Sequential([ st(iaa.Pad(percent=((0, 0.2), (0, 0.2), (0, 0.2), (0, 0.2)), keep_size=False)), # #st(iaa.Crop(percent=([0.0, 0.1], [0.00, 0.1], [0.0, 0.1], [0.0, 0.1]), keep_size=False)), st(iaa.Affine(scale=(0.9, 1.0), rotate=(-30, 30), shear=(-5, 5), translate_px={"x": (-30, 30), "y": (-10, 10)}, fit_output=True)), # st(iaa.PerspectiveTransform((0,0.1),fit_output=True)), # st(iaa.MultiplyAndAddToBrightness(mul=(0.6, 1.5), add=(0, 30))), st(iaa.ChangeColorTemperature(kelvin=(3000, 9100))), st(iaa.LinearContrast((0.75, 1.5))), st(iaa.GaussianBlur((0, 0.2))), # st(iaa.PerspectiveTransform(scale=0.05,)), st(iaa.AddToHueAndSaturation((-20, 20))), # st(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 16), per_channel=True)), # add gaussian noise to images # # # #st(iaa.Dropout((0.0, 0.1), per_channel=0.5)), # randomly remove up to 10% of the pixels # # # change brightness of images (by -10 to 10 of original value) st(iaa.Add((-40, 40), per_channel=True)), # # change brightness of images (50-150% of original value) st(iaa.Multiply((0.5, 1.5), per_channel=True)), ])
def augment_batch_img_for_box(batch_img, batch_pts, plot=False): """ Image augmentation, used when training :param batch_img: [B,H,W,C] :param batch_pts: [B,number,xy] :return: aug_b_img, aug_b_pts """ sometimes = lambda aug: iaa.Sometimes(0.5, aug) seq = iaa.Sequential([ sometimes(iaa.Pad(percent=(0, 0.8))), iaa.Affine(rotate=(-5, 5)), iaa.Multiply((0.7, 1.3)) # change brightness ]) aug_b_imgs, aug_b_pts = seq(images=batch_img, keypoints=batch_pts) if plot: import cv2 batch_img = [ cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in batch_img ] aug_b_imgs = [ cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in aug_b_imgs ] for i in range(len(batch_img)): print("[Image #%d]" % (i, )) keypoints_before = KeypointsOnImage.from_xy_array( batch_pts[i], shape=batch_img[i].shape) keypoints_after = KeypointsOnImage.from_xy_array( aug_b_pts[i], shape=aug_b_imgs[i].shape) image_before = keypoints_before.draw_on_image(batch_img[i]) image_after = keypoints_after.draw_on_image(aug_b_imgs[i]) ia.imshow(np.hstack([image_before, image_after])) return aug_b_imgs, aug_b_pts
def _pad_image(self, image): height = image.shape[0] width = image.shape[1] pad_sequence = self._get_pad_sequence(height, width) augmenter = iaa.Pad(px=pad_sequence, keep_size=False, pad_mode=self.pad_mode) return augmenter.augment_image(image)
def resize_pad_seq(pad_size): dy0, dx0, dy1, dx1 = compute_random_pad() seq = iaa.Sequential([ affine_seq, iaa.Scale({'height': IMG_ORI_SIZE*SCALE, 'width': IMG_ORI_SIZE*SCALE}), iaa.Pad(px=(dy0, dx0, dy1, dx1), pad_mode='edge', keep_size=False), ], random_order=False) return seq
def epoch_augmentation(__data, __ground_truth, padding): MAX = 2 * padding assert (__data.shape != __ground_truth.shape), "Incorrect dimensions for data and labels" assert ( MAX >= 0), "Augmentation would reduce images, is this really what you want?" offset_x, offset_y = np.random.randint(0, MAX + 1, 2) padding = iaa.Pad(px=(offset_y, offset_x, MAX - offset_y, MAX - offset_x), pad_mode=["reflect"], keep_size=False) affine = iaa.Affine( rotate=(-180, 180), # shear=(-5, 5), scale=(0.9, 1.1), mode=["reflect"]) augment_both = iaa.Sequential( [ padding, # Pad the image to requested padding iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Sometimes( 0.5, affine) # Apply sometimes more interesting augmentations ], random_order=False).to_deterministic() road_augment = iaa.Sequential( [ iaa.Multiply((1.5, 1.7)), # iaa.ContrastNormalization((1.5, 1.8)), iaa.Sharpen(alpha=(0, 0.25), lightness=(0.75, 1.0)), iaa.Emboss(alpha=(0, 1.0), strength=(0, 0.5)), ], random_order=False).to_deterministic() probabilistic_road_augment = iaa.Sequential( [iaa.Sometimes(0.3, road_augment)]).to_deterministic() augment_image = iaa.Sequential( iaa.SomeOf( (0, None), [iaa.Multiply((0.8, 1.2)), iaa.ContrastNormalization((0.8, 1.2))], random_order=True)).to_deterministic() __data = img_float_to_uint8(__data) aug_image = augment_both.augment_image(__data) aug_ground_truth = augment_both.augment_image(__ground_truth) aug_image = augment_image.augment_image(aug_image) aug_road = probabilistic_road_augment.augment_image(aug_image) road_ids = aug_ground_truth > 0.5 aug_image[road_ids] = aug_road[road_ids] aug_image = aug_image / 255.0 return aug_image, aug_ground_truth
def main(image2bboxes, annot_file): bbs, images, classes, img_files = [], [], [], [] for img_file in image2bboxes: bb = image2bboxes[img_file] # this has class, too images.append(plt.imread(os.path.join('..', img_file))) img_files.append(img_file) many_boxes = [] many_classes = [] # Parse out bboxes and classes, adding bboxes as imgaug BoundingBox objects for box in bb: box = [int(x) for x in box.split(',')] many_boxes.append(ia.BoundingBox(x1=box[0], y1=box[1], x2=box[2], y2=box[3])) many_classes.append(box[4]) bbs.append(many_boxes) classes.append(many_classes) # To sometimes apply aug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # The transformations!!! seq = iaa.Sequential([ iaa.AdditiveGaussianNoise(scale=0.05*255), # iaa.Affine(translate_px={"x": (1, 5)}), # sometimes(iaa.Fog()), sometimes(iaa.SigmoidContrast(cutoff=0.7)), # sometimes(iaa.Multiply(0.5)), sometimes(iaa.Add(-10)), sometimes(iaa.Pad(px=(256, 256, 0, 0))) ]) images_aug, bbs_aug = seq(images=images, bounding_boxes=bbs) # Save images for i in range(len(images)): img_file = img_files[i] # Create new file name and save name_spl = os.path.basename(img_file).split('.') ending = name_spl[-1] new_file_name = '.'.join(name_spl[0:-1]) + '_aug' + '.' + ending plt.imsave(os.path.join('..', 'data', 'JPEGImages', new_file_name), images_aug[i]) # Save annotations to one file with open(annot_file.replace('.txt', '_aug.txt'), 'w') as f: for i in range(len(images)): items = [] name_spl = os.path.basename(img_files[i]).split('.') ending = name_spl[-1] new_file_name = '.'.join(name_spl[0:-1]) + '_aug' + '.' + ending items.append('data/JPEGImages/' + new_file_name) many_boxes = bbs_aug[i] many_classes = classes[i] for j in range(len(many_boxes)): box = many_boxes[j] annot = ','.join([str(int(x)) for x in [box.x1, box.y1, box.x2, box.y2, many_classes[j]]]) items.append(annot) f.write(' '.join(items) + '\n')
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter: self.check_requirements(X, y) self.pad_augmenter = iaa.Pad(percent=self.percent, keep_size=False) self.crop_augmenter = iaa.CropToFixedSize(height=X['image_height'], width=X['image_width']) self.augmenter: Augmenter = iaa.Sequential([ self.pad_augmenter, self.crop_augmenter ], name=self.get_properties()['name']) return self
def scale_crop_pad(img, factor): size = img.size[0] new_size = int(size * factor) img = img.resize((new_size, new_size), Image.BICUBIC) if new_size < size: img = iaa.Pad(px=(size - new_size, size - new_size, 0, 0), pad_mode='symmetric', keep_size=False).augment_image(np.array(img)) return Image.fromarray(img) return transforms.CenterCrop(size)(img)
def __init__(self): self.aug = iaa.Sequential([ iaa.Pad(percent=(0, (0, 0.5), 0, (0, 0.5))), iaa.Resize(size={ "height": 32, "width": "keep-aspect-ratio" }), iaa.Rotate(rotate=(-10, 10)), ])
def crop_pad(dataset): new_dataset = [] for iterm in dataset: image = np.array(iterm[0][:]) image = image.reshape((32, 32, 3)).astype(np.uint8) seq = iaa.Sequential( [iaa.Pad(px=(4, 4, 4, 4)), iaa.Crop(px=(4, 4, 4, 4))]) image = seq(images=image) noise_image = (image.reshape(-1)).astype(np.float32) new_dataset.append([noise_image, iterm[1]]) return new_dataset
def iaa_letterbox(img, new_dim): if isinstance(img, tuple): org_dim = img else: org_dim = img.shape[1], img.shape[0] padded_w, padded_h, x_pad, y_pad, ratio = letterbox_transforms(*org_dim, *new_dim) l_pad, r_pad = x_pad, new_dim[0] - padded_w - x_pad t_pad, b_pad = y_pad, new_dim[1] - padded_h - y_pad lb_reverter = np.array([org_dim[0], org_dim[1], padded_w, padded_h, x_pad, y_pad]) return iaa.Sequential([iaa.Scale({ "width": padded_w, "height": padded_h }), iaa.Pad(px=(t_pad, r_pad, b_pad, l_pad), keep_size=False, pad_cval=128), ]), \ lb_reverter
def __init__(self, seed=1, aug_type=None, probability=0.9): self.type = aug_type ia.seed(seed) self.shape = [ iaa.Crop(px=(32, 64)), iaa.Fliplr(1), iaa.Pad(px=(16, 32), pad_mode=ia.ALL, pad_cval=(0, 128)), iaa.CoarseDropout(0.1, size_percent=0.02) ] self.color = [ iaa.OneOf([ iaa.GammaContrast((0.75,1.25)), iaa.GammaContrast((0.95, 1.05), per_channel=True) ]), # iaa.GaussianBlur(sigma=(1.0, 2.0)), iaa.SaltAndPepper(0.005), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.025*255)) ] self.seq = self.aug_sequence(aug_type, probability)
def __init__(self): sometimes = lambda aug: iaa.Sometimes(0.5, aug) self.seq = iaa.Sequential([ sometimes(iaa.Crop(px=(0, 0, 8, 0), keep_size=True)), sometimes(iaa.Pad(px=(0, 0, 0, 5), keep_size=False)), iaa.Multiply((0.8, 1.2), per_channel=0.5), sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.05))), sometimes( iaa.OneOf([ iaa.CoarseDropout((0.01, 0.03), size_percent=(0.1, 0.3)), iaa.CoarseDropout((0.01, 0.03), size_percent=(0.1, 0.3), per_channel=1.0), iaa.Dropout((0.03,0.05)), iaa.Salt((0.03,0.05)) ]) ), iaa.Multiply((0.8, 1.2), per_channel=0.5), sometimes(iaa.FrequencyNoiseAlpha( exponent=(-4, 0), first=iaa.Multiply((0.8, 1.2), per_channel=0.5), second=iaa.ContrastNormalization((0.8, 1.5)) ) ), sometimes( iaa.OneOf([ iaa.MotionBlur(k=(3,4),angle=(0, 360)), iaa.GaussianBlur((0, 1.2)), iaa.AverageBlur(k=(2, 3)), iaa.MedianBlur(k=(3, 5)) ]) ), sometimes( iaa.CropAndPad( percent=(-0.05, 0.1), pad_mode='constant', pad_cval=(0, 255) ), ), sometimes(iaa.ElasticTransformation(alpha=(1.0, 2.0), sigma=(2.0, 3.0))), # move pixels locally around (with random strengths) sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.02), mode='constant')), # sometimes move parts of the image around sometimes(iaa.AdditiveGaussianNoise((0.02, 0.1))), sometimes(iaa.AdditivePoissonNoise((0.02,0.05))), iaa.Invert(p=0.5) ])
def generate_augmentation(aug_pad, aug_affine, aug_ch_suffle, aug_dropout, aug_AGN, aug_fliplr, aug_flipud, aug_percent): ''' This function creates an augment for dataset transform to use. Args determine which augments are used. The augments are predetermined and appliend in same order as args. Args: aug_pad (bool): Boolean value if pad filter will be used. aug_affine (bool): Boolean value if affine rotation filter will be used. aug_ch_suffle (bool): Boolean value if channel suffle filter will be used. aug_dropout (bool): Boolean value if dropout filter will be used. aug_AGN (bool): Boolean value if Additive Gaussian Noice filter will be used. aug_fliplr (bool): Boolean value if left-right flip filter will be used. aug_flipud (bool): Boolean value if up-down flip filter will be used. aug_percent (float): Float between 0 and 1. The determined augments are applied randomly based on aug_percent. Return: augment (imgaug augmenter): Imgaug augmenter that can be used to perform transformations on images. ''' #Create all augments resize = iaa.Resize(224) pad = iaa.Pad(px=(0, 4)) affine = iaa.Affine(rotate=(-10, 10)) ch_suffle = iaa.ChannelShuffle(0.35) dropout = iaa.Dropout(p=(0, 0.2)) AGN = iaa.AdditiveGaussianNoise(loc=0, scale=(0, 15)) flip_lr = iaa.Fliplr(0.5) flip_ud = iaa.Flipud(0.5) #Put augments to list and choose only if aug_ parameter is true aug_list = [pad, affine, ch_suffle, dropout, AGN, flip_lr, flip_ud] use_aug_list = [ aug_pad, aug_affine, aug_ch_suffle, aug_dropout, aug_AGN, aug_fliplr, aug_flipud ] aug_list = [aug_list[i] for i in np.where(use_aug_list)[0]] #Create the augment and use aug_percent to determine how oftern augments are used augment = iaa.Sequential( [resize, iaa.Sometimes(aug_percent, iaa.Sequential(aug_list))]) return augment
def epoch_augmentation_old(__data, __ground_truth, padding): MAX = 2 * padding assert (__data.shape != __ground_truth.shape), "Incorrect dimensions for data and labels" assert ( MAX >= 0), "Augmentation would reduce images, is this really what you want?" offset_x, offset_y = np.random.randint(0, MAX + 1, 2) padding = iaa.Pad(px=(offset_y, offset_x, MAX - offset_y, MAX - offset_x), pad_mode=["reflect"], keep_size=False) affine = iaa.Affine(rotate=(-180, 180), shear=(-5, 5), scale=(0.9, 1.1), mode=["reflect"]) augment_both = iaa.Sequential( [ padding, # Pad the image to requested padding iaa.Sometimes( 0.3, affine) # Apply sometimes more interesting augmentations ], random_order=False).to_deterministic() augment_image = iaa.Sequential( iaa.SomeOf( (0, None), [ iaa.Multiply((0.8, 1.2)), iaa.ContrastNormalization((0.8, 1.2)), iaa.Dropout(0.01), # Drop out single pixels iaa.SaltAndPepper(0.01) # Add salt-n-pepper noise ], random_order=True)).to_deterministic() __data = img_float_to_uint8(__data) aug_image = augment_both.augment_image(__data) aug_ground_truth = augment_both.augment_image(__ground_truth) aug_image = augment_image.augment_image(aug_image) aug_image = aug_image / 255.0 return aug_image, aug_ground_truth
def main(): imgs = np.zeros((1, 100, 100, 3), dtype=np.uint8) + 255 bbs = ia.BoundingBoxesOnImage([ia.BoundingBox(x1=0, x2=50, y1=0, y2=50)], shape=imgs.shape[1:]) aug = iaa.Sequential([ iaa.Crop(px=10), iaa.Pad(px=10, pad_cval=128), iaa.Affine(scale=0.5, cval=0) ]) aug_det = aug.to_deterministic() imgs_aug = aug_det.augment_images(imgs) bbs_aug = aug_det.augment_bounding_boxes([bbs]) print("bbs:") for bbs_aug_i in bbs_aug[0].bounding_boxes: print(bbs_aug_i) cv2.imshow('orig', imgs) cv2.imshow('aug', bbs_aug[0].draw_on_image(imgs_aug[0])) cv2.waitKey()
def test_dtype_preservation(): reseed() size = (4, 16, 16, 3) images = [ np.random.uniform(0, 255, size).astype(np.uint8), np.random.uniform(0, 65535, size).astype(np.uint16), np.random.uniform(0, 4294967295, size).astype(np.uint32), np.random.uniform(-128, 127, size).astype(np.int16), np.random.uniform(-32768, 32767, size).astype(np.int32), np.random.uniform(0.0, 1.0, size).astype(np.float32), np.random.uniform(-1000.0, 1000.0, size).astype(np.float16), np.random.uniform(-1000.0, 1000.0, size).astype(np.float32), np.random.uniform(-1000.0, 1000.0, size).astype(np.float64) ] default_dtypes = set([arr.dtype for arr in images]) # Some dtypes are here removed per augmenter, because the respective # augmenter does not support them. This test currently only checks whether # dtypes are preserved from in- to output for all dtypes that are supported # per augmenter. # dtypes are here removed via list comprehension instead of # `default_dtypes - set([dtype])`, because the latter one simply never # removed the dtype(s) for some reason def _not_dts(dts): return [dt for dt in default_dtypes if dt not in dts] augs = [ (iaa.Add((-5, 5), name="Add"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.AddElementwise((-5, 5), name="AddElementwise"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Multiply((0.95, 1.05), name="Multiply"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Dropout(0.01, name="Dropout"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Invert(0.01, per_channel=True, name="Invert"), default_dtypes), (iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"), _not_dts([np.float16])), (iaa.AverageBlur((3, 5), name="AverageBlur"), _not_dts([np.uint32, np.int32, np.float16])), (iaa.MedianBlur((3, 5), name="MedianBlur"), _not_dts([np.uint32, np.int32, np.float16, np.float64])), (iaa.BilateralBlur((3, 5), name="BilateralBlur"), _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float16, np.float64])), (iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"), _not_dts([np.uint32, np.int32, np.float16, np.uint32])), (iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"), _not_dts([np.uint32, np.int32, np.float16, np.uint32])), (iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"), _not_dts([np.uint32, np.int32, np.float16, np.uint32])), (iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0, name="DirectedEdgeDetect"), _not_dts([np.uint32, np.int32, np.float16, np.uint32])), (iaa.Fliplr(0.5, name="Fliplr"), default_dtypes), (iaa.Flipud(0.5, name="Flipud"), default_dtypes), (iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"), _not_dts([np.uint32, np.int32])), (iaa.Affine(translate_percent=(-0.05, 0.05), name="Affine-translate-percent"), _not_dts([np.uint32, np.int32])), (iaa.Affine(rotate=(-20, 20), name="Affine-rotate"), _not_dts([np.uint32, np.int32])), (iaa.Affine(shear=(-20, 20), name="Affine-shear"), _not_dts([np.uint32, np.int32])), (iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"), _not_dts([np.uint32, np.int32])), (iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"), default_dtypes), (iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2), name="ElasticTransformation"), _not_dts([np.float16])), (iaa.Sequential([iaa.Identity(), iaa.Identity()], name="SequentialNoop"), default_dtypes), (iaa.SomeOf(1, [iaa.Identity(), iaa.Identity()], name="SomeOfNoop"), default_dtypes), (iaa.OneOf([iaa.Identity(), iaa.Identity()], name="OneOfNoop"), default_dtypes), (iaa.Sometimes(0.5, iaa.Identity(), name="SometimesNoop"), default_dtypes), (iaa.Sequential([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))], name="Sequential"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.SomeOf(1, [iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))], name="SomeOf"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.OneOf([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))], name="OneOf"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Sometimes(0.5, iaa.Add((-5, 5)), name="Sometimes"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Identity(name="Identity"), default_dtypes), (iaa.BlendAlpha((0.0, 0.1), iaa.Identity(), name="BlendAlphaIdentity"), _not_dts([np.float64])), # float64 requires float128 support (iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Identity(), name="BlendAlphaElementwiseIdentity"), _not_dts([np.float64])), # float64 requires float128 support (iaa.BlendAlphaSimplexNoise(iaa.Identity(), name="BlendAlphaSimplexNoiseIdentity"), _not_dts([np.float64])), # float64 requires float128 support (iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2), foreground=iaa.Identity(), name="BlendAlphaFrequencyNoiseIdentity"), _not_dts([np.float64])), (iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10), name="BlendAlphaElementwise"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2), foreground=iaa.Add(10), name="BlendAlphaFrequencyNoise"), _not_dts([np.uint32, np.int32, np.float64])), (iaa.Superpixels(p_replace=0.01, n_segments=64), _not_dts([np.float16, np.float32, np.float64])), (iaa.Resize({"height": 4, "width": 4}, name="Resize"), _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32, np.float16, np.float64])), (iaa.CropAndPad(px=(-10, 10), name="CropAndPad"), _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32, np.float16, np.float64])), (iaa.Pad(px=(0, 10), name="Pad"), _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32, np.float16, np.float64])), (iaa.Crop(px=(0, 10), name="Crop"), _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32, np.float16, np.float64])) ] for (aug, allowed_dtypes) in augs: for images_i in images: if images_i.dtype in allowed_dtypes: images_aug = aug.augment_images(images_i) assert images_aug.dtype == images_i.dtype
def main(): image = ia.quokka(size=0.5) kps = [ia.KeypointsOnImage( [ia.Keypoint(x=245, y=203, vis=None, label=None), ia.Keypoint(x=365, y=195, vis=None, label=None), ia.Keypoint(x=313, y=269, vis=None, label=None)], shape=(image.shape[0]*2, image.shape[1]*2) )] kps[0] = kps[0].on(image.shape) print("image shape:", image.shape) augs = [ iaa.CropAndPad(px=50, name="pad-by-50px"), iaa.CropAndPad(px=(10, 20, 30, 40), name="pad-by-10-20-30-40px"), iaa.CropAndPad(percent=0.1, name="pad-by-01percent"), iaa.CropAndPad(percent=(0.01, 0.02, 0.03, 0.04), name="pad-by-001-002-003-004percent"), iaa.CropAndPad(px=-20, name="crop-by-20px"), iaa.CropAndPad(px=(-10, -20, -30, -40), name="crop-by-10-20-30-40px"), iaa.CropAndPad(percent=-0.1, name="crop-by-01percent"), iaa.CropAndPad(percent=(-0.01, -0.02, -0.03, -0.04), name="crop-by-001-002-003-004percent") ] augs_many = [ iaa.Crop(px=(0, 50), name="native-crop-0-to-50px"), iaa.Crop(px=iap.DiscreteUniform(0, 50), name="native-crop-0-to-50px-iap"), iaa.Pad(px=(0, 50), pad_mode="linear_ramp", pad_cval=(0, 255), name="native-pad-0-to-50px-pad-modes"), iaa.CropAndPad(px=(0, 50), sample_independently=False, name="pad-by-0-to-50px-same"), iaa.CropAndPad(px=(0, 50), name="pad-by-0-to-50px"), iaa.CropAndPad(px=(0, 50), pad_mode=ia.ALL, pad_cval=(0, 255), name="pad-by-0-to-50px-random-pad-modes-cvals"), iaa.CropAndPad(px=((0, 50), (0, 50), (0, 50), (0, 50)), name="pad-by-0-to-50px-each"), iaa.CropAndPad(percent=(0, 0.1), sample_independently=False, name="pad-by-0-to-01percent-same"), iaa.CropAndPad(percent=(0, 0.1), name="pad-by-0-to-01percent"), iaa.CropAndPad(percent=(0, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255), name="pad-by-0-to-01percent-random-pad-modes-cvals"), iaa.CropAndPad(percent=((0, 0.1), (0, 0.1), (0, 0.1), (0, 0.1)), name="pad-by-0-to-01percent-each"), iaa.CropAndPad(px=(-50, 0), name="crop-by-50-to-0px"), iaa.CropAndPad(px=((-50, 0), (-50, 0), (-50, 0), (-50, 0)), name="crop-by-50-to-0px-each"), iaa.CropAndPad(percent=(-0.1, 0), name="crop-by-01-to-0percent"), iaa.CropAndPad(percent=((-0.1, 0), (-0.1, 0), (-0.1, 0), (-0.1, 0)), name="crop-by-01-to-0percent-each"), iaa.CropAndPad(px=(-50, 50), name="pad-and-crop-by-50px") ] print("original", image.shape) ia.imshow(kps[0].draw_on_image(image)) print("-----------------") print("Same aug per image") print("-----------------") for aug in augs: img_aug = aug.augment_image(image) kps_aug = aug.augment_keypoints(kps)[0] img_aug_kps = kps_aug.draw_on_image(img_aug) print(aug.name, img_aug_kps.shape, img_aug_kps.shape[1]/img_aug_kps.shape[0]) ia.imshow(img_aug_kps) print("-----------------") print("Random aug per image") print("-----------------") for aug in augs_many: images_aug = [] for _ in range(64): aug_det = aug.to_deterministic() img_aug = aug_det.augment_image(image) kps_aug = aug_det.augment_keypoints(kps)[0] img_aug_kps = kps_aug.draw_on_image(img_aug) img_aug_kps = np.pad(img_aug_kps, ((1, 1), (1, 1), (0, 0)), mode="constant", constant_values=255) images_aug.append(img_aug_kps) print(aug.name) ia.imshow(ia.draw_grid(images_aug))
def test_unusual_channel_numbers(): reseed() images = [ (0, create_random_images((4, 16, 16))), (1, create_random_images((4, 16, 16, 1))), (2, create_random_images((4, 16, 16, 2))), (4, create_random_images((4, 16, 16, 4))), (5, create_random_images((4, 16, 16, 5))), (10, create_random_images((4, 16, 16, 10))), (20, create_random_images((4, 16, 16, 20))) ] augs = [ iaa.Add((-5, 5), name="Add"), iaa.AddElementwise((-5, 5), name="AddElementwise"), iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"), iaa.Multiply((0.95, 1.05), name="Multiply"), iaa.Dropout(0.01, name="Dropout"), iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"), iaa.Invert(0.01, per_channel=True, name="Invert"), iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"), iaa.AverageBlur((3, 5), name="AverageBlur"), iaa.MedianBlur((3, 5), name="MedianBlur"), iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"), iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"), iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"), iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0, name="DirectedEdgeDetect"), iaa.Fliplr(0.5, name="Fliplr"), iaa.Flipud(0.5, name="Flipud"), iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"), iaa.Affine(translate_percent=(-0.05, 0.05), name="Affine-translate-percent"), iaa.Affine(rotate=(-20, 20), name="Affine-rotate"), iaa.Affine(shear=(-20, 20), name="Affine-shear"), iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"), iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"), iaa.PerspectiveTransform(scale=(0.01, 0.10), name="PerspectiveTransform"), iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2), name="ElasticTransformation"), iaa.Sequential([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]), iaa.SomeOf(1, [iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]), iaa.OneOf([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]), iaa.Sometimes(0.5, iaa.Add((-5, 5)), name="Sometimes"), iaa.Identity(name="Noop"), iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"), iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10), name="BlendAlphaElementwise"), iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"), iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2), foreground=iaa.Add(10), name="BlendAlphaSimplexNoise"), iaa.Superpixels(p_replace=0.01, n_segments=64), iaa.Resize({"height": 4, "width": 4}, name="Resize"), iaa.CropAndPad(px=(-10, 10), name="CropAndPad"), iaa.Pad(px=(0, 10), name="Pad"), iaa.Crop(px=(0, 10), name="Crop") ] for aug in augs: for (nb_channels, images_c) in images: if aug.name != "Resize": images_aug = aug.augment_images(images_c) assert images_aug.shape == images_c.shape image_aug = aug.augment_image(images_c[0]) assert image_aug.shape == images_c[0].shape else: images_aug = aug.augment_images(images_c) image_aug = aug.augment_image(images_c[0]) if images_c.ndim == 3: assert images_aug.shape == (4, 4, 4) assert image_aug.shape == (4, 4) else: assert images_aug.shape == (4, 4, 4, images_c.shape[3]) assert image_aug.shape == (4, 4, images_c.shape[3])
def test_determinism(): reseed() images = [ ia.quokka(size=(128, 128)), ia.quokka(size=(64, 64)), ia.quokka((128, 256)) ] images.extend([ia.quokka(size=(16, 16))] * 20) keypoints = [ ia.KeypointsOnImage([ ia.Keypoint(x=20, y=10), ia.Keypoint(x=5, y=5), ia.Keypoint(x=10, y=43)], shape=(50, 60, 3)) ] * 20 augs = [ iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.SomeOf(1, [iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.OneOf([iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.Sometimes(0.5, iaa.Fliplr(1.0)), iaa.WithColorspace("HSV", children=iaa.Add((-50, 50))), iaa.Resize((0.5, 0.9)), iaa.CropAndPad(px=(-50, 50)), iaa.Pad(px=(1, 50)), iaa.Crop(px=(1, 50)), iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Superpixels(p_replace=(0.25, 1.0), n_segments=(16, 128)), iaa.Grayscale(alpha=(0.1, 1.0)), iaa.GaussianBlur((0.1, 3.0)), iaa.AverageBlur((3, 11)), iaa.MedianBlur((3, 11)), iaa.Sharpen(alpha=(0.1, 1.0), lightness=(0.8, 1.2)), iaa.Emboss(alpha=(0.1, 1.0), strength=(0.8, 1.2)), iaa.EdgeDetect(alpha=(0.1, 1.0)), iaa.DirectedEdgeDetect(alpha=(0.1, 1.0), direction=(0.0, 1.0)), iaa.Add((-50, 50)), iaa.AddElementwise((-50, 50)), iaa.AdditiveGaussianNoise(scale=(0.1, 1.0)), iaa.Multiply((0.6, 1.4)), iaa.MultiplyElementwise((0.6, 1.4)), iaa.Dropout((0.3, 0.5)), iaa.CoarseDropout((0.3, 0.5), size_percent=(0.05, 0.2)), iaa.Invert(0.5), iaa.Affine(scale=(0.7, 1.3), translate_percent=(-0.1, 0.1), rotate=(-20, 20), shear=(-20, 20), order=ia.ALL, mode=ia.ALL, cval=(0, 255)), iaa.PiecewiseAffine(scale=(0.1, 0.3)), iaa.ElasticTransformation(alpha=10.0) ] augs_affect_geometry = [ iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.SomeOf(1, [iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.OneOf([iaa.Fliplr(0.5), iaa.Flipud(0.5)]), iaa.Sometimes(0.5, iaa.Fliplr(1.0)), iaa.Resize((0.5, 0.9)), iaa.CropAndPad(px=(-50, 50)), iaa.Pad(px=(1, 50)), iaa.Crop(px=(1, 50)), iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Affine(scale=(0.7, 1.3), translate_percent=(-0.1, 0.1), rotate=(-20, 20), shear=(-20, 20), order=ia.ALL, mode=ia.ALL, cval=(0, 255)), iaa.PiecewiseAffine(scale=(0.1, 0.3)), iaa.ElasticTransformation(alpha=(5, 100), sigma=(3, 5)) ] for aug in augs: aug_det = aug.to_deterministic() images_aug1 = aug_det.augment_images(images) images_aug2 = aug_det.augment_images(images) aug_det = aug.to_deterministic() images_aug3 = aug_det.augment_images(images) images_aug4 = aug_det.augment_images(images) assert array_equal_lists(images_aug1, images_aug2), \ "Images (1, 2) expected to be identical for %s" % (aug.name,) assert array_equal_lists(images_aug3, images_aug4), \ "Images (3, 4) expected to be identical for %s" % (aug.name,) assert not array_equal_lists(images_aug1, images_aug3), \ "Images (1, 3) expected to be different for %s" % (aug.name,) for aug in augs_affect_geometry: aug_det = aug.to_deterministic() kps_aug1 = aug_det.augment_keypoints(keypoints) kps_aug2 = aug_det.augment_keypoints(keypoints) aug_det = aug.to_deterministic() kps_aug3 = aug_det.augment_keypoints(keypoints) kps_aug4 = aug_det.augment_keypoints(keypoints) assert keypoints_equal(kps_aug1, kps_aug2), \ "Keypoints (1, 2) expected to be identical for %s" % (aug.name,) assert keypoints_equal(kps_aug3, kps_aug4), \ "Keypoints (3, 4) expected to be identical for %s" % (aug.name,) assert not keypoints_equal(kps_aug1, kps_aug3), \ "Keypoints (1, 3) expected to be different for %s" % (aug.name,)
sometimes( iaa.Affine( scale={ "x": (0.6, 1), "y": (0.6, 1) }, rotate=(-15, 15), # rotate by -45 to +45 degrees shear=(-15, 15), # shear by -16 to +16 degrees order=[ 0, 1 ], # use nearest neighbour or bilinear interpolation (fast) cval=(0, 0), # if mode is constant, use a cval between 0 and 255 mode='constant')), sometimes( iaa.Pad(percent=(0, 0.4), pad_mode='constant', pad_cval=(0, 0))), ]), iaa.OneOf([ sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1))), sometimes(iaa.PiecewiseAffine( scale=(0.01, 0.05))), # sometimes move parts of the image around ]) ]) aug = iaa.Sequential([ iaa.SomeOf( (0, 6), [ (iaa.Superpixels(p_replace=(0, 1.0), n_segments=(100, 200))), # convert images into their superpixel representation iaa.OneOf([ iaa.GaussianBlur(
def test_many_augmenters(self): keypoints = [] for y in sm.xrange(40//5): for x in sm.xrange(60//5): keypoints.append(ia.Keypoint(y=y*5, x=x*5)) keypoints_oi = ia.KeypointsOnImage(keypoints, shape=(40, 60, 3)) keypoints_oi_empty = ia.KeypointsOnImage([], shape=(40, 60, 3)) augs = [ iaa.Add((-5, 5), name="Add"), iaa.AddElementwise((-5, 5), name="AddElementwise"), iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"), iaa.Multiply((0.95, 1.05), name="Multiply"), iaa.Dropout(0.01, name="Dropout"), iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"), iaa.Invert(0.01, per_channel=True, name="Invert"), iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"), iaa.AverageBlur((3, 5), name="AverageBlur"), iaa.MedianBlur((3, 5), name="MedianBlur"), iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"), iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"), iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"), iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0, name="DirectedEdgeDetect"), iaa.Fliplr(0.5, name="Fliplr"), iaa.Flipud(0.5, name="Flipud"), iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"), iaa.Affine(translate_percent=(-0.05, 0.05), name="Affine-translate-percent"), iaa.Affine(rotate=(-20, 20), name="Affine-rotate"), iaa.Affine(shear=(-20, 20), name="Affine-shear"), iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"), iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"), iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2), name="ElasticTransformation"), iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"), iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10), name="BlendAlphaElementwise"), iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"), iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2), foreground=iaa.Add(10), name="BlendAlphaSimplexNoise"), iaa.Superpixels(p_replace=0.01, n_segments=64), iaa.Resize(0.5, name="Resize"), iaa.CropAndPad(px=(-10, 10), name="CropAndPad"), iaa.Pad(px=(0, 10), name="Pad"), iaa.Crop(px=(0, 10), name="Crop") ] for aug in augs: dss = [] for i in sm.xrange(10): aug_det = aug.to_deterministic() kp_fully_empty_aug = aug_det.augment_keypoints([]) assert kp_fully_empty_aug == [] kp_first_empty_aug = aug_det.augment_keypoints(keypoints_oi_empty) assert len(kp_first_empty_aug.keypoints) == 0 kp_image = keypoints_oi.to_keypoint_image(size=5) with assertWarns(self, iaa.SuspiciousSingleImageShapeWarning): kp_image_aug = aug_det.augment_image(kp_image) kp_image_aug_rev = ia.KeypointsOnImage.from_keypoint_image( kp_image_aug, if_not_found_coords={"x": -9999, "y": -9999}, nb_channels=1 ) kp_aug = aug_det.augment_keypoints([keypoints_oi])[0] ds = [] assert len(kp_image_aug_rev.keypoints) == len(kp_aug.keypoints), ( "Lost keypoints for '%s' (%d vs expected %d)" % ( aug.name, len(kp_aug.keypoints), len(kp_image_aug_rev.keypoints)) ) gen = zip(kp_aug.keypoints, kp_image_aug_rev.keypoints) for kp_pred, kp_pred_img in gen: kp_pred_lost = (kp_pred.x == -9999 and kp_pred.y == -9999) kp_pred_img_lost = (kp_pred_img.x == -9999 and kp_pred_img.y == -9999) if not kp_pred_lost and not kp_pred_img_lost: d = np.sqrt((kp_pred.x - kp_pred_img.x) ** 2 + (kp_pred.y - kp_pred_img.y) ** 2) ds.append(d) dss.extend(ds) if len(ds) == 0: print("[INFO] No valid keypoints found for '%s' " "in test_keypoint_augmentation()" % (str(aug),)) assert np.average(dss) < 5.0, \ "Average distance too high (%.2f, with ds: %s)" \ % (np.average(dss), str(dss))
"Add_To_Hue_And_Saturation": lambda lo, hi: iaa.AddToHueAndSaturation((lo, hi), per_channel=True), # Increase each pixel’s channel-value (redness/greenness/blueness) [0, 1, 2] by value in between lo and hi: "Increase_Channel": lambda channel, lo, hi: iaa.WithChannels(channel, iaa.Add((lo, hi))), # Rotate each image’s channel [R=0, G=1, B=2] by value in between lo and hi degrees: "Rotate_Channel": lambda channel, lo, hi: iaa.WithChannels(channel, iaa.Affine(rotate=(lo, hi))), # Augmenter that never changes input images (“no operation”). "No_Operation": iaa.Noop(), # Pads images, i.e. adds columns/rows to them. Pads image by value in between lo and hi # percent relative to its original size (only accepts positive values in range[0, 1]): # If s_i is false, The value will be sampled once per image and used for all sides # (i.e. all sides gain/lose the same number of rows/columns) # NOTE: automatically resizes images back to their original size after it has augmented them. "Pad_Percent": lambda lo, hi, s_i: iaa.Pad(percent=(lo, hi), keep_size=True, sample_independently=s_i), # Pads images by a number of pixels between lo and hi # If s_i is false, The value will be sampled once per image and used for all sides # (i.e. all sides gain/lose the same number of rows/columns) "Pad_Pixels": lambda lo, hi, s_i: iaa.Pad(px=(lo, hi), keep_size=True, sample_independently=s_i), # Crops/cuts away pixels at the sides of the image. # Crops images by value in between lo and hi (only accepts positive values in range[0, 1]): # If s_i is false, The value will be sampled once per image and used for all sides # (i.e. all sides gain/lose the same number of rows/columns) # NOTE: automatically resizes images back to their original size after it has augmented them. "Crop_Percent": lambda lo, hi, s_i: iaa.Crop(percent=(lo, hi), keep_size=True, sample_independently=s_i), # Crops images by a number of pixels between lo and hi # If s_i is false, The value will be sampled once per image and used for all sides
def draw_per_augmenter_images(): print("[draw_per_augmenter_images] Loading image...") #image = misc.imresize(ndimage.imread("quokka.jpg")[0:643, 0:643], (128, 128)) image = ia.quokka_square(size=(128, 128)) keypoints = [ia.Keypoint(x=34, y=15), ia.Keypoint(x=85, y=13), ia.Keypoint(x=63, y=73)] # left ear, right ear, mouth keypoints = [ia.KeypointsOnImage(keypoints, shape=image.shape)] print("[draw_per_augmenter_images] Initializing...") rows_augmenters = [ (0, "Noop", [("", iaa.Noop()) for _ in sm.xrange(5)]), (0, "Crop\n(top, right,\nbottom, left)", [(str(vals), iaa.Crop(px=vals)) for vals in [(2, 0, 0, 0), (0, 8, 8, 0), (4, 0, 16, 4), (8, 0, 0, 32), (32, 64, 0, 0)]]), (0, "Pad\n(top, right,\nbottom, left)", [(str(vals), iaa.Pad(px=vals)) for vals in [(2, 0, 0, 0), (0, 8, 8, 0), (4, 0, 16, 4), (8, 0, 0, 32), (32, 64, 0, 0)]]), (0, "Fliplr", [(str(p), iaa.Fliplr(p)) for p in [0, 0, 1, 1, 1]]), (0, "Flipud", [(str(p), iaa.Flipud(p)) for p in [0, 0, 1, 1, 1]]), (0, "Superpixels\np_replace=1", [("n_segments=%d" % (n_segments,), iaa.Superpixels(p_replace=1.0, n_segments=n_segments)) for n_segments in [25, 50, 75, 100, 125]]), (0, "Superpixels\nn_segments=100", [("p_replace=%.2f" % (p_replace,), iaa.Superpixels(p_replace=p_replace, n_segments=100)) for p_replace in [0, 0.25, 0.5, 0.75, 1.0]]), (0, "Invert", [("p=%d" % (p,), iaa.Invert(p=p)) for p in [0, 0, 1, 1, 1]]), (0, "Invert\n(per_channel)", [("p=%.2f" % (p,), iaa.Invert(p=p, per_channel=True)) for p in [0.5, 0.5, 0.5, 0.5, 0.5]]), (0, "Add", [("value=%d" % (val,), iaa.Add(val)) for val in [-45, -25, 0, 25, 45]]), (0, "Add\n(per channel)", [("value=(%d, %d)" % (vals[0], vals[1],), iaa.Add(vals, per_channel=True)) for vals in [(-55, -35), (-35, -15), (-10, 10), (15, 35), (35, 55)]]), (0, "AddToHueAndSaturation", [("value=%d" % (val,), iaa.AddToHueAndSaturation(val)) for val in [-45, -25, 0, 25, 45]]), (0, "Multiply", [("value=%.2f" % (val,), iaa.Multiply(val)) for val in [0.25, 0.5, 1.0, 1.25, 1.5]]), (1, "Multiply\n(per channel)", [("value=(%.2f, %.2f)" % (vals[0], vals[1],), iaa.Multiply(vals, per_channel=True)) for vals in [(0.15, 0.35), (0.4, 0.6), (0.9, 1.1), (1.15, 1.35), (1.4, 1.6)]]), (0, "GaussianBlur", [("sigma=%.2f" % (sigma,), iaa.GaussianBlur(sigma=sigma)) for sigma in [0.25, 0.50, 1.0, 2.0, 4.0]]), (0, "AverageBlur", [("k=%d" % (k,), iaa.AverageBlur(k=k)) for k in [1, 3, 5, 7, 9]]), (0, "MedianBlur", [("k=%d" % (k,), iaa.MedianBlur(k=k)) for k in [1, 3, 5, 7, 9]]), (0, "BilateralBlur\nsigma_color=250,\nsigma_space=250", [("d=%d" % (d,), iaa.BilateralBlur(d=d, sigma_color=250, sigma_space=250)) for d in [1, 3, 5, 7, 9]]), (0, "Sharpen\n(alpha=1)", [("lightness=%.2f" % (lightness,), iaa.Sharpen(alpha=1, lightness=lightness)) for lightness in [0, 0.5, 1.0, 1.5, 2.0]]), (0, "Emboss\n(alpha=1)", [("strength=%.2f" % (strength,), iaa.Emboss(alpha=1, strength=strength)) for strength in [0, 0.5, 1.0, 1.5, 2.0]]), (0, "EdgeDetect", [("alpha=%.2f" % (alpha,), iaa.EdgeDetect(alpha=alpha)) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]), (0, "DirectedEdgeDetect\n(alpha=1)", [("direction=%.2f" % (direction,), iaa.DirectedEdgeDetect(alpha=1, direction=direction)) for direction in [0.0, 1*(360/5)/360, 2*(360/5)/360, 3*(360/5)/360, 4*(360/5)/360]]), (0, "AdditiveGaussianNoise", [("scale=%.2f*255" % (scale,), iaa.AdditiveGaussianNoise(scale=scale * 255)) for scale in [0.025, 0.05, 0.1, 0.2, 0.3]]), (0, "AdditiveGaussianNoise\n(per channel)", [("scale=%.2f*255" % (scale,), iaa.AdditiveGaussianNoise(scale=scale * 255, per_channel=True)) for scale in [0.025, 0.05, 0.1, 0.2, 0.3]]), (0, "Dropout", [("p=%.2f" % (p,), iaa.Dropout(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]), (0, "Dropout\n(per channel)", [("p=%.2f" % (p,), iaa.Dropout(p=p, per_channel=True)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]), (3, "CoarseDropout\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseDropout(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]), (0, "CoarseDropout\n(p=0.2, per channel)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseDropout(p=0.2, size_percent=size_percent, per_channel=True, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]), (0, "SaltAndPepper", [("p=%.2f" % (p,), iaa.SaltAndPepper(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]), (0, "Salt", [("p=%.2f" % (p,), iaa.Salt(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]), (0, "Pepper", [("p=%.2f" % (p,), iaa.Pepper(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]), (0, "CoarseSaltAndPepper\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseSaltAndPepper(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]), (0, "CoarseSalt\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseSalt(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]), (0, "CoarsePepper\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarsePepper(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]), (0, "ContrastNormalization", [("alpha=%.1f" % (alpha,), iaa.ContrastNormalization(alpha=alpha)) for alpha in [0.5, 0.75, 1.0, 1.25, 1.50]]), (0, "ContrastNormalization\n(per channel)", [("alpha=(%.2f, %.2f)" % (alphas[0], alphas[1],), iaa.ContrastNormalization(alpha=alphas, per_channel=True)) for alphas in [(0.4, 0.6), (0.65, 0.85), (0.9, 1.1), (1.15, 1.35), (1.4, 1.6)]]), (0, "Grayscale", [("alpha=%.1f" % (alpha,), iaa.Grayscale(alpha=alpha)) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]), (6, "PerspectiveTransform", [("scale=%.3f" % (scale,), iaa.PerspectiveTransform(scale=scale)) for scale in [0.025, 0.05, 0.075, 0.10, 0.125]]), (0, "PiecewiseAffine", [("scale=%.3f" % (scale,), iaa.PiecewiseAffine(scale=scale)) for scale in [0.015, 0.03, 0.045, 0.06, 0.075]]), (0, "Affine: Scale", [("%.1fx" % (scale,), iaa.Affine(scale=scale)) for scale in [0.1, 0.5, 1.0, 1.5, 1.9]]), (0, "Affine: Translate", [("x=%d y=%d" % (x, y), iaa.Affine(translate_px={"x": x, "y": y})) for x, y in [(-32, -16), (-16, -32), (-16, -8), (16, 8), (16, 32)]]), (0, "Affine: Rotate", [("%d deg" % (rotate,), iaa.Affine(rotate=rotate)) for rotate in [-90, -45, 0, 45, 90]]), (0, "Affine: Shear", [("%d deg" % (shear,), iaa.Affine(shear=shear)) for shear in [-45, -25, 0, 25, 45]]), (0, "Affine: Modes", [(mode, iaa.Affine(translate_px=-32, mode=mode)) for mode in ["constant", "edge", "symmetric", "reflect", "wrap"]]), (0, "Affine: cval", [("%d" % (int(cval*255),), iaa.Affine(translate_px=-32, cval=int(cval*255), mode="constant")) for cval in [0.0, 0.25, 0.5, 0.75, 1.0]]), ( 2, "Affine: all", [ ( "", iaa.Affine( scale={"x": (0.5, 1.5), "y": (0.5, 1.5)}, translate_px={"x": (-32, 32), "y": (-32, 32)}, rotate=(-45, 45), shear=(-32, 32), mode=ia.ALL, cval=(0.0, 1.0) ) ) for _ in sm.xrange(5) ] ), (1, "ElasticTransformation\n(sigma=0.2)", [("alpha=%.1f" % (alpha,), iaa.ElasticTransformation(alpha=alpha, sigma=0.2)) for alpha in [0.1, 0.5, 1.0, 3.0, 9.0]]), (0, "Alpha\nwith EdgeDetect(1.0)", [("factor=%.1f" % (factor,), iaa.Alpha(factor=factor, first=iaa.EdgeDetect(1.0))) for factor in [0.0, 0.25, 0.5, 0.75, 1.0]]), (4, "Alpha\nwith EdgeDetect(1.0)\n(per channel)", [("factor=(%.2f, %.2f)" % (factor[0], factor[1]), iaa.Alpha(factor=factor, first=iaa.EdgeDetect(1.0), per_channel=0.5)) for factor in [(0.0, 0.2), (0.15, 0.35), (0.4, 0.6), (0.65, 0.85), (0.8, 1.0)]]), (15, "SimplexNoiseAlpha\nwith EdgeDetect(1.0)", [("", iaa.SimplexNoiseAlpha(first=iaa.EdgeDetect(1.0))) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]), (9, "FrequencyNoiseAlpha\nwith EdgeDetect(1.0)", [("exponent=%.1f" % (exponent,), iaa.FrequencyNoiseAlpha(exponent=exponent, first=iaa.EdgeDetect(1.0), size_px_max=16, upscale_method="linear", sigmoid=False)) for exponent in [-4, -2, 0, 2, 4]]) ] print("[draw_per_augmenter_images] Augmenting...") rows = [] for (row_seed, row_name, augmenters) in rows_augmenters: ia.seed(row_seed) #for img_title, augmenter in augmenters: # #aug.reseed(1000) # pass row_images = [] row_keypoints = [] row_titles = [] for img_title, augmenter in augmenters: aug_det = augmenter.to_deterministic() row_images.append(aug_det.augment_image(image)) row_keypoints.append(aug_det.augment_keypoints(keypoints)[0]) row_titles.append(img_title) rows.append((row_name, row_images, row_keypoints, row_titles)) # matplotlib drawin routine """ print("[draw_per_augmenter_images] Plotting...") width = 8 height = int(1.5 * len(rows_augmenters)) fig = plt.figure(figsize=(width, height)) grid_rows = len(rows) grid_cols = 1 + 5 gs = gridspec.GridSpec(grid_rows, grid_cols, width_ratios=[2, 1, 1, 1, 1, 1]) axes = [] for i in sm.xrange(grid_rows): axes.append([plt.subplot(gs[i, col_idx]) for col_idx in sm.xrange(grid_cols)]) fig.tight_layout() #fig.subplots_adjust(bottom=0.2 / grid_rows, hspace=0.22) #fig.subplots_adjust(wspace=0.005, hspace=0.425, bottom=0.02) fig.subplots_adjust(wspace=0.005, hspace=0.005, bottom=0.02) for row_idx, (row_name, row_images, row_keypoints, row_titles) in enumerate(rows): axes_row = axes[row_idx] for col_idx in sm.xrange(grid_cols): ax = axes_row[col_idx] ax.cla() ax.axis("off") ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if col_idx == 0: ax.text(0, 0.5, row_name, color="black") else: cell_image = row_images[col_idx-1] cell_keypoints = row_keypoints[col_idx-1] cell_image_kp = cell_keypoints.draw_on_image(cell_image, size=5) ax.imshow(cell_image_kp) x = 0 y = 145 #ax.text(x, y, row_titles[col_idx-1], color="black", backgroundcolor="white", fontsize=6) ax.text(x, y, row_titles[col_idx-1], color="black", fontsize=7) fig.savefig("examples.jpg", bbox_inches="tight") #plt.show() """ # simpler and faster drawing routine """ output_image = ExamplesImage(128, 128, 128+64, 32) for (row_name, row_images, row_keypoints, row_titles) in rows: row_images_kps = [] for image, keypoints in zip(row_images, row_keypoints): row_images_kps.append(keypoints.draw_on_image(image, size=5)) output_image.add_row(row_name, row_images_kps, row_titles) misc.imsave("examples.jpg", output_image.draw()) """ # routine to draw many single files seen = defaultdict(lambda: 0) markups = [] for (row_name, row_images, row_keypoints, row_titles) in rows: output_image = ExamplesImage(128, 128, 128+64, 32) row_images_kps = [] for image, keypoints in zip(row_images, row_keypoints): row_images_kps.append(keypoints.draw_on_image(image, size=5)) output_image.add_row(row_name, row_images_kps, row_titles) if "\n" in row_name: row_name_clean = row_name[0:row_name.find("\n")+1] else: row_name_clean = row_name row_name_clean = re.sub(r"[^a-z0-9]+", "_", row_name_clean.lower()) row_name_clean = row_name_clean.strip("_") if seen[row_name_clean] > 0: row_name_clean = "%s_%d" % (row_name_clean, seen[row_name_clean] + 1) fp = os.path.join(IMAGES_DIR, "examples_%s.jpg" % (row_name_clean,)) #misc.imsave(fp, output_image.draw()) save(fp, output_image.draw()) seen[row_name_clean] += 1 markup_descr = row_name.replace('"', '') \ .replace("\n", " ") \ .replace("(", "") \ .replace(")", "") markup = '![%s](%s?raw=true "%s")' % (markup_descr, fp, markup_descr) markups.append(markup) for markup in markups: print(markup)
def build_augmentators(self): """ Build the augmentators from the information """ print(self.grayscale_only, self.background_images_value) self.augment_lines_general = iaa.Sequential([ iaa.Sometimes(0.3, ItalicizeLine(shear=(-30, 31), cval=self.cval)), iaa.Sometimes(0.3, RotateLine(angle=(-5, 5), cval=self.cval)), iaa.OneOf([ iaa.Pad(percent=((0.02, 0.1), (0.01, 0.1), (0.02, 0.1), (0.02, 0.1)), pad_mode='constant', pad_cval=self.cval), iaa.Pad(px=((2, 20), (2, 60), (2, 20), (2, 60)), pad_mode='constant', pad_cval=self.cval), ]), iaa.Sometimes( 0.3, PerspectiveTransform((0.05, 0.15), cval=self.cval, keep_size=False)), iaa.Sometimes( 0.3, iaa.ElasticTransformation(alpha=(0, 1.0), sigma=(0.4, 0.6), cval=self.cval)), iaa.Sometimes(0.02, Skeletonize(self.is_binary)), iaa.Sometimes(0.1 * self.grayscale_only, iaa.ContrastNormalization((0.5, 1.5))), iaa.Sometimes(0.3 * self.grayscale_only, PencilStroke()), iaa.Sometimes( self.grayscale_only, iaa.OneOf([ iaa.Sometimes( 0.5, iaa.OneOf([ iaa.GaussianBlur((0.2, 1.0)), iaa.AverageBlur(k=(1, 5)), iaa.MedianBlur(k=(1, 3)) ])), iaa.OneOf([ iaa.Add((-50, 30)), iaa.Multiply((0.9, 1.1)), iaa.OneOf([ iaa.Dropout(p=(0.01, 0.05)), iaa.CoarseDropout((0.01, 0.02), size_percent=(0.1, 0.25)) ]), iaa.Sometimes( 0.7, iaa.OneOf([ iaa.AddElementwise((-10 * n, 5 * n)) for n in range(1, 5) ] + [ iaa.AdditiveGaussianNoise(scale=(0.05 * 255, 0.1 * 255)), iaa.MultiplyElementwise((0.95, 1.05)) ])) ]), iaa.Sometimes( self.grayscale_only * self.background_images_value, BackgroundImageNoises(self.background_images_path)), ])) ]) # reduce absolute padding size, perspective transform value, gaussblur self.augment_lines_short_image = iaa.Sequential([ iaa.Sometimes(0.4, ItalicizeLine(shear=(-25, 25), cval=self.cval)), iaa.Sometimes(0.4, RotateLine(angle=(-5, 5), cval=self.cval)), iaa.OneOf([ iaa.Pad(percent=((0.01, 0.05), (0.01, 0.05), (0.01, 0.05), (0.01, 0.05)), pad_mode='constant', pad_cval=self.cval), iaa.Pad(px=((3, 10), (3, 30), (3, 10), (3, 30)), pad_mode='constant', pad_cval=self.cval), ]), iaa.Sometimes( 0.3, PerspectiveTransform((0.02, 0.05), cval=self.cval, keep_size=False)), iaa.Sometimes( 0.3, iaa.ElasticTransformation(alpha=(0, 1.0), sigma=(0.4, 0.6), cval=self.cval)), iaa.Sometimes(0.02, Skeletonize(self.is_binary)), iaa.Sometimes(0.1 * self.grayscale_only, iaa.ContrastNormalization((0.5, 1.5))), iaa.Sometimes(0.3 * self.grayscale_only, PencilStroke()), iaa.Sometimes( self.grayscale_only, iaa.Sequential([ iaa.Sometimes( 0.5, iaa.OneOf([ iaa.GaussianBlur((0.2, 0.5)), iaa.AverageBlur(k=(1, 5)), iaa.MedianBlur(k=(1, 3)) ])), iaa.Sequential([ iaa.Sometimes( 0.7, iaa.OneOf([ iaa.Add((-60, 0)), iaa.Multiply((0.6, 0.9)), ])), iaa.Sometimes( 0.7, iaa.OneOf([ iaa.Dropout(p=(0.01, 0.05)), iaa.CoarseDropout((0.01, 0.02), size_percent=(0.1, 0.25)) ])), iaa.Sometimes( 0.7, iaa.OneOf([ iaa.AddElementwise((-10 * n, 5 * n)) for n in range(1, 5) ] + [ iaa.AdditiveGaussianNoise(scale=(0.05 * 255, 0.1 * 255)), iaa.MultiplyElementwise((0.95, 1.05)) ])) ]), # iaa.Sometimes( # self.grayscale_only * self.background_images_value, # BackgroundImageNoises(self.background_images_path)), ])) ])
class NeuralNet: # History of accuracies on train set accs = [] # History of accuracies on test set val_accs = [] # Image augmenters augmenters = [ ia.Noop(), ia.CoarseSaltAndPepper(p=0.2, size_percent=0.30), ia.CoarseSaltAndPepper(p=0.4, size_percent=0.30), ia.Pad(px=(3, 0, 0, 0)), ia.Pad(px=(0, 3, 0, 0)), ia.Pad(px=(0, 0, 3, 0)), ia.Pad(px=(0, 0, 0, 3)), ia.GaussianBlur(sigma=0.25), ia.GaussianBlur(sigma=0.5), ia.GaussianBlur(sigma=1), ia.GaussianBlur(sigma=2), ia.Affine(rotate=-2), ia.Affine(rotate=2), ia.PiecewiseAffine(scale=0.007) ] def __init__( self, experiment_name: str, # Input shape input_shape: Tuple[int, int, int], # Mini batch size mb_size: Optional = 32, # Number of filters in each convolutional layer filters_count: Optional[List[int]] = None, # Size of kernel, common for each convolutional layer kernel_size: Optional[List[int]] = None, # Neurons count in each dense layer dense_layers: Optional[List[int]] = None, # Learning rate learning_rate: float = 0.005, # Number of epochs nb_epochs: int = 50000, # Steps per epoch. Each |steps_per_epoch| epochs net is evaluated on val set. steps_per_epoch: int = 1000, # Dropout after each dense layer (excluding last) dropout_rate: float = 0.5, # Whether or not augmentation should be performed when choosing next # batch (as opposed to augmenting the entire augment_on_the_fly: bool = True, augmenters: Optional[List[ia.Augmenter]] = None, min_label: int = 0, max_label: int = NUM_CLASSES, # Whether or not classification should be in binary mode. If yes, # *please* provide the |positive_class| parameter. binary_classification: bool = False, # ID of the subject that is considered "positive" in case of # binary classification. positive_class: int = 0, # If provided, will store checkpoints to ckpt_file ckpt_file: Optional[str] = None, ): self.experiment_name = experiment_name self.input_shape = input_shape self.mb_size = mb_size self.learning_rate = learning_rate self.nb_epochs = nb_epochs self.steps_per_epoch = steps_per_epoch self.dropout = dropout_rate self.augment_on_the_fly = augment_on_the_fly self.ckpt_file = ckpt_file self.binary_classification = binary_classification self.positive_class = positive_class self.num_classes = NUM_CLASSES if not binary_classification else 1 if dense_layers is None: dense_layers = [32, self.num_classes] self.dense_layers = dense_layers if filters_count is None: filters_count = [32, 64] self.filters_count = filters_count if kernel_size is None: kernel_size = [5, 5] self.kernel_size = kernel_size if binary_classification: self._confusion_matrix = np.zeros((2, 2)) else: self._confusion_matrix = np.zeros( (self.num_classes, self.num_classes)) if augmenters is not None: self.augmenters = augmenters self._get_data(range_beg=min_label, range_end=max_label) # Initialize logging. self.logger = logging.Logger("main_logger", level=logging.INFO) log_file = 'log.txt' formatter = logging.Formatter(fmt='{levelname:<7} {message}', style='{') console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) file_handler = logging.FileHandler(log_file) file_handler.setFormatter(formatter) self.logger.addHandler(console_handler) self.logger.addHandler(file_handler) def _augment_single_input(self, inp_x: np.ndarray): """ Augments single input with given augmenter. :param inp_x: single input :return: augmented input """ augmenter = choice(self.augmenters) inp_x = inp_x.reshape([1] + list(inp_x.shape)) augmented = np.ndarray.astype( augmenter.augment_images(np.ndarray.astype(inp_x * 256, np.uint8)), np.float32) augmented = augmented * (1 / 256) augmented = augmented.reshape(inp_x.shape[1:]) return augmented def _augment_train_set(self) -> None: """ Augments entire training set with all augmenters. :return: None, appends augmented images to the train set. """ train_augs = [] for augmenter in self.augmenters: cur_aug = np.ndarray.astype( augmenter.augment_images( np.ndarray.astype(self.x_train * 256, np.uint8)), np.float32) cur_aug = cur_aug * (1 / 256) # Display augmented input, if you want # show_image(cur_aug[0].reshape(NN_INPUT_SIZE)) train_augs.append(cur_aug) self.x_train = np.concatenate([self.x_train] + train_augs) self.y_train = np.concatenate([self.y_train] * (1 + len(train_augs))) def _get_data(self, range_beg: int = 0, range_end: int = 52) -> None: """ :param range_beg, range_end: only samples such that label \in [range_beg, range_end) will be used. Sensible values for (range_beg, range_end) would be: * 00, 52 -> to use eurecom only * 52, 78 -> to use ias_lab_rgbd_only * 78, 98 -> to use superface_dataset only :return: self.(x|y)_(train|test) are set as a result """ # Load stored numpy arrays from files. logging.info("Loading data..") self.x_train = np.load(DB_LOCATION + '/gen/' + self.experiment_name + '_X_train.npy') self.y_train = np.load(DB_LOCATION + '/gen/' + self.experiment_name + '_Y_train.npy') self.x_test = np.load(DB_LOCATION + '/gen/' + self.experiment_name + '_X_test.npy') self.y_test = np.load(DB_LOCATION + '/gen/' + self.experiment_name + '_Y_test.npy') train_indices = [] test_indices = [] # Filter out samples out of [range_beg, range_end). for i in range(len(self.y_train)): if range_end > np.argmax(self.y_train[i]) >= range_beg: train_indices.append(i) for i in range(len(self.y_test)): if range_end > np.argmax(self.y_test[i]) >= range_beg: test_indices.append(i) shuffle(train_indices) self.x_train = self.x_train[train_indices] self.y_train = self.y_train[train_indices] self.x_test = self.x_test[test_indices] self.y_test = self.y_test[test_indices] if self.binary_classification: def to_binary(row): return np.array([ 1. ]) if np.argmax(row) == self.positive_class else np.array([0.]) self.y_train = np.apply_along_axis(to_binary, 1, self.y_train) self.y_test = np.apply_along_axis(to_binary, 1, self.y_test) # Show first input if you want show_image(self.x_train[0].reshape( [self.input_shape[0], self.input_shape[1] * self.input_shape[2]])) # Image augmentation. if not self.augment_on_the_fly: self._augment_train_set() logging.info("Loaded data..") def _visualize_kernels(self): """ For each convolutional layer, visualizes filters and convolved images. """ for layer_no in range(len(self.conv_layers)): num_filters = self.filters_count[layer_no] kernels = [] applied_kernels = [] for filter_no in range(num_filters): inp_x = self.input_shape[0] // (2**layer_no) inp_y = self.input_shape[1] // (2**layer_no) if layer_no == 0: tmp_str = 'conv2d/kernel:0' else: tmp_str = 'conv2d_%d/kernel:0' % layer_no kernel = [ v for v in tf.global_variables() if v.name == tmp_str ][0] kernel = kernel[:, :, :, filter_no] cur_conv_layer = self.conv_layers[layer_no] if layer_no == 0: kernel = tf.reshape(kernel, [ 1, self.kernel_size[0] * self.input_shape[-1], self.kernel_size[1], 1 ]) else: kernel = tf.reshape(kernel, [1] +\ [self.kernel_size[0] * self.filters_count[layer_no - 1], self.kernel_size[1]] + [1]) kernels.append(kernel) applied = tf.reshape(cur_conv_layer[0, :, :, filter_no], [1, inp_x, inp_y, 1]) tf.summary.image('conv{0}_filter{1}_kernel'.format( layer_no, filter_no), kernel, family='kernels_layer{0}'.format(layer_no), max_outputs=1) tf.summary.image('conv{0}_filter{1}_applied'.format( layer_no, filter_no), applied, family='convolved_layer_{0}'.format(layer_no), max_outputs=1) applied_kernels.append(applied) # Write concatenated patches to summary. concatenated_kernels = tf.concat(kernels, axis=2) kernels_name = "kernels_layer{0}".format(layer_no) tf.summary.image(kernels_name, concatenated_kernels, family='kernels_all_layers') concatenated_applieds = tf.concat(applied_kernels, axis=2) applieds_name = "convolved_layer{0}".format(layer_no) tf.summary.image(applieds_name, concatenated_applieds, family='convolved_all_layers') if self.conv_layers: # Merge all visualizations of kernels. self.merged_summary = tf.summary.merge_all() def _visualize_exciting_patches(self): """ For each convolutional layer, visualizes patches that excite each filter the most. """ # Initialize fetch handles for exciting patches and their respective responses. self.exciting_patches = [[None] * k for k in self.filters_count] self.patches_responses = [[None] * k for k in self.filters_count] self.flattened_exciting_patches = [[None] * k for k in self.filters_count] self.all_exciting_patches_at_layer = [None for _ in self.filters_count] for layer_no in range(len(self.conv_layers)): num_filters = self.filters_count[layer_no] cur_conv_layer = self.conv_layers[layer_no] for filter_no in range(num_filters): # Find top 10 responses to current filter, in the current mini-batch. inp_x = self.input_shape[0] // (2**layer_no) inp_y = self.input_shape[1] // (2**layer_no) single_filtered_flattened = tf.reshape( cur_conv_layer[:, :, :, filter_no], [self.eff_mb_size * inp_x * inp_y]) top10_vals, top10_indices = tf.nn.top_k( single_filtered_flattened, k=10, sorted=True) top10_reshaped = tf.map_fn( lambda sxy: [ sxy // (inp_x * inp_y), (sxy // inp_y) % inp_x, sxy % inp_y ], top10_indices, dtype=[tf.int32, tf.int32, tf.int32]) def safe_cut_patch(sxy, size, img, layer_no): """ :param (sample_no, x, y)@sxy :param size: size of patch to cut out :param img: image to cut it from :param layer_no: current layer number :return: Cuts out a patch of size (|size|) located at (x, y) on input #sample_no in current batch. """ sample_no, x, y = sxy x *= 2**layer_no y *= 2**layer_no pad_marg_x = size[0] // 2 pad_marg_y = size[1] // 2 padding = [[0, 0], [pad_marg_x, pad_marg_x], [pad_marg_y, pad_marg_y], [0, 0]] padded = tf.pad(img, padding) return padded[sample_no, x:x + size[0], y:y + size[1], :] # Find patches corresponding to the top 10 responses. # Store patches and responses in class-visible array to be retrieved later. self.exciting_patches[layer_no][filter_no] = \ tf.map_fn(lambda sxy: safe_cut_patch(sxy, size=(self.kernel_size[0] * (2 ** layer_no), self.kernel_size[1] * (2 ** layer_no)), img=tf.expand_dims(self.x[:, :, :, 0], axis=-1), layer_no=layer_no), top10_reshaped, dtype=tf.float32) self.patches_responses[layer_no][filter_no] = top10_vals # Flatten and concatenate the 10 patches to 2 dimensions for visualization. flattened_patches_shape = [1] + \ [10 * self.kernel_size[0] * (2 ** layer_no), self.kernel_size[1] * (2 ** layer_no)] + \ [1] # Write patches to summary. patch_name = "exciting_patches_filter{0}".format(filter_no) flattened_exciting_patches = tf.reshape( self.exciting_patches[layer_no][filter_no], flattened_patches_shape, name=patch_name) self.flattened_exciting_patches[layer_no][ filter_no] = flattened_exciting_patches self.all_exciting_patches_at_layer[layer_no] = tf.concat( self.flattened_exciting_patches[layer_no], axis=2) # Write concatenated patches to summary. all_patches_name = "exciting_patches_layer{0}".format(layer_no) tf.summary.image(all_patches_name, self.all_exciting_patches_at_layer[layer_no], family='exciting_all_layers') # Merge all summaries. self.merged_summary = tf.summary.merge_all() def _visualize_incorrect_answer_images(self): correct = tf.boolean_mask(self.x, self.correct) correct = tf.transpose(correct, perm=[0, 1, 3, 2]) correct = tf.reshape( correct, shape=[1, -1, self.input_shape[1] * self.input_shape[2], 1]) correct = tf.concat([ correct, tf.zeros( shape=[1, 1, self.input_shape[1] * self.input_shape[2], 1]) ], axis=1) tf.summary.image('correct', correct) incorrect = tf.boolean_mask(self.x, tf.logical_not(self.correct)) incorrect = tf.transpose(incorrect, perm=[0, 1, 3, 2]) incorrect = tf.reshape( incorrect, shape=[1, -1, self.input_shape[1] * self.input_shape[2], 1]) incorrect = tf.concat([ incorrect, tf.zeros( shape=[1, 1, self.input_shape[1] * self.input_shape[2], 1]) ], axis=1) tf.summary.image('incorrect', incorrect) # Merge all summaries. self.merged_summary = tf.summary.merge_all() def _create_convolutional_layers(self) -> None: signal = self.x for layer_no in range(len(self.filters_count)): num_filters = self.filters_count[layer_no] signal = tf.layers.batch_normalization(signal) # Init weights with std.dev = sqrt(2 / N) # input_size = int(signal.get_shape()[1]) * int( signal.get_shape()[2]) * int(signal.get_shape()[3]) w_init = tf.initializers.random_normal(stddev=sqrt(2 / input_size)) # Convolutional layer cur_conv_layer = tf.layers.conv2d(inputs=signal, filters=num_filters, kernel_size=self.kernel_size, kernel_initializer=w_init, padding='same') # Reduce image dimensions in half. cur_pool_layer = tf.layers.max_pooling2d(inputs=cur_conv_layer, pool_size=[2, 2], strides=2, padding='valid') self.conv_layers.append(cur_conv_layer) self.pool_layers.append(cur_pool_layer) # Set pooled image as current signal signal = cur_pool_layer return signal def _create_dense_layers(self) -> None: signal = self.x if not self.pool_layers else self.pool_layers[-1] input_size = int(signal.get_shape()[1]) * int( signal.get_shape()[2]) * int(signal.get_shape()[3]) signal = tf.reshape(signal, [self.eff_mb_size, input_size]) for num_neurons in self.dense_layers[:-1]: signal = tf.layers.batch_normalization(signal) # Init weights with std.dev = sqrt(2 / N) # https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf?spm=5176.100239.blogcont55892.28.pm8zm1&file=He_Delving_Deep_into_ICCV_2015_paper.pdf input_size = int(signal.get_shape()[1]) w_init = tf.initializers.random_normal(stddev=sqrt(2 / input_size)) cur_dense_layer = tf.layers.dense(inputs=signal, units=num_neurons, activation=tf.nn.leaky_relu, kernel_initializer=w_init) signal = cur_dense_layer # Apply dropout cur_dropout_layer = tf.layers.dropout(inputs=signal, rate=self.dropout) signal = cur_dropout_layer # Init weights with std.dev = sqrt(2 / N) input_size = int(signal.get_shape()[1]) w_init = tf.initializers.random_normal( stddev=tf.sqrt(tf.constant(2.) / input_size)) cur_layer = tf.layers.dense(inputs=signal, activation=tf.nn.sigmoid, units=self.dense_layers[-1], kernel_initializer=w_init) self.output_layer = cur_layer def _create_training_objectives(self) -> None: if self.binary_classification: self.preds = tf.cast(tf.round(self.output_layer), dtype=tf.int64) self.y_sparse = tf.cast(self.y, dtype=tf.int64) else: self.preds = tf.argmax(self.output_layer, axis=1) self.y_sparse = tf.argmax(self.y, axis=1) self.loss = tf.losses.log_loss(self.y, self.output_layer) self.correct = tf.reshape(tf.equal(self.y_sparse, self.preds), shape=[self.eff_mb_size]) self.accuracy = tf.reduce_mean(tf.cast(self.correct, tf.float32)) self.train_op = tf.train.GradientDescentOptimizer( self.learning_rate).minimize(self.loss) self.logger.info('list of variables {0}'.format( list(map(lambda x: x.name, tf.global_variables())))) def _create_model(self): self.x = tf.placeholder(dtype=tf.float32, shape=[None] + list(self.input_shape)) self.y = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes]) self.eff_mb_size = tf.shape(self.x)[0] # Effective batch size self.conv_layers = [] self.pool_layers = [] self._create_convolutional_layers() self._create_dense_layers() self._create_training_objectives() def train_on_batch(self, batch_x, batch_y): """ :return: [loss, accuracy] """ results = self.sess.run([self.loss, self.accuracy, self.train_op], feed_dict={ self.x: batch_x, self.y: batch_y }) self.accs.append(results[1]) return results[:2] def test_on_batch(self, batch_x, batch_y, global_step=1) -> Tuple[float, float, List[float]]: """ Note that this function does not fetch |self.train_op|, so that the weights are not updated. :param batch_x: :param batch_y: :param global_step: :return: (loss, accuracy, probs) """ if self.conv_layers: # Write summary results = self.sess.run([ self.loss, self.accuracy, self.output_layer, self.preds, self.merged_summary ], feed_dict={ self.x: batch_x, self.y: batch_y }) msum = results[4] self.writer.add_summary(msum, global_step=global_step) self.writer.flush() else: results = self.sess.run([self.loss, self.accuracy, self.preds], feed_dict={ self.x: batch_x, self.y: batch_y }) self.val_accs.append(results[1]) # Update confusion matrix preds = results[3] for i in range(len(batch_x)): self._confusion_matrix[np.argmax(batch_y[i]), preds[i]] += 1. return results[0], results[1], list(results[2]) def validate(self, global_step) -> ClassificationResults: """ :return: (loss, accuracy, auc_roc) Note that if self.binary_classification is False, auc_roc may be anything """ losses = [] accs = [] all_pred_probs = [] all_labels = [] for batch_no in range(self.x_test.shape[0] // self.mb_size + 1): inputs = self.x_test[batch_no * self.mb_size:(batch_no + 1) * self.mb_size] labels = self.y_test[batch_no * self.mb_size:(batch_no + 1) * self.mb_size] loss, acc, probs = self.test_on_batch(inputs, labels, global_step=global_step) losses.append(loss) accs.append(acc) all_pred_probs += probs all_labels += list(labels) all_pred_probs = np.array(all_pred_probs) all_labels = np.array(all_labels) all_labels = all_labels.astype(dtype=np.bool) loss = np.mean(losses) acc = np.mean(accs) return ClassificationResults(loss=loss, acc=acc, pred_probs=all_pred_probs, labels=all_labels, binary=self.binary_classification) def _next_training_batch(self) -> (np.ndarray, np.ndarray): batch = sample(list(range(self.x_train.shape[0])), self.mb_size) batch_x, batch_y = self.x_train[batch], self.y_train[batch] if self.augment_on_the_fly: for sample_no in range(self.mb_size): batch_x[sample_no] = self._augment_single_input( batch_x[sample_no]) return batch_x, batch_y def train_and_evaluate(self) -> ClassificationResults: """ Train and evaluate model. """ with tf.Session() as self.sess: # Initialize computation graph. self._create_model() # Add visualizations to computation graph. self._visualize_kernels() self._visualize_exciting_patches() self._visualize_incorrect_answer_images() # Initialize variables. if self.ckpt_file: saver = tf.train.Saver() try: saver.restore(self.sess, self.ckpt_file) except (tf.errors.InvalidArgumentError, tf.errors.NotFoundError): tf.global_variables_initializer().run() else: tf.global_variables_initializer().run() # Initialize summary writer. self.writer = tf.summary.FileWriter(logdir='conv_vis') # Initialize progress bar. bar = Bar('', max=self.steps_per_epoch, suffix='%(index)d/%(max)d ETA: %(eta)ds') for epoch_no in range(self.nb_epochs): self.logger.info("Epoch {epoch_no}/{nb_epochs}".format( epoch_no=epoch_no, nb_epochs=self.nb_epochs)) for step_no in range(self.steps_per_epoch): # Train model on next batch batch_x, batch_y = self._next_training_batch() results = self.train_on_batch(batch_x, batch_y) # Update bar bar.message = 'loss: {0[0]:.8f} acc: {0[1]:.3f} mean_acc: {1:.3f}'. \ format(results, np.mean(self.accs[-1000:]), ) bar.next() # Re-initialize progress bar bar.finish() bar = Bar('', max=self.steps_per_epoch, suffix='%(index)d/%(max)d ETA: %(eta)ds') # Store model if self.ckpt_file: saver.save(self.sess, self.ckpt_file) # Validate val_results = self.validate(global_step=epoch_no) loss, acc, auc_roc = val_results.loss, val_results.acc, val_results.get_auc_roc( ) if self.binary_classification: self.logger.info( "Validation results: Loss: {0}, accuracy: {1}, auc_roc: {2}" .format(loss, acc, auc_roc)) else: self.logger.info( "Validation results: Loss: {0}, accuracy: {1}".format( loss, acc)) # Dipslay confusion matrix show_image(self._confusion_matrix) return val_results
return None if np.isnan(gir_face).any() or np.isnan(depth_face).any(): return None try: face = normalized(face, rotate=False) face = hog_and_entropy(face) except ValueError: return None return face.get_fd_desc() augmenters = [ ia.Noop(), ia.CoarseSaltAndPepper(p=0.2, size_percent=0.30), ia.CoarseSaltAndPepper(p=0.4, size_percent=0.30), ia.Pad(px=(3, 0, 0, 0)), ia.Pad(px=(0, 3, 0, 0)), ia.Pad(px=(0, 0, 3, 0)), ia.Pad(px=(0, 0, 0, 3)), ia.GaussianBlur(sigma=0.25), ia.GaussianBlur(sigma=0.5), ia.GaussianBlur(sigma=1), ia.GaussianBlur(sigma=2), ia.Affine(rotate=-2), ia.Affine(rotate=2) ] def run_preprocess(): preprocessor = InputPreprocessor(exp_name=EXP_NAME, nn_input_size=INPUT_SIZE,