Ejemplo n.º 1
0
def test_time_augmentation(image):
    aug1 = iaa.Pad(percent=(0., 0.2, 0.2, 0.), keep_size=True)
    aug2 = iaa.Pad(percent=(0.2, 0., 0.2, 0.), keep_size=True)
    aug3 = iaa.Pad(percent=(0.2, 0., 0., 0.2), keep_size=True)
    aug4 = iaa.Affine(rotate=15)
    aug5 = iaa.Fliplr()
    return [image] + [
        i.augment_image(image) for i in [aug1, aug2, aug3, aug4, aug5]
    ]
Ejemplo n.º 2
0
def create_sequence():
    aug_1 = iaa.Pad(percent=((0.15, 0.3), (0.15, 0.3), (0.15, 0.3), (0.15,
                                                                     0.3)),
                    pad_mode="constant",
                    pad_cval=(0, 255))
    aug_2 = iaa.Pad(percent=((0.45, 0.6), (0.45, 0.6), (0.45, 0.6), (0.45,
                                                                     0.6)),
                    pad_mode="constant",
                    pad_cval=(0, 255))
    aug_3 = iaa.Pad(percent=((0.75, 0.9), (0.75, 0.9), (0.75, 0.9), (0.75,
                                                                     0.9)),
                    pad_mode="constant",
                    pad_cval=(0, 255))

    return aug_1, aug_2, aug_3
Ejemplo n.º 3
0
 def logic(self, image):
     for param in self.augmentation_params:
         self.augmentation_data.append([
             str(param.augmentation_value),
             iaa.Pad(px=param.augmentation_value).to_deterministic().
             augment_image(image), param.detection_tag
         ])
 def __init__(self):
     st = lambda aug: iaa.Sometimes(0.5, aug)
     self.seq = iaa.Sequential([
         st(iaa.Pad(percent=((0, 0.2), (0, 0.2), (0, 0.2), (0, 0.2)), keep_size=False)),
         #
         #st(iaa.Crop(percent=([0.0, 0.1], [0.00, 0.1], [0.0, 0.1], [0.0, 0.1]), keep_size=False)),
         st(iaa.Affine(scale=(0.9, 1.0), rotate=(-30, 30), shear=(-5, 5),
                       translate_px={"x": (-30, 30), "y": (-10, 10)},
                       fit_output=True)),
         # st(iaa.PerspectiveTransform((0,0.1),fit_output=True)),
         # st(iaa.MultiplyAndAddToBrightness(mul=(0.6, 1.5), add=(0, 30))),
         st(iaa.ChangeColorTemperature(kelvin=(3000, 9100))),
         st(iaa.LinearContrast((0.75, 1.5))),
         st(iaa.GaussianBlur((0, 0.2))),
         # st(iaa.PerspectiveTransform(scale=0.05,)),
         st(iaa.AddToHueAndSaturation((-20, 20))),
         #
         st(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 16),
                                      per_channel=True)),  # add gaussian noise to images
         # # # #st(iaa.Dropout((0.0, 0.1), per_channel=0.5)),  # randomly remove up to 10% of the pixels
         # # # change brightness of images (by -10 to 10 of original value)
         st(iaa.Add((-40, 40), per_channel=True)),
         # # change brightness of images (50-150% of original value)
         st(iaa.Multiply((0.5, 1.5), per_channel=True)),
     ])
Ejemplo n.º 5
0
def augment_batch_img_for_box(batch_img, batch_pts, plot=False):
    """
    Image augmentation, used when training
    :param batch_img: [B,H,W,C]
    :param batch_pts: [B,number,xy]
    :return: aug_b_img, aug_b_pts
    """
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)
    seq = iaa.Sequential([
        sometimes(iaa.Pad(percent=(0, 0.8))),
        iaa.Affine(rotate=(-5, 5)),
        iaa.Multiply((0.7, 1.3))  # change brightness
    ])
    aug_b_imgs, aug_b_pts = seq(images=batch_img, keypoints=batch_pts)

    if plot:
        import cv2
        batch_img = [
            cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in batch_img
        ]
        aug_b_imgs = [
            cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in aug_b_imgs
        ]
        for i in range(len(batch_img)):
            print("[Image #%d]" % (i, ))
            keypoints_before = KeypointsOnImage.from_xy_array(
                batch_pts[i], shape=batch_img[i].shape)
            keypoints_after = KeypointsOnImage.from_xy_array(
                aug_b_pts[i], shape=aug_b_imgs[i].shape)
            image_before = keypoints_before.draw_on_image(batch_img[i])
            image_after = keypoints_after.draw_on_image(aug_b_imgs[i])
            ia.imshow(np.hstack([image_before, image_after]))
    return aug_b_imgs, aug_b_pts
Ejemplo n.º 6
0
    def _pad_image(self, image):
        height = image.shape[0]
        width = image.shape[1]

        pad_sequence = self._get_pad_sequence(height, width)
        augmenter = iaa.Pad(px=pad_sequence, keep_size=False, pad_mode=self.pad_mode)
        return augmenter.augment_image(image)
def resize_pad_seq(pad_size):
    dy0, dx0, dy1, dx1 = compute_random_pad()
    seq = iaa.Sequential([
        affine_seq,
        iaa.Scale({'height': IMG_ORI_SIZE*SCALE, 'width': IMG_ORI_SIZE*SCALE}),
        iaa.Pad(px=(dy0, dx0, dy1, dx1), pad_mode='edge', keep_size=False),
    ], random_order=False)
    return seq
def epoch_augmentation(__data, __ground_truth, padding):
    MAX = 2 * padding
    assert (__data.shape !=
            __ground_truth.shape), "Incorrect dimensions for data and labels"
    assert (
        MAX >=
        0), "Augmentation would reduce images, is this really what you want?"

    offset_x, offset_y = np.random.randint(0, MAX + 1, 2)
    padding = iaa.Pad(px=(offset_y, offset_x, MAX - offset_y, MAX - offset_x),
                      pad_mode=["reflect"],
                      keep_size=False)
    affine = iaa.Affine(
        rotate=(-180, 180),
        # shear=(-5, 5),
        scale=(0.9, 1.1),
        mode=["reflect"])
    augment_both = iaa.Sequential(
        [
            padding,  # Pad the image to requested padding
            iaa.Fliplr(0.5),
            iaa.Flipud(0.5),
            iaa.Sometimes(
                0.5, affine)  # Apply sometimes more interesting augmentations
        ],
        random_order=False).to_deterministic()

    road_augment = iaa.Sequential(
        [
            iaa.Multiply((1.5, 1.7)),
            #   iaa.ContrastNormalization((1.5, 1.8)),
            iaa.Sharpen(alpha=(0, 0.25), lightness=(0.75, 1.0)),
            iaa.Emboss(alpha=(0, 1.0), strength=(0, 0.5)),
        ],
        random_order=False).to_deterministic()

    probabilistic_road_augment = iaa.Sequential(
        [iaa.Sometimes(0.3, road_augment)]).to_deterministic()

    augment_image = iaa.Sequential(
        iaa.SomeOf(
            (0, None),
            [iaa.Multiply((0.8, 1.2)),
             iaa.ContrastNormalization((0.8, 1.2))],
            random_order=True)).to_deterministic()

    __data = img_float_to_uint8(__data)
    aug_image = augment_both.augment_image(__data)
    aug_ground_truth = augment_both.augment_image(__ground_truth)
    aug_image = augment_image.augment_image(aug_image)

    aug_road = probabilistic_road_augment.augment_image(aug_image)
    road_ids = aug_ground_truth > 0.5
    aug_image[road_ids] = aug_road[road_ids]

    aug_image = aug_image / 255.0

    return aug_image, aug_ground_truth
def main(image2bboxes, annot_file):
    bbs, images, classes, img_files = [], [], [], []
    for img_file in image2bboxes:
        bb = image2bboxes[img_file] # this has class, too
        images.append(plt.imread(os.path.join('..', img_file)))
        img_files.append(img_file)
        many_boxes = []
        many_classes = []
        # Parse out bboxes and classes, adding bboxes as imgaug BoundingBox objects
        for box in bb:
            box = [int(x) for x in box.split(',')]
            many_boxes.append(ia.BoundingBox(x1=box[0], y1=box[1], x2=box[2], y2=box[3]))
            many_classes.append(box[4])
        bbs.append(many_boxes)
        classes.append(many_classes)

    # To sometimes apply aug
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)

    # The transformations!!!
    seq = iaa.Sequential([
        iaa.AdditiveGaussianNoise(scale=0.05*255),
        # iaa.Affine(translate_px={"x": (1, 5)}),
        # sometimes(iaa.Fog()),
        sometimes(iaa.SigmoidContrast(cutoff=0.7)),
        # sometimes(iaa.Multiply(0.5)),
        sometimes(iaa.Add(-10)),
        sometimes(iaa.Pad(px=(256, 256, 0, 0)))
    ])

    images_aug, bbs_aug = seq(images=images, bounding_boxes=bbs)

    # Save images
    for i in range(len(images)):
        img_file = img_files[i]
        # Create new file name and save
        name_spl = os.path.basename(img_file).split('.')
        ending = name_spl[-1]
        new_file_name = '.'.join(name_spl[0:-1]) + '_aug' + '.' + ending
        plt.imsave(os.path.join('..', 'data', 'JPEGImages', new_file_name), images_aug[i])

    # Save annotations to one file
    with open(annot_file.replace('.txt', '_aug.txt'), 'w') as f:
        for i in range(len(images)):
            items = []
            name_spl = os.path.basename(img_files[i]).split('.')
            ending = name_spl[-1]
            new_file_name = '.'.join(name_spl[0:-1]) + '_aug' + '.' + ending
            items.append('data/JPEGImages/' + new_file_name)
            many_boxes = bbs_aug[i]
            many_classes = classes[i]
            for j in range(len(many_boxes)):
                box = many_boxes[j]
                annot = ','.join([str(int(x)) for x in [box.x1, box.y1, box.x2, box.y2, many_classes[j]]])
                items.append(annot)
            f.write(' '.join(items) + '\n')
    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
        self.check_requirements(X, y)
        self.pad_augmenter = iaa.Pad(percent=self.percent, keep_size=False)
        self.crop_augmenter = iaa.CropToFixedSize(height=X['image_height'], width=X['image_width'])
        self.augmenter: Augmenter = iaa.Sequential([
            self.pad_augmenter,
            self.crop_augmenter
        ], name=self.get_properties()['name'])

        return self
Ejemplo n.º 11
0
def scale_crop_pad(img, factor):
    size = img.size[0]
    new_size = int(size * factor)
    img = img.resize((new_size, new_size), Image.BICUBIC)
    if new_size < size:
        img = iaa.Pad(px=(size - new_size, size - new_size, 0, 0),
                      pad_mode='symmetric',
                      keep_size=False).augment_image(np.array(img))
        return Image.fromarray(img)
    return transforms.CenterCrop(size)(img)
Ejemplo n.º 12
0
    def __init__(self):

        self.aug = iaa.Sequential([
            iaa.Pad(percent=(0, (0, 0.5), 0, (0, 0.5))),
            iaa.Resize(size={
                "height": 32,
                "width": "keep-aspect-ratio"
            }),
            iaa.Rotate(rotate=(-10, 10)),
        ])
Ejemplo n.º 13
0
 def crop_pad(dataset):
     new_dataset = []
     for iterm in dataset:
         image = np.array(iterm[0][:])
         image = image.reshape((32, 32, 3)).astype(np.uint8)
         seq = iaa.Sequential(
             [iaa.Pad(px=(4, 4, 4, 4)),
              iaa.Crop(px=(4, 4, 4, 4))])
         image = seq(images=image)
         noise_image = (image.reshape(-1)).astype(np.float32)
         new_dataset.append([noise_image, iterm[1]])
     return new_dataset
Ejemplo n.º 14
0
def iaa_letterbox(img, new_dim):
    if isinstance(img, tuple):
        org_dim = img
    else:
        org_dim = img.shape[1], img.shape[0]
    
    padded_w, padded_h, x_pad, y_pad, ratio = letterbox_transforms(*org_dim, *new_dim)
    l_pad, r_pad = x_pad, new_dim[0] - padded_w - x_pad
    t_pad, b_pad = y_pad, new_dim[1] - padded_h - y_pad
    lb_reverter = np.array([org_dim[0], org_dim[1], padded_w, padded_h, x_pad, y_pad])

    return iaa.Sequential([iaa.Scale({ "width": padded_w, "height": padded_h }),
                           iaa.Pad(px=(t_pad, r_pad, b_pad, l_pad), keep_size=False, pad_cval=128),
                          ]), \
           lb_reverter
	def __init__(self, seed=1, aug_type=None, probability=0.9):
		self.type = aug_type
		ia.seed(seed)
		self.shape = [
			iaa.Crop(px=(32, 64)),
			iaa.Fliplr(1),
			iaa.Pad(px=(16, 32), pad_mode=ia.ALL, pad_cval=(0, 128)),
			iaa.CoarseDropout(0.1, size_percent=0.02)
		]
		self.color = [
			iaa.OneOf([
				iaa.GammaContrast((0.75,1.25)),
				iaa.GammaContrast((0.95, 1.05), per_channel=True)
			]),
			# iaa.GaussianBlur(sigma=(1.0, 2.0)),
			iaa.SaltAndPepper(0.005),
			iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.025*255))
		]
		self.seq = self.aug_sequence(aug_type, probability)
Ejemplo n.º 16
0
 def __init__(self):
     sometimes = lambda aug: iaa.Sometimes(0.5, aug)
     self.seq = iaa.Sequential([
         sometimes(iaa.Crop(px=(0, 0, 8, 0), keep_size=True)),
         sometimes(iaa.Pad(px=(0, 0, 0, 5), keep_size=False)),
         iaa.Multiply((0.8, 1.2), per_channel=0.5),
         sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.05))),
         sometimes(
             iaa.OneOf([
                 iaa.CoarseDropout((0.01, 0.03), size_percent=(0.1, 0.3)),
                 iaa.CoarseDropout((0.01, 0.03), size_percent=(0.1, 0.3), per_channel=1.0),
                 iaa.Dropout((0.03,0.05)),
                 iaa.Salt((0.03,0.05))
             ])
         ),
         iaa.Multiply((0.8, 1.2), per_channel=0.5),
         sometimes(iaa.FrequencyNoiseAlpha(
                 exponent=(-4, 0),
                 first=iaa.Multiply((0.8, 1.2), per_channel=0.5),
                 second=iaa.ContrastNormalization((0.8, 1.5))
             )
         ),
         sometimes(
             iaa.OneOf([
                 iaa.MotionBlur(k=(3,4),angle=(0, 360)),
                 iaa.GaussianBlur((0, 1.2)),
                 iaa.AverageBlur(k=(2, 3)),
                 iaa.MedianBlur(k=(3, 5))
             ])
         ),
         sometimes(
             iaa.CropAndPad(
                 percent=(-0.05, 0.1),
                 pad_mode='constant',
                 pad_cval=(0, 255)
             ),
         ),
         sometimes(iaa.ElasticTransformation(alpha=(1.0, 2.0), sigma=(2.0, 3.0))), # move pixels locally around (with random strengths)
         sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.02), mode='constant')), # sometimes move parts of the image around
         sometimes(iaa.AdditiveGaussianNoise((0.02, 0.1))),
         sometimes(iaa.AdditivePoissonNoise((0.02,0.05))),
         iaa.Invert(p=0.5)
     ])
Ejemplo n.º 17
0
def generate_augmentation(aug_pad, aug_affine, aug_ch_suffle, aug_dropout,
                          aug_AGN, aug_fliplr, aug_flipud, aug_percent):
    '''
    This function creates an augment for dataset transform to use. Args
    determine which augments are used. The augments are predetermined and
    appliend in same order as args.
    Args:
        aug_pad (bool): Boolean value if pad filter will be used.
        aug_affine (bool): Boolean value if affine rotation filter will be used.
        aug_ch_suffle (bool): Boolean value if channel suffle filter will be used.
        aug_dropout (bool): Boolean value if dropout filter will be used.
        aug_AGN (bool): Boolean value if Additive Gaussian Noice filter will be used.
        aug_fliplr (bool): Boolean value if left-right flip filter will be used.
        aug_flipud (bool): Boolean value if up-down flip filter will be used.
        aug_percent (float): Float between 0 and 1. The determined augments are 
                             applied randomly based on aug_percent.
    Return:
        augment (imgaug augmenter): Imgaug augmenter that can be 
                                    used to perform transformations on images.
    '''

    #Create all augments
    resize = iaa.Resize(224)
    pad = iaa.Pad(px=(0, 4))
    affine = iaa.Affine(rotate=(-10, 10))
    ch_suffle = iaa.ChannelShuffle(0.35)
    dropout = iaa.Dropout(p=(0, 0.2))
    AGN = iaa.AdditiveGaussianNoise(loc=0, scale=(0, 15))
    flip_lr = iaa.Fliplr(0.5)
    flip_ud = iaa.Flipud(0.5)

    #Put augments to list and choose only if aug_ parameter is true
    aug_list = [pad, affine, ch_suffle, dropout, AGN, flip_lr, flip_ud]
    use_aug_list = [
        aug_pad, aug_affine, aug_ch_suffle, aug_dropout, aug_AGN, aug_fliplr,
        aug_flipud
    ]
    aug_list = [aug_list[i] for i in np.where(use_aug_list)[0]]
    #Create the augment and use aug_percent to determine how oftern augments are used
    augment = iaa.Sequential(
        [resize, iaa.Sometimes(aug_percent, iaa.Sequential(aug_list))])
    return augment
Ejemplo n.º 18
0
def epoch_augmentation_old(__data, __ground_truth, padding):
    MAX = 2 * padding
    assert (__data.shape !=
            __ground_truth.shape), "Incorrect dimensions for data and labels"
    assert (
        MAX >=
        0), "Augmentation would reduce images, is this really what you want?"

    offset_x, offset_y = np.random.randint(0, MAX + 1, 2)
    padding = iaa.Pad(px=(offset_y, offset_x, MAX - offset_y, MAX - offset_x),
                      pad_mode=["reflect"],
                      keep_size=False)
    affine = iaa.Affine(rotate=(-180, 180),
                        shear=(-5, 5),
                        scale=(0.9, 1.1),
                        mode=["reflect"])
    augment_both = iaa.Sequential(
        [
            padding,  # Pad the image to requested padding
            iaa.Sometimes(
                0.3, affine)  # Apply sometimes more interesting augmentations
        ],
        random_order=False).to_deterministic()

    augment_image = iaa.Sequential(
        iaa.SomeOf(
            (0, None),
            [
                iaa.Multiply((0.8, 1.2)),
                iaa.ContrastNormalization((0.8, 1.2)),
                iaa.Dropout(0.01),  # Drop out single pixels
                iaa.SaltAndPepper(0.01)  # Add salt-n-pepper noise
            ],
            random_order=True)).to_deterministic()

    __data = img_float_to_uint8(__data)
    aug_image = augment_both.augment_image(__data)
    aug_ground_truth = augment_both.augment_image(__ground_truth)
    aug_image = augment_image.augment_image(aug_image)
    aug_image = aug_image / 255.0

    return aug_image, aug_ground_truth
def main():
    imgs = np.zeros((1, 100, 100, 3), dtype=np.uint8) + 255
    bbs = ia.BoundingBoxesOnImage([ia.BoundingBox(x1=0, x2=50, y1=0, y2=50)],
                                  shape=imgs.shape[1:])

    aug = iaa.Sequential([
        iaa.Crop(px=10),
        iaa.Pad(px=10, pad_cval=128),
        iaa.Affine(scale=0.5, cval=0)
    ])

    aug_det = aug.to_deterministic()
    imgs_aug = aug_det.augment_images(imgs)
    bbs_aug = aug_det.augment_bounding_boxes([bbs])

    print("bbs:")
    for bbs_aug_i in bbs_aug[0].bounding_boxes:
        print(bbs_aug_i)

    cv2.imshow('orig', imgs)
    cv2.imshow('aug', bbs_aug[0].draw_on_image(imgs_aug[0]))
    cv2.waitKey()
Ejemplo n.º 20
0
def test_dtype_preservation():
    reseed()

    size = (4, 16, 16, 3)
    images = [
        np.random.uniform(0, 255, size).astype(np.uint8),
        np.random.uniform(0, 65535, size).astype(np.uint16),
        np.random.uniform(0, 4294967295, size).astype(np.uint32),
        np.random.uniform(-128, 127, size).astype(np.int16),
        np.random.uniform(-32768, 32767, size).astype(np.int32),
        np.random.uniform(0.0, 1.0, size).astype(np.float32),
        np.random.uniform(-1000.0, 1000.0, size).astype(np.float16),
        np.random.uniform(-1000.0, 1000.0, size).astype(np.float32),
        np.random.uniform(-1000.0, 1000.0, size).astype(np.float64)
    ]

    default_dtypes = set([arr.dtype for arr in images])
    # Some dtypes are here removed per augmenter, because the respective
    # augmenter does not support them. This test currently only checks whether
    # dtypes are preserved from in- to output for all dtypes that are supported
    # per augmenter.
    # dtypes are here removed via list comprehension instead of
    # `default_dtypes - set([dtype])`, because the latter one simply never
    # removed the dtype(s) for some reason

    def _not_dts(dts):
        return [dt for dt in default_dtypes if dt not in dts]

    augs = [
        (iaa.Add((-5, 5), name="Add"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.AddElementwise((-5, 5), name="AddElementwise"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Multiply((0.95, 1.05), name="Multiply"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Dropout(0.01, name="Dropout"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Invert(0.01, per_channel=True, name="Invert"),
         default_dtypes),
        (iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"),
         _not_dts([np.float16])),
        (iaa.AverageBlur((3, 5), name="AverageBlur"),
         _not_dts([np.uint32, np.int32, np.float16])),
        (iaa.MedianBlur((3, 5), name="MedianBlur"),
         _not_dts([np.uint32, np.int32, np.float16, np.float64])),
        (iaa.BilateralBlur((3, 5), name="BilateralBlur"),
         _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float16,
                   np.float64])),
        (iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"),
         _not_dts([np.uint32, np.int32, np.float16, np.uint32])),
        (iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"),
         _not_dts([np.uint32, np.int32, np.float16, np.uint32])),
        (iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"),
         _not_dts([np.uint32, np.int32, np.float16, np.uint32])),
        (iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0,
                                name="DirectedEdgeDetect"),
         _not_dts([np.uint32, np.int32, np.float16, np.uint32])),
        (iaa.Fliplr(0.5, name="Fliplr"), default_dtypes),
        (iaa.Flipud(0.5, name="Flipud"), default_dtypes),
        (iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"),
         _not_dts([np.uint32, np.int32])),
        (iaa.Affine(translate_percent=(-0.05, 0.05),
                    name="Affine-translate-percent"),
         _not_dts([np.uint32, np.int32])),
        (iaa.Affine(rotate=(-20, 20), name="Affine-rotate"),
         _not_dts([np.uint32, np.int32])),
        (iaa.Affine(shear=(-20, 20), name="Affine-shear"),
         _not_dts([np.uint32, np.int32])),
        (iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"),
         _not_dts([np.uint32, np.int32])),
        (iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"),
         default_dtypes),
        (iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2),
                                   name="ElasticTransformation"),
         _not_dts([np.float16])),
        (iaa.Sequential([iaa.Identity(), iaa.Identity()],
                        name="SequentialNoop"),
         default_dtypes),
        (iaa.SomeOf(1, [iaa.Identity(), iaa.Identity()], name="SomeOfNoop"),
         default_dtypes),
        (iaa.OneOf([iaa.Identity(), iaa.Identity()], name="OneOfNoop"),
         default_dtypes),
        (iaa.Sometimes(0.5, iaa.Identity(), name="SometimesNoop"),
         default_dtypes),
        (iaa.Sequential([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))],
                        name="Sequential"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.SomeOf(1,
                    [iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))],
                    name="SomeOf"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.OneOf([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))],
                   name="OneOf"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Sometimes(0.5, iaa.Add((-5, 5)), name="Sometimes"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Identity(name="Identity"), default_dtypes),
        (iaa.BlendAlpha((0.0, 0.1), iaa.Identity(), name="BlendAlphaIdentity"),
         _not_dts([np.float64])),  # float64 requires float128 support
        (iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Identity(),
                                   name="BlendAlphaElementwiseIdentity"),
         _not_dts([np.float64])),  # float64 requires float128 support
        (iaa.BlendAlphaSimplexNoise(iaa.Identity(),
                                    name="BlendAlphaSimplexNoiseIdentity"),
         _not_dts([np.float64])),  # float64 requires float128 support
        (iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2),
                                      foreground=iaa.Identity(),
                                      name="BlendAlphaFrequencyNoiseIdentity"),
         _not_dts([np.float64])),
        (iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10),
                                   name="BlendAlphaElementwise"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2),
                                      foreground=iaa.Add(10),
                                      name="BlendAlphaFrequencyNoise"),
         _not_dts([np.uint32, np.int32, np.float64])),
        (iaa.Superpixels(p_replace=0.01, n_segments=64),
         _not_dts([np.float16, np.float32, np.float64])),
        (iaa.Resize({"height": 4, "width": 4}, name="Resize"),
         _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32,
                   np.float16, np.float64])),
        (iaa.CropAndPad(px=(-10, 10), name="CropAndPad"),
         _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32,
                   np.float16, np.float64])),
        (iaa.Pad(px=(0, 10), name="Pad"),
         _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32,
                   np.float16, np.float64])),
        (iaa.Crop(px=(0, 10), name="Crop"),
         _not_dts([np.uint16, np.uint32, np.int16, np.int32, np.float32,
                   np.float16, np.float64]))
    ]

    for (aug, allowed_dtypes) in augs:
        for images_i in images:
            if images_i.dtype in allowed_dtypes:
                images_aug = aug.augment_images(images_i)
                assert images_aug.dtype == images_i.dtype
Ejemplo n.º 21
0
def main():
    image = ia.quokka(size=0.5)
    kps = [ia.KeypointsOnImage(
        [ia.Keypoint(x=245, y=203, vis=None, label=None), ia.Keypoint(x=365, y=195, vis=None, label=None),
         ia.Keypoint(x=313, y=269, vis=None, label=None)],
        shape=(image.shape[0]*2, image.shape[1]*2)
    )]
    kps[0] = kps[0].on(image.shape)
    print("image shape:", image.shape)

    augs = [
        iaa.CropAndPad(px=50, name="pad-by-50px"),
        iaa.CropAndPad(px=(10, 20, 30, 40), name="pad-by-10-20-30-40px"),
        iaa.CropAndPad(percent=0.1, name="pad-by-01percent"),
        iaa.CropAndPad(percent=(0.01, 0.02, 0.03, 0.04), name="pad-by-001-002-003-004percent"),
        iaa.CropAndPad(px=-20, name="crop-by-20px"),
        iaa.CropAndPad(px=(-10, -20, -30, -40), name="crop-by-10-20-30-40px"),
        iaa.CropAndPad(percent=-0.1, name="crop-by-01percent"),
        iaa.CropAndPad(percent=(-0.01, -0.02, -0.03, -0.04), name="crop-by-001-002-003-004percent")
    ]

    augs_many = [
        iaa.Crop(px=(0, 50), name="native-crop-0-to-50px"),
        iaa.Crop(px=iap.DiscreteUniform(0, 50), name="native-crop-0-to-50px-iap"),
        iaa.Pad(px=(0, 50), pad_mode="linear_ramp", pad_cval=(0, 255), name="native-pad-0-to-50px-pad-modes"),
        iaa.CropAndPad(px=(0, 50), sample_independently=False, name="pad-by-0-to-50px-same"),
        iaa.CropAndPad(px=(0, 50), name="pad-by-0-to-50px"),
        iaa.CropAndPad(px=(0, 50), pad_mode=ia.ALL, pad_cval=(0, 255), name="pad-by-0-to-50px-random-pad-modes-cvals"),
        iaa.CropAndPad(px=((0, 50), (0, 50), (0, 50), (0, 50)), name="pad-by-0-to-50px-each"),
        iaa.CropAndPad(percent=(0, 0.1), sample_independently=False, name="pad-by-0-to-01percent-same"),
        iaa.CropAndPad(percent=(0, 0.1), name="pad-by-0-to-01percent"),
        iaa.CropAndPad(percent=(0, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255),
                       name="pad-by-0-to-01percent-random-pad-modes-cvals"),
        iaa.CropAndPad(percent=((0, 0.1), (0, 0.1), (0, 0.1), (0, 0.1)), name="pad-by-0-to-01percent-each"),
        iaa.CropAndPad(px=(-50, 0), name="crop-by-50-to-0px"),
        iaa.CropAndPad(px=((-50, 0), (-50, 0), (-50, 0), (-50, 0)), name="crop-by-50-to-0px-each"),
        iaa.CropAndPad(percent=(-0.1, 0), name="crop-by-01-to-0percent"),
        iaa.CropAndPad(percent=((-0.1, 0), (-0.1, 0), (-0.1, 0), (-0.1, 0)), name="crop-by-01-to-0percent-each"),
        iaa.CropAndPad(px=(-50, 50), name="pad-and-crop-by-50px")
    ]

    print("original", image.shape)
    ia.imshow(kps[0].draw_on_image(image))

    print("-----------------")
    print("Same aug per image")
    print("-----------------")
    for aug in augs:
        img_aug = aug.augment_image(image)
        kps_aug = aug.augment_keypoints(kps)[0]
        img_aug_kps = kps_aug.draw_on_image(img_aug)
        print(aug.name, img_aug_kps.shape, img_aug_kps.shape[1]/img_aug_kps.shape[0])
        ia.imshow(img_aug_kps)

    print("-----------------")
    print("Random aug per image")
    print("-----------------")
    for aug in augs_many:
        images_aug = []
        for _ in range(64):
            aug_det = aug.to_deterministic()
            img_aug = aug_det.augment_image(image)
            kps_aug = aug_det.augment_keypoints(kps)[0]
            img_aug_kps = kps_aug.draw_on_image(img_aug)
            img_aug_kps = np.pad(img_aug_kps, ((1, 1), (1, 1), (0, 0)), mode="constant", constant_values=255)
            images_aug.append(img_aug_kps)
        print(aug.name)
        ia.imshow(ia.draw_grid(images_aug))
Ejemplo n.º 22
0
def test_unusual_channel_numbers():
    reseed()

    images = [
        (0, create_random_images((4, 16, 16))),
        (1, create_random_images((4, 16, 16, 1))),
        (2, create_random_images((4, 16, 16, 2))),
        (4, create_random_images((4, 16, 16, 4))),
        (5, create_random_images((4, 16, 16, 5))),
        (10, create_random_images((4, 16, 16, 10))),
        (20, create_random_images((4, 16, 16, 20)))
    ]

    augs = [
        iaa.Add((-5, 5), name="Add"),
        iaa.AddElementwise((-5, 5), name="AddElementwise"),
        iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"),
        iaa.Multiply((0.95, 1.05), name="Multiply"),
        iaa.Dropout(0.01, name="Dropout"),
        iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"),
        iaa.Invert(0.01, per_channel=True, name="Invert"),
        iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"),
        iaa.AverageBlur((3, 5), name="AverageBlur"),
        iaa.MedianBlur((3, 5), name="MedianBlur"),
        iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"),
        iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"),
        iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"),
        iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0,
                               name="DirectedEdgeDetect"),
        iaa.Fliplr(0.5, name="Fliplr"),
        iaa.Flipud(0.5, name="Flipud"),
        iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"),
        iaa.Affine(translate_percent=(-0.05, 0.05),
                   name="Affine-translate-percent"),
        iaa.Affine(rotate=(-20, 20), name="Affine-rotate"),
        iaa.Affine(shear=(-20, 20), name="Affine-shear"),
        iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"),
        iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"),
        iaa.PerspectiveTransform(scale=(0.01, 0.10),
                                 name="PerspectiveTransform"),
        iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2),
                                  name="ElasticTransformation"),
        iaa.Sequential([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]),
        iaa.SomeOf(1, [iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]),
        iaa.OneOf([iaa.Add((-5, 5)), iaa.AddElementwise((-5, 5))]),
        iaa.Sometimes(0.5, iaa.Add((-5, 5)), name="Sometimes"),
        iaa.Identity(name="Noop"),
        iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"),
        iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10),
                                  name="BlendAlphaElementwise"),
        iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"),
        iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2),
                                     foreground=iaa.Add(10),
                                     name="BlendAlphaSimplexNoise"),
        iaa.Superpixels(p_replace=0.01, n_segments=64),
        iaa.Resize({"height": 4, "width": 4}, name="Resize"),
        iaa.CropAndPad(px=(-10, 10), name="CropAndPad"),
        iaa.Pad(px=(0, 10), name="Pad"),
        iaa.Crop(px=(0, 10), name="Crop")
    ]

    for aug in augs:
        for (nb_channels, images_c) in images:
            if aug.name != "Resize":
                images_aug = aug.augment_images(images_c)
                assert images_aug.shape == images_c.shape
                image_aug = aug.augment_image(images_c[0])
                assert image_aug.shape == images_c[0].shape
            else:
                images_aug = aug.augment_images(images_c)
                image_aug = aug.augment_image(images_c[0])
                if images_c.ndim == 3:
                    assert images_aug.shape == (4, 4, 4)
                    assert image_aug.shape == (4, 4)
                else:
                    assert images_aug.shape == (4, 4, 4, images_c.shape[3])
                    assert image_aug.shape == (4, 4, images_c.shape[3])
Ejemplo n.º 23
0
def test_determinism():
    reseed()

    images = [
        ia.quokka(size=(128, 128)),
        ia.quokka(size=(64, 64)),
        ia.quokka((128, 256))
    ]
    images.extend([ia.quokka(size=(16, 16))] * 20)

    keypoints = [
        ia.KeypointsOnImage([
            ia.Keypoint(x=20, y=10), ia.Keypoint(x=5, y=5),
            ia.Keypoint(x=10, y=43)], shape=(50, 60, 3))
    ] * 20

    augs = [
        iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.SomeOf(1, [iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.OneOf([iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.Sometimes(0.5, iaa.Fliplr(1.0)),
        iaa.WithColorspace("HSV", children=iaa.Add((-50, 50))),
        iaa.Resize((0.5, 0.9)),
        iaa.CropAndPad(px=(-50, 50)),
        iaa.Pad(px=(1, 50)),
        iaa.Crop(px=(1, 50)),
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.Superpixels(p_replace=(0.25, 1.0), n_segments=(16, 128)),
        iaa.Grayscale(alpha=(0.1, 1.0)),
        iaa.GaussianBlur((0.1, 3.0)),
        iaa.AverageBlur((3, 11)),
        iaa.MedianBlur((3, 11)),
        iaa.Sharpen(alpha=(0.1, 1.0), lightness=(0.8, 1.2)),
        iaa.Emboss(alpha=(0.1, 1.0), strength=(0.8, 1.2)),
        iaa.EdgeDetect(alpha=(0.1, 1.0)),
        iaa.DirectedEdgeDetect(alpha=(0.1, 1.0), direction=(0.0, 1.0)),
        iaa.Add((-50, 50)),
        iaa.AddElementwise((-50, 50)),
        iaa.AdditiveGaussianNoise(scale=(0.1, 1.0)),
        iaa.Multiply((0.6, 1.4)),
        iaa.MultiplyElementwise((0.6, 1.4)),
        iaa.Dropout((0.3, 0.5)),
        iaa.CoarseDropout((0.3, 0.5), size_percent=(0.05, 0.2)),
        iaa.Invert(0.5),
        iaa.Affine(scale=(0.7, 1.3), translate_percent=(-0.1, 0.1),
                   rotate=(-20, 20), shear=(-20, 20), order=ia.ALL,
                   mode=ia.ALL, cval=(0, 255)),
        iaa.PiecewiseAffine(scale=(0.1, 0.3)),
        iaa.ElasticTransformation(alpha=10.0)
    ]

    augs_affect_geometry = [
        iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.SomeOf(1, [iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.OneOf([iaa.Fliplr(0.5), iaa.Flipud(0.5)]),
        iaa.Sometimes(0.5, iaa.Fliplr(1.0)),
        iaa.Resize((0.5, 0.9)),
        iaa.CropAndPad(px=(-50, 50)),
        iaa.Pad(px=(1, 50)),
        iaa.Crop(px=(1, 50)),
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.Affine(scale=(0.7, 1.3), translate_percent=(-0.1, 0.1),
                   rotate=(-20, 20), shear=(-20, 20), order=ia.ALL,
                   mode=ia.ALL, cval=(0, 255)),
        iaa.PiecewiseAffine(scale=(0.1, 0.3)),
        iaa.ElasticTransformation(alpha=(5, 100), sigma=(3, 5))
    ]

    for aug in augs:
        aug_det = aug.to_deterministic()
        images_aug1 = aug_det.augment_images(images)
        images_aug2 = aug_det.augment_images(images)

        aug_det = aug.to_deterministic()
        images_aug3 = aug_det.augment_images(images)
        images_aug4 = aug_det.augment_images(images)

        assert array_equal_lists(images_aug1, images_aug2), \
            "Images (1, 2) expected to be identical for %s" % (aug.name,)

        assert array_equal_lists(images_aug3, images_aug4), \
            "Images (3, 4) expected to be identical for %s" % (aug.name,)

        assert not array_equal_lists(images_aug1, images_aug3), \
            "Images (1, 3) expected to be different for %s" % (aug.name,)

    for aug in augs_affect_geometry:
        aug_det = aug.to_deterministic()
        kps_aug1 = aug_det.augment_keypoints(keypoints)
        kps_aug2 = aug_det.augment_keypoints(keypoints)

        aug_det = aug.to_deterministic()
        kps_aug3 = aug_det.augment_keypoints(keypoints)
        kps_aug4 = aug_det.augment_keypoints(keypoints)

        assert keypoints_equal(kps_aug1, kps_aug2), \
            "Keypoints (1, 2) expected to be identical for %s" % (aug.name,)

        assert keypoints_equal(kps_aug3, kps_aug4), \
            "Keypoints (3, 4) expected to be identical for %s" % (aug.name,)

        assert not keypoints_equal(kps_aug1, kps_aug3), \
            "Keypoints (1, 3) expected to be different for %s" % (aug.name,)
Ejemplo n.º 24
0
        sometimes(
            iaa.Affine(
                scale={
                    "x": (0.6, 1),
                    "y": (0.6, 1)
                },
                rotate=(-15, 15),  # rotate by -45 to +45 degrees
                shear=(-15, 15),  # shear by -16 to +16 degrees
                order=[
                    0, 1
                ],  # use nearest neighbour or bilinear interpolation (fast)
                cval=(0,
                      0),  # if mode is constant, use a cval between 0 and 255
                mode='constant')),
        sometimes(
            iaa.Pad(percent=(0, 0.4), pad_mode='constant', pad_cval=(0, 0))),
    ]),
    iaa.OneOf([
        sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1))),
        sometimes(iaa.PiecewiseAffine(
            scale=(0.01, 0.05))),  # sometimes move parts of the image around
    ])
])
aug = iaa.Sequential([
    iaa.SomeOf(
        (0, 6),
        [
            (iaa.Superpixels(p_replace=(0, 1.0), n_segments=(100, 200))),
            # convert images into their superpixel representation
            iaa.OneOf([
                iaa.GaussianBlur(
Ejemplo n.º 25
0
    def test_many_augmenters(self):
        keypoints = []
        for y in sm.xrange(40//5):
            for x in sm.xrange(60//5):
                keypoints.append(ia.Keypoint(y=y*5, x=x*5))

        keypoints_oi = ia.KeypointsOnImage(keypoints, shape=(40, 60, 3))
        keypoints_oi_empty = ia.KeypointsOnImage([], shape=(40, 60, 3))

        augs = [
            iaa.Add((-5, 5), name="Add"),
            iaa.AddElementwise((-5, 5), name="AddElementwise"),
            iaa.AdditiveGaussianNoise(0.01*255, name="AdditiveGaussianNoise"),
            iaa.Multiply((0.95, 1.05), name="Multiply"),
            iaa.Dropout(0.01, name="Dropout"),
            iaa.CoarseDropout(0.01, size_px=6, name="CoarseDropout"),
            iaa.Invert(0.01, per_channel=True, name="Invert"),
            iaa.GaussianBlur(sigma=(0.95, 1.05), name="GaussianBlur"),
            iaa.AverageBlur((3, 5), name="AverageBlur"),
            iaa.MedianBlur((3, 5), name="MedianBlur"),
            iaa.Sharpen((0.0, 0.1), lightness=(1.0, 1.2), name="Sharpen"),
            iaa.Emboss(alpha=(0.0, 0.1), strength=(0.5, 1.5), name="Emboss"),
            iaa.EdgeDetect(alpha=(0.0, 0.1), name="EdgeDetect"),
            iaa.DirectedEdgeDetect(alpha=(0.0, 0.1), direction=0,
                                   name="DirectedEdgeDetect"),
            iaa.Fliplr(0.5, name="Fliplr"),
            iaa.Flipud(0.5, name="Flipud"),
            iaa.Affine(translate_px=(-5, 5), name="Affine-translate-px"),
            iaa.Affine(translate_percent=(-0.05, 0.05),
                       name="Affine-translate-percent"),
            iaa.Affine(rotate=(-20, 20), name="Affine-rotate"),
            iaa.Affine(shear=(-20, 20), name="Affine-shear"),
            iaa.Affine(scale=(0.9, 1.1), name="Affine-scale"),
            iaa.PiecewiseAffine(scale=(0.001, 0.005), name="PiecewiseAffine"),
            iaa.ElasticTransformation(alpha=(0.1, 0.2), sigma=(0.1, 0.2),
                                      name="ElasticTransformation"),
            iaa.BlendAlpha((0.0, 0.1), iaa.Add(10), name="BlendAlpha"),
            iaa.BlendAlphaElementwise((0.0, 0.1), iaa.Add(10),
                                      name="BlendAlphaElementwise"),
            iaa.BlendAlphaSimplexNoise(iaa.Add(10), name="BlendAlphaSimplexNoise"),
            iaa.BlendAlphaFrequencyNoise(exponent=(-2, 2), foreground=iaa.Add(10),
                                         name="BlendAlphaSimplexNoise"),
            iaa.Superpixels(p_replace=0.01, n_segments=64),
            iaa.Resize(0.5, name="Resize"),
            iaa.CropAndPad(px=(-10, 10), name="CropAndPad"),
            iaa.Pad(px=(0, 10), name="Pad"),
            iaa.Crop(px=(0, 10), name="Crop")
        ]

        for aug in augs:
            dss = []
            for i in sm.xrange(10):
                aug_det = aug.to_deterministic()

                kp_fully_empty_aug = aug_det.augment_keypoints([])
                assert kp_fully_empty_aug == []

                kp_first_empty_aug = aug_det.augment_keypoints(keypoints_oi_empty)
                assert len(kp_first_empty_aug.keypoints) == 0

                kp_image = keypoints_oi.to_keypoint_image(size=5)
                with assertWarns(self, iaa.SuspiciousSingleImageShapeWarning):
                    kp_image_aug = aug_det.augment_image(kp_image)
                kp_image_aug_rev = ia.KeypointsOnImage.from_keypoint_image(
                    kp_image_aug,
                    if_not_found_coords={"x": -9999, "y": -9999},
                    nb_channels=1
                )
                kp_aug = aug_det.augment_keypoints([keypoints_oi])[0]
                ds = []
                assert len(kp_image_aug_rev.keypoints) == len(kp_aug.keypoints), (
                    "Lost keypoints for '%s' (%d vs expected %d)" % (
                        aug.name,
                        len(kp_aug.keypoints),
                        len(kp_image_aug_rev.keypoints))
                )

                gen = zip(kp_aug.keypoints, kp_image_aug_rev.keypoints)
                for kp_pred, kp_pred_img in gen:
                    kp_pred_lost = (kp_pred.x == -9999 and kp_pred.y == -9999)
                    kp_pred_img_lost = (kp_pred_img.x == -9999
                                        and kp_pred_img.y == -9999)

                    if not kp_pred_lost and not kp_pred_img_lost:
                        d = np.sqrt((kp_pred.x - kp_pred_img.x) ** 2
                                    + (kp_pred.y - kp_pred_img.y) ** 2)
                        ds.append(d)
                dss.extend(ds)
                if len(ds) == 0:
                    print("[INFO] No valid keypoints found for '%s' "
                          "in test_keypoint_augmentation()" % (str(aug),))
            assert np.average(dss) < 5.0, \
                "Average distance too high (%.2f, with ds: %s)" \
                % (np.average(dss), str(dss))
Ejemplo n.º 26
0
    "Add_To_Hue_And_Saturation": lambda lo, hi: iaa.AddToHueAndSaturation((lo, hi), per_channel=True),

    # Increase each pixel’s channel-value (redness/greenness/blueness) [0, 1, 2] by value in between lo and hi:
    "Increase_Channel": lambda channel, lo, hi: iaa.WithChannels(channel, iaa.Add((lo, hi))),
    # Rotate each image’s channel [R=0, G=1, B=2] by value in between lo and hi degrees:
    "Rotate_Channel": lambda channel, lo, hi: iaa.WithChannels(channel, iaa.Affine(rotate=(lo, hi))),

    # Augmenter that never changes input images (“no operation”).
    "No_Operation": iaa.Noop(),

    # Pads images, i.e. adds columns/rows to them. Pads image by value in between lo and hi
    # percent relative to its original size (only accepts positive values in range[0, 1]):
    # If s_i is false, The value will be sampled once per image and used for all sides
    # (i.e. all sides gain/lose the same number of rows/columns)
    # NOTE: automatically resizes images back to their original size after it has augmented them.
    "Pad_Percent": lambda lo, hi, s_i: iaa.Pad(percent=(lo, hi), keep_size=True, sample_independently=s_i),

    # Pads images by a number of pixels between lo and hi
    # If s_i is false, The value will be sampled once per image and used for all sides
    # (i.e. all sides gain/lose the same number of rows/columns)
    "Pad_Pixels": lambda lo, hi, s_i: iaa.Pad(px=(lo, hi), keep_size=True, sample_independently=s_i),

    # Crops/cuts away pixels at the sides of the image.
    # Crops images by value in between lo and hi (only accepts positive values in range[0, 1]):
    # If s_i is false, The value will be sampled once per image and used for all sides
    # (i.e. all sides gain/lose the same number of rows/columns)
    # NOTE: automatically resizes images back to their original size after it has augmented them.
    "Crop_Percent": lambda lo, hi, s_i: iaa.Crop(percent=(lo, hi), keep_size=True, sample_independently=s_i),

    # Crops images by a number of pixels between lo and hi
    # If s_i is false, The value will be sampled once per image and used for all sides
Ejemplo n.º 27
0
def draw_per_augmenter_images():
    print("[draw_per_augmenter_images] Loading image...")
    #image = misc.imresize(ndimage.imread("quokka.jpg")[0:643, 0:643], (128, 128))
    image = ia.quokka_square(size=(128, 128))

    keypoints = [ia.Keypoint(x=34, y=15), ia.Keypoint(x=85, y=13), ia.Keypoint(x=63, y=73)] # left ear, right ear, mouth
    keypoints = [ia.KeypointsOnImage(keypoints, shape=image.shape)]

    print("[draw_per_augmenter_images] Initializing...")
    rows_augmenters = [
        (0, "Noop", [("", iaa.Noop()) for _ in sm.xrange(5)]),
        (0, "Crop\n(top, right,\nbottom, left)", [(str(vals), iaa.Crop(px=vals)) for vals in [(2, 0, 0, 0), (0, 8, 8, 0), (4, 0, 16, 4), (8, 0, 0, 32), (32, 64, 0, 0)]]),
        (0, "Pad\n(top, right,\nbottom, left)", [(str(vals), iaa.Pad(px=vals)) for vals in [(2, 0, 0, 0), (0, 8, 8, 0), (4, 0, 16, 4), (8, 0, 0, 32), (32, 64, 0, 0)]]),
        (0, "Fliplr", [(str(p), iaa.Fliplr(p)) for p in [0, 0, 1, 1, 1]]),
        (0, "Flipud", [(str(p), iaa.Flipud(p)) for p in [0, 0, 1, 1, 1]]),
        (0, "Superpixels\np_replace=1", [("n_segments=%d" % (n_segments,), iaa.Superpixels(p_replace=1.0, n_segments=n_segments)) for n_segments in [25, 50, 75, 100, 125]]),
        (0, "Superpixels\nn_segments=100", [("p_replace=%.2f" % (p_replace,), iaa.Superpixels(p_replace=p_replace, n_segments=100)) for p_replace in [0, 0.25, 0.5, 0.75, 1.0]]),
        (0, "Invert", [("p=%d" % (p,), iaa.Invert(p=p)) for p in [0, 0, 1, 1, 1]]),
        (0, "Invert\n(per_channel)", [("p=%.2f" % (p,), iaa.Invert(p=p, per_channel=True)) for p in [0.5, 0.5, 0.5, 0.5, 0.5]]),
        (0, "Add", [("value=%d" % (val,), iaa.Add(val)) for val in [-45, -25, 0, 25, 45]]),
        (0, "Add\n(per channel)", [("value=(%d, %d)" % (vals[0], vals[1],), iaa.Add(vals, per_channel=True)) for vals in [(-55, -35), (-35, -15), (-10, 10), (15, 35), (35, 55)]]),
        (0, "AddToHueAndSaturation", [("value=%d" % (val,), iaa.AddToHueAndSaturation(val)) for val in [-45, -25, 0, 25, 45]]),
        (0, "Multiply", [("value=%.2f" % (val,), iaa.Multiply(val)) for val in [0.25, 0.5, 1.0, 1.25, 1.5]]),
        (1, "Multiply\n(per channel)", [("value=(%.2f, %.2f)" % (vals[0], vals[1],), iaa.Multiply(vals, per_channel=True)) for vals in [(0.15, 0.35), (0.4, 0.6), (0.9, 1.1), (1.15, 1.35), (1.4, 1.6)]]),
        (0, "GaussianBlur", [("sigma=%.2f" % (sigma,), iaa.GaussianBlur(sigma=sigma)) for sigma in [0.25, 0.50, 1.0, 2.0, 4.0]]),
        (0, "AverageBlur", [("k=%d" % (k,), iaa.AverageBlur(k=k)) for k in [1, 3, 5, 7, 9]]),
        (0, "MedianBlur", [("k=%d" % (k,), iaa.MedianBlur(k=k)) for k in [1, 3, 5, 7, 9]]),
        (0, "BilateralBlur\nsigma_color=250,\nsigma_space=250", [("d=%d" % (d,), iaa.BilateralBlur(d=d, sigma_color=250, sigma_space=250)) for d in [1, 3, 5, 7, 9]]),
        (0, "Sharpen\n(alpha=1)", [("lightness=%.2f" % (lightness,), iaa.Sharpen(alpha=1, lightness=lightness)) for lightness in [0, 0.5, 1.0, 1.5, 2.0]]),
        (0, "Emboss\n(alpha=1)", [("strength=%.2f" % (strength,), iaa.Emboss(alpha=1, strength=strength)) for strength in [0, 0.5, 1.0, 1.5, 2.0]]),
        (0, "EdgeDetect", [("alpha=%.2f" % (alpha,), iaa.EdgeDetect(alpha=alpha)) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]),
        (0, "DirectedEdgeDetect\n(alpha=1)", [("direction=%.2f" % (direction,), iaa.DirectedEdgeDetect(alpha=1, direction=direction)) for direction in [0.0, 1*(360/5)/360, 2*(360/5)/360, 3*(360/5)/360, 4*(360/5)/360]]),
        (0, "AdditiveGaussianNoise", [("scale=%.2f*255" % (scale,), iaa.AdditiveGaussianNoise(scale=scale * 255)) for scale in [0.025, 0.05, 0.1, 0.2, 0.3]]),
        (0, "AdditiveGaussianNoise\n(per channel)", [("scale=%.2f*255" % (scale,), iaa.AdditiveGaussianNoise(scale=scale * 255, per_channel=True)) for scale in [0.025, 0.05, 0.1, 0.2, 0.3]]),
        (0, "Dropout", [("p=%.2f" % (p,), iaa.Dropout(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]),
        (0, "Dropout\n(per channel)", [("p=%.2f" % (p,), iaa.Dropout(p=p, per_channel=True)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]),
        (3, "CoarseDropout\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseDropout(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]),
        (0, "CoarseDropout\n(p=0.2, per channel)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseDropout(p=0.2, size_percent=size_percent, per_channel=True, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]),
        (0, "SaltAndPepper", [("p=%.2f" % (p,), iaa.SaltAndPepper(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]),
        (0, "Salt", [("p=%.2f" % (p,), iaa.Salt(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]),
        (0, "Pepper", [("p=%.2f" % (p,), iaa.Pepper(p=p)) for p in [0.025, 0.05, 0.1, 0.2, 0.4]]),
        (0, "CoarseSaltAndPepper\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseSaltAndPepper(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]),
        (0, "CoarseSalt\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarseSalt(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]),
        (0, "CoarsePepper\n(p=0.2)", [("size_percent=%.2f" % (size_percent,), iaa.CoarsePepper(p=0.2, size_percent=size_percent, min_size=2)) for size_percent in [0.3, 0.2, 0.1, 0.05, 0.02]]),
        (0, "ContrastNormalization", [("alpha=%.1f" % (alpha,), iaa.ContrastNormalization(alpha=alpha)) for alpha in [0.5, 0.75, 1.0, 1.25, 1.50]]),
        (0, "ContrastNormalization\n(per channel)", [("alpha=(%.2f, %.2f)" % (alphas[0], alphas[1],), iaa.ContrastNormalization(alpha=alphas, per_channel=True)) for alphas in [(0.4, 0.6), (0.65, 0.85), (0.9, 1.1), (1.15, 1.35), (1.4, 1.6)]]),
        (0, "Grayscale", [("alpha=%.1f" % (alpha,), iaa.Grayscale(alpha=alpha)) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]),
        (6, "PerspectiveTransform", [("scale=%.3f" % (scale,), iaa.PerspectiveTransform(scale=scale)) for scale in [0.025, 0.05, 0.075, 0.10, 0.125]]),
        (0, "PiecewiseAffine", [("scale=%.3f" % (scale,), iaa.PiecewiseAffine(scale=scale)) for scale in [0.015, 0.03, 0.045, 0.06, 0.075]]),
        (0, "Affine: Scale", [("%.1fx" % (scale,), iaa.Affine(scale=scale)) for scale in [0.1, 0.5, 1.0, 1.5, 1.9]]),
        (0, "Affine: Translate", [("x=%d y=%d" % (x, y), iaa.Affine(translate_px={"x": x, "y": y})) for x, y in [(-32, -16), (-16, -32), (-16, -8), (16, 8), (16, 32)]]),
        (0, "Affine: Rotate", [("%d deg" % (rotate,), iaa.Affine(rotate=rotate)) for rotate in [-90, -45, 0, 45, 90]]),
        (0, "Affine: Shear", [("%d deg" % (shear,), iaa.Affine(shear=shear)) for shear in [-45, -25, 0, 25, 45]]),
        (0, "Affine: Modes", [(mode, iaa.Affine(translate_px=-32, mode=mode)) for mode in ["constant", "edge", "symmetric", "reflect", "wrap"]]),
        (0, "Affine: cval", [("%d" % (int(cval*255),), iaa.Affine(translate_px=-32, cval=int(cval*255), mode="constant")) for cval in [0.0, 0.25, 0.5, 0.75, 1.0]]),
        (
            2, "Affine: all", [
                (
                    "",
                    iaa.Affine(
                        scale={"x": (0.5, 1.5), "y": (0.5, 1.5)},
                        translate_px={"x": (-32, 32), "y": (-32, 32)},
                        rotate=(-45, 45),
                        shear=(-32, 32),
                        mode=ia.ALL,
                        cval=(0.0, 1.0)
                    )
                )
                for _ in sm.xrange(5)
            ]
        ),
        (1, "ElasticTransformation\n(sigma=0.2)", [("alpha=%.1f" % (alpha,), iaa.ElasticTransformation(alpha=alpha, sigma=0.2)) for alpha in [0.1, 0.5, 1.0, 3.0, 9.0]]),
        (0, "Alpha\nwith EdgeDetect(1.0)", [("factor=%.1f" % (factor,), iaa.Alpha(factor=factor, first=iaa.EdgeDetect(1.0))) for factor in [0.0, 0.25, 0.5, 0.75, 1.0]]),
        (4, "Alpha\nwith EdgeDetect(1.0)\n(per channel)", [("factor=(%.2f, %.2f)" % (factor[0], factor[1]), iaa.Alpha(factor=factor, first=iaa.EdgeDetect(1.0), per_channel=0.5)) for factor in [(0.0, 0.2), (0.15, 0.35), (0.4, 0.6), (0.65, 0.85), (0.8, 1.0)]]),
        (15, "SimplexNoiseAlpha\nwith EdgeDetect(1.0)", [("", iaa.SimplexNoiseAlpha(first=iaa.EdgeDetect(1.0))) for alpha in [0.0, 0.25, 0.5, 0.75, 1.0]]),
        (9, "FrequencyNoiseAlpha\nwith EdgeDetect(1.0)", [("exponent=%.1f" % (exponent,), iaa.FrequencyNoiseAlpha(exponent=exponent, first=iaa.EdgeDetect(1.0), size_px_max=16, upscale_method="linear", sigmoid=False)) for exponent in [-4, -2, 0, 2, 4]])
    ]

    print("[draw_per_augmenter_images] Augmenting...")
    rows = []
    for (row_seed, row_name, augmenters) in rows_augmenters:
        ia.seed(row_seed)
        #for img_title, augmenter in augmenters:
        #    #aug.reseed(1000)
        #    pass

        row_images = []
        row_keypoints = []
        row_titles = []
        for img_title, augmenter in augmenters:
            aug_det = augmenter.to_deterministic()
            row_images.append(aug_det.augment_image(image))
            row_keypoints.append(aug_det.augment_keypoints(keypoints)[0])
            row_titles.append(img_title)
        rows.append((row_name, row_images, row_keypoints, row_titles))

    # matplotlib drawin routine
    """
    print("[draw_per_augmenter_images] Plotting...")
    width = 8
    height = int(1.5 * len(rows_augmenters))
    fig = plt.figure(figsize=(width, height))
    grid_rows = len(rows)
    grid_cols = 1 + 5
    gs = gridspec.GridSpec(grid_rows, grid_cols, width_ratios=[2, 1, 1, 1, 1, 1])
    axes = []
    for i in sm.xrange(grid_rows):
        axes.append([plt.subplot(gs[i, col_idx]) for col_idx in sm.xrange(grid_cols)])
    fig.tight_layout()
    #fig.subplots_adjust(bottom=0.2 / grid_rows, hspace=0.22)
    #fig.subplots_adjust(wspace=0.005, hspace=0.425, bottom=0.02)
    fig.subplots_adjust(wspace=0.005, hspace=0.005, bottom=0.02)

    for row_idx, (row_name, row_images, row_keypoints, row_titles) in enumerate(rows):
        axes_row = axes[row_idx]

        for col_idx in sm.xrange(grid_cols):
            ax = axes_row[col_idx]

            ax.cla()
            ax.axis("off")
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

            if col_idx == 0:
                ax.text(0, 0.5, row_name, color="black")
            else:
                cell_image = row_images[col_idx-1]
                cell_keypoints = row_keypoints[col_idx-1]
                cell_image_kp = cell_keypoints.draw_on_image(cell_image, size=5)
                ax.imshow(cell_image_kp)
                x = 0
                y = 145
                #ax.text(x, y, row_titles[col_idx-1], color="black", backgroundcolor="white", fontsize=6)
                ax.text(x, y, row_titles[col_idx-1], color="black", fontsize=7)


    fig.savefig("examples.jpg", bbox_inches="tight")
    #plt.show()
    """

    # simpler and faster drawing routine
    """
    output_image = ExamplesImage(128, 128, 128+64, 32)
    for (row_name, row_images, row_keypoints, row_titles) in rows:
        row_images_kps = []
        for image, keypoints in zip(row_images, row_keypoints):
            row_images_kps.append(keypoints.draw_on_image(image, size=5))
        output_image.add_row(row_name, row_images_kps, row_titles)
    misc.imsave("examples.jpg", output_image.draw())
    """

    # routine to draw many single files
    seen = defaultdict(lambda: 0)
    markups = []
    for (row_name, row_images, row_keypoints, row_titles) in rows:
        output_image = ExamplesImage(128, 128, 128+64, 32)
        row_images_kps = []
        for image, keypoints in zip(row_images, row_keypoints):
            row_images_kps.append(keypoints.draw_on_image(image, size=5))
        output_image.add_row(row_name, row_images_kps, row_titles)
        if "\n" in row_name:
            row_name_clean = row_name[0:row_name.find("\n")+1]
        else:
            row_name_clean = row_name
        row_name_clean = re.sub(r"[^a-z0-9]+", "_", row_name_clean.lower())
        row_name_clean = row_name_clean.strip("_")
        if seen[row_name_clean] > 0:
            row_name_clean = "%s_%d" % (row_name_clean, seen[row_name_clean] + 1)
        fp = os.path.join(IMAGES_DIR, "examples_%s.jpg" % (row_name_clean,))
        #misc.imsave(fp, output_image.draw())
        save(fp, output_image.draw())
        seen[row_name_clean] += 1

        markup_descr = row_name.replace('"', '') \
                               .replace("\n", " ") \
                               .replace("(", "") \
                               .replace(")", "")
        markup = '![%s](%s?raw=true "%s")' % (markup_descr, fp, markup_descr)
        markups.append(markup)

    for markup in markups:
        print(markup)
Ejemplo n.º 28
0
    def build_augmentators(self):
        """ Build the augmentators from the information """
        print(self.grayscale_only, self.background_images_value)
        self.augment_lines_general = iaa.Sequential([
            iaa.Sometimes(0.3, ItalicizeLine(shear=(-30, 31), cval=self.cval)),
            iaa.Sometimes(0.3, RotateLine(angle=(-5, 5), cval=self.cval)),
            iaa.OneOf([
                iaa.Pad(percent=((0.02, 0.1), (0.01, 0.1), (0.02, 0.1), (0.02,
                                                                         0.1)),
                        pad_mode='constant',
                        pad_cval=self.cval),
                iaa.Pad(px=((2, 20), (2, 60), (2, 20), (2, 60)),
                        pad_mode='constant',
                        pad_cval=self.cval),
            ]),
            iaa.Sometimes(
                0.3,
                PerspectiveTransform((0.05, 0.15),
                                     cval=self.cval,
                                     keep_size=False)),
            iaa.Sometimes(
                0.3,
                iaa.ElasticTransformation(alpha=(0, 1.0),
                                          sigma=(0.4, 0.6),
                                          cval=self.cval)),
            iaa.Sometimes(0.02, Skeletonize(self.is_binary)),
            iaa.Sometimes(0.1 * self.grayscale_only,
                          iaa.ContrastNormalization((0.5, 1.5))),
            iaa.Sometimes(0.3 * self.grayscale_only, PencilStroke()),
            iaa.Sometimes(
                self.grayscale_only,
                iaa.OneOf([
                    iaa.Sometimes(
                        0.5,
                        iaa.OneOf([
                            iaa.GaussianBlur((0.2, 1.0)),
                            iaa.AverageBlur(k=(1, 5)),
                            iaa.MedianBlur(k=(1, 3))
                        ])),
                    iaa.OneOf([
                        iaa.Add((-50, 30)),
                        iaa.Multiply((0.9, 1.1)),
                        iaa.OneOf([
                            iaa.Dropout(p=(0.01, 0.05)),
                            iaa.CoarseDropout((0.01, 0.02),
                                              size_percent=(0.1, 0.25))
                        ]),
                        iaa.Sometimes(
                            0.7,
                            iaa.OneOf([
                                iaa.AddElementwise((-10 * n, 5 * n))
                                for n in range(1, 5)
                            ] + [
                                iaa.AdditiveGaussianNoise(scale=(0.05 * 255,
                                                                 0.1 * 255)),
                                iaa.MultiplyElementwise((0.95, 1.05))
                            ]))
                    ]),
                    iaa.Sometimes(
                        self.grayscale_only * self.background_images_value,
                        BackgroundImageNoises(self.background_images_path)),
                ]))
        ])

        # reduce absolute padding size, perspective transform value, gaussblur
        self.augment_lines_short_image = iaa.Sequential([
            iaa.Sometimes(0.4, ItalicizeLine(shear=(-25, 25), cval=self.cval)),
            iaa.Sometimes(0.4, RotateLine(angle=(-5, 5), cval=self.cval)),
            iaa.OneOf([
                iaa.Pad(percent=((0.01, 0.05), (0.01, 0.05), (0.01, 0.05),
                                 (0.01, 0.05)),
                        pad_mode='constant',
                        pad_cval=self.cval),
                iaa.Pad(px=((3, 10), (3, 30), (3, 10), (3, 30)),
                        pad_mode='constant',
                        pad_cval=self.cval),
            ]),
            iaa.Sometimes(
                0.3,
                PerspectiveTransform((0.02, 0.05),
                                     cval=self.cval,
                                     keep_size=False)),
            iaa.Sometimes(
                0.3,
                iaa.ElasticTransformation(alpha=(0, 1.0),
                                          sigma=(0.4, 0.6),
                                          cval=self.cval)),
            iaa.Sometimes(0.02, Skeletonize(self.is_binary)),
            iaa.Sometimes(0.1 * self.grayscale_only,
                          iaa.ContrastNormalization((0.5, 1.5))),
            iaa.Sometimes(0.3 * self.grayscale_only, PencilStroke()),
            iaa.Sometimes(
                self.grayscale_only,
                iaa.Sequential([
                    iaa.Sometimes(
                        0.5,
                        iaa.OneOf([
                            iaa.GaussianBlur((0.2, 0.5)),
                            iaa.AverageBlur(k=(1, 5)),
                            iaa.MedianBlur(k=(1, 3))
                        ])),
                    iaa.Sequential([
                        iaa.Sometimes(
                            0.7,
                            iaa.OneOf([
                                iaa.Add((-60, 0)),
                                iaa.Multiply((0.6, 0.9)),
                            ])),
                        iaa.Sometimes(
                            0.7,
                            iaa.OneOf([
                                iaa.Dropout(p=(0.01, 0.05)),
                                iaa.CoarseDropout((0.01, 0.02),
                                                  size_percent=(0.1, 0.25))
                            ])),
                        iaa.Sometimes(
                            0.7,
                            iaa.OneOf([
                                iaa.AddElementwise((-10 * n, 5 * n))
                                for n in range(1, 5)
                            ] + [
                                iaa.AdditiveGaussianNoise(scale=(0.05 * 255,
                                                                 0.1 * 255)),
                                iaa.MultiplyElementwise((0.95, 1.05))
                            ]))
                    ]),
                    # iaa.Sometimes(
                    #     self.grayscale_only * self.background_images_value,
                    #     BackgroundImageNoises(self.background_images_path)),
                ]))
        ])
Ejemplo n.º 29
0
class NeuralNet:
    # History of accuracies on train set
    accs = []

    # History of accuracies on test set
    val_accs = []

    # Image augmenters
    augmenters = [
        ia.Noop(),
        ia.CoarseSaltAndPepper(p=0.2, size_percent=0.30),
        ia.CoarseSaltAndPepper(p=0.4, size_percent=0.30),
        ia.Pad(px=(3, 0, 0, 0)),
        ia.Pad(px=(0, 3, 0, 0)),
        ia.Pad(px=(0, 0, 3, 0)),
        ia.Pad(px=(0, 0, 0, 3)),
        ia.GaussianBlur(sigma=0.25),
        ia.GaussianBlur(sigma=0.5),
        ia.GaussianBlur(sigma=1),
        ia.GaussianBlur(sigma=2),
        ia.Affine(rotate=-2),
        ia.Affine(rotate=2),
        ia.PiecewiseAffine(scale=0.007)
    ]

    def __init__(
        self,
        experiment_name: str,
        # Input shape
        input_shape: Tuple[int, int, int],
        # Mini batch size
        mb_size: Optional = 32,
        # Number of filters in each convolutional layer
        filters_count: Optional[List[int]] = None,
        # Size of kernel, common for each convolutional layer
        kernel_size: Optional[List[int]] = None,
        # Neurons count in each dense layer
        dense_layers: Optional[List[int]] = None,
        # Learning rate
        learning_rate: float = 0.005,
        # Number of epochs
        nb_epochs: int = 50000,
        # Steps per epoch. Each |steps_per_epoch| epochs net is evaluated on val set.
        steps_per_epoch: int = 1000,
        # Dropout after each dense layer (excluding last)
        dropout_rate: float = 0.5,
        # Whether or not augmentation should be performed when choosing next
        # batch (as opposed to augmenting the entire
        augment_on_the_fly: bool = True,
        augmenters: Optional[List[ia.Augmenter]] = None,
        min_label: int = 0,
        max_label: int = NUM_CLASSES,
        # Whether or not classification should be in binary mode. If yes,
        # *please* provide the |positive_class| parameter.
        binary_classification: bool = False,
        # ID of the subject that is considered "positive" in case of
        # binary classification.
        positive_class: int = 0,
        # If provided, will store checkpoints to ckpt_file
        ckpt_file: Optional[str] = None,
    ):
        self.experiment_name = experiment_name
        self.input_shape = input_shape
        self.mb_size = mb_size
        self.learning_rate = learning_rate
        self.nb_epochs = nb_epochs
        self.steps_per_epoch = steps_per_epoch
        self.dropout = dropout_rate
        self.augment_on_the_fly = augment_on_the_fly
        self.ckpt_file = ckpt_file
        self.binary_classification = binary_classification
        self.positive_class = positive_class
        self.num_classes = NUM_CLASSES if not binary_classification else 1
        if dense_layers is None:
            dense_layers = [32, self.num_classes]
        self.dense_layers = dense_layers
        if filters_count is None:
            filters_count = [32, 64]
        self.filters_count = filters_count
        if kernel_size is None:
            kernel_size = [5, 5]
        self.kernel_size = kernel_size
        if binary_classification:
            self._confusion_matrix = np.zeros((2, 2))
        else:
            self._confusion_matrix = np.zeros(
                (self.num_classes, self.num_classes))
        if augmenters is not None:
            self.augmenters = augmenters

        self._get_data(range_beg=min_label, range_end=max_label)

        # Initialize logging.
        self.logger = logging.Logger("main_logger", level=logging.INFO)
        log_file = 'log.txt'
        formatter = logging.Formatter(fmt='{levelname:<7} {message}',
                                      style='{')
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(formatter)
        file_handler = logging.FileHandler(log_file)
        file_handler.setFormatter(formatter)
        self.logger.addHandler(console_handler)
        self.logger.addHandler(file_handler)

    def _augment_single_input(self, inp_x: np.ndarray):
        """
        Augments single input with given augmenter.
        :param inp_x: single input
        :return: augmented input
        """
        augmenter = choice(self.augmenters)
        inp_x = inp_x.reshape([1] + list(inp_x.shape))
        augmented = np.ndarray.astype(
            augmenter.augment_images(np.ndarray.astype(inp_x * 256, np.uint8)),
            np.float32)
        augmented = augmented * (1 / 256)
        augmented = augmented.reshape(inp_x.shape[1:])
        return augmented

    def _augment_train_set(self) -> None:
        """
        Augments entire training set with all augmenters.
        :return: None, appends augmented images to the train set.
        """
        train_augs = []
        for augmenter in self.augmenters:
            cur_aug = np.ndarray.astype(
                augmenter.augment_images(
                    np.ndarray.astype(self.x_train * 256, np.uint8)),
                np.float32)
            cur_aug = cur_aug * (1 / 256)
            # Display augmented input, if you want
            # show_image(cur_aug[0].reshape(NN_INPUT_SIZE))
            train_augs.append(cur_aug)
        self.x_train = np.concatenate([self.x_train] + train_augs)
        self.y_train = np.concatenate([self.y_train] * (1 + len(train_augs)))

    def _get_data(self, range_beg: int = 0, range_end: int = 52) -> None:
        """
        :param range_beg, range_end: only samples such that label \in [range_beg, range_end) will be
            used. Sensible values for (range_beg, range_end) would be:
            * 00, 52 -> to use eurecom only
            * 52, 78 -> to use ias_lab_rgbd_only
            * 78, 98 -> to use superface_dataset only
        :return: self.(x|y)_(train|test) are set as a result
        """

        # Load stored numpy arrays from files.
        logging.info("Loading data..")
        self.x_train = np.load(DB_LOCATION + '/gen/' + self.experiment_name +
                               '_X_train.npy')
        self.y_train = np.load(DB_LOCATION + '/gen/' + self.experiment_name +
                               '_Y_train.npy')
        self.x_test = np.load(DB_LOCATION + '/gen/' + self.experiment_name +
                              '_X_test.npy')
        self.y_test = np.load(DB_LOCATION + '/gen/' + self.experiment_name +
                              '_Y_test.npy')
        train_indices = []
        test_indices = []

        # Filter out samples out of [range_beg, range_end).
        for i in range(len(self.y_train)):
            if range_end > np.argmax(self.y_train[i]) >= range_beg:
                train_indices.append(i)
        for i in range(len(self.y_test)):
            if range_end > np.argmax(self.y_test[i]) >= range_beg:
                test_indices.append(i)
        shuffle(train_indices)
        self.x_train = self.x_train[train_indices]
        self.y_train = self.y_train[train_indices]
        self.x_test = self.x_test[test_indices]
        self.y_test = self.y_test[test_indices]

        if self.binary_classification:

            def to_binary(row):
                return np.array([
                    1.
                ]) if np.argmax(row) == self.positive_class else np.array([0.])

            self.y_train = np.apply_along_axis(to_binary, 1, self.y_train)
            self.y_test = np.apply_along_axis(to_binary, 1, self.y_test)

        # Show first input if you want
        show_image(self.x_train[0].reshape(
            [self.input_shape[0], self.input_shape[1] * self.input_shape[2]]))

        # Image augmentation.
        if not self.augment_on_the_fly:
            self._augment_train_set()

        logging.info("Loaded data..")

    def _visualize_kernels(self):
        """
        For each convolutional layer, visualizes filters and convolved images.
        """
        for layer_no in range(len(self.conv_layers)):
            num_filters = self.filters_count[layer_no]
            kernels = []
            applied_kernels = []
            for filter_no in range(num_filters):
                inp_x = self.input_shape[0] // (2**layer_no)
                inp_y = self.input_shape[1] // (2**layer_no)
                if layer_no == 0:
                    tmp_str = 'conv2d/kernel:0'
                else:
                    tmp_str = 'conv2d_%d/kernel:0' % layer_no
                kernel = [
                    v for v in tf.global_variables() if v.name == tmp_str
                ][0]
                kernel = kernel[:, :, :, filter_no]
                cur_conv_layer = self.conv_layers[layer_no]
                if layer_no == 0:
                    kernel = tf.reshape(kernel, [
                        1, self.kernel_size[0] * self.input_shape[-1],
                        self.kernel_size[1], 1
                    ])
                else:
                    kernel = tf.reshape(kernel, [1] +\
                                        [self.kernel_size[0] * self.filters_count[layer_no - 1], self.kernel_size[1]] +
                                        [1])
                kernels.append(kernel)
                applied = tf.reshape(cur_conv_layer[0, :, :, filter_no],
                                     [1, inp_x, inp_y, 1])
                tf.summary.image('conv{0}_filter{1}_kernel'.format(
                    layer_no, filter_no),
                                 kernel,
                                 family='kernels_layer{0}'.format(layer_no),
                                 max_outputs=1)
                tf.summary.image('conv{0}_filter{1}_applied'.format(
                    layer_no, filter_no),
                                 applied,
                                 family='convolved_layer_{0}'.format(layer_no),
                                 max_outputs=1)
                applied_kernels.append(applied)
            # Write concatenated patches to summary.
            concatenated_kernels = tf.concat(kernels, axis=2)
            kernels_name = "kernels_layer{0}".format(layer_no)
            tf.summary.image(kernels_name,
                             concatenated_kernels,
                             family='kernels_all_layers')
            concatenated_applieds = tf.concat(applied_kernels, axis=2)
            applieds_name = "convolved_layer{0}".format(layer_no)
            tf.summary.image(applieds_name,
                             concatenated_applieds,
                             family='convolved_all_layers')

        if self.conv_layers:
            # Merge all visualizations of kernels.
            self.merged_summary = tf.summary.merge_all()

    def _visualize_exciting_patches(self):
        """
        For each convolutional layer, visualizes patches that excite each filter the most.
        """
        # Initialize fetch handles for exciting patches and their respective responses.
        self.exciting_patches = [[None] * k for k in self.filters_count]
        self.patches_responses = [[None] * k for k in self.filters_count]
        self.flattened_exciting_patches = [[None] * k
                                           for k in self.filters_count]
        self.all_exciting_patches_at_layer = [None for _ in self.filters_count]

        for layer_no in range(len(self.conv_layers)):
            num_filters = self.filters_count[layer_no]
            cur_conv_layer = self.conv_layers[layer_no]

            for filter_no in range(num_filters):
                # Find top 10 responses to current filter, in the current mini-batch.
                inp_x = self.input_shape[0] // (2**layer_no)
                inp_y = self.input_shape[1] // (2**layer_no)
                single_filtered_flattened = tf.reshape(
                    cur_conv_layer[:, :, :, filter_no],
                    [self.eff_mb_size * inp_x * inp_y])
                top10_vals, top10_indices = tf.nn.top_k(
                    single_filtered_flattened, k=10, sorted=True)
                top10_reshaped = tf.map_fn(
                    lambda sxy: [
                        sxy // (inp_x * inp_y),
                        (sxy // inp_y) % inp_x, sxy % inp_y
                    ],
                    top10_indices,
                    dtype=[tf.int32, tf.int32, tf.int32])

                def safe_cut_patch(sxy, size, img, layer_no):
                    """
                    :param (sample_no, x, y)@sxy
                    :param size: size of patch to cut out
                    :param img: image to cut it from
                    :param layer_no: current layer number
                    :return: Cuts out a patch of size (|size|) located at (x, y) on
                        input #sample_no in current batch.
                    """
                    sample_no, x, y = sxy
                    x *= 2**layer_no
                    y *= 2**layer_no
                    pad_marg_x = size[0] // 2
                    pad_marg_y = size[1] // 2
                    padding = [[0, 0], [pad_marg_x, pad_marg_x],
                               [pad_marg_y, pad_marg_y], [0, 0]]
                    padded = tf.pad(img, padding)
                    return padded[sample_no, x:x + size[0], y:y + size[1], :]

                # Find patches corresponding to the top 10 responses.
                # Store patches and responses in class-visible array to be retrieved later.
                self.exciting_patches[layer_no][filter_no] = \
                    tf.map_fn(lambda sxy: safe_cut_patch(sxy,
                                                         size=(self.kernel_size[0] * (2 ** layer_no),
                                                               self.kernel_size[1] * (2 ** layer_no)),
                                                         img=tf.expand_dims(self.x[:, :, :, 0], axis=-1),
                                                         layer_no=layer_no),
                              top10_reshaped,
                              dtype=tf.float32)
                self.patches_responses[layer_no][filter_no] = top10_vals

                # Flatten and concatenate the 10 patches to 2 dimensions for visualization.
                flattened_patches_shape = [1] + \
                                          [10 * self.kernel_size[0] * (2 ** layer_no),
                                           self.kernel_size[1] * (2 ** layer_no)] + \
                                          [1]
                # Write patches to summary.
                patch_name = "exciting_patches_filter{0}".format(filter_no)
                flattened_exciting_patches = tf.reshape(
                    self.exciting_patches[layer_no][filter_no],
                    flattened_patches_shape,
                    name=patch_name)
                self.flattened_exciting_patches[layer_no][
                    filter_no] = flattened_exciting_patches
            self.all_exciting_patches_at_layer[layer_no] = tf.concat(
                self.flattened_exciting_patches[layer_no], axis=2)
            # Write concatenated patches to summary.
            all_patches_name = "exciting_patches_layer{0}".format(layer_no)
            tf.summary.image(all_patches_name,
                             self.all_exciting_patches_at_layer[layer_no],
                             family='exciting_all_layers')

            # Merge all summaries.
            self.merged_summary = tf.summary.merge_all()

    def _visualize_incorrect_answer_images(self):
        correct = tf.boolean_mask(self.x, self.correct)
        correct = tf.transpose(correct, perm=[0, 1, 3, 2])
        correct = tf.reshape(
            correct,
            shape=[1, -1, self.input_shape[1] * self.input_shape[2], 1])
        correct = tf.concat([
            correct,
            tf.zeros(
                shape=[1, 1, self.input_shape[1] * self.input_shape[2], 1])
        ],
                            axis=1)
        tf.summary.image('correct', correct)
        incorrect = tf.boolean_mask(self.x, tf.logical_not(self.correct))
        incorrect = tf.transpose(incorrect, perm=[0, 1, 3, 2])
        incorrect = tf.reshape(
            incorrect,
            shape=[1, -1, self.input_shape[1] * self.input_shape[2], 1])
        incorrect = tf.concat([
            incorrect,
            tf.zeros(
                shape=[1, 1, self.input_shape[1] * self.input_shape[2], 1])
        ],
                              axis=1)
        tf.summary.image('incorrect', incorrect)

        # Merge all summaries.
        self.merged_summary = tf.summary.merge_all()

    def _create_convolutional_layers(self) -> None:
        signal = self.x

        for layer_no in range(len(self.filters_count)):
            num_filters = self.filters_count[layer_no]
            signal = tf.layers.batch_normalization(signal)

            # Init weights with std.dev = sqrt(2 / N)
            #
            input_size = int(signal.get_shape()[1]) * int(
                signal.get_shape()[2]) * int(signal.get_shape()[3])
            w_init = tf.initializers.random_normal(stddev=sqrt(2 / input_size))

            # Convolutional layer
            cur_conv_layer = tf.layers.conv2d(inputs=signal,
                                              filters=num_filters,
                                              kernel_size=self.kernel_size,
                                              kernel_initializer=w_init,
                                              padding='same')

            # Reduce image dimensions in half.
            cur_pool_layer = tf.layers.max_pooling2d(inputs=cur_conv_layer,
                                                     pool_size=[2, 2],
                                                     strides=2,
                                                     padding='valid')

            self.conv_layers.append(cur_conv_layer)
            self.pool_layers.append(cur_pool_layer)

            # Set pooled image as current signal
            signal = cur_pool_layer

        return signal

    def _create_dense_layers(self) -> None:
        signal = self.x if not self.pool_layers else self.pool_layers[-1]
        input_size = int(signal.get_shape()[1]) * int(
            signal.get_shape()[2]) * int(signal.get_shape()[3])
        signal = tf.reshape(signal, [self.eff_mb_size, input_size])

        for num_neurons in self.dense_layers[:-1]:
            signal = tf.layers.batch_normalization(signal)

            # Init weights with std.dev = sqrt(2 / N)
            # https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf?spm=5176.100239.blogcont55892.28.pm8zm1&file=He_Delving_Deep_into_ICCV_2015_paper.pdf
            input_size = int(signal.get_shape()[1])
            w_init = tf.initializers.random_normal(stddev=sqrt(2 / input_size))

            cur_dense_layer = tf.layers.dense(inputs=signal,
                                              units=num_neurons,
                                              activation=tf.nn.leaky_relu,
                                              kernel_initializer=w_init)

            signal = cur_dense_layer

            # Apply dropout
            cur_dropout_layer = tf.layers.dropout(inputs=signal,
                                                  rate=self.dropout)

            signal = cur_dropout_layer

        # Init weights with std.dev = sqrt(2 / N)
        input_size = int(signal.get_shape()[1])
        w_init = tf.initializers.random_normal(
            stddev=tf.sqrt(tf.constant(2.) / input_size))
        cur_layer = tf.layers.dense(inputs=signal,
                                    activation=tf.nn.sigmoid,
                                    units=self.dense_layers[-1],
                                    kernel_initializer=w_init)
        self.output_layer = cur_layer

    def _create_training_objectives(self) -> None:
        if self.binary_classification:
            self.preds = tf.cast(tf.round(self.output_layer), dtype=tf.int64)
            self.y_sparse = tf.cast(self.y, dtype=tf.int64)
        else:
            self.preds = tf.argmax(self.output_layer, axis=1)
            self.y_sparse = tf.argmax(self.y, axis=1)
        self.loss = tf.losses.log_loss(self.y, self.output_layer)
        self.correct = tf.reshape(tf.equal(self.y_sparse, self.preds),
                                  shape=[self.eff_mb_size])
        self.accuracy = tf.reduce_mean(tf.cast(self.correct, tf.float32))
        self.train_op = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.loss)

        self.logger.info('list of variables {0}'.format(
            list(map(lambda x: x.name, tf.global_variables()))))

    def _create_model(self):
        self.x = tf.placeholder(dtype=tf.float32,
                                shape=[None] + list(self.input_shape))
        self.y = tf.placeholder(dtype=tf.float32,
                                shape=[None, self.num_classes])
        self.eff_mb_size = tf.shape(self.x)[0]  # Effective batch size
        self.conv_layers = []
        self.pool_layers = []

        self._create_convolutional_layers()
        self._create_dense_layers()
        self._create_training_objectives()

    def train_on_batch(self, batch_x, batch_y):
        """
        :return: [loss, accuracy]
        """
        results = self.sess.run([self.loss, self.accuracy, self.train_op],
                                feed_dict={
                                    self.x: batch_x,
                                    self.y: batch_y
                                })
        self.accs.append(results[1])
        return results[:2]

    def test_on_batch(self,
                      batch_x,
                      batch_y,
                      global_step=1) -> Tuple[float, float, List[float]]:
        """
        Note that this function does not fetch |self.train_op|, so that the weights
        are not updated.
        :param batch_x:
        :param batch_y:
        :param global_step:
        :return: (loss, accuracy, probs)
        """
        if self.conv_layers:
            # Write summary
            results = self.sess.run([
                self.loss, self.accuracy, self.output_layer, self.preds,
                self.merged_summary
            ],
                                    feed_dict={
                                        self.x: batch_x,
                                        self.y: batch_y
                                    })
            msum = results[4]
            self.writer.add_summary(msum, global_step=global_step)
            self.writer.flush()
        else:
            results = self.sess.run([self.loss, self.accuracy, self.preds],
                                    feed_dict={
                                        self.x: batch_x,
                                        self.y: batch_y
                                    })
        self.val_accs.append(results[1])
        # Update confusion matrix
        preds = results[3]
        for i in range(len(batch_x)):
            self._confusion_matrix[np.argmax(batch_y[i]), preds[i]] += 1.

        return results[0], results[1], list(results[2])

    def validate(self, global_step) -> ClassificationResults:
        """
        :return: (loss, accuracy, auc_roc)
        Note that if self.binary_classification is False, auc_roc may be anything
        """
        losses = []
        accs = []
        all_pred_probs = []
        all_labels = []
        for batch_no in range(self.x_test.shape[0] // self.mb_size + 1):
            inputs = self.x_test[batch_no * self.mb_size:(batch_no + 1) *
                                 self.mb_size]
            labels = self.y_test[batch_no * self.mb_size:(batch_no + 1) *
                                 self.mb_size]
            loss, acc, probs = self.test_on_batch(inputs,
                                                  labels,
                                                  global_step=global_step)
            losses.append(loss)
            accs.append(acc)
            all_pred_probs += probs
            all_labels += list(labels)
        all_pred_probs = np.array(all_pred_probs)
        all_labels = np.array(all_labels)
        all_labels = all_labels.astype(dtype=np.bool)
        loss = np.mean(losses)
        acc = np.mean(accs)
        return ClassificationResults(loss=loss,
                                     acc=acc,
                                     pred_probs=all_pred_probs,
                                     labels=all_labels,
                                     binary=self.binary_classification)

    def _next_training_batch(self) -> (np.ndarray, np.ndarray):
        batch = sample(list(range(self.x_train.shape[0])), self.mb_size)
        batch_x, batch_y = self.x_train[batch], self.y_train[batch]
        if self.augment_on_the_fly:
            for sample_no in range(self.mb_size):
                batch_x[sample_no] = self._augment_single_input(
                    batch_x[sample_no])
        return batch_x, batch_y

    def train_and_evaluate(self) -> ClassificationResults:
        """
        Train and evaluate model.
        """
        with tf.Session() as self.sess:
            # Initialize computation graph.
            self._create_model()
            # Add visualizations to computation graph.
            self._visualize_kernels()
            self._visualize_exciting_patches()
            self._visualize_incorrect_answer_images()

            # Initialize variables.
            if self.ckpt_file:
                saver = tf.train.Saver()
                try:
                    saver.restore(self.sess, self.ckpt_file)
                except (tf.errors.InvalidArgumentError,
                        tf.errors.NotFoundError):
                    tf.global_variables_initializer().run()
            else:
                tf.global_variables_initializer().run()

            # Initialize summary writer.
            self.writer = tf.summary.FileWriter(logdir='conv_vis')

            # Initialize progress bar.
            bar = Bar('',
                      max=self.steps_per_epoch,
                      suffix='%(index)d/%(max)d ETA: %(eta)ds')

            for epoch_no in range(self.nb_epochs):
                self.logger.info("Epoch {epoch_no}/{nb_epochs}".format(
                    epoch_no=epoch_no, nb_epochs=self.nb_epochs))
                for step_no in range(self.steps_per_epoch):
                    # Train model on next batch
                    batch_x, batch_y = self._next_training_batch()
                    results = self.train_on_batch(batch_x, batch_y)

                    # Update bar
                    bar.message = 'loss: {0[0]:.8f} acc: {0[1]:.3f} mean_acc: {1:.3f}'. \
                        format(results, np.mean(self.accs[-1000:]), )
                    bar.next()

                # Re-initialize progress bar
                bar.finish()
                bar = Bar('',
                          max=self.steps_per_epoch,
                          suffix='%(index)d/%(max)d ETA: %(eta)ds')

                # Store model
                if self.ckpt_file:
                    saver.save(self.sess, self.ckpt_file)

                # Validate
                val_results = self.validate(global_step=epoch_no)
                loss, acc, auc_roc = val_results.loss, val_results.acc, val_results.get_auc_roc(
                )
                if self.binary_classification:
                    self.logger.info(
                        "Validation results: Loss: {0}, accuracy: {1}, auc_roc: {2}"
                        .format(loss, acc, auc_roc))
                else:
                    self.logger.info(
                        "Validation results: Loss: {0}, accuracy: {1}".format(
                            loss, acc))
                # Dipslay confusion matrix
                show_image(self._confusion_matrix)

            return val_results
Ejemplo n.º 30
0
        return None
    if np.isnan(gir_face).any() or np.isnan(depth_face).any():
        return None
    try:
        face = normalized(face, rotate=False)
        face = hog_and_entropy(face)
    except ValueError:
        return None
    return face.get_fd_desc()


augmenters = [
    ia.Noop(),
    ia.CoarseSaltAndPepper(p=0.2, size_percent=0.30),
    ia.CoarseSaltAndPepper(p=0.4, size_percent=0.30),
    ia.Pad(px=(3, 0, 0, 0)),
    ia.Pad(px=(0, 3, 0, 0)),
    ia.Pad(px=(0, 0, 3, 0)),
    ia.Pad(px=(0, 0, 0, 3)),
    ia.GaussianBlur(sigma=0.25),
    ia.GaussianBlur(sigma=0.5),
    ia.GaussianBlur(sigma=1),
    ia.GaussianBlur(sigma=2),
    ia.Affine(rotate=-2),
    ia.Affine(rotate=2)
]


def run_preprocess():
    preprocessor = InputPreprocessor(exp_name=EXP_NAME,
                                     nn_input_size=INPUT_SIZE,