Esempio n. 1
0
    def augmentation(self, images, labels):
        batches = []
        batch_size = math.ceil(self.batch_size /
                               self.number_batches_augmentation)

        for i in range(self.number_batches_augmentation - 1):
            batches.append(
                UnnormalizedBatch(
                    images=images[i * batch_size:(i + 1) * batch_size],
                    segmentation_maps=labels[i * batch_size:(i + 1) *
                                             batch_size]))

        batches.append(
            UnnormalizedBatch(
                images=images[(self.number_batches_augmentation - 1) *
                              batch_size:self.batch_size],
                segmentation_maps=labels[(self.number_batches_augmentation -
                                          1) * batch_size:self.batch_size]))

        # time_start = time.time()
        batches_aug = list(
            self.augmenting_pipeline.augment_batches(batches, background=True))
        # time_end = time.time()

        # print("Augmentation done in %.2fs" % (time_end - time_start,))
        return [image for batch in batches_aug for image in batch.images_aug], \
               [label for batch in batches_aug for label in batch.segmentation_maps_aug]
Esempio n. 2
0
    def test_with_seed_start(self, mock_ia_seed):
        augseq = mock.MagicMock()
        augseq.augment_batch_.return_value = "augmented_batch_"
        image = np.zeros((1, 1, 3), dtype=np.uint8)
        batch = UnnormalizedBatch(images=[image])
        batch_idx = 1
        seed_start = 10

        multicore.Pool._WORKER_AUGSEQ = augseq
        multicore.Pool._WORKER_SEED_START = seed_start
        result = multicore._Pool_worker(batch_idx, batch)

        # expected seeds used
        seed = seed_start + batch_idx
        seed_global_expected = (
            iarandom.SEED_MIN_VALUE + (seed - 10**9) %
            (iarandom.SEED_MAX_VALUE - iarandom.SEED_MIN_VALUE))
        seed_local_expected = (
            iarandom.SEED_MIN_VALUE + seed %
            (iarandom.SEED_MAX_VALUE - iarandom.SEED_MIN_VALUE))

        assert result == "augmented_batch_"
        assert augseq.augment_batch_.call_count == 1
        augseq.augment_batch_.assert_called_once_with(batch)
        mock_ia_seed.assert_called_once_with(seed_global_expected)
        augseq.seed_.assert_called_once_with(seed_local_expected)
Esempio n. 3
0
def example_multicore_augmentation():
    print("Example: Multicore Augmentation")
    import skimage.data
    import imgaug as ia
    import imgaug.augmenters as iaa
    from imgaug.augmentables.batches import UnnormalizedBatch

    # Number of batches and batch size for this example
    nb_batches = 10
    batch_size = 32

    # Example augmentation sequence to run in the background
    augseq = iaa.Sequential(
        [iaa.Fliplr(0.5),
         iaa.CoarseDropout(p=0.1, size_percent=0.1)])

    # For simplicity, we use the same image here many times
    astronaut = skimage.data.astronaut()
    astronaut = ia.imresize_single_image(astronaut, (64, 64))

    # Make batches out of the example image (here: 10 batches, each 32 times
    # the example image)
    batches = []
    for _ in range(nb_batches):
        batches.append(UnnormalizedBatch(images=[astronaut] * batch_size))

    # Show the augmented images.
    # Note that augment_batches() returns a generator.
    for images_aug in augseq.augment_batches(batches, background=True):
        ia.imshow(ia.draw_grid(images_aug.images_aug, cols=8))
Esempio n. 4
0
def add_noise(seq, data, batched=True):
    if batched:
        imgs = data.reshape(config.noise_batch,
                            config.minibatch_size // config.noise_batch,
                            config.height, config.width)
        imgs = [[
            imgs[i][j]
            for j in range(config.minibatch_size // config.noise_batch)
        ] for i in range(config.noise_batch)]
        img_batches = [
            UnnormalizedBatch(images=imgs[i])
            for i in range(config.noise_batch)
        ]
        img_batches_aug = list(
            seq.augment_batches(img_batches, background=True))
        imgs_aug = np.stack([
            np.stack(img_batches_aug[i].images_aug)
            for i in range(config.noise_batch)
        ]).reshape(config.minibatch_size, 1, config.height, config.width)
    else:
        imgs = data.reshape(-1, config.height, config.width)
        imgs_aug = seq.augment_images(imgs)
        imgs_aug = np.stack(
            [img.reshape(-1, config.height, config.width) for img in imgs_aug])
    return imgs_aug
Esempio n. 5
0
def gen_batches(files, scale_bs=10, aug_bs=5, crop_size=600, scale=4):
    from imgaug.augmentables.batches import UnnormalizedBatch
    skip = 0
    for xml_file in files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        img = root.find('path').text
        try:
            raw_img = Image.open(img)
            clean(raw_img)
            raw_img = ImageOps.exif_transpose(raw_img)
            # resize_image(raw_img, root, scale)
            # img_array = np.array(raw_img)
            img_array = resize_fix_shape(raw_img, root, crop_size)
            images = [img_array for _ in range(scale_bs)]
            bbs = [
                ia.BoundingBox(int(member[4][0].text), int(member[4][1].text),
                               int(member[4][2].text), int(member[4][3].text))
                for member in root.findall('object')
            ]
            images_scale, bbs_scale = seq_scale(
                images=images, bounding_boxes=[bbs for _ in range(scale_bs)])
            imgs = [im for im in images_scale for _ in range(aug_bs)]
            batche = UnnormalizedBatch(images=imgs,
                                       bounding_boxes=[
                                           bbss for bbss in bbs_scale
                                           for _ in range(aug_bs)
                                       ])

        except Exception as e:
            skip += 1
            print(repr(e), f" skip {skip}")
        yield batche
Esempio n. 6
0
    def test_simple_call(self, mock_worker):
        image = np.zeros((1, 1, 3), dtype=np.uint8)
        batch = UnnormalizedBatch(images=[image])
        batch_idx = 1
        mock_worker.return_value = "returned_batch"

        result = multicore._Pool_starworker((batch_idx, batch))

        assert result == "returned_batch"
        mock_worker.assert_called_once_with(batch_idx, batch)
Esempio n. 7
0
 def generate_data_I(self, ds, save_to_dir=None, prefix='test'):
     """
     Generates augmented images using ImgAug library class.
     The results are persisted in disc.
     """
     create_dir(save_to_dir)
     for imgs_batch, _ in ds.as_numpy_iterator():
         batches = UnnormalizedBatch(images=(imgs_batch*255).astype(np.uint8))
         images_aug = [next(seq.augment_batches(batches, background=True)).images_aug for i in range(5)]
         [imageio.imwrite("%s/%s_%d_%d.png" % (str(save_to_dir), prefix, i, random.randint(0, 1000),), ia_j)
          for i, images in enumerate(images_aug) for ia_j in images]
Esempio n. 8
0
    def test_without_seed_start(self):
        augseq = mock.MagicMock()
        augseq.augment_batch_.return_value = "augmented_batch_"
        image = np.zeros((1, 1, 3), dtype=np.uint8)
        batch = UnnormalizedBatch(images=[image])

        multicore.Pool._WORKER_AUGSEQ = augseq
        result = multicore._Pool_worker(1, batch)

        assert result == "augmented_batch_"
        assert augseq.augment_batch_.call_count == 1
        augseq.augment_batch_.assert_called_once_with(batch)
Esempio n. 9
0
def gen_batches(files, bs=5, scale=4.5, must_rotate=True):
    from imgaug.augmentables.batches import UnnormalizedBatch
    batches = []
    trees = []
    for xml_file in files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        img = root.find('path').text
        os.path.join(*img.split("\\"))
        raw_img = Image.open(img)
        clean(raw_img)
        raw_img = ImageOps.exif_transpose(raw_img)
        # Reduce image size
        if scale > 1:
            resize_image(raw_img, root, scale)
        img_array = np.array(raw_img)

        bbs = [
            ia.BoundingBox(int(member[4][0].text), int(member[4][1].text),
                           int(member[4][2].text), int(member[4][3].text))
            for member in root.findall('object')
        ]
        # Rotated
        if must_rotate:
            img_aug, bbs_aug = aug_by_value_list([img_array], [bbs],
                                                 fit_output=True,
                                                 rotate=MUST_ROTATE)
        else:
            img_aug, bbs_aug = [img_array], [bbs]
        # img_aug, bbs_aug = aug_by_value_list(img_aug, bbs_aug, scale=MUST_SCALE)
        # img_aug, bbs_aug = [], []
        # Original
        # img_aug.insert(0, img_array)
        # bbs_aug.insert(0, bbs)

        images = [
            img_aug_array for img_aug_array in img_aug for _ in range(bs)
        ]
        batches.append(
            UnnormalizedBatch(images=images,
                              bounding_boxes=[
                                  bbs_aug_array for bbs_aug_array in bbs_aug
                                  for _ in range(bs)
                              ]))
        trees.append(tree)
    return batches, trees
def dictCropMultiples(baseImageListFunc, baseMaskListFunc, fullImageListFunc, segmapListFunc):
    cropMultiples_heightmultiple = 2
    cropMultiples_widthmultiple = 2
    cropMultiples_x00percent = cropMultiples_heightmultiple * cropMultiples_widthmultiple
    cropMultiples = iaa.CropToMultiplesOf(height_multiple=cropMultiples_heightmultiple,
                                          width_multiple=cropMultiples_widthmultiple)
    if PARALLEL_PROCESSING:
        batches = [UnnormalizedBatch(images=baseImageListFunc, segmentation_maps=baseMaskListFunc) for _ in
                   range(cropMultiples_x00percent)]
        batches_aug = list(cropMultiples.augment_batches(batches, background=True))
        for entry in batches_aug:
            fullImageListFunc.extend(entry.images_aug)
            segmapListFunc.extend(entry.segmentation_maps_aug)
    else:
        alteredImageListFunc, alteredMaskListFunc = expandList(baseImageListFunc, baseMaskListFunc,
                                                               cropMultiples_x00percent)
        (alteredImageListFunc, alteredMaskListFunc) = cropMultiples(images=alteredImageListFunc,
                                                                    segmentation_maps=alteredMaskListFunc)

        fullImageListFunc.extend(alteredImageListFunc)
        segmapListFunc.extend(alteredMaskListFunc)
    return fullImageListFunc, segmapListFunc
def dictShotNoise(baseImageListFunc, baseMaskListFunc, fullImageListFunc, segmapListFunc):
    print('Shot noise, starting number of images:', len(segmapListFunc))
    shotNoise_x00percent = 5
    shotNoise = iaa.imgcorruptlike.ShotNoise(severity=1)

    if PARALLEL_PROCESSING:
        batches = [UnnormalizedBatch(images=baseImageListFunc, segmentation_maps=baseMaskListFunc) for _ in
                   range(shotNoise_x00percent)]
        batches_aug = list(shotNoise.augment_batches(batches, background=True))
        for entry in batches_aug:
            fullImageListFunc.extend(entry.images_aug)
            segmapListFunc.extend(entry.segmentation_maps_aug)
    else:
        alteredImageListFunc, alteredMaskListFunc = expandList(baseImageListFunc, baseMaskListFunc,
                                                               shotNoise_x00percent)
        (alteredImageListFunc, alteredMaskListFunc) = shotNoise(images=alteredImageListFunc,
                                                                segmentation_maps=alteredMaskListFunc)

        fullImageListFunc.extend(alteredImageListFunc)
        segmapListFunc.extend(alteredMaskListFunc)

    return fullImageListFunc, segmapListFunc
Esempio n. 12
0
def main():
    # Configure imgaug
    ia.seed(1)

    #########################################################
    # Parse arguments
    #########################################################

    parser = argparse.ArgumentParser(
        description=
        'Applies a set of augmentations to every image in the input directory.'
    )
    parser.add_argument('--input_directory',
                        '-i',
                        type=str,
                        help='e.g. "./downloads/"')
    parser.add_argument('--output_directory',
                        '-o',
                        type=str,
                        help='e.g. "./downloads/"')
    parser.add_argument(
        '--single_threaded',
        '-s',
        action='store_true',
        help='Process images in one thread instead of multithreading')
    parser.add_argument('--preview_only',
                        '-p',
                        action='store_true',
                        help='Show previews instead of writing to disk')
    parser.add_argument(
        '--skip_originals',
        action='store_true',
        help=
        'Prevent original images from being copied into the destination folder'
    )

    args = parser.parse_args()

    input_directory = args.input_directory
    output_directory = args.output_directory
    user_requested_preview_only = args.preview_only
    single_threaded = args.single_threaded
    skip_originals = args.skip_originals

    #########################################################
    # Process all data files in the input directory, either
    # copying them over or queueing them to be augmented in
    # the next step.
    #########################################################

    # Load YOLO region class names from file
    class_names = []
    with open(os.path.join(input_directory, "class.names")) as class_file:
        class_names = [
            line.rstrip() for line in class_file if line.rstrip() != ""
        ]

    if not user_requested_preview_only:
        # Copy MyAugments.py to the output directory
        copyfile(os.path.join(os.getcwd(), "MyAugments.py"),
                 os.path.join(output_directory, "MyAugments.py"))
        # Copy the YOLO region class names file to the output directory
        copyfile(os.path.join(input_directory, "class.names"),
                 os.path.join(output_directory, "class.names"))

    augment_files = []

    filenames = os.listdir(input_directory)
    filenames.sort()
    for filename in filenames:

        # Work only on YOLO .txt files.
        if not filename.endswith(".txt"):
            continue

        base_filename = os.path.splitext(filename)[0]
        data_path = os.path.join(input_directory, base_filename + ".txt")
        image_path = os.path.join(input_directory, base_filename + ".jpg")
        augment_files.append(DataPair(data_path, image_path))

        if not skip_originals and not user_requested_preview_only:
            # Copy the original data file to the output directory.
            copyfile(os.path.join(input_directory, base_filename + ".txt"),
                     os.path.join(output_directory, base_filename + ".txt"))
            # Copy the original image to the output directory.
            copyfile(os.path.join(input_directory, base_filename + ".jpg"),
                     os.path.join(output_directory, base_filename + ".jpg"))

    #########################################################
    # From the list of data/image pairs to augment, create
    # batches for imgaug to process.
    #########################################################

    batch = []
    batches = []
    MAX_BATCH_SIZE = 10 if user_requested_preview_only else 16

    for i, item in enumerate(augment_files):
        # Load image into memory
        image = imageio.imread(item.image_path)
        # Get imgaug representation of bounding boxes
        image_data = ImageData.from_yolo_data(item.data_path, class_names)
        bbs = image_data.to_imgaug(image.shape)

        batch.append((image, bbs, item))

        # If we're at the max batch size or the end of the file list,
        # finalize the batch and add it to the batch list.
        if len(batch) == MAX_BATCH_SIZE or i == len(augment_files) - 1:
            images, bounding_boxes, data = list(zip(*batch))
            batches.append(
                UnnormalizedBatch(images=images,
                                  bounding_boxes=bounding_boxes,
                                  data=data))
            batch.clear()

    #########################################################
    # Apply each operation in MyAugments.py to each image
    #########################################################

    should_multithread = not single_threaded and not user_requested_preview_only

    ops = get_augmentation_operations()

    total_ops_per_image = sum([op.num_repetitions for op in ops])
    input_image_count = sum([len(b.data) for b in batches])
    generated_image_count = total_ops_per_image * input_image_count
    print(f"{generated_image_count} new images will be created.")
    progress_bar = tqdm(total=generated_image_count)

    for op in ops:
        for i in range(op.num_repetitions):
            # Produce augmentations
            for batches_aug in op.operation.augment_batches(
                    batches, background=should_multithread):
                if user_requested_preview_only:
                    # Preview output one batch at a time.
                    # Blocks execution until the window is closed.
                    # Closing a window will cause the next batch to appear.
                    # Close the Python instance in the dock to stop execution.
                    images_with_labels = [
                        bb.draw_on_image(image)
                        for image, bb in zip(batches_aug.images_aug,
                                             batches_aug.bounding_boxes_aug)
                    ]
                    grid_image = ia.draw_grid(images_with_labels,
                                              cols=None,
                                              rows=None)
                    title = f"{op.name}\nRep {i}\n"
                    # title += ", ".join([item.image_filename for item in batches_aug.data])  # Draw image filenames
                    grid_image = ia.draw_text(grid_image,
                                              8,
                                              8,
                                              title,
                                              color=(255, 0, 0),
                                              size=50)
                    ia.imshow(grid_image, backend='matplotlib')
                    continue

                for image, bbs, data in zip(batches_aug.images_aug,
                                            batches_aug.bounding_boxes_aug,
                                            batches_aug.data):
                    # Write image and matching data file to output folder

                    # Determine base name for image and matching data file
                    image_filename_no_extension, image_extension = os.path.splitext(
                        Path(data.image_path).name)
                    base_filename = ""
                    if op.num_repetitions == 1:
                        base_filename = f"{image_filename_no_extension}_{op.name}"
                    else:
                        base_filename = f"{image_filename_no_extension}_{op.name}_rep{i}"

                    # Write image to output folder
                    output_image_path = os.path.join(
                        output_directory, f"{base_filename}{image_extension}")
                    imageio.imwrite(output_image_path, image)

                    # Write modified imgaug bounding boxes as YOLO format in output folder
                    image_height, image_width, _ = image.shape
                    output_data = ImageData.from_imagaug(
                        image_width, image_height, bbs)
                    output_data.write_yolo(data.data_path, class_names)

                    # Update progress bar
                    progress_bar.update(1)
    progress_bar.close()
Esempio n. 13
0
def batch_op_sub_chip_augmentation(x_chips_total,
                                   y_chips_total,
                                   amt=1000,
                                   test=True,
                                   test_size=256,
                                   ISZ=256,
                                   random_seed=42):
    if test:
        time_start = time.time()
        amt = test_size
        amt = int(amt // 128 * 128)
        print('test selected, amt:', amt)
    else:
        amt = int(amt // 128 * 128)
        print(amt, 'non-test amt here')

    dataset_table = find_datasets(inDir)

    if len(x_chips_total) < 50:
        for index in range(len(dataset_table[0])):
            selected_x = dataset_table[0][index]
            selected_y = dataset_table[1][index]
            # seed = 15
            # seed = np.random.randint(1,5000)
            print(selected_x, selected_y)

            chips_x = np.load(inDir + '/x_set/' + selected_x)
            chips_y = np.load(inDir + '/y_set/' + selected_y)

            x_windows, y_windows, dim_Max = window_creation(
                chips_x, chips_y, ISZ)
            del chips_x, chips_y
            # print(dim_Max)
            x_chips_verified, y_chips_verified = chip_verification(
                x_windows, y_windows, dim_Max, ISZ)
            del x_windows, y_windows
            # if test:
            #  print(x_chips_verified.shape, y_chips_verified.shape)

            #        np.random.seed(42)
            #        np.random.shuffle(dataset_table[0])
            #        np.random.seed(42)
            #        np.random.shuffle(dataset_table[1])
            np.random.seed(random_seed)
            np.random.shuffle(x_chips_verified)
            np.random.seed(random_seed)
            np.random.shuffle(y_chips_verified)
            print(x_chips_verified.shape, y_chips_verified.shape)
            if len(x_chips_verified) < 500:
                x_chips_verified, y_chips_verified = pad_valid(
                    x_chips_verified, y_chips_verified, 100)
            # x_chips_verified = x_chips_verified[:500]
            # y_chips_verified = y_chips_verified[:500]

            if index == 0:
                x_chips_total = x_chips_verified
                y_chips_total = y_chips_verified

            if index > 0:
                x_chips_total = np.append(x_chips_total,
                                          x_chips_verified,
                                          axis=0)
                y_chips_total = np.append(y_chips_total,
                                          y_chips_verified,
                                          axis=0)
                del x_chips_verified, y_chips_verified
            print(x_chips_total.shape, y_chips_total.shape)
            if test:
                break

    np.random.seed(random_seed * 2)
    np.random.shuffle(x_chips_total)
    np.random.seed(random_seed * 2)
    np.random.shuffle(y_chips_total)
    print(x_chips_total.shape, y_chips_total.shape)

    # x_chips_padded = x_chips_total#[:amt]
    # y_chips_padded = y_chips_total#[:amt]
    # del x_chips_total, y_chips_total
    # print(x_chips_padded.shape, y_chips_padded.shape)

    BATCH_SIZE = 128
    NB_BATCHES = int(amt / 128)

    images_batch = [x_chips_total[_] for _ in range(BATCH_SIZE)]
    segmentation_maps_batch = [y_chips_total[_] for _ in range(BATCH_SIZE)]

    # images_batch = [x_chips_padded[_] for _ in range(BATCH_SIZE)]
    # segmentation_maps_batch = [y_chips_padded[_] for _ in range(BATCH_SIZE)]

    # del x_chips_padded, y_chips_padded

    del x_chips_total, y_chips_total

    print('images_batch & segmentation_maps_batch loaded')

    batches = [
        UnnormalizedBatch(images=images_batch,
                          segmentation_maps=segmentation_maps_batch)
        for _ in range(NB_BATCHES)
    ]

    # seq = simple_seq()
    seq = aug_seq()
    print('seq loaded')

    batches_aug = list(seq.augment_batches(batches, background=False))

    print('augmentation finished')
    if test:
        time_end = time.time()
        print("Complete load & augmentation pipeline done in %.2fs" %
              (time_end - time_start, ))
        # print("Resizing & returning augmented x_trn, y_trn datasets")

    print('resizing img & msk')

    img = np.array([batches_aug[a].images_aug
                    for a in range(NB_BATCHES)]).reshape(amt, ISZ, ISZ, 3)
    msk = (np.array([
        batches_aug[a].segmentation_maps_aug for a in range(NB_BATCHES)
    ]).reshape(amt, ISZ, ISZ, 1))

    print(img.shape, msk.shape)

    img = np.array(img).reshape(amt, ISZ, ISZ, 3)

    # img = [stretch_n(i) for i in img]

    # print(img.shape)

    img = np.array(img).reshape(amt, ISZ, ISZ, 3)
    print(img.dtype)
    print(img.shape)
    # img = img / 255.

    # img = img.astype('uint8')
    print(msk.dtype)
    print(msk.shape)
    msk = msk.clip(max=1)
    print(msk.dtype)

    return img, msk
    def test_data_load2(self):
        """testing data_load.data_load """
        input_file = '../data/sample.csv'
        input_file = '../data/train.csv'
        image_size = 28
        training_dataset, testing_dataset, validating_dataset, training_labels, testing_labels, validating_labels = \
            data_load.data_load(input_file, image_size)

        from matplotlib import pyplot as plt
        import random
        random_picture = random.randint(0, len(training_dataset))
        print("random int : ", random_picture)

        plt.imshow(training_dataset[random_picture].reshape(28, 28),
                   interpolation='nearest')
        plt.show()
        print(training_labels[random_picture])

        import imgaug.augmenters as iaa
        # seq = iaa.Sequential([
        #     iaa.Crop(px=(1, 16), keep_size=False),
        #     iaa.Fliplr(0.5),
        #     iaa.GaussianBlur(sigma=(0, 3.0))])

        # for i in range(10):
        #     seq = iaa.Affine(translate_px=(-3, 3))
        #     new_images = seq(images=training_dataset)
        #     plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
        #     plt.show()

        # for i in range(10):
        #     seq = iaa.Affine(translate_px={"x": (-3, 3), "y": (-3, 3)})
        #     new_images = seq(images=training_dataset)
        #     plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
        #     plt.show()

        # for i in range(10):
        #     print("i : ", i)
        #     seq = iaa.Affine(shear=(-16, 16))
        #     new_images = seq(images=training_dataset)
        #     plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
        #     plt.show()

        # for i in range(10):
        #     print("i : ", i)
        #     seq = iaa.Affine(scale=(0.80, 1.20))
        #     new_images = seq(images=training_dataset)
        #     plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
        #     plt.show()

        #
        # for i in range(10):
        #     print("i : ", i)
        #     seq = iaa.Affine(rotate=(-35, 35))
        #     new_images = seq(images=training_dataset)
        #     plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
        #     plt.show()

        sometimes = lambda aug: iaa.Sometimes(0.5, aug)
        #import imgaug as ia
        import time
        import numpy as np
        from imgaug.augmentables.batches import UnnormalizedBatch
        from imgaug import multicore
        for i in range(1):
            #print("i : ", i)

            BATCH_SIZE = 16
            NB_BATCHES = 9

            batches = [
                UnnormalizedBatch(images=training_dataset,
                                  data=training_labels)
                for _ in range(NB_BATCHES)
            ]

            #seq = iaa.GaussianBlur(sigma=(0.1, 0.5))

            aug = iaa.Sequential([

                #iaa.Affine(translate_px=(-3, 3))
                sometimes(iaa.Affine(translate_px={
                    "x": (-3, 3),
                    "y": (-3, 3)
                })),
                sometimes(iaa.Affine(scale=(0.80, 1.20))),
                sometimes(iaa.Affine(rotate=(-35, 35))),
                sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5)))

                # iaa.PiecewiseAffine(scale=0.05, nb_cols=6, nb_rows=6),  # very slow
                # iaa.Fliplr(0.5),  # very fast
                # iaa.CropAndPad(px=(-10, 10))  # very fast
            ])
            #new_images = seq(images=training_dataset)
            # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest')
            # plt.show()
            #ia.imshow(new_images[70].reshape(28, 28))

            time_start = time.time()
            #batches_aug = list(seq.augment_batches(batches, background=True))

            with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool:
                batches_aug = pool.map_batches(batches)

            for i in range(NB_BATCHES):

                plt.imshow(batches_aug[i].images_aug[random_picture].reshape(
                    28, 28),
                           interpolation='nearest')
                plt.show()

                print(batches_aug[i].data[random_picture])

            # plt.imshow(batches_aug[0].images_aug[random_picture].reshape(28, 28), interpolation='nearest')
            # plt.show()

            time_end = time.time()
            print("Augmentation done in %.2fs" % (time_end - time_start, ))

        all_training_dataset = training_dataset
        all_training_labels = training_labels
        for i in range(NB_BATCHES):
            all_training_dataset = np.concatenate(
                (all_training_dataset, batches_aug[i].images_aug))
            all_training_labels = np.concatenate(
                (all_training_labels, batches_aug[i].data))

        print(" len of all_training_dataset : ", len(all_training_dataset))
        print(" len of all_training_labels  : ", len(all_training_labels))

        print(" end ")
def augment_data(images_dataset, labels_dataset, multiplier):

    # size (in pictures) of one job send to the child process to work on
    max_size_of_one_job = 30

    time_start = time.time()
    # some funny lambda function to randomly decide to make augmentation or not
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)

    # we do following augmentations with probability of 50%
    # - translation in the plane xy by +-3 pixels
    # - scaling from 80% to 120%
    # - rotation by +- 35deg
    # - gaussian blur
    aug = iaa.Sequential([
        sometimes(iaa.Affine(translate_px={
            "x": (-3, 3),
            "y": (-3, 3)
        })),
        sometimes(iaa.Affine(scale=(0.80, 1.20))),
        sometimes(iaa.Affine(rotate=(-35, 35))),
        sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5)))
    ])

    # calculate how many pieces of array with training data we can get using max number of pictures
    # that should be processed per one child process
    pieces = len(images_dataset) // max_size_of_one_job
    if pieces == 0:
        pieces = 1
    # print("before if check ", pieces)
    # if len(images_dataset) % (pieces * max_size_of_one_job) != 0:
    #     pieces = pieces + 1
    #     print("in if check ", pieces)

    # split training data into pieces
    split_training_dataset = np.array_split(images_dataset, pieces)
    split_training_labels = np.array_split(labels_dataset, pieces)

    batches = []
    # for each piece generate batches that will be augmented
    for i in range(pieces):
        batches = batches + [
            UnnormalizedBatch(images=split_training_dataset[i],
                              data=split_training_labels[i])
            for _ in range(multiplier)
        ]

    # run jobs in 32 child processes
    with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool:
        print("sending for augmentation batches : ", len(batches))
        batches_aug = pool.map_batches(batches)

    # concatenate all data back together
    all_images_dataset = np.concatenate(
        (images_dataset,
         np.concatenate(([i.images_aug[:] for i in batches_aug]))))
    all_labels_dataset = np.concatenate(
        (labels_dataset, np.concatenate(([i.data[:] for i in batches_aug]))))

    time_end = time.time()
    print("Augmentation done in %.2fs" % (time_end - time_start, ))

    print(" len of all_images_dataset : ", len(all_images_dataset))
    print(" len of all_labels_dataset  : ", len(all_labels_dataset))

    print(" end ")
    return all_images_dataset, all_labels_dataset
Esempio n. 16
0
def get_image_batches(images, nb_batches=NB_BATCHES):
    return [UnnormalizedBatch(images=images) for _ in range(nb_batches)]
Esempio n. 17
0
    def generate_plate_multicore(self,
                                 plate_numbers,
                                 bg_color,
                                 is_double,
                                 enhance=False):
        """
        生成特定号码、颜色车牌
        :param plate_numbers: 车牌号码列表
        :param bg_color: 背景颜色
        :param is_double: 是否双层
        :param enhance: 图像增强
        :return: 车牌图
        """
        if 'motor' in bg_color:
            height = 140
            width = 220
        elif 'dishu' in bg_color:
            height = 165
            width = 300
        elif 'macau' in bg_color:
            height = 120
            width = 520
        else:
            height = 220 if is_double else 140

        plate_images = list()
        for plate_number in plate_numbers:
            print('车牌号是:{}'.format(plate_number), "\n",
                  '车牌高度是:{}'.format(height), "\n",
                  '车牌底颜色是:{}'.format(bg_color), "\n",
                  '是否双行:{}'.format(is_double))
            if "army" in bg_color:
                number_xy = self.get_location_multi_WJ(plate_number, height)
            else:
                number_xy = self.get_location_multi(plate_number, height)
            print(number_xy)
            img_plate_model = cv2.imread(
                os.path.join(self.adr_plate_model,
                             '{}_{}.PNG'.format(bg_color, height)))
            print(img_plate_model.shape)
            if "motor" in bg_color or 'dishu' in bg_color or 'macau' in bg_color:
                img_plate_model = cv2.resize(img_plate_model, (width, height))
            else:
                img_plate_model = cv2.resize(
                    img_plate_model,
                    (480 if 'green' in bg_color else 440, height))
            print(img_plate_model.shape)
            for i in range(len(plate_number)):
                if len(plate_number) == 8 and 'green' in bg_color:
                    font_img = self.font_imgs['green_{}'.format(
                        plate_number[i])]
                # 加入低速车车牌
                elif 'dishu' in bg_color:
                    if 'farm' in bg_color:
                        if i == 0:
                            font_img = self.font_imgs['motor_up_{}'.format(
                                plate_number[i])]
                        elif i == 1 or i == 2:
                            font_img = self.font_imgs['140_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (30, 45))
                        else:
                            font_img = self.font_imgs['140_{}'.format(
                                plate_number[i])]
                    else:
                        if i == 0:
                            font_img = self.font_imgs['220_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (55, 45))
                        elif i == 1:
                            font_img = self.font_imgs['220_up_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (55, 45))
                        else:
                            font_img = self.font_imgs['140_{}'.format(
                                plate_number[i])]
                # 加入澳门车牌
                elif 'macau' in bg_color:
                    font_img = self.font_imgs['140_{}'.format(plate_number[i])]
                    font_img = cv2.resize(font_img, (65, 100))
                # 加入武警单行地方车牌
                elif len(plate_number) == 8 and ('army' in bg_color
                                                 and is_double == False):
                    if i == 1:
                        font_img = self.font_imgs['{}_{}'.format(
                            height, plate_number[i])]
                        font_img = cv2.resize(font_img, (30, 90))
                    elif i == 2:
                        font_img = self.font_imgs['{}_{}'.format(
                            height, plate_number[i])]
                        font_img = cv2.resize(font_img, (40, 90))
                    else:
                        font_img = self.font_imgs['{}_{}'.format(
                            height, plate_number[i])]
                # 加入武警双行大车车牌
                elif 'army' in bg_color and is_double:
                    if i < 3:
                        if i == 0:
                            font_img = self.font_imgs['220_up_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (55, 50))
                        if i == 1:
                            font_img = self.font_imgs['140_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (25, 50))
                        if i == 2:
                            font_img = self.font_imgs['220_{}'.format(
                                plate_number[i])]
                            font_img = cv2.resize(font_img, (80, 50))
                    else:
                        if plate_number[i] in digits:
                            font_img = self.font_imgs['220_{}'.format(
                                plate_number[i])]
                        else:
                            font_img = self.font_imgs['220_down_{}'.format(
                                plate_number[i])]
                else:
                    if '{}_{}'.format(height,
                                      plate_number[i]) in self.font_imgs:
                        # 更改WJ中J的尺寸,武警小车车牌
                        if 'army' in bg_color and i == 1:
                            font_img = self.font_imgs['{}_{}'.format(
                                height, plate_number[i])]
                            font_img = cv2.resize(font_img, (30, 90))
                        else:
                            font_img = self.font_imgs['{}_{}'.format(
                                height, plate_number[i])]
                    # 加入摩托车
                    elif '{}_{}'.format(bg_color,
                                        plate_number[i]) in self.font_imgs:
                        if len(plate_number) == 7:
                            if i < 2:
                                font_img = self.font_imgs['motor_up_{}'.format(
                                    plate_number[i])]
                            else:
                                font_img = self.font_imgs['140_{}'.format(
                                    plate_number[i])]
                        else:
                            if i < 1:
                                font_img = self.font_imgs['motor_up_{}'.format(
                                    plate_number[i])]
                                font_img = cv2.resize(font_img, (80, 50))
                            else:
                                font_img = self.font_imgs['140_{}'.format(
                                    plate_number[i])]

                    else:
                        if i < 2:
                            font_img = self.font_imgs['220_up_{}'.format(
                                plate_number[i])]
                        else:
                            font_img = self.font_imgs['220_down_{}'.format(
                                plate_number[i])]
                if plate_number[i] in [
                        '警', '使', '领'
                ] or ('army' in bg_color and i == 0) or ('army' in bg_color
                                                         and i == 1):
                    is_red = True
                # 加入武警变红规则
                elif plate_number[i] in provinces and 'army' in bg_color:
                    is_red = True
                # 加入军牌变红规则
                elif 'jun' in bg_color and (i == 0 or i == 1):
                    is_red = True
                elif plate_number[i] in letters and (
                        i == 6 or i == 7) and 'army' in bg_color:
                    is_red = True
                else:
                    is_red = False
                if enhance:
                    k = np.random.randint(1, 6)
                    kernel = np.ones((k, k), np.uint8)
                    if np.random.random(1) > 0.5:
                        font_img = np.copy(
                            cv2.erode(font_img, kernel, iterations=1))
                    else:
                        font_img = np.copy(
                            cv2.dilate(font_img, kernel, iterations=1))

                img_plate = copy_to_image_multi(img_plate_model, font_img,
                                                number_xy[i, :], bg_color,
                                                is_red)
                img_plate = cv2.blur(img_plate, (3, 3))
            plate_images.append(img_plate)
        # 多核数据增强
        import time
        time_start = time.time()
        augment = ImageAugmentation()
        batches = [UnnormalizedBatch(images=plate_images)]
        # 自行定义数据增强的方法
        aug = iaa.Sequential([
            iaa.imgcorruptlike.Snow(severity=3),
            iaa.GaussianBlur(0.5),
            iaa.CropAndPad(px=(-10, 10)),
            #iaa.Lambda(func_images=light_change_right_trap_multi)
        ])

        batches_aug = list(aug.augment_batches(batches, background=True))
        images = augment.light_change_right_trap(batches_aug[0], flag='x')
        time_end = time.time()
        print("Augmentation done in %.2fs" % (time_end - time_start, ))
        #img_plate = augment.gaussian_noise_iaa(img_plate)
        #img_plate = augment.add_smudge(img_plate)

        return images
    def test_data_load4(self):
        """testing data_load.data_load """
        input_file = '../data/sample.csv'
        input_file = '../data/train.csv'
        image_size = 28
        training_dataset, testing_dataset, validating_dataset, training_labels, testing_labels, validating_labels = \
            data_load.data_load(input_file, image_size)

        from matplotlib import pyplot as plt
        import random
        random_picture = random.randint(0, len(training_dataset))
        print("random int : ", random_picture)

        plt.imshow(training_dataset[random_picture].reshape(28, 28),
                   interpolation='nearest')
        plt.show()
        print(training_labels[random_picture])

        import imgaug.augmenters as iaa

        import time
        import numpy as np
        from imgaug.augmentables.batches import UnnormalizedBatch
        #from imgaug import multicore
        #import imgaug as ia

        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        #BATCH_SIZE = 16
        NB_BATCHES = 200
        #max_size_of_one_job = 301
        max_size_of_one_job = 300

        #seq = iaa.GaussianBlur(sigma=(0.1, 0.5))

        aug = iaa.Sequential([
            sometimes(iaa.Affine(translate_px={
                "x": (-3, 3),
                "y": (-3, 3)
            })),
            sometimes(iaa.Affine(scale=(0.80, 1.20))),
            sometimes(iaa.Affine(rotate=(-35, 35))),
            sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5)))
        ])

        time_start = time.time()

        #all_training_dataset = training_dataset
        #all_training_labels = training_labels

        #print("initial size of the all_training_dataset : ", len(all_training_dataset))

        with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool:

            # calculate how many pieces of array with training data we can get using max number of pictures
            # that should be processed per one child process
            pieces = len(training_dataset) // max_size_of_one_job
            print("before if check ", pieces)
            if len(training_dataset) % (pieces * max_size_of_one_job) != 0:
                pieces = pieces + 1
                print("in if check ", pieces)

            # split training data into pieces
            split_training_dataset = np.array_split(training_dataset, pieces)
            split_training_labels = np.array_split(training_labels, pieces)

            batches = []
            # for each piece generate batches that will be augmented
            for i in range(pieces):
                batches = batches + [
                    UnnormalizedBatch(images=split_training_dataset[i],
                                      data=split_training_labels[i])
                    for _ in range(NB_BATCHES)
                ]

            # call sending tasks to children
            print("sending for augmentation batches : ", len(batches))
            batches_aug = pool.map_batches(batches)

            # join together all augmented sets (including original pictures)
            print("joining all the batches with original pictures")
            #for i in range(len(batches_aug)):
            #print(i)
            #all_training_dataset = np.concatenate((all_training_dataset, batches_aug[i].images_aug))
            #all_training_labels = np.concatenate((all_training_labels, batches_aug[i].data))

            #x = (i.images_aug[:] for i in batches_aug)
            #y = np.concatenate(x)

            all_training_dataset = np.concatenate(
                (training_dataset,
                 np.concatenate(([i.images_aug[:] for i in batches_aug]))))
            #all_training_dataset = np.concatenate((all_training_dataset, (i.images_aug for i in batches_aug)) )
            all_training_labels = np.concatenate(
                (training_labels,
                 np.concatenate(([i.data[:] for i in batches_aug]))))

            #print("intermediate size of the all_training_dataset : ", len(all_training_dataset))
        print("final size of the all_training_dataset : ",
              len(all_training_dataset))

        # print some random pictures
        #import matplotlib.pyplot as plt
        plt.imshow(all_training_dataset[79].reshape(28, 28),
                   interpolation='nearest')
        plt.show()
        print(all_training_labels[79])

        plt.imshow(all_training_dataset[81].reshape(28, 28),
                   interpolation='nearest')
        plt.show()
        print(all_training_labels[81])
        print("----------------")
        for i in range(10):
            random_picture = random.randint(0, len(all_training_dataset))
            plt.imshow(all_training_dataset[random_picture].reshape(28, 28),
                       interpolation='nearest')
            plt.show()

            print(all_training_labels[random_picture])

        # plt.imshow(batches_aug[0].images_aug[random_picture].reshape(28, 28), interpolation='nearest')
        # plt.show()

        time_end = time.time()
        print("Augmentation done in %.2fs" % (time_end - time_start, ))

        print(" len of all_training_dataset : ", len(all_training_dataset))
        print(" len of all_training_labels  : ", len(all_training_labels))

        print(" end ")