Exemple #1
0
    def __getitem__(self, index):
        img_path = self.all_img_paths[index]
        gt_path = self.all_gt_paths[index]

        img = get_img(img_path)
        bboxes, tags = get_bboxes(img, gt_path)

        # multi-scale training
        if self.is_transform:
            img = random_scale(img, min_size=self.img_size[0])

        # get gt_text and training_mask
        img_h, img_w = img.shape[0: 2]
        gt_text = np.zeros((img_h, img_w), dtype=np.float32)
        training_mask = np.ones((img_h, img_w), dtype=np.float32)
        if bboxes.shape[0] > 0:
            bboxes = np.reshape(bboxes * ([img_w, img_h] * 4), (bboxes.shape[0], -1, 2)).astype('int32')
            for i in range(bboxes.shape[0]):
                cv2.drawContours(gt_text, [bboxes[i]], 0, i + 1, -1)
                if not tags[i]:
                    cv2.drawContours(training_mask, [bboxes[i]], 0, 0, -1)

        # get gt_kernels
        gt_kernels = []
        for i in range(1, self.kernel_num):
            rate = 1.0 - (1.0 - self.min_scale) / (self.kernel_num - 1) * i
            gt_kernel = np.zeros(img.shape[0:2], dtype=np.float32)
            kernel_bboxes = shrink(bboxes, rate)
            for j in range(kernel_bboxes.shape[0]):
                cv2.drawContours(gt_kernel, [kernel_bboxes[j]], 0, 1, -1)
            gt_kernels.append(gt_kernel)

        # data augmentation
        if self.is_transform:
            imgs = [img, gt_text, training_mask]
            imgs.extend(gt_kernels)
            imgs = random_horizontal_flip(imgs)
            imgs = random_rotate(imgs)
            imgs = random_crop(imgs, self.img_size)
            img, gt_text, training_mask, gt_kernels = imgs[0], imgs[1], imgs[2], imgs[3:]

        gt_text[gt_text > 0] = 1
        gt_kernels = np.array(gt_kernels)

        if self.is_transform:
            img = Image.fromarray(img)
            img = img.convert('RGB')
            img = py_transforms.RandomColorAdjust(brightness=32.0 / 255, saturation=0.5)(img)
        else:
            img = Image.fromarray(img)
            img = img.convert('RGB')

        img = py_transforms.ToTensor()(img)
        img = py_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img)

        gt_text = gt_text.astype(np.float32)
        gt_kernels = gt_kernels.astype(np.float32)
        training_mask = training_mask.astype(np.float32)

        return img, gt_text, gt_kernels, training_mask
Exemple #2
0
def test_random_color_adjust_md5():
    """
    Test RandomColorAdjust with md5 check
    """
    logger.info("Test RandomColorAdjust with md5 check")
    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    decode_op = c_vision.Decode()
    random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1)
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_adjust_op, input_columns=["image"])

    # Second dataset
    transforms = [
        py_vision.Decode(),
        py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1),
        py_vision.ToTensor()
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(operations=transform, input_columns=["image"])
    # Compare with expected md5 from images
    filename = "random_color_adjust_01_c_result.npz"
    save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
    filename = "random_color_adjust_01_py_result.npz"
    save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Exemple #3
0
def create_dataset(batch_size,
                   train_data_url='',
                   workers=8,
                   distributed=False,
                   input_size=224,
                   color_jitter=0.4):
    """Create ImageNet training dataset"""
    if not os.path.exists(train_data_url):
        raise ValueError('Path not exists')
    decode_op = py_vision.Decode()
    type_cast_op = c_transforms.TypeCast(mstype.int32)

    random_resize_crop_bicubic = py_vision.RandomResizedCrop(
        size=(input_size, input_size),
        scale=SCALE,
        ratio=RATIO,
        interpolation=Inter.BICUBIC)
    random_horizontal_flip_op = py_vision.RandomHorizontalFlip(0.5)
    adjust_range = (max(0, 1 - color_jitter), 1 + color_jitter)
    random_color_jitter_op = py_vision.RandomColorAdjust(
        brightness=adjust_range,
        contrast=adjust_range,
        saturation=adjust_range)
    to_tensor = py_vision.ToTensor()
    normalize_op = py_vision.Normalize(IMAGENET_DEFAULT_MEAN,
                                       IMAGENET_DEFAULT_STD)

    # assemble all the transforms
    image_ops = py_transforms.Compose([
        decode_op, random_resize_crop_bicubic, random_horizontal_flip_op,
        random_color_jitter_op, to_tensor, normalize_op
    ])

    rank_id = get_rank() if distributed else 0
    rank_size = get_group_size() if distributed else 1

    dataset_train = ds.ImageFolderDataset(train_data_url,
                                          num_parallel_workers=workers,
                                          shuffle=True,
                                          num_shards=rank_size,
                                          shard_id=rank_id)

    dataset_train = dataset_train.map(input_columns=["image"],
                                      operations=image_ops,
                                      num_parallel_workers=workers)

    dataset_train = dataset_train.map(input_columns=["label"],
                                      operations=type_cast_op,
                                      num_parallel_workers=workers)

    # batch dealing
    ds_train = dataset_train.batch(batch_size,
                                   per_batch_map=split_imgs_and_labels,
                                   input_columns=["image", "label"],
                                   num_parallel_workers=2,
                                   drop_remainder=True)

    ds_train = ds_train.repeat(1)
    return ds_train
Exemple #4
0
    def __init__(self,
                 data_dir,
                 training=True,
                 use_third_trsfm=False,
                 use_auto_augment=False,
                 num_parallel_workers=8,
                 device_num=1,
                 device_id=0):

        if not training:
            trsfm = Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ])
        else:
            if not use_third_trsfm:
                trsfm = Compose([
                    transforms.ToPIL(),
                    transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                    transforms.RandomColorAdjust(0.4, 0.4, 0.4, 0.4),
                    transforms.RandomGrayscale(prob=0.2),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010)),
                ])
            else:
                if use_auto_augment:
                    trsfm = Compose([
                        transforms.ToPIL(),
                        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                        transforms.RandomHorizontalFlip(),
                        CIFAR10Policy(),
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465),
                                             (0.2023, 0.1994, 0.2010)),
                    ])
                else:
                    rand_augment = RandAugment(n=2, m=10)
                    trsfm = Compose([
                        transforms.ToPIL(),
                        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                        transforms.RandomHorizontalFlip(),
                        rand_augment,
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465),
                                             (0.2023, 0.1994, 0.2010)),
                    ])

        self.trsfm = trsfm
        self.data_dir = data_dir
        self.num_parallel_workers = num_parallel_workers
        self.device_num = device_num
        self.device_id = device_id
Exemple #5
0
def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0), plot=False):
    """
    Util function that tests RandomColorAdjust for a specific argument
    """

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    decode_op = c_vision.Decode()

    random_adjust_op = c_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation,
                                                  hue=hue)

    ctrans = [decode_op,
              random_adjust_op,
              ]

    data1 = data1.map(operations=ctrans, input_columns=["image"])

    # Second dataset
    transforms = [
        py_vision.Decode(),
        py_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation,
                                    hue=hue),
        py_vision.ToTensor()
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(operations=transform, input_columns=["image"])

    num_iter = 0
    for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
                            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        num_iter += 1
        c_image = item1["image"]
        py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)

        logger.info("shape of c_image: {}".format(c_image.shape))
        logger.info("shape of py_image: {}".format(py_image.shape))

        logger.info("dtype of c_image: {}".format(c_image.dtype))
        logger.info("dtype of py_image: {}".format(py_image.dtype))

        mse = diff_mse(c_image, py_image)
        logger.info("mse is {}".format(mse))

        logger.info("random_rotation_op_{}, mse: {}".format(num_iter + 1, mse))
        assert mse < 0.01

        if plot:
            visualize_image(c_image, py_image, mse)
Exemple #6
0
 def __init__(self,
              min_area_ratio=0.8,
              aspect_ratio_range=(0.8, 1.2),
              brightness=32. / 255.,
              contrast=0.5,
              saturation=0.5,
              hue=0.2,
              img_tile_shape=(150, 150)):
     self.min_area_ratio = min_area_ratio
     self.aspect_ratio_range = aspect_ratio_range
     self.img_tile_shape = img_tile_shape
     self.random_image_distortion_ops = P.RandomColorAdjust(
         brightness=brightness,
         contrast=contrast,
         saturation=saturation,
         hue=hue)