def __getitem__(self, index): img_path = self.all_img_paths[index] gt_path = self.all_gt_paths[index] img = get_img(img_path) bboxes, tags = get_bboxes(img, gt_path) # multi-scale training if self.is_transform: img = random_scale(img, min_size=self.img_size[0]) # get gt_text and training_mask img_h, img_w = img.shape[0: 2] gt_text = np.zeros((img_h, img_w), dtype=np.float32) training_mask = np.ones((img_h, img_w), dtype=np.float32) if bboxes.shape[0] > 0: bboxes = np.reshape(bboxes * ([img_w, img_h] * 4), (bboxes.shape[0], -1, 2)).astype('int32') for i in range(bboxes.shape[0]): cv2.drawContours(gt_text, [bboxes[i]], 0, i + 1, -1) if not tags[i]: cv2.drawContours(training_mask, [bboxes[i]], 0, 0, -1) # get gt_kernels gt_kernels = [] for i in range(1, self.kernel_num): rate = 1.0 - (1.0 - self.min_scale) / (self.kernel_num - 1) * i gt_kernel = np.zeros(img.shape[0:2], dtype=np.float32) kernel_bboxes = shrink(bboxes, rate) for j in range(kernel_bboxes.shape[0]): cv2.drawContours(gt_kernel, [kernel_bboxes[j]], 0, 1, -1) gt_kernels.append(gt_kernel) # data augmentation if self.is_transform: imgs = [img, gt_text, training_mask] imgs.extend(gt_kernels) imgs = random_horizontal_flip(imgs) imgs = random_rotate(imgs) imgs = random_crop(imgs, self.img_size) img, gt_text, training_mask, gt_kernels = imgs[0], imgs[1], imgs[2], imgs[3:] gt_text[gt_text > 0] = 1 gt_kernels = np.array(gt_kernels) if self.is_transform: img = Image.fromarray(img) img = img.convert('RGB') img = py_transforms.RandomColorAdjust(brightness=32.0 / 255, saturation=0.5)(img) else: img = Image.fromarray(img) img = img.convert('RGB') img = py_transforms.ToTensor()(img) img = py_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img) gt_text = gt_text.astype(np.float32) gt_kernels = gt_kernels.astype(np.float32) training_mask = training_mask.astype(np.float32) return img, gt_text, gt_kernels, training_mask
def test_random_color_adjust_md5(): """ Test RandomColorAdjust with md5 check """ logger.info("Test RandomColorAdjust with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_adjust_op, input_columns=["image"]) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "random_color_adjust_01_c_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) filename = "random_color_adjust_01_py_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def create_dataset(batch_size, train_data_url='', workers=8, distributed=False, input_size=224, color_jitter=0.4): """Create ImageNet training dataset""" if not os.path.exists(train_data_url): raise ValueError('Path not exists') decode_op = py_vision.Decode() type_cast_op = c_transforms.TypeCast(mstype.int32) random_resize_crop_bicubic = py_vision.RandomResizedCrop( size=(input_size, input_size), scale=SCALE, ratio=RATIO, interpolation=Inter.BICUBIC) random_horizontal_flip_op = py_vision.RandomHorizontalFlip(0.5) adjust_range = (max(0, 1 - color_jitter), 1 + color_jitter) random_color_jitter_op = py_vision.RandomColorAdjust( brightness=adjust_range, contrast=adjust_range, saturation=adjust_range) to_tensor = py_vision.ToTensor() normalize_op = py_vision.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD) # assemble all the transforms image_ops = py_transforms.Compose([ decode_op, random_resize_crop_bicubic, random_horizontal_flip_op, random_color_jitter_op, to_tensor, normalize_op ]) rank_id = get_rank() if distributed else 0 rank_size = get_group_size() if distributed else 1 dataset_train = ds.ImageFolderDataset(train_data_url, num_parallel_workers=workers, shuffle=True, num_shards=rank_size, shard_id=rank_id) dataset_train = dataset_train.map(input_columns=["image"], operations=image_ops, num_parallel_workers=workers) dataset_train = dataset_train.map(input_columns=["label"], operations=type_cast_op, num_parallel_workers=workers) # batch dealing ds_train = dataset_train.batch(batch_size, per_batch_map=split_imgs_and_labels, input_columns=["image", "label"], num_parallel_workers=2, drop_remainder=True) ds_train = ds_train.repeat(1) return ds_train
def __init__(self, data_dir, training=True, use_third_trsfm=False, use_auto_augment=False, num_parallel_workers=8, device_num=1, device_id=0): if not training: trsfm = Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: if not use_third_trsfm: trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomColorAdjust(0.4, 0.4, 0.4, 0.4), transforms.RandomGrayscale(prob=0.2), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: if use_auto_augment: trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), CIFAR10Policy(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: rand_augment = RandAugment(n=2, m=10) trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), rand_augment, transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) self.trsfm = trsfm self.data_dir = data_dir self.num_parallel_workers = num_parallel_workers self.device_num = device_num self.device_id = device_id
def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0), plot=False): """ Util function that tests RandomColorAdjust for a specific argument """ # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) ctrans = [decode_op, random_adjust_op, ] data1 = data1.map(operations=ctrans, input_columns=["image"]) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) logger.info("shape of c_image: {}".format(c_image.shape)) logger.info("shape of py_image: {}".format(py_image.shape)) logger.info("dtype of c_image: {}".format(c_image.dtype)) logger.info("dtype of py_image: {}".format(py_image.dtype)) mse = diff_mse(c_image, py_image) logger.info("mse is {}".format(mse)) logger.info("random_rotation_op_{}, mse: {}".format(num_iter + 1, mse)) assert mse < 0.01 if plot: visualize_image(c_image, py_image, mse)
def __init__(self, min_area_ratio=0.8, aspect_ratio_range=(0.8, 1.2), brightness=32. / 255., contrast=0.5, saturation=0.5, hue=0.2, img_tile_shape=(150, 150)): self.min_area_ratio = min_area_ratio self.aspect_ratio_range = aspect_ratio_range self.img_tile_shape = img_tile_shape self.random_image_distortion_ops = P.RandomColorAdjust( brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)