def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ GoogleNetResize(crop_area_fraction=0.49), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return get_imagenet_dataflow(args.data, name, batch, augmentors)
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def get_data(name, batch): isTrain = name == 'train' image_shape = 224 if isTrain: augmentors = [ # use lighter augs if model is too small GoogleNetResize( crop_area_fraction=0.49 if args.width_ratio < 1 else 0.08, target_shape=image_shape), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(int(image_shape * 256 / 224), cv2.INTER_CUBIC), imgaug.CenterCrop((image_shape, image_shape)), ] return get_imagenet_dataflow(args.data_dir, name, batch, augmentors, meta_dir=args.meta_dir)
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.Flip(horiz=True), imgaug.ToFloat32(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_LINEAR), imgaug.CenterCrop((224, 224)), imgaug.ToFloat32(), ] return augmentors
def get_moco_v1_augmentor(): augmentors = [ TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)), RandomGrayScale(0.2), imgaug.ToFloat32(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 72 = 180*0.4 imgaug.Hue(range=(-72, 72), rgb=False) ]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_moco_v1_augmentor(): augmentors = [ imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2), imgaug.ToFloat32(), imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 72 = 180*0.4 imgaug.Hue(range=(-72, 72), rgb=False) ]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_resnet_augmentor(): augmentors = [ imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]) ] return augmentors
def get_moco_v2_augmentor(): augmentors = [ TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.ToFloat32(), imgaug.RandomApplyAug( imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 18 = 180*0.1 imgaug.Hue(range=(-18, 18), rgb=False) ]), 0.8), RandomGrayScale(0.2), imgaug.RandomApplyAug(RandomGaussionBlur([0.1, 2.0], 0.1), 0.5), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_moco_v2_augmentor(): augmentors = [ imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.ToFloat32(), imgaug.RandomApplyAug( imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 18 = 180*0.1 imgaug.Hue(range=(-18, 18), rgb=False) ]), 0.8), imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2), imgaug.RandomApplyAug( # 11 = 0.1*224//2 imgaug.GaussianBlur(size_range=(11, 12), sigma_range=[0.1, 2.0]), 0.5), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def fbresnet_augmentor(): # assme BGR input augmentors = [ imgaug.GoogleNetRandomCropAndResize(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb->bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] return augmentors
def get_data(is_train, batch_size, data_dir_path, input_image_size=224, resize_inv_factor=0.875): assert (resize_inv_factor > 0.0) resize_value = int(math.ceil(float(input_image_size) / resize_inv_factor)) if is_train: augmentors = [ GoogleNetResize(crop_area_fraction=0.08, target_shape=input_image_size), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True) ] else: augmentors = [ # imgaug.ResizeShortestEdge(resize_value, cv2.INTER_CUBIC), imgaug.ResizeShortestEdge(resize_value, cv2.INTER_LINEAR), imgaug.CenterCrop((input_image_size, input_image_size)) ] return get_imagenet_dataflow(datadir=data_dir_path, is_train=is_train, batch_size=batch_size, augmentors=augmentors)
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ interpolation = cv2.INTER_CUBIC # linear seems to have more stable performance. # but we keep cubic for compatibility with old models if isTrain: augmentors = [ imgaug.GoogleNetRandomCropAndResize(interp=interpolation), imgaug.ToFloat32(), # avoid frequent casting in each color augmentation # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def get_inat_augmented_data(subset, options, lmdb_dir=None, year='2018', do_multiprocess=True, do_validation=False, is_train=None, shuffle=None, n_allow=None): input_size = options.input_size if options.input_size else 224 isTrain = is_train if is_train is not None else (subset == 'train' and do_multiprocess) shuffle = shuffle if shuffle is not None else isTrain postfix = "" if n_allow is None else "_allow_{}".format(n_allow) #TODO: Parameterize the cv split to be consider #Currently hardcoding to 1 cv = 1 # When do_validation is True it will expect *cv_train and *cv_val lmdbs # Currently the cv_train split is always used if isTrain: postfix += '_cv_train_{}'.format(cv) elif do_validation: subset = 'train' postfix += '_cv_val_{}'.format(cv) if lmdb_dir == None: lmdb_path = os.path.join(options.data_dir, 'inat_lmdb', 'inat2018_{}{}.lmdb'.format(subset, postfix)) else: lmdb_path = os.path.join( options.data_dir, lmdb_dir, 'inat{}_{}{}.lmdb'.format(year, subset, postfix)) ds = LMDBData(lmdb_path, shuffle=False) if shuffle: ds = LocallyShuffleData(ds, 1024 * 80) # This is 64G~80G in memory images ds = PrefetchData(ds, 1024 * 8, 1) # prefetch around 8 G ds = LMDBDataPoint(ds) ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) # BGR uint8 data if isTrain: class Resize(imgaug.ImageAugmentor): """ crop 8%~100% of the original image See `Going Deeper with Convolutions` by Google. """ def _augment(self, img, _): h, w = img.shape[:2] area = h * w for _ in range(10): targetArea = self.rng.uniform(0.08, 1.0) * area aspectR = self.rng.uniform(0.75, 1.333) ww = int(np.sqrt(targetArea * aspectR)) hh = int(np.sqrt(targetArea / aspectR)) if self.rng.uniform() < 0.5: ww, hh = hh, ww if hh <= h and ww <= w: x1 = 0 if w == ww else self.rng.randint(0, w - ww) y1 = 0 if h == hh else self.rng.randint(0, h - hh) out = img[y1:y1 + hh, x1:x1 + ww] out = cv2.resize(out, (input_size, input_size), interpolation=cv2.INTER_CUBIC) return out out = cv2.resize(img, (input_size, input_size), interpolation=cv2.INTER_CUBIC) return out augmentors = [ Resize(), imgaug.RandomOrderAug([ imgaug.Brightness(30, clip=False), imgaug.Contrast((0.8, 1.2), clip=False), imgaug.Saturation(0.4), # rgb-bgr conversion imgaug.Lighting(0.1, eigval=[0.2175, 0.0188, 0.0045][::-1], eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Clip(), imgaug.Flip(horiz=True), imgaug.ToUint8() ] else: augmentors = [ imgaug.ResizeShortestEdge(256), imgaug.CenterCrop((input_size, input_size)), imgaug.ToUint8() ] ds = AugmentImageComponent(ds, augmentors, copy=False) if do_multiprocess: ds = PrefetchDataZMQ(ds, min(24, multiprocessing.cpu_count())) ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain) return ds
def get_tiny_imagenet_augmented_data(subset, options, do_multiprocess=True, is_train=None, shuffle=None): isTrain = is_train if is_train is not None else (subset == 'train' and do_multiprocess) shuffle = shuffle if shuffle is not None else isTrain lmdb_path = os.path.join(options.data_dir, 'tiny_imagenet_lmdb', 'tiny_imagenet_{}.lmdb'.format(subset)) # since tiny imagenet is small (200MB zipped) we can shuffle all directly. # we skipped the LocallyShuffleData and PrefetchData routine. ds = LMDBData(lmdb_path, shuffle=shuffle) ds = LMDBDataPoint(ds) ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) img_size = 64 if isTrain: class Resize(imgaug.ImageAugmentor): """ crop 8%~100% of the original image See `Going Deeper with Convolutions` by Google. """ def _augment(self, img, _): h, w = img.shape[:2] area = h * w for _ in range(10): targetArea = self.rng.uniform(0.3, 1.0) * area aspectR = self.rng.uniform(0.75, 1.333) ww = int(np.sqrt(targetArea * aspectR)) hh = int(np.sqrt(targetArea / aspectR)) if self.rng.uniform() < 0.5: ww, hh = hh, ww if hh <= h and ww <= w: x1 = 0 if w == ww else self.rng.randint(0, w - ww) y1 = 0 if h == hh else self.rng.randint(0, h - hh) out = img[y1:y1 + hh, x1:x1 + ww] out = cv2.resize(out, (img_size, img_size), interpolation=cv2.INTER_CUBIC) return out out = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC) return out augmentors = [ Resize(), imgaug.RandomOrderAug( [imgaug.Brightness(30, clip=False), imgaug.Contrast((0.8, 1.2), clip=False), imgaug.Saturation(0.4), # rgb-bgr conversion imgaug.Lighting(0.1, eigval=[0.2175, 0.0188, 0.0045][::-1], eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Clip(), imgaug.Flip(horiz=True), imgaug.ToUint8() ] else: augmentors = [ imgaug.ResizeShortestEdge(72), imgaug.CenterCrop((img_size, img_size)), imgaug.ToUint8() ] ds = AugmentImageComponent(ds, augmentors, copy=False) ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain) if do_multiprocess: ds = PrefetchData(ds, nr_prefetch=4, nr_proc=4) return ds
def get_input_imagenet(): train = dataset.ILSVRC12("/datasets/ImageNet/ILSVRC/Data/CLS-LOC", "train", dir_structure="train", shuffle=True) test = dataset.ILSVRC12("/datasets/ImageNet/ILSVRC/Data/CLS-LOC", "val", dir_structure="train", shuffle=False) # Copied from tensorpack examples: # https://github.com/tensorpack/tensorpack/blob/master/examples/ImageNetModels/imagenet_utils.py train_augmentors = imgaug.AugmentorList([ imgaug.GoogleNetRandomCropAndResize(interp=cv2.INTER_CUBIC), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ]) test_augmentors = imgaug.AugmentorList([ imgaug.ResizeShortestEdge(256, interp=cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ]) def preprocess(augmentors): def apply(x): image, label = x onehot = np.zeros(1000) onehot[label] = 1.0 image = augmentors.augment(image) return image, onehot return apply parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading train = MapData(train, preprocess(train_augmentors)) train = PrefetchDataZMQ(train, parallel) test = MultiThreadMapData(test, parallel, preprocess(test_augmentors), strict=True) test = PrefetchDataZMQ(test, 1) return train, test, ((224, 224, 3), (1000, ))