def get_data(datadir, size=IMAGE_SIZE, isTrain=True, zmin=-1, zmax=1, batch=BATCH, shuffle_read=False): if isTrain: augs = [ imgaug.ResizeShortestEdge(int(size * 1.143)), imgaug.RandomCrop(size), imgaug.Flip(horiz=True), ] else: augs = [ imgaug.ResizeShortestEdge(int(size * 1.143)), imgaug.CenterCrop(size) ] def get_images(dir): files = glob.glob(os.path.join(dir, "*.jpg")) if shuffle_read: import random random.shuffle(files) else: files = sorted(files) image_df = ImageFromFile(files, channel=3, shuffle=isTrain) image_df = AugmentImageComponent(image_df, augs) random_df = RandomZData([size, size, 3], zmin, zmax) return JoinData([random_df, image_df]) names = ['train'] if isTrain else ['test'] df = get_images(*[os.path.join(datadir, n) for n in names]) df = BatchData(df, batch) return df
def fbresnet_augmentor(isTrain, target_shape): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(target_shape=target_shape), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True) ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)) ] if target_shape != 224: augmentors.append( imgaug.ResizeShortestEdge(target_shape, cv2.INTER_CUBIC)) return augmentors
def _augment(self, img, _): h, w = img.shape[:2] #获取图像的行数和列数 area = h * w #相当于28*28 要处理的整个区域的大小 for _ in range(10): # 循环十次 targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area # 确定目标区域 随机数生成器 aspectR = self.rng.uniform( self.aspect_ratio_low, self.aspect_ratio_high) # 随机设置 0.75~1.333的数字 ww = int(np.sqrt(targetArea * aspectR) + 0.5) hh = int(np.sqrt(targetArea / aspectR) + 0.5) # 上面四步是确定要处理的目标区域的长和宽 if self.rng.uniform() < 0.5: ww, hh = hh, ww # 如果随机数小于0.5 说明...互换 if hh <= h and ww <= w: # 如果处理的区域小于总长度 正常处理 否则开始下一次循环 # x1,y1为起始点坐标,out是确定的随机区域 x1 = 0 if w == ww else self.rng.randint(0, w - ww) y1 = 0 if h == hh else self.rng.randint(0, h - hh) out = img[y1:y1 + hh, x1:x1 + ww] out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC) return out out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img) out = imgaug.CenterCrop(self.target_shape).augment(out) return out
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ JohnAug(), imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: round2pow2 = lambda x: 2**(x - 1).bit_length() augmentors = [ imgaug.ResizeShortestEdge(round2pow2(IMAGE_SIZE), cv2.INTER_CUBIC), imgaug.CenterCrop((IMAGE_SIZE, IMAGE_SIZE)), ] return augmentors
def get_tp_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ interpolation = cv2.INTER_CUBIC # linear seems to have more stable performance. # but we keep cubic for compatibility with old models if isTrain: augmentors = [ imgaug.GoogleNetRandomCropAndResize(interp=interpolation), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. # imgaug.RandomOrderAug( # [imgaug.BrightnessScale((0.6, 1.4), clip=False), # imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), # imgaug.Saturation(0.4, rgb=False), # # rgb-bgr conversion for the constants copied from fb.resnet.torch # imgaug.Lighting(0.1, # eigval=np.asarray( # [0.2175, 0.0188, 0.0045][::-1]) * 255.0, # eigvec=np.array( # [[-0.5675, 0.7192, 0.4009], # [-0.5808, -0.0045, -0.8140], # [-0.5836, -0.6948, 0.4203]], # dtype='float32')[::-1, ::-1] # )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def normal_augmentor(isTrain): """ Normal augmentor with random crop and flip only, for BGR images in range [0,255]. """ if isTrain: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.RandomCrop((DEFAULT_IMAGE_SHAPE, DEFAULT_IMAGE_SHAPE)), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((DEFAULT_IMAGE_SHAPE, DEFAULT_IMAGE_SHAPE)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def get_augmentations(is_train): if is_train: augmentors = [ GoogleNetResize(crop_area_fraction=0.76, target_shape=224), # TODO : 76% or 49%? imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=True), imgaug.Contrast((0.6, 1.4), clip=True), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: """ Sec 5.1: We use scale and aspect ratio data augmentation [35] as in [12]. The network input image is a 224×224 pixel random crop from an augmented image or its horizontal flip. """ augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.ColorSpace(mode=cv2.COLOR_BGR2RGB), imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch # imgaug.Lighting(0.1, # eigval=np.asarray( # [0.2175, 0.0188, 0.0045][::-1]) * 255.0, # eigvec=np.array( # [[-0.5675, 0.7192, 0.4009], # [-0.5808, -0.0045, -0.8140], # [-0.5836, -0.6948, 0.4203]], # dtype='float32')[::-1, ::-1] # ) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ColorSpace(mode=cv2.COLOR_BGR2RGB), imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. # 残差网络增强图像 """ if isTrain: # 如果训练数据的话 augmentors = [ GoogleNetResize(), # 定义好了crop_area_fraction等参数 imgaug. RandomOrderAug( # GPU不行的话就把这部分删除Remove these augs if your CPU is not fast enough #imgaug是一个图像增强库 [ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: # 如果不是训练数据的话 augmentors = [ imgaug.ResizeShortestEdge( 256, cv2.INTER_CUBIC), # 在保持纵横比的同时,将最短边的大小调整为某个数字。 imgaug.CenterCrop((224, 224)), # 在中间裁剪图像 ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.Brightness(30, clip=False), imgaug.Contrast((0.8, 1.2), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion imgaug.Lighting(0.1, eigval=[0.2175, 0.0188, 0.0045][::-1], eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Clip(), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def get_ilsvrc_data_alexnet(is_train, image_size, batchsize, directory): if is_train: if not directory.startswith('/'): ds = ILSVRCTTenthTrain(directory) else: ds = ILSVRC12(directory, 'train') augs = [ imgaug.RandomApplyAug(imgaug.RandomResize((0.9, 1.2), (0.9, 1.2)), 0.7), imgaug.RandomApplyAug(imgaug.RotationAndCropValid(15), 0.7), imgaug.RandomApplyAug( imgaug.RandomChooseAug([ imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.8, 1.2), clip=False), imgaug.Contrast((0.8, 1.2), clip=False), # imgaug.Saturation(0.4, rgb=True), ]), ]), 0.7), imgaug.Flip(horiz=True), imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.RandomCrop((224, 224)), ] ds = AugmentImageComponent(ds, augs) ds = PrefetchData(ds, 1000, multiprocessing.cpu_count()) ds = BatchData(ds, batchsize) ds = PrefetchData(ds, 10, 4) else: if not directory.startswith('/'): ds = ILSVRCTenthValid(directory) else: ds = ILSVRC12(directory, 'val') ds = AugmentImageComponent(ds, [ imgaug.ResizeShortestEdge(224, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ]) ds = PrefetchData(ds, 100, multiprocessing.cpu_count()) ds = BatchData(ds, batchsize) return ds
def get_data(datadir, size=IMAGESIZE, isTrain=True, zmin=-1, zmax=1): if isTrain: augs = [ imgaug.ResizeShortestEdge(int(size * 1.143)), imgaug.RandomCrop(size), imgaug.Flip(horiz=True), ] else: augs = [ imgaug.ResizeShortestEdge(int(size * 1.143)), imgaug.CenterCrop(size) ] def get_images(dir): files = sorted(glob.glob(os.path.join(dir, "*.jpg"))) df = ImageFromFile(files, channel=3, shuffle=isTrain) random_df = RandomZData([size, size, 3], zmin, zmax) return JoinData([random_df, AugmentImageComponent(df, augs)]) names = ['train'] if isTrain else ['test'] df = get_images(*[os.path.join(datadir, n) for n in names]) df = BatchData(df, BATCH if isTrain else TEST_BATCH) return df
def fbresnet_augmentor_fast(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def _augment(self, img, _): h, w = img.shape[:2] area = h * w for _ in range(10): targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area aspectR = self.rng.uniform(self.aspect_ratio_low, self.aspect_ratio_high) ww = int(np.sqrt(targetArea * aspectR) + 0.5) hh = int(np.sqrt(targetArea / aspectR) + 0.5) if self.rng.uniform() < 0.5: ww, hh = hh, ww if hh <= h and ww <= w: x1 = 0 if w == ww else self.rng.randint(0, w - ww) y1 = 0 if h == hh else self.rng.randint(0, h - hh) out = img[y1:y1 + hh, x1:x1 + ww] out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC) return out out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img) out = imgaug.CenterCrop(self.target_shape).augment(out) return out
def _augment(self, img, _): h, w = img.shape[:2] area = h * w for _ in range(10): targetArea = self.rng.uniform(0.08, 1.0) * area aspectR = self.rng.uniform(0.75, 1.333) ww = int(np.sqrt(targetArea * aspectR)) hh = int(np.sqrt(targetArea / aspectR)) if self.rng.uniform() < 0.5: ww, hh = hh, ww if hh <= h and ww <= w: x1 = 0 if w == ww else self.rng.randint(0, w - ww) y1 = 0 if h == hh else self.rng.randint(0, h - hh) out = img[y1:y1 + hh, x1:x1 + ww] out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC) return out out = imgaug.ResizeShortestEdge(224, interp=cv2.INTER_CUBIC).augment(img) out = imgaug.CenterCrop(224).augment(out) return out
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ interpolation = cv2.INTER_LINEAR if isTrain: """ Sec 5.1: We use scale and aspect ratio data augmentation [35] as in [12]. The network input image is a 224×224 pixel random crop from an augmented image or its horizontal flip. """ augmentors = [ imgaug.GoogleNetRandomCropAndResize(interp=interpolation), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'): with tf.name_scope('prediction_incorrect'): x = tf.logical_not(tf.nn.in_top_k(logits, label, topk)) return tf.cast(x, tf.float32, name=name) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) return loss if __name__ == '__main__': import argparse from tensorpack.dataflow import TestDataSpeed parser = argparse.ArgumentParser() parser.add_argument('--data', required=True) parser.add_argument('--batch', type=int, default=32) args = parser.parse_args() augs = fbresnet_augmentor(False) augs = [imgaug.ResizeShortestEdge(256), imgaug.CenterCrop(224)] df = get_imagenet_dataflow(args.data, 'train', args.batch, augs) TestDataSpeed(df).start()
def fbresnet_augmentor(isTrain, crop_method, color_augmentation): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ execution_lst = [] if isTrain: augmentors = [ # 1. crop_method # a) GoogleNetResize GoogleNetResize(), # b) ShortestEdgeResize imgaug.ResizeShortestEdge(256), # c) GlobalWarp imgaug.Resize(226), # NOTE: for CAM generation imgaug.RandomCrop((224, 224)), # d) CAMCrop # (when CAMCrop is set, the output from the original DataFlow has already been cropped) # 2. color_augmentation imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] # if crop_method == 'GoogleNetResize': print( '--> perform GoogleNetResize cropping method during the training pipeline' ) execution_lst.extend([0]) elif crop_method == 'ShortestEdgeResize': print( '--> perform ShortestEdgeResize cropping method during the training pipeline' ) execution_lst.extend([1, 3]) elif crop_method == 'GlobalWarp': print( '--> perform GlobalWarp cropping method during the training pipeline' ) execution_lst.extend([2, 3]) elif crop_method == 'CAMCrop': # enable CAMCrop @ 20171124 print( '*** Perform CAMCrop to better the training dynamics and the results ***' ) if color_augmentation: print( '--> perform color augmentation during the training pipeline') execution_lst.extend([4]) else: print( '--> discard the color jittering process during the training pipeline' ) # perform mirror reflection augmentation anyway execution_lst.extend([5]) else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), imgaug.RandomCrop((224, 224)), ] if crop_method == 'RandomCrop': execution_lst.extend([0, 2]) elif crop_method == 'CenterCrop': execution_lst.extend([0, 1]) return [ item_ for id_, item_ in enumerate(augmentors) if id_ in execution_lst ]