Exemple #1
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug(      # Remove these augs if your CPU is not fast enough
                [imgaug.BrightnessScale((0.6, 1.4), clip=False),
                 imgaug.Contrast((0.6, 1.4), clip=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug([
                JohnAug(),
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        round2pow2 = lambda x: 2**(x - 1).bit_length()

        augmentors = [
            imgaug.ResizeShortestEdge(round2pow2(IMAGE_SIZE), cv2.INTER_CUBIC),
            imgaug.CenterCrop((IMAGE_SIZE, IMAGE_SIZE)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        """
        Sec 5.1:
        We use scale and aspect ratio data augmentation [35] as
        in [12]. The network input image is a 224×224 pixel random
        crop from an augmented image or its horizontal flip.
        """
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Exemple #4
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug([
                # imgaug.BrightnessScale((0.6, 1.4), clip=False),
                # imgaug.Contrast((0.6, 1.4), clip=False),
                # imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((DEFAULT_IMAGE_SHAPE, DEFAULT_IMAGE_SHAPE)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images.
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug([
                imgaug.Brightness(30, clip=False),
                imgaug.Contrast((0.8, 1.2), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion
                imgaug.Lighting(0.1,
                                eigval=[0.2175, 0.0188, 0.0045][::-1],
                                eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                                 [-0.5808, -0.0045, -0.8140],
                                                 [-0.5836, -0.6948, 0.4203]],
                                                dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Clip(),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. # 残差网络增强图像
    """
    if isTrain:  # 如果训练数据的话
        augmentors = [
            GoogleNetResize(),  # 定义好了crop_area_fraction等参数
            imgaug.
            RandomOrderAug(  # GPU不行的话就把这部分删除Remove these augs if your CPU is not fast enough #imgaug是一个图像增强库
                [
                    imgaug.BrightnessScale((0.6, 1.4), clip=False),
                    imgaug.Contrast((0.6, 1.4), clip=False),
                    imgaug.Saturation(0.4, rgb=False),
                    # rgb-bgr conversion for the constants copied from fb.resnet.torch
                    imgaug.Lighting(
                        0.1,
                        eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) *
                        255.0,
                        eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                         [-0.5808, -0.0045, -0.8140],
                                         [-0.5836, -0.6948, 0.4203]],
                                        dtype='float32')[::-1, ::-1])
                ]),
            imgaug.Flip(horiz=True),
        ]
    else:  # 如果不是训练数据的话
        augmentors = [
            imgaug.ResizeShortestEdge(
                256, cv2.INTER_CUBIC),  #  在保持纵横比的同时,将最短边的大小调整为某个数字。
            imgaug.CenterCrop((224, 224)),  # 在中间裁剪图像
        ]
    return augmentors
Exemple #7
0
def get_augmentations(is_train):
    if is_train:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.76,
                            target_shape=224),  # TODO : 76% or 49%?
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=True),
                imgaug.Contrast((0.6, 1.4), clip=True),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain, target_shape=224):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.32,
                            target_shape=target_shape),
            # GoogleNetResize(target_shape=target_shape),
            imgaug.
            RandomOrderAug([  # imgaug.BrightnessScale((0.6, 1.4), clip=False),
                # imgaug.Contrast((0.6, 1.4), clip=False),
                # imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(int(256 / 224 * target_shape),
                                      cv2.INTER_CUBIC),
            imgaug.CenterCrop((target_shape, target_shape)),
        ]
    return augmentors
def small_augmentor():
    augmentors = [
        GoogleNetResize(),
        imgaug.Lighting(
            0.1,
            eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
            eigvec=np.array(
                [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140],
                 [-0.5836, -0.6948, 0.4203]],
                dtype='float32')[::-1, ::-1]),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Exemple #10
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    interpolation = cv2.INTER_LINEAR
    if isTrain:
        """
        Sec 5.1:
        We use scale and aspect ratio data augmentation [35] as
        in [12]. The network input image is a 224×224 pixel random
        crop from an augmented image or its horizontal flip.
        """
        augmentors = [
            imgaug.GoogleNetRandomCropAndResize(interp=interpolation),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4), clip=False),
                 imgaug.Contrast((0.6, 1.4), rgb=False, clip=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
def prepare_video(args):
    data_root, video_path, video_width, video_height, video_length, video_downsample_ratio, video_index, batch_size, shared_mem_idx, is_training, is_ucf101, is_imagenet, is_zipped = args

    augs = [
        imgaug.BrightnessScale((0.6, 1.4), clip=False),
        imgaug.Contrast((0.6, 1.4), clip=False),
        imgaug.Saturation(0.4, rgb=True),
        imgaug.Lighting(0.1,
                        eigval=np.asarray([0.2175, 0.0188, 0.0045]) * 255.0,
                        eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                         [-0.5808, -0.0045, -0.8140],
                                         [-0.5836, -0.6948, 0.4203]],
                                        dtype='float32')),
    ]
    random.shuffle(augs)

    video_mem = np.frombuffer(shared_mem[shared_mem_idx],
                              np.ctypeslib.ctypes.c_float)
    video_mem = video_mem.reshape(
        (batch_size, video_length, video_height, video_width, 3))

    pathgen = lambda x: os.path.join(data_root, str(video_path), x)

    frames = None
    if os.path.isdir(pathgen('')):
        frames = sorted(os.listdir(pathgen('')))
    else:
        frames = [os.path.join(data_root, str(video_path))]

    crop_frames = is_training
    flip_frames = bool(
        random.getrandbits(1)) and is_training and (is_ucf101 or is_imagenet)
    add_noise = bool(random.getrandbits(1)) and is_training and not is_training

    # choose a random time to start the video
    num_frames = len(frames) // video_downsample_ratio
    t_offset = 0
    stride_offset = 0
    if is_training and num_frames > video_length:
        t_offset = random.choice(range(num_frames - video_length))
        stride_offset = random.choice(range(video_downsample_ratio))

    num_frames = min(len(frames) // video_downsample_ratio, video_length)
    assert num_frames != 0, 'num frames in video cannot be 0: {}'.format(
        video_path)

    round2pow2 = lambda x: 2**(x - 1).bit_length()
    pow2_width = round2pow2(video_width)
    pow2_height = round2pow2(video_height)
    crop_margin_x = pow2_width - video_width
    crop_margin_y = pow2_height - video_height

    x1 = random.choice(list(range(crop_margin_x)))
    y1 = random.choice(list(range(crop_margin_y)))
    x2 = pow2_width - (crop_margin_x - x1)
    y2 = pow2_height - (crop_margin_y - y1)

    rotation_angle = random.choice(list(range(-10, 10,
                                              1))) if is_training else 0

    video_mem[video_index, :, :, :] = 0
    for i in range(num_frames):
        image_idx = video_downsample_ratio * (i + t_offset)
        image_idx = min(image_idx + stride_offset, len(frames))

        image = None
        if is_imagenet and is_zipped:
            fname = pathgen(frames[image_idx])

            jpeg_filename = os.path.basename(fname)
            jpeg_dirname = os.path.basename(os.path.dirname(fname))
            zip_filepath = os.path.dirname(fname) + '.zip'
            f = zipfile.ZipFile(zip_filepath, 'r')
            compress_jpeg = io.BytesIO(
                f.read(os.path.join(jpeg_dirname, jpeg_filename)))
            image = Image.open(compress_jpeg)

        else:
            image = Image.open(pathgen(
                frames[image_idx]))  # in RGB order by default

        image = image.convert('RGB')
        #image = image.convert('L') # convert to YUV and grab Y-component

        if crop_frames:
            image = image.resize((pow2_width, pow2_height), PIL.Image.BICUBIC)
            image = image.crop(box=(x1, y1, x2, y2))
        else:
            image = image.resize((video_width, video_height),
                                 PIL.Image.BICUBIC)

        if flip_frames:
            image = image.transpose(PIL.Image.FLIP_LEFT_RIGHT)

        if rotation_angle != 0:
            image = image.rotate(rotation_angle)

        image = np.asarray(image, dtype=np.uint8)
        assert image.shape == (
            video_width, video_height,
            3), 'cropped image must be {} but was {}'.format(
                (video_width, video_height, 3), image.shape)

        if is_imagenet:
            if is_training:
                for a in augs:
                    a.reset_state()
                    image = a.augment(image)

            image = np.asarray(image, dtype=np.float32)
            image = image * (1.0 / 255)
            mean = np.asarray([0.485, 0.456, 0.406])
            std = np.asarray([0.229, 0.224, 0.225])
            image = (image - mean) / std

        else:
            image = np.asarray(image, dtype=np.uint32)
            image = image - 116  # center on mean value of 116 (as computed in preprocessing step)
            image = np.clip(image, -128, 128)

        image = np.asarray(image, dtype=np.float32)

        if add_noise:
            noise = np.random.normal(loc=0,
                                     scale=5,
                                     size=(video_height, video_width, 3))
            image = image + noise
            image = np.clip(image, -128, 128)

        video_mem[video_index, i, :, :, :] = image

    return {'num_frames': num_frames, 'video_path': video_path}
Exemple #12
0
def fbresnet_augmentor(isTrain, crop_method, color_augmentation):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    execution_lst = []

    if isTrain:
        augmentors = [
            # 1. crop_method
            # a) GoogleNetResize
            GoogleNetResize(),
            # b) ShortestEdgeResize
            imgaug.ResizeShortestEdge(256),
            # c) GlobalWarp
            imgaug.Resize(226),  # NOTE: for CAM generation
            imgaug.RandomCrop((224, 224)),
            # d) CAMCrop
            # (when CAMCrop is set, the output from the original DataFlow has already been cropped)
            # 2. color_augmentation
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]

        #
        if crop_method == 'GoogleNetResize':
            print(
                '--> perform GoogleNetResize cropping method during the training pipeline'
            )
            execution_lst.extend([0])
        elif crop_method == 'ShortestEdgeResize':
            print(
                '--> perform ShortestEdgeResize cropping method during the training pipeline'
            )
            execution_lst.extend([1, 3])
        elif crop_method == 'GlobalWarp':
            print(
                '--> perform GlobalWarp cropping method during the training pipeline'
            )
            execution_lst.extend([2, 3])
        elif crop_method == 'CAMCrop':
            # enable CAMCrop @ 20171124
            print(
                '*** Perform CAMCrop to better the training dynamics and the results ***'
            )

        if color_augmentation:
            print(
                '--> perform color augmentation during the training pipeline')
            execution_lst.extend([4])
        else:
            print(
                '--> discard the color jittering process during the training pipeline'
            )

        # perform mirror reflection augmentation anyway
        execution_lst.extend([5])

    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
            imgaug.RandomCrop((224, 224)),
        ]

        if crop_method == 'RandomCrop':
            execution_lst.extend([0, 2])

        elif crop_method == 'CenterCrop':
            execution_lst.extend([0, 1])

    return [
        item_ for id_, item_ in enumerate(augmentors) if id_ in execution_lst
    ]