コード例 #1
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug(      # Remove these augs if your CPU is not fast enough
                [imgaug.BrightnessScale((0.6, 1.4), clip=False),
                 imgaug.Contrast((0.6, 1.4), clip=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
コード例 #2
0
ファイル: pose_dataset.py プロジェクト: ybCliff/gp
def get_dataflow(path, is_train):
    ds = CocoPoseLMDB(path, is_train)  # read data from lmdb
    if is_train:
        ds = MapDataComponent(ds, pose_random_scale)
        ds = MapDataComponent(ds, pose_rotation)
        ds = MapDataComponent(ds, pose_flip)
        ds = MapDataComponent(ds, pose_resize_shortestedge_random)
        ds = MapDataComponent(ds, pose_crop_random)
        ds = MapData(ds, pose_to_img)
        augs = [
            imgaug.RandomApplyAug(
                imgaug.RandomChooseAug([
                    imgaug.BrightnessScale((0.6, 1.4), clip=False),
                    imgaug.Contrast((0.7, 1.4), clip=False),
                    imgaug.GaussianBlur(max_size=3)
                ]), 0.7),
        ]
        ds = AugmentImageComponent(ds, augs)
    else:
        ds = MapDataComponent(ds, pose_resize_shortestedge_fixed)
        ds = MapDataComponent(ds, pose_crop_center)
        ds = MapData(ds, pose_to_img)

    ds = PrefetchData(ds, 1000, multiprocessing.cpu_count())

    return ds
コード例 #3
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        """
        Sec 5.1:
        We use scale and aspect ratio data augmentation [35] as
        in [12]. The network input image is a 224×224 pixel random
        crop from an augmented image or its horizontal flip.
        """
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. # 残差网络增强图像
    """
    if isTrain:  # 如果训练数据的话
        augmentors = [
            GoogleNetResize(),  # 定义好了crop_area_fraction等参数
            imgaug.
            RandomOrderAug(  # GPU不行的话就把这部分删除Remove these augs if your CPU is not fast enough #imgaug是一个图像增强库
                [
                    imgaug.BrightnessScale((0.6, 1.4), clip=False),
                    imgaug.Contrast((0.6, 1.4), clip=False),
                    imgaug.Saturation(0.4, rgb=False),
                    # rgb-bgr conversion for the constants copied from fb.resnet.torch
                    imgaug.Lighting(
                        0.1,
                        eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) *
                        255.0,
                        eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                         [-0.5808, -0.0045, -0.8140],
                                         [-0.5836, -0.6948, 0.4203]],
                                        dtype='float32')[::-1, ::-1])
                ]),
            imgaug.Flip(horiz=True),
        ]
    else:  # 如果不是训练数据的话
        augmentors = [
            imgaug.ResizeShortestEdge(
                256, cv2.INTER_CUBIC),  #  在保持纵横比的同时,将最短边的大小调整为某个数字。
            imgaug.CenterCrop((224, 224)),  # 在中间裁剪图像
        ]
    return augmentors
コード例 #5
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.RandomOrderAug([
                JohnAug(),
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        round2pow2 = lambda x: 2**(x - 1).bit_length()

        augmentors = [
            imgaug.ResizeShortestEdge(round2pow2(IMAGE_SIZE), cv2.INTER_CUBIC),
            imgaug.CenterCrop((IMAGE_SIZE, IMAGE_SIZE)),
        ]
    return augmentors
コード例 #6
0
def sample_augmentations():
    ds = CocoPoseLMDB('/data/public/rw/coco-pose-estimation-lmdb/',
                      is_train=False,
                      only_idx=0)
    ds = MapDataComponent(ds, pose_random_scale)
    ds = MapDataComponent(ds, pose_rotation)
    ds = MapDataComponent(ds, pose_flip)
    ds = MapDataComponent(ds, pose_resize_shortestedge_random)
    ds = MapDataComponent(ds, pose_crop_random)
    ds = MapData(ds, pose_to_img)
    augs = [
        imgaug.RandomApplyAug(
            imgaug.RandomChooseAug([
                imgaug.GaussianBlur(3),
                imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01),
                imgaug.RandomOrderAug([
                    imgaug.BrightnessScale((0.8, 1.2), clip=False),
                    imgaug.Contrast((0.8, 1.2), clip=False),
                    # imgaug.Saturation(0.4, rgb=True),
                ]),
            ]),
            0.7),
    ]
    ds = AugmentImageComponent(ds, augs)

    ds.reset_state()
    for l1, l2, l3 in ds.get_data():
        CocoPoseLMDB.display_image(l1, l2, l3)
コード例 #7
0
def fbresnet_augmentor(isTrain, input_size=224):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            imgaug.GoogleNetRandomCropAndResize(interp=cv2.INTER_CUBIC),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4), clip=False),
                 imgaug.Contrast((0.6, 1.4), clip=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(input_size + 32, cv2.INTER_CUBIC),
            imgaug.CenterCrop((input_size, input_size)),
        ]
    return augmentors
コード例 #8
0
def get_augmentations(is_train):
    if is_train:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.76,
                            target_shape=224),  # TODO : 76% or 49%?
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=True),
                imgaug.Contrast((0.6, 1.4), clip=True),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
コード例 #9
0
def get_dataflow(path, is_train):
    ds = CocoPoseLMDB(path, is_train)  # read data from lmdb
    if is_train:
        ds = MapDataComponent(ds, pose_random_scale)
        ds = MapDataComponent(ds, pose_rotation)
        ds = MapDataComponent(ds, pose_flip)
        ds = MapDataComponent(ds, pose_resize_shortestedge_random)
        ds = MapDataComponent(ds, pose_crop_random)
        ds = MapData(ds, pose_to_img)
        augs = [
            imgaug.RandomApplyAug(
                imgaug.RandomChooseAug([
                    imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01),
                    imgaug.RandomOrderAug([
                        imgaug.BrightnessScale((0.8, 1.2), clip=False),
                        imgaug.Contrast((0.8, 1.2), clip=False),
                        # imgaug.Saturation(0.4, rgb=True),
                    ]),
                ]),
                0.7),
        ]
        ds = AugmentImageComponent(ds, augs)
    else:
        ds = MapDataComponent(ds, pose_resize_shortestedge_fixed)
        ds = MapDataComponent(ds, pose_crop_center)
        ds = MapData(ds, pose_to_img)

    ds = PrefetchData(ds, 1000, multiprocessing.cpu_count())

    return ds
コード例 #10
0
def get_dataflow(is_train):
    ds = CocoPoseLMDB('/data/public/rw/coco-pose-estimation-lmdb/', is_train)
    if is_train:
        ds = MapDataComponent(ds, pose_rotation)
        ds = MapDataComponent(ds, pose_flip)
        ds = MapDataComponent(ds, pose_resize_shortestedge_random)
        ds = MapDataComponent(ds, pose_crop_random)
        ds = MapData(ds, pose_to_img)
        augs = [
            imgaug.RandomApplyAug(imgaug.RandomChooseAug([
                imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01),
                imgaug.RandomOrderAug([
                    imgaug.BrightnessScale((0.8, 1.2), clip=False),
                    imgaug.Contrast((0.8, 1.2), clip=False),
                    # imgaug.Saturation(0.4, rgb=True),
                ]),
            ]), 0.7),
        ]
        ds = AugmentImageComponent(ds, augs)
    else:
        ds = MapDataComponent(ds, pose_resize_shortestedge_fixed)
        ds = MapDataComponent(ds, pose_crop_center)
        ds = MapData(ds, pose_to_img)

    return ds
コード例 #11
0
def get_ilsvrc_data_alexnet(is_train, image_size, batchsize, directory):
    if is_train:
        if not directory.startswith('/'):
            ds = ILSVRCTTenthTrain(directory)
        else:
            ds = ILSVRC12(directory, 'train')
        augs = [
            imgaug.RandomApplyAug(imgaug.RandomResize((0.9, 1.2), (0.9, 1.2)),
                                  0.7),
            imgaug.RandomApplyAug(imgaug.RotationAndCropValid(15), 0.7),
            imgaug.RandomApplyAug(
                imgaug.RandomChooseAug([
                    imgaug.SaltPepperNoise(white_prob=0.01, black_prob=0.01),
                    imgaug.RandomOrderAug([
                        imgaug.BrightnessScale((0.8, 1.2), clip=False),
                        imgaug.Contrast((0.8, 1.2), clip=False),
                        # imgaug.Saturation(0.4, rgb=True),
                    ]),
                ]),
                0.7),
            imgaug.Flip(horiz=True),
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.RandomCrop((224, 224)),
        ]
        ds = AugmentImageComponent(ds, augs)
        ds = PrefetchData(ds, 1000, multiprocessing.cpu_count())
        ds = BatchData(ds, batchsize)
        ds = PrefetchData(ds, 10, 4)
    else:
        if not directory.startswith('/'):
            ds = ILSVRCTenthValid(directory)
        else:
            ds = ILSVRC12(directory, 'val')
        ds = AugmentImageComponent(ds, [
            imgaug.ResizeShortestEdge(224, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ])
        ds = PrefetchData(ds, 100, multiprocessing.cpu_count())
        ds = BatchData(ds, batchsize)

    return ds
コード例 #12
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    interpolation = cv2.INTER_LINEAR
    if isTrain:
        """
        Sec 5.1:
        We use scale and aspect ratio data augmentation [35] as
        in [12]. The network input image is a 224×224 pixel random
        crop from an augmented image or its horizontal flip.
        """
        augmentors = [
            imgaug.GoogleNetRandomCropAndResize(interp=interpolation),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4), clip=False),
                 imgaug.Contrast((0.6, 1.4), rgb=False, clip=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
コード例 #13
0
def prepare_video(args):
    data_root, video_path, video_width, video_height, video_length, video_downsample_ratio, video_index, batch_size, shared_mem_idx, is_training, is_ucf101, is_imagenet, is_zipped = args

    augs = [
        imgaug.BrightnessScale((0.6, 1.4), clip=False),
        imgaug.Contrast((0.6, 1.4), clip=False),
        imgaug.Saturation(0.4, rgb=True),
        imgaug.Lighting(0.1,
                        eigval=np.asarray([0.2175, 0.0188, 0.0045]) * 255.0,
                        eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                         [-0.5808, -0.0045, -0.8140],
                                         [-0.5836, -0.6948, 0.4203]],
                                        dtype='float32')),
    ]
    random.shuffle(augs)

    video_mem = np.frombuffer(shared_mem[shared_mem_idx],
                              np.ctypeslib.ctypes.c_float)
    video_mem = video_mem.reshape(
        (batch_size, video_length, video_height, video_width, 3))

    pathgen = lambda x: os.path.join(data_root, str(video_path), x)

    frames = None
    if os.path.isdir(pathgen('')):
        frames = sorted(os.listdir(pathgen('')))
    else:
        frames = [os.path.join(data_root, str(video_path))]

    crop_frames = is_training
    flip_frames = bool(
        random.getrandbits(1)) and is_training and (is_ucf101 or is_imagenet)
    add_noise = bool(random.getrandbits(1)) and is_training and not is_training

    # choose a random time to start the video
    num_frames = len(frames) // video_downsample_ratio
    t_offset = 0
    stride_offset = 0
    if is_training and num_frames > video_length:
        t_offset = random.choice(range(num_frames - video_length))
        stride_offset = random.choice(range(video_downsample_ratio))

    num_frames = min(len(frames) // video_downsample_ratio, video_length)
    assert num_frames != 0, 'num frames in video cannot be 0: {}'.format(
        video_path)

    round2pow2 = lambda x: 2**(x - 1).bit_length()
    pow2_width = round2pow2(video_width)
    pow2_height = round2pow2(video_height)
    crop_margin_x = pow2_width - video_width
    crop_margin_y = pow2_height - video_height

    x1 = random.choice(list(range(crop_margin_x)))
    y1 = random.choice(list(range(crop_margin_y)))
    x2 = pow2_width - (crop_margin_x - x1)
    y2 = pow2_height - (crop_margin_y - y1)

    rotation_angle = random.choice(list(range(-10, 10,
                                              1))) if is_training else 0

    video_mem[video_index, :, :, :] = 0
    for i in range(num_frames):
        image_idx = video_downsample_ratio * (i + t_offset)
        image_idx = min(image_idx + stride_offset, len(frames))

        image = None
        if is_imagenet and is_zipped:
            fname = pathgen(frames[image_idx])

            jpeg_filename = os.path.basename(fname)
            jpeg_dirname = os.path.basename(os.path.dirname(fname))
            zip_filepath = os.path.dirname(fname) + '.zip'
            f = zipfile.ZipFile(zip_filepath, 'r')
            compress_jpeg = io.BytesIO(
                f.read(os.path.join(jpeg_dirname, jpeg_filename)))
            image = Image.open(compress_jpeg)

        else:
            image = Image.open(pathgen(
                frames[image_idx]))  # in RGB order by default

        image = image.convert('RGB')
        #image = image.convert('L') # convert to YUV and grab Y-component

        if crop_frames:
            image = image.resize((pow2_width, pow2_height), PIL.Image.BICUBIC)
            image = image.crop(box=(x1, y1, x2, y2))
        else:
            image = image.resize((video_width, video_height),
                                 PIL.Image.BICUBIC)

        if flip_frames:
            image = image.transpose(PIL.Image.FLIP_LEFT_RIGHT)

        if rotation_angle != 0:
            image = image.rotate(rotation_angle)

        image = np.asarray(image, dtype=np.uint8)
        assert image.shape == (
            video_width, video_height,
            3), 'cropped image must be {} but was {}'.format(
                (video_width, video_height, 3), image.shape)

        if is_imagenet:
            if is_training:
                for a in augs:
                    a.reset_state()
                    image = a.augment(image)

            image = np.asarray(image, dtype=np.float32)
            image = image * (1.0 / 255)
            mean = np.asarray([0.485, 0.456, 0.406])
            std = np.asarray([0.229, 0.224, 0.225])
            image = (image - mean) / std

        else:
            image = np.asarray(image, dtype=np.uint32)
            image = image - 116  # center on mean value of 116 (as computed in preprocessing step)
            image = np.clip(image, -128, 128)

        image = np.asarray(image, dtype=np.float32)

        if add_noise:
            noise = np.random.normal(loc=0,
                                     scale=5,
                                     size=(video_height, video_width, 3))
            image = image + noise
            image = np.clip(image, -128, 128)

        video_mem[video_index, i, :, :, :] = image

    return {'num_frames': num_frames, 'video_path': video_path}
コード例 #14
0
def fbresnet_augmentor(isTrain, crop_method, color_augmentation):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    execution_lst = []

    if isTrain:
        augmentors = [
            # 1. crop_method
            # a) GoogleNetResize
            GoogleNetResize(),
            # b) ShortestEdgeResize
            imgaug.ResizeShortestEdge(256),
            # c) GlobalWarp
            imgaug.Resize(226),  # NOTE: for CAM generation
            imgaug.RandomCrop((224, 224)),
            # d) CAMCrop
            # (when CAMCrop is set, the output from the original DataFlow has already been cropped)
            # 2. color_augmentation
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]

        #
        if crop_method == 'GoogleNetResize':
            print(
                '--> perform GoogleNetResize cropping method during the training pipeline'
            )
            execution_lst.extend([0])
        elif crop_method == 'ShortestEdgeResize':
            print(
                '--> perform ShortestEdgeResize cropping method during the training pipeline'
            )
            execution_lst.extend([1, 3])
        elif crop_method == 'GlobalWarp':
            print(
                '--> perform GlobalWarp cropping method during the training pipeline'
            )
            execution_lst.extend([2, 3])
        elif crop_method == 'CAMCrop':
            # enable CAMCrop @ 20171124
            print(
                '*** Perform CAMCrop to better the training dynamics and the results ***'
            )

        if color_augmentation:
            print(
                '--> perform color augmentation during the training pipeline')
            execution_lst.extend([4])
        else:
            print(
                '--> discard the color jittering process during the training pipeline'
            )

        # perform mirror reflection augmentation anyway
        execution_lst.extend([5])

    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
            imgaug.RandomCrop((224, 224)),
        ]

        if crop_method == 'RandomCrop':
            execution_lst.extend([0, 2])

        elif crop_method == 'CenterCrop':
            execution_lst.extend([0, 1])

    return [
        item_ for id_, item_ in enumerate(augmentors) if id_ in execution_lst
    ]