Esempio n. 1
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         #            imgaug.RandomApplyAug(imgaug.RandomResize( xrange = (0.8, 1.5), minimum = (cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0], cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), aspect_ratio_thres = 0.0 ), prob = 0.5),
         imgaug.Flip(horiz=True, prob=0.5),
         imgaug.Flip(vert=True, prob=0.5),
         imgaug.RandomApplyAug(imgaug.Rotation(max_deg=180.0,
                                               step_deg=30.0,
                                               center_range=(0.5, 0.5)),
                               prob=0.5),
         imgaug.RandomApplyAug(imgaug.Grayscale(keepshape=True), prob=0.5),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
     ])
Esempio n. 2
0
def get_data(name, batch):
    isTrain = name == 'train'

    if isTrain:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.49),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_downsampled_imagenet_augmented_data(subset, options,
        do_multiprocess=True, do_validation=False, shuffle=None):
    isTrain = subset == 'train' and do_multiprocess
    shuffle = shuffle if shuffle is not None else isTrain

    reret = re.search(r'^imagenet([0-9]*)$', options.ds_name)
    input_size = int(reret.group(1))

    ds = DownsampledImageNet(_data_batch_dir(options.data_dir, input_size),\
         subset, shuffle, input_size, do_validation=do_validation)

    pp_mean = ds.mean_img
    paste_size = ds.input_size * 5 // 4
    crop_size = ds.input_size
    if isTrain:
        augmentors = [
            imgaug.CenterPaste((paste_size, paste_size)),
            imgaug.RandomCrop((crop_size, crop_size)),
            imgaug.Flip(horiz=True),
            imgaug.MapImage(lambda x: (x - pp_mean)/128.0),
        ]
    else:
        augmentors = [
            imgaug.MapImage(lambda x: (x - pp_mean)/128.0)
        ]
    ds = AugmentImageComponent(ds, augmentors)
    ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain)
    if do_multiprocess:
        ds = PrefetchData(ds, 4, 2)
    return ds
Esempio n. 4
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
         imgaug.Flip(horiz=True)
     ])
Esempio n. 5
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Esempio n. 6
0
def get_data(name, batch):
    isTrain = name == 'train'
    image_shape = 224

    if isTrain:
        augmentors = [
            # use lighter augs if model is too small
            GoogleNetResize(
                crop_area_fraction=0.49 if args.width_ratio < 1 else 0.08,
                target_shape=image_shape),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
            ]),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(int(image_shape * 256 / 224),
                                      cv2.INTER_CUBIC),
            imgaug.CenterCrop((image_shape, image_shape)),
        ]
    return get_imagenet_dataflow(args.data_dir,
                                 name,
                                 batch,
                                 augmentors,
                                 meta_dir=args.meta_dir)
Esempio n. 7
0
def read_and_augment_images(ds):
    def mapf(dp):
        fname = dp[0]
        im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32')
        assert im is not None, dp[0]
        dp[0] = im

        # assume floatbox as input
        assert dp[1].dtype == np.float32
        dp[1] = box_to_point8(dp[1])

        dp.append(fname)
        return dp

    ds = MapData(ds, mapf)

    augs = [
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ]
    ds = AugmentImageComponents(ds, augs, index=(0, ), coords_index=(1, ))

    def unmapf(points):
        boxes = point8_to_box(points)
        return boxes

    ds = MapDataComponent(ds, unmapf, 1)
    return ds
Esempio n. 8
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    if isTrain:
        augmentors = [
            GoogleNetResize(),
            imgaug.Flip(horiz=True),
            imgaug.ToFloat32(),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), rgb=False, clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_LINEAR),
            imgaug.CenterCrop((224, 224)),
            imgaug.ToFloat32(),
        ]
    return augmentors
Esempio n. 9
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         imgaug.RandomApplyAug(SquareAspectRatioResize(), 0.075),
         # imgaug.RandomApplyAug(imgaug.RandomCropRandomShape(wmin=int(
         #     0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), hmin=int(0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0])), 0.25),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
         imgaug.RandomApplyAug(imgaug.Flip(horiz=True), 0.5),
     ])
Esempio n. 10
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug_weak = imgaug.AugmentorList([
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
         imgaug.Flip(horiz=True)
     ])
     self.aug_type = cfg.TRAIN.AUGTYPE_LAB
     self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE_LAB)
     logger.info("Use affine-enabled TrainingDataPreprocessor_aug")
Esempio n. 11
0
def get_basic_augmentor(isTrain):
    interpolation = cv2.INTER_LINEAR
    if isTrain:
        augmentors = [
            TorchvisionCropAndResize(),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Esempio n. 12
0
def resizeOnly_augmentor():
    # assme BGR input
    augmentors = [
        GoogleNetResize(),
        imgaug.Lighting(
            0.1,
            eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
            eigvec=np.array(
                [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140],
                 [-0.5836, -0.6948, 0.4203]],
                dtype='float32')[::-1, ::-1]),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Esempio n. 13
0
 def __init__(self, cfg, is_aws, is_gcs):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList(
         [
             CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
             imgaug.Flip(horiz=True),
         ]
     )
     self.is_aws = is_aws
     self.is_gcs = is_gcs
     if self.is_aws:
         self.s3 = boto3.resource("s3")
     elif self.is_gcs:
         self.storage_client = storage.Client.create_anonymous_client()
         self.bucket = self.storage_client.get_bucket("determined-ai-coco-dataset")
Esempio n. 14
0
def get_moco_v1_augmentor():
    augmentors = [
        TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)),
        RandomGrayScale(0.2),
        imgaug.ToFloat32(),
        imgaug.RandomOrderAug([
            imgaug.BrightnessScale((0.6, 1.4)),
            imgaug.Contrast((0.6, 1.4), rgb=False),
            imgaug.Saturation(0.4, rgb=False),
            # 72 = 180*0.4
            imgaug.Hue(range=(-72, 72), rgb=False)
        ]),
        imgaug.ToUint8(),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Esempio n. 15
0
def get_moco_v1_augmentor():
    augmentors = [
        imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)),
        imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2),
        imgaug.ToFloat32(),
        imgaug.RandomOrderAug(
            [imgaug.BrightnessScale((0.6, 1.4)),
             imgaug.Contrast((0.6, 1.4), rgb=False),
             imgaug.Saturation(0.4, rgb=False),
             # 72 = 180*0.4
             imgaug.Hue(range=(-72, 72), rgb=False)
             ]),
        imgaug.ToUint8(),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
def get_cifar_augmented_data(subset,
                             options,
                             do_multiprocess=True,
                             do_validation=False,
                             shuffle=None):
    isTrain = subset == 'train' and do_multiprocess
    shuffle = shuffle if shuffle is not None else isTrain
    if options.num_classes == 10 and options.ds_name == 'cifar10':
        ds = dataset.Cifar10(subset,
                             shuffle=shuffle,
                             do_validation=do_validation)
        cutout_length = 16
        n_holes = 1
    elif options.num_classes == 100 and options.ds_name == 'cifar100':
        ds = dataset.Cifar100(subset,
                              shuffle=shuffle,
                              do_validation=do_validation)
        cutout_length = 8
        n_holes = 1
    else:
        raise ValueError(
            'Number of classes must be set to 10(default) or 100 for CIFAR')
    logger.info('{} set has n_samples: {}'.format(subset, len(ds.data)))
    pp_mean = ds.get_per_pixel_mean()
    if isTrain:
        logger.info('Will do cut-out with length={} n_holes={}'.format(
            cutout_length, n_holes))
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.RandomCrop((32, 32)),
            imgaug.Flip(horiz=True),
            imgaug.MapImage(lambda x: (x - pp_mean) / 128.0),
            Cutout(length=cutout_length, n_holes=n_holes),
        ]
    else:
        augmentors = [imgaug.MapImage(lambda x: (x - pp_mean) / 128.0)]
    ds = AugmentImageComponent(ds, augmentors)
    ds = BatchData(ds,
                   options.batch_size // options.nr_gpu,
                   remainder=not isTrain)
    if do_multiprocess:
        ds = PrefetchData(ds, 3, 2)
    return ds
Esempio n. 17
0
def get_moco_v2_augmentor():
    augmentors = [
        TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)),
        imgaug.ToFloat32(),
        imgaug.RandomApplyAug(
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4)),
                imgaug.Contrast((0.6, 1.4), rgb=False),
                imgaug.Saturation(0.4, rgb=False),
                # 18 = 180*0.1
                imgaug.Hue(range=(-18, 18), rgb=False)
            ]),
            0.8),
        RandomGrayScale(0.2),
        imgaug.RandomApplyAug(RandomGaussionBlur([0.1, 2.0], 0.1), 0.5),
        imgaug.ToUint8(),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Esempio n. 18
0
 def get_data(self, train_or_test):
     isTrain = train_or_test == 'train'
     ds = dataset.Cifar10(train_or_test, dir='.')
     pp_mean = ds.get_per_pixel_mean()
     if isTrain:
         augmentors = [
             imgaug.CenterPaste((40, 40)),
             imgaug.RandomCrop((32, 32)),
             imgaug.Flip(horiz=True),
             # imgaug.Brightness(20),
             # imgaug.Contrast((0.6,1.4)),
             imgaug.MapImage(lambda x: x - pp_mean),
         ]
     else:
         augmentors = [imgaug.MapImage(lambda x: x - pp_mean)]
     ds = AugmentImageComponent(ds, augmentors)
     ds = BatchData(ds, self.batch_size, remainder=not isTrain)
     if isTrain:
         ds = PrefetchData(ds, 3, 2)
     return ds
Esempio n. 19
0
def fbresnet_augmentor():
    # assme BGR input
    augmentors = [
        GoogleNetResize(),
        imgaug.RandomOrderAug([
            imgaug.BrightnessScale((0.6, 1.4), clip=False),
            imgaug.Contrast((0.6, 1.4), clip=False),
            imgaug.Saturation(0.4, rgb=False),
            # rgb->bgr conversion for the constants copied from fb.resnet.torch
            imgaug.Lighting(
                0.1,
                eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                eigvec=np.array(
                    [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140],
                     [-0.5836, -0.6948, 0.4203]],
                    dtype='float32')[::-1, ::-1])
        ]),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Esempio n. 20
0
def get_data(name, batch):
    isTrain = name == 'train'
    if isTrain:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.RandomCrop(224),
            imgaug.Lighting(0.1,
                            eigval=np.asarray(
                                [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                            eigvec=np.array(
                                [[-0.5675, 0.7192, 0.4009],
                                 [-0.5808, -0.0045, -0.8140],
                                 [-0.5836, -0.6948, 0.4203]],
                                dtype='float32')[::-1, ::-1]),
            imgaug.Flip(horiz=True)]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC),
            imgaug.CenterCrop((224, 224))]
    return get_imagenet_dataflow(args.data, name, batch, augmentors)
Esempio n. 21
0
def get_moco_v2_augmentor():
    augmentors = [
        imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)),
        imgaug.ToFloat32(),
        imgaug.RandomApplyAug(
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4)),
                 imgaug.Contrast((0.6, 1.4), rgb=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # 18 = 180*0.1
                 imgaug.Hue(range=(-18, 18), rgb=False)
                 ]), 0.8),
        imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2),
        imgaug.RandomApplyAug(
            # 11 = 0.1*224//2
            imgaug.GaussianBlur(size_range=(11, 12), sigma_range=[0.1, 2.0]), 0.5),
        imgaug.ToUint8(),
        imgaug.Flip(horiz=True),
    ]
    return augmentors
Esempio n. 22
0
def get_data(is_train,
             batch_size,
             data_dir_path,
             input_image_size=224,
             resize_inv_factor=0.875):
    assert (resize_inv_factor > 0.0)
    resize_value = int(math.ceil(float(input_image_size) / resize_inv_factor))

    if is_train:
        augmentors = [
            GoogleNetResize(crop_area_fraction=0.08,
                            target_shape=input_image_size),
            imgaug.RandomOrderAug([
                imgaug.BrightnessScale((0.6, 1.4), clip=False),
                imgaug.Contrast((0.6, 1.4), clip=False),
                imgaug.Saturation(0.4, rgb=False),
                # rgb-bgr conversion for the constants copied from fb.resnet.torch
                imgaug.Lighting(
                    0.1,
                    eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                    eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                     [-0.5808, -0.0045, -0.8140],
                                     [-0.5836, -0.6948, 0.4203]],
                                    dtype='float32')[::-1, ::-1])
            ]),
            imgaug.Flip(horiz=True)
        ]
    else:
        augmentors = [
            # imgaug.ResizeShortestEdge(resize_value, cv2.INTER_CUBIC),
            imgaug.ResizeShortestEdge(resize_value, cv2.INTER_LINEAR),
            imgaug.CenterCrop((input_image_size, input_image_size))
        ]

    return get_imagenet_dataflow(datadir=data_dir_path,
                                 is_train=is_train,
                                 batch_size=batch_size,
                                 augmentors=augmentors)
Esempio n. 23
0
def fbresnet_augmentor(isTrain):
    """
    Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
    """
    interpolation = cv2.INTER_CUBIC
    # linear seems to have more stable performance.
    # but we keep cubic for compatibility with old models
    if isTrain:
        augmentors = [
            imgaug.GoogleNetRandomCropAndResize(interp=interpolation),
            imgaug.ToFloat32(),  # avoid frequent casting in each color augmentation
            # It's OK to remove the following augs if your CPU is not fast enough.
            # Removing brightness/contrast/saturation does not have a significant effect on accuracy.
            # Removing lighting leads to a tiny drop in accuracy.
            imgaug.RandomOrderAug(
                [imgaug.BrightnessScale((0.6, 1.4)),
                 imgaug.Contrast((0.6, 1.4), rgb=False),
                 imgaug.Saturation(0.4, rgb=False),
                 # rgb-bgr conversion for the constants copied from fb.resnet.torch
                 imgaug.Lighting(0.1,
                                 eigval=np.asarray(
                                     [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
                                 eigvec=np.array(
                                     [[-0.5675, 0.7192, 0.4009],
                                      [-0.5808, -0.0045, -0.8140],
                                      [-0.5836, -0.6948, 0.4203]],
                                     dtype='float32')[::-1, ::-1]
                                 )]),
            imgaug.ToUint8(),
            imgaug.Flip(horiz=True),
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256, interp=interpolation),
            imgaug.CenterCrop((224, 224)),
        ]
    return augmentors
Esempio n. 24
0
    def __init__(self, cfg, confidence, pseudo_targets):
        self.cfg = cfg
        self.aug = imgaug.AugmentorList([
            CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                         cfg.PREPROC.MAX_SIZE),
            imgaug.Flip(horiz=True)
        ])

        self.resize = imgaug.AugmentorList([
            CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                         cfg.PREPROC.MAX_SIZE),
        ])

        self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE)
        self.aug_strong_labeled = RandomAugmentBBox(
            aug_type=cfg.TRAIN.AUGTYPE_LAB)
        self.labeled_augment_type = cfg.TRAIN.AUGTYPE_LAB
        self.unlabeled_augment_type = cfg.TRAIN.AUGTYPE

        self.confidence = confidence
        logger.info(
            "Use TrainingDataPreprocessor6 (using offline generated pseudo labels)"
        )
        self.pseudo_targets = pseudo_targets
Esempio n. 25
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Esempio n. 26
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
def get_train_dataflow_coco(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=4,
                             map_func=preprocess,
                             buffer_size=20)
    return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False):
    train_img_path = config.MAPILLARY_PATH + "training/images/"
    train_label_path = config.MAPILLARY_PATH + "training/instances/"
    imgs = glob.glob(train_img_path + "*.jpg")

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        # filter by categories we use
        instances_valid = [
            cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes
        ]
        instances = [
            inst for inst, valid in zip(instances, instances_valid) if valid
        ]
        instance_classes = [
            cls for cls, valid in zip(instance_classes, instances_valid)
            if valid
        ]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        if map_to_coco:
            instance_classes = [
                config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes
            ]
            instance_classes = [
                config.VOID_LABEL if cls == config.VOID_LABEL else
                COCOMeta.category_id_to_class_id[cls]
                for cls in instance_classes
            ]
        else:
            # remap to contiguous numbers starting with 1
            instance_classes = [
                config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1
                for cls in instance_classes
            ]

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        #import cProfile
        #start1 = time.time()
        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        #boxes1_time = time.time() - start1
        #pr = cProfile.Profile()
        #pr.enable()
        #start1 = time.time()
        #boxes2 = get_bboxes_from_segmentation_masks(masks)
        #print("boxes1", boxes1_time, "boxes2", time.time() - start1)
        #pr.disable()
        #pr.print_stats(sort="cumulative")
        #assert (boxes1 == boxes2).all(), (boxes1, boxes2)
        boxes = boxes1

        second_klass = np.array(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("mapillary: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        print("mapillary example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=8,
                             map_func=preprocess,
                             buffer_size=35)
    return ds
def get_train_dataflow_davis(add_mask=False):
    # train_img_path = config.DAVIS_PATH + "train/"
    # train_label_path = config.DAVIS_PATH + "train-gt/"
    # imgs = glob.glob(train_img_path + "*/*.jpg")

    # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/"
    # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/"

    train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/"
    train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/"

    imgs = sorted(glob.glob(train_img_path + "*/*.jpg"))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        # print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        boxes = boxes1

        # second_klass = np.array(instance_classes, dtype=np.int)
        second_klass = np.zeros_like(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("davis: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        # print("davis example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    ds = MapData(ds, preprocess)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess)
    return ds
Esempio n. 30
0
def get_train_dataflow(src):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)

    classes = (
        'BG',  # always index 0
        'bathtub',
        'bed',
        'bookshelf',
        'box',
        'chair',
        'counter',
        'desk',
        'door',
        'dresser',
        'garbage_bin',
        'lamp',
        'monitor',
        'night_stand',
        'pillow',
        'sink',
        'sofa',
        'table',
        'toilet',
        'tv')

    class_to_ind = dict(list(zip(classes, list(range(len(classes))))))
    #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/'
    textfile_index = natsorted(
        [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')])
    imgs = []
    count = 0
    for fn in textfile_index:
        each_file = {}
        count = count + 1
        print(str(count) + ':::', fn)
        F = open(fn, 'r')
        file_F = F.read()
        file_F = file_F.split('\n')
        each_file['file_name'] = file_F[0]
        im = cv2.imread(each_file['file_name'])
        each_file['height'] = im.shape[0]
        each_file['width'] = im.shape[1]
        objects = file_F[2:len(file_F) - 1]
        boxes = []
        class_ = []
        for obj in objects:
            objs_line = obj.split(' ')
            x1 = float(objs_line[1]) - 1.0
            y1 = float(objs_line[2]) - 1.0
            x2 = float(objs_line[3]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            if x1 >= x2:
                x2 = x1 + 1
            boxes.append([x1, y1, x2, y2])
            cls = class_to_ind[objs_line[0]]
            class_.append(cls)
        each_file['boxes'] = np.array(boxes).astype(np.float32)
        each_file['class'] = np.array(class_).astype(np.int32)
        each_file['is_crowd'] = np.zeros_like(each_file['class']).astype(
            np.int8)
        imgs.append(each_file)
    """ 
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds