def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ # imgaug.RandomApplyAug(imgaug.RandomResize( xrange = (0.8, 1.5), minimum = (cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0], cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), aspect_ratio_thres = 0.0 ), prob = 0.5), imgaug.Flip(horiz=True, prob=0.5), imgaug.Flip(vert=True, prob=0.5), imgaug.RandomApplyAug(imgaug.Rotation(max_deg=180.0, step_deg=30.0, center_range=(0.5, 0.5)), prob=0.5), imgaug.RandomApplyAug(imgaug.Grayscale(keepshape=True), prob=0.5), CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), ])
def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ GoogleNetResize(crop_area_fraction=0.49), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_downsampled_imagenet_augmented_data(subset, options, do_multiprocess=True, do_validation=False, shuffle=None): isTrain = subset == 'train' and do_multiprocess shuffle = shuffle if shuffle is not None else isTrain reret = re.search(r'^imagenet([0-9]*)$', options.ds_name) input_size = int(reret.group(1)) ds = DownsampledImageNet(_data_batch_dir(options.data_dir, input_size),\ subset, shuffle, input_size, do_validation=do_validation) pp_mean = ds.mean_img paste_size = ds.input_size * 5 // 4 crop_size = ds.input_size if isTrain: augmentors = [ imgaug.CenterPaste((paste_size, paste_size)), imgaug.RandomCrop((crop_size, crop_size)), imgaug.Flip(horiz=True), imgaug.MapImage(lambda x: (x - pp_mean)/128.0), ] else: augmentors = [ imgaug.MapImage(lambda x: (x - pp_mean)/128.0) ] ds = AugmentImageComponent(ds, augmentors) ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain) if do_multiprocess: ds = PrefetchData(ds, 4, 2) return ds
def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ])
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def get_data(name, batch): isTrain = name == 'train' image_shape = 224 if isTrain: augmentors = [ # use lighter augs if model is too small GoogleNetResize( crop_area_fraction=0.49 if args.width_ratio < 1 else 0.08, target_shape=image_shape), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(int(image_shape * 256 / 224), cv2.INTER_CUBIC), imgaug.CenterCrop((image_shape, image_shape)), ] return get_imagenet_dataflow(args.data_dir, name, batch, augmentors, meta_dir=args.meta_dir)
def read_and_augment_images(ds): def mapf(dp): fname = dp[0] im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32') assert im is not None, dp[0] dp[0] = im # assume floatbox as input assert dp[1].dtype == np.float32 dp[1] = box_to_point8(dp[1]) dp.append(fname) return dp ds = MapData(ds, mapf) augs = [ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ] ds = AugmentImageComponents(ds, augs, index=(0, ), coords_index=(1, )) def unmapf(points): boxes = point8_to_box(points) return boxes ds = MapDataComponent(ds, unmapf, 1) return ds
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.Flip(horiz=True), imgaug.ToFloat32(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_LINEAR), imgaug.CenterCrop((224, 224)), imgaug.ToFloat32(), ] return augmentors
def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ imgaug.RandomApplyAug(SquareAspectRatioResize(), 0.075), # imgaug.RandomApplyAug(imgaug.RandomCropRandomShape(wmin=int( # 0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), hmin=int(0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0])), 0.25), CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.RandomApplyAug(imgaug.Flip(horiz=True), 0.5), ])
def __init__(self, cfg): self.cfg = cfg self.aug_weak = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) self.aug_type = cfg.TRAIN.AUGTYPE_LAB self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE_LAB) logger.info("Use affine-enabled TrainingDataPreprocessor_aug")
def get_basic_augmentor(isTrain): interpolation = cv2.INTER_LINEAR if isTrain: augmentors = [ TorchvisionCropAndResize(), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def resizeOnly_augmentor(): # assme BGR input augmentors = [ GoogleNetResize(), imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]), imgaug.Flip(horiz=True), ] return augmentors
def __init__(self, cfg, is_aws, is_gcs): self.cfg = cfg self.aug = imgaug.AugmentorList( [ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True), ] ) self.is_aws = is_aws self.is_gcs = is_gcs if self.is_aws: self.s3 = boto3.resource("s3") elif self.is_gcs: self.storage_client = storage.Client.create_anonymous_client() self.bucket = self.storage_client.get_bucket("determined-ai-coco-dataset")
def get_moco_v1_augmentor(): augmentors = [ TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)), RandomGrayScale(0.2), imgaug.ToFloat32(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 72 = 180*0.4 imgaug.Hue(range=(-72, 72), rgb=False) ]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_moco_v1_augmentor(): augmentors = [ imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2), imgaug.ToFloat32(), imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 72 = 180*0.4 imgaug.Hue(range=(-72, 72), rgb=False) ]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_cifar_augmented_data(subset, options, do_multiprocess=True, do_validation=False, shuffle=None): isTrain = subset == 'train' and do_multiprocess shuffle = shuffle if shuffle is not None else isTrain if options.num_classes == 10 and options.ds_name == 'cifar10': ds = dataset.Cifar10(subset, shuffle=shuffle, do_validation=do_validation) cutout_length = 16 n_holes = 1 elif options.num_classes == 100 and options.ds_name == 'cifar100': ds = dataset.Cifar100(subset, shuffle=shuffle, do_validation=do_validation) cutout_length = 8 n_holes = 1 else: raise ValueError( 'Number of classes must be set to 10(default) or 100 for CIFAR') logger.info('{} set has n_samples: {}'.format(subset, len(ds.data))) pp_mean = ds.get_per_pixel_mean() if isTrain: logger.info('Will do cut-out with length={} n_holes={}'.format( cutout_length, n_holes)) augmentors = [ imgaug.CenterPaste((40, 40)), imgaug.RandomCrop((32, 32)), imgaug.Flip(horiz=True), imgaug.MapImage(lambda x: (x - pp_mean) / 128.0), Cutout(length=cutout_length, n_holes=n_holes), ] else: augmentors = [imgaug.MapImage(lambda x: (x - pp_mean) / 128.0)] ds = AugmentImageComponent(ds, augmentors) ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain) if do_multiprocess: ds = PrefetchData(ds, 3, 2) return ds
def get_moco_v2_augmentor(): augmentors = [ TorchvisionCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.ToFloat32(), imgaug.RandomApplyAug( imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 18 = 180*0.1 imgaug.Hue(range=(-18, 18), rgb=False) ]), 0.8), RandomGrayScale(0.2), imgaug.RandomApplyAug(RandomGaussionBlur([0.1, 2.0], 0.1), 0.5), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_data(self, train_or_test): isTrain = train_or_test == 'train' ds = dataset.Cifar10(train_or_test, dir='.') pp_mean = ds.get_per_pixel_mean() if isTrain: augmentors = [ imgaug.CenterPaste((40, 40)), imgaug.RandomCrop((32, 32)), imgaug.Flip(horiz=True), # imgaug.Brightness(20), # imgaug.Contrast((0.6,1.4)), imgaug.MapImage(lambda x: x - pp_mean), ] else: augmentors = [imgaug.MapImage(lambda x: x - pp_mean)] ds = AugmentImageComponent(ds, augmentors) ds = BatchData(ds, self.batch_size, remainder=not isTrain) if isTrain: ds = PrefetchData(ds, 3, 2) return ds
def fbresnet_augmentor(): # assme BGR input augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb->bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] return augmentors
def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.RandomCrop(224), imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]), imgaug.Flip(horiz=True)] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224))] return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_moco_v2_augmentor(): augmentors = [ imgaug.GoogleNetRandomCropAndResize(crop_area_fraction=(0.2, 1.)), imgaug.ToFloat32(), imgaug.RandomApplyAug( imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # 18 = 180*0.1 imgaug.Hue(range=(-18, 18), rgb=False) ]), 0.8), imgaug.RandomApplyAug(imgaug.Grayscale(rgb=False, keepshape=True), 0.2), imgaug.RandomApplyAug( # 11 = 0.1*224//2 imgaug.GaussianBlur(size_range=(11, 12), sigma_range=[0.1, 2.0]), 0.5), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] return augmentors
def get_data(is_train, batch_size, data_dir_path, input_image_size=224, resize_inv_factor=0.875): assert (resize_inv_factor > 0.0) resize_value = int(math.ceil(float(input_image_size) / resize_inv_factor)) if is_train: augmentors = [ GoogleNetResize(crop_area_fraction=0.08, target_shape=input_image_size), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True) ] else: augmentors = [ # imgaug.ResizeShortestEdge(resize_value, cv2.INTER_CUBIC), imgaug.ResizeShortestEdge(resize_value, cv2.INTER_LINEAR), imgaug.CenterCrop((input_image_size, input_image_size)) ] return get_imagenet_dataflow(datadir=data_dir_path, is_train=is_train, batch_size=batch_size, augmentors=augmentors)
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ interpolation = cv2.INTER_CUBIC # linear seems to have more stable performance. # but we keep cubic for compatibility with old models if isTrain: augmentors = [ imgaug.GoogleNetRandomCropAndResize(interp=interpolation), imgaug.ToFloat32(), # avoid frequent casting in each color augmentation # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4)), imgaug.Contrast((0.6, 1.4), rgb=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.ToUint8(), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def __init__(self, cfg, confidence, pseudo_targets): self.cfg = cfg self.aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) self.resize = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), ]) self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE) self.aug_strong_labeled = RandomAugmentBBox( aug_type=cfg.TRAIN.AUGTYPE_LAB) self.labeled_augment_type = cfg.TRAIN.AUGTYPE_LAB self.unlabeled_augment_type = cfg.TRAIN.AUGTYPE self.confidence = confidence logger.info( "Use TrainingDataPreprocessor6 (using offline generated pseudo labels)" ) self.pseudo_targets = pseudo_targets
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow_coco(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): print("start preproc coco") start = time.time() if config.USE_SECOND_HEAD: fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \ img['second_class'], img['is_crowd'] else: fname, boxes, klass, is_crowd = img['file_name'], img[ 'boxes'], img['class'], img['is_crowd'] second_klass = None res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("coco: preproc_img returned None on", fname) return None ret, params = res im = ret[0] boxes = ret[3] # masks if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img.get('segmentation', None)) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes), (len(segmentation), len(boxes)) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) end = time.time() elapsed = end - start print("coco example done, elapsed:", elapsed) return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=4, map_func=preprocess, buffer_size=20) return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False): train_img_path = config.MAPILLARY_PATH + "training/images/" train_label_path = config.MAPILLARY_PATH + "training/instances/" imgs = glob.glob(train_img_path + "*.jpg") ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] # filter by categories we use instances_valid = [ cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes ] instances = [ inst for inst, valid in zip(instances, instances_valid) if valid ] instance_classes = [ cls for cls, valid in zip(instance_classes, instances_valid) if valid ] if len(instances) == 0: print("no instances") pil_label.close() return None if map_to_coco: instance_classes = [ config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes ] instance_classes = [ config.VOID_LABEL if cls == config.VOID_LABEL else COCOMeta.category_id_to_class_id[cls] for cls in instance_classes ] else: # remap to contiguous numbers starting with 1 instance_classes = [ config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1 for cls in instance_classes ] masks = np.array([label == inst for inst in instances], dtype=np.uint8) #import cProfile #start1 = time.time() boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) #boxes1_time = time.time() - start1 #pr = cProfile.Profile() #pr.enable() #start1 = time.time() #boxes2 = get_bboxes_from_segmentation_masks(masks) #print("boxes1", boxes1_time, "boxes2", time.time() - start1) #pr.disable() #pr.print_stats(sort="cumulative") #assert (boxes1 == boxes2).all(), (boxes1, boxes2) boxes = boxes1 second_klass = np.array(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("mapillary: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start print("mapillary example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) return ds
def get_train_dataflow_davis(add_mask=False): # train_img_path = config.DAVIS_PATH + "train/" # train_label_path = config.DAVIS_PATH + "train-gt/" # imgs = glob.glob(train_img_path + "*/*.jpg") # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/" # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/" train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/" train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/" imgs = sorted(glob.glob(train_img_path + "*/*.jpg")) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): # print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] if len(instances) == 0: print("no instances") pil_label.close() return None masks = np.array([label == inst for inst in instances], dtype=np.uint8) boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) boxes = boxes1 # second_klass = np.array(instance_classes, dtype=np.int) second_klass = np.zeros_like(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("davis: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start # print("davis example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret ds = MapData(ds, preprocess) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess) return ds
def get_train_dataflow(src): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) classes = ( 'BG', # always index 0 'bathtub', 'bed', 'bookshelf', 'box', 'chair', 'counter', 'desk', 'door', 'dresser', 'garbage_bin', 'lamp', 'monitor', 'night_stand', 'pillow', 'sink', 'sofa', 'table', 'toilet', 'tv') class_to_ind = dict(list(zip(classes, list(range(len(classes)))))) #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/' textfile_index = natsorted( [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')]) imgs = [] count = 0 for fn in textfile_index: each_file = {} count = count + 1 print(str(count) + ':::', fn) F = open(fn, 'r') file_F = F.read() file_F = file_F.split('\n') each_file['file_name'] = file_F[0] im = cv2.imread(each_file['file_name']) each_file['height'] = im.shape[0] each_file['width'] = im.shape[1] objects = file_F[2:len(file_F) - 1] boxes = [] class_ = [] for obj in objects: objs_line = obj.split(' ') x1 = float(objs_line[1]) - 1.0 y1 = float(objs_line[2]) - 1.0 x2 = float(objs_line[3]) - 1.0 y2 = float(objs_line[4]) - 1.0 y2 = float(objs_line[4]) - 1.0 if x1 >= x2: x2 = x1 + 1 boxes.append([x1, y1, x2, y2]) cls = class_to_ind[objs_line[0]] class_.append(cls) each_file['boxes'] = np.array(boxes).astype(np.float32) each_file['class'] = np.array(class_).astype(np.int32) each_file['is_crowd'] = np.zeros_like(each_file['class']).astype( np.int8) imgs.append(each_file) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds