def get_imagenet_dataflow(datadir, name, batch_size, parallel=None): """ Get a standard imagenet training/evaluation dataflow, for linear classifier tuning. """ assert name in ['train', 'val'] isTrain = name == 'train' assert datadir is not None augmentors = get_basic_augmentor(isTrain) augmentors = imgaug.AugmentorList(augmentors) if parallel is None: parallel = min(50, mp.cpu_count()) def mapper(dp): fname, label = dp img = cv2.imread(fname) img = augmentors.augment(img) return img, label if isTrain: ds = dataset.ILSVRC12Files(datadir, name, shuffle=True) ds = MultiProcessMapAndBatchDataZMQ(ds, parallel, mapper, batch_size, buffer_size=7000) else: ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) ds = MultiThreadMapData(ds, parallel, mapper, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = MultiProcessRunnerZMQ(ds, 1) return ds
def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ])
def get_train_aseval_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname = img['file_name'] im = cv2.imread(fname, cv2.IMREAD_COLOR) orig_shape = im.shape[:2] assert im is not None, fname im = im.astype('float32') # augmentation: im, params = aug.augment_return_params(im) ret = [fname, im, orig_shape] return ret ds = MapData(ds, preprocess) return ds
def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ imgaug.RandomApplyAug(SquareAspectRatioResize(), 0.075), # imgaug.RandomApplyAug(imgaug.RandomCropRandomShape(wmin=int( # 0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), hmin=int(0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0])), 0.25), CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.RandomApplyAug(imgaug.Flip(horiz=True), 0.5), ])
def get_moco_dataflow(datadir, batch_size, augmentors): """ Dataflow for training MOCO. """ augmentors = imgaug.AugmentorList(augmentors) parallel = min(30, mp.cpu_count()) # tuned on a 40-CPU 80-core machine ds = dataset.ILSVRC12Files(datadir, 'train', shuffle=True) ds = MultiProcessMapAndBatchDataZMQ(ds, parallel, MoCoMapper(augmentors), batch_size, buffer_size=5000) return ds
def __init__(self, cfg): self.cfg = cfg self.aug_weak = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) self.aug_type = cfg.TRAIN.AUGTYPE_LAB self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE_LAB) logger.info("Use affine-enabled TrainingDataPreprocessor_aug")
def get_imagenet_dataflow(datadir, name, batch_size, augmentors=None, parallel=None): """ Args: augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)` Returns: A DataFlow which produces BGR images and labels. See explanations in the tutorial: http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] isTrain = name == 'train' assert datadir is not None if augmentors is None: augmentors = fbresnet_augmentor(isTrain) assert isinstance(augmentors, list) if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) if parallel < 16: logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) return ds
def get_data(name, meta_dir, gpu_nums): isTrain = True if 'train' in name else False m = np.array([104, 116, 122]) const_arr = np.resize(m, (1, 1, 3)) # NCHW const_arr = np.zeros( (args.crop_size[0], args.crop_size[1], 3)) + const_arr #broadcast if isTrain: #ds = FakeData([[1024, 2048, 3], [ 1024, 2048]], 5000, random=False, dtype='uint8') #ds = FakeData([[CROP_HEIGHT, CROP_HEIGHT, 3], [CROP_HEIGHT, CROP_HEIGHT]], 5000,random=False, dtype='uint8') ds = CityscapesFiles(base_dir, meta_dir, name, shuffle=True) parallel = min(3, multiprocessing.cpu_count()) augmentors = [ RandomCropWithPadding(args.crop_size), Flip(horiz=True), ] aug = imgaug.AugmentorList(augmentors) def mapf(ds): img, label = ds img = cv2.imread(img, cv2.IMREAD_COLOR) label = cv2.imread(label, cv2.IMREAD_GRAYSCALE) img, params = aug.augment_return_params(img) label = aug._augment(label, params) img = img - const_arr # very time-consuming return img, label #ds = MapData(ds, mapf) ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=500, strict=True) #ds = MapData(ds, reduce_mean_rgb) ds = BatchData(ds, args.batch_size * gpu_nums) #ds = PrefetchDataZMQ(ds, 1) else: def imgread(ds): img, label = ds img = cv2.imread(img, cv2.IMREAD_COLOR) label = cv2.imread(label, cv2.IMREAD_GRAYSCALE) return [img, label] ds = CityscapesFiles(base_dir, meta_dir, name, shuffle=False) ds = MapData(ds, imgread) ds = BatchData(ds, 1) return ds
def __init__(self, cfg): self.cfg = cfg self.aug = imgaug.AugmentorList([ # imgaug.RandomApplyAug(imgaug.RandomResize( xrange = (0.8, 1.5), minimum = (cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0], cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), aspect_ratio_thres = 0.0 ), prob = 0.5), imgaug.Flip(horiz=True, prob=0.5), imgaug.Flip(vert=True, prob=0.5), imgaug.RandomApplyAug(imgaug.Rotation(max_deg=180.0, step_deg=30.0, center_range=(0.5, 0.5)), prob=0.5), imgaug.RandomApplyAug(imgaug.Grayscale(keepshape=True), prob=0.5), CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), ])
def __init__(self, cfg, is_aws, is_gcs): self.cfg = cfg self.aug = imgaug.AugmentorList( [ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True), ] ) self.is_aws = is_aws self.is_gcs = is_gcs if self.is_aws: self.s3 = boto3.resource("s3") elif self.is_gcs: self.storage_client = storage.Client.create_anonymous_client() self.bucket = self.storage_client.get_bucket("determined-ai-coco-dataset")
def __init__(self, cfg, confidence, pseudo_targets): self.cfg = cfg self.aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) self.resize = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), ]) self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE) self.aug_strong_labeled = RandomAugmentBBox( aug_type=cfg.TRAIN.AUGTYPE_LAB) self.labeled_augment_type = cfg.TRAIN.AUGTYPE_LAB self.unlabeled_augment_type = cfg.TRAIN.AUGTYPE self.confidence = confidence logger.info( "Use TrainingDataPreprocessor6 (using offline generated pseudo labels)" ) self.pseudo_targets = pseudo_targets
def build_dataflow(files): train_ds = DataFromList(files) aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False)) def mapper(dp): idx, fname, label = dp img = cv2.imread(fname) img = aug.augment(img) return img, idx train_ds = MultiProcessMapDataZMQ(train_ds, num_proc=8, map_func=mapper, strict=True) train_ds = BatchData(train_ds, local_batch_size) train_ds.reset_state() return train_ds
def get_imagenet_dataflow(datadir, is_train, batch_size, augmentors, parallel=None): """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert datadir is not None assert isinstance(augmentors, list) if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if is_train: ds = dataset.ILSVRC12(datadir, "train", shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) if parallel < 16: logging.warning( "DataFlow may become the bottleneck when too few processes are used." ) ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.ILSVRC12Files(datadir, "val", shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = np.flip(im, axis=2) # print("fname={}".format(fname)) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) # ds = MapData(ds, mapf) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) # ds = PrefetchData(ds, 1) return ds
def get_data(self, name, num_gpu): gpu_batch = self.batch_size // num_gpu assert name in ['train', 'val', 'test'] isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain) assert isinstance(augmentors, list) parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = dataset.ILSVRC12(self.datadir, name, shuffle=True, dir_structure='train') ds = AugmentImageComponent(ds, augmentors, copy=False) ds = MultiProcessRunnerZMQ(ds, parallel) ds = BatchData(ds, gpu_batch, remainder=False) #ds = QueueInput(ds) else: ds = dataset.ILSVRC12Files(self.datadir, name, shuffle=False, dir_structure='train') aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, gpu_batch, remainder=True) ds = MultiProcessRunnerZMQ(ds, 1) if num_gpu == 1: ds = QueueInput(ds) return ds
def get_query_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load_query() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname, boxes, re_id_class = img['file_name'], img['boxes'], img[ 're_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = [im, boxes, re_id_class] return ret ds = MapData(ds, preprocess) return ds
def process_avatar_synth_data(df, batch_size, num_threads): """ Perform preprocessing for the avatar synth data. :param df: An AvatarSynthDataFlow. :param batch_size: The minibatch size. :param num_threads: The number of threads to read and process data. :return: A dataflow with extra processing steps applied. """ augmentor = imgaug.AugmentorList([imgaug.MinMaxNormalize(min=-1, max=1)]) df = MultiThreadMapData( df, nr_thread=num_threads, map_func=lambda dp: [np.load(dp[0]), augmentor.augment(imread(dp[1]))]) # df = MapData(df, lambda dp: [np.load(dp[0]), augmentor.augment(imread(dp[1]))]) df = PrefetchDataZMQ(df, nr_proc=num_threads) df = BatchData(df, batch_size, remainder=True) return df
def get_imagenet_dataflow( datadir, name, batch_size, augmentors, parallel=None): """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] assert datadir is not None assert isinstance(augmentors, list) isTrain = name == 'train' if parallel is None: parallel = min(40, 16) # assuming hyperthreading if isTrain: ds1 = ilsvrcsemi.ILSVRC12(datadir, name, shuffle=True, labeled=True) ds2 = ilsvrcsemi.ILSVRC12(datadir, name, shuffle=True, labeled=False) ds1 = AugmentImageComponent(ds1, augmentors, copy=False) ds2 = AugmentImageComponent(ds2, augmentors, copy=False) ds = JoinData([ds1, ds2]) if parallel < 16: logger.warn("DataFlow may become the bottleneck when too few processes are used.") ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls, im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) return ds
def process_s2b_data(df, batch_size, num_threads): """ Perform preprocessing for the avatar synth data. :param df: An AvatarSynthDataFlow. :param batch_size: The minibatch size. :param num_threads: The number of threads to read and process data. :return: A dataflow with extra processing steps applied. """ augmentor = imgaug.AugmentorList([imgaug.MinMaxNormalize(min=-1, max=1)]) def get_imgs(dp): """ :param dp: A datapoint tuple, (path_to_face.jpg, path_to_bitmoji.jpg) """ face_img = augmentor.augment(imread(dp[0])) bitmoji_img = augmentor.augment(imread(dp[1])) if len(face_img.shape) == 2: face_img = np.stack([face_img] * 3, axis=-1) if len(bitmoji_img.shape) == 2: bitmoji_img = np.stack([bitmoji_img] * 3, axis=-1) return [face_img, bitmoji_img] df = MultiThreadMapData(df, nr_thread=num_threads, map_func=get_imgs, buffer_size=min(df.size(), 200)) df = PrefetchDataZMQ(df, nr_proc=num_threads) # TODO: switch back to remainder=True when s2b input batch size switched back to None df = BatchData(df, batch_size, remainder=False) # df = BatchData(df, batch_size, remainder=True) return df
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False): train_img_path = config.MAPILLARY_PATH + "training/images/" train_label_path = config.MAPILLARY_PATH + "training/instances/" imgs = glob.glob(train_img_path + "*.jpg") ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] # filter by categories we use instances_valid = [ cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes ] instances = [ inst for inst, valid in zip(instances, instances_valid) if valid ] instance_classes = [ cls for cls, valid in zip(instance_classes, instances_valid) if valid ] if len(instances) == 0: print("no instances") pil_label.close() return None if map_to_coco: instance_classes = [ config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes ] instance_classes = [ config.VOID_LABEL if cls == config.VOID_LABEL else COCOMeta.category_id_to_class_id[cls] for cls in instance_classes ] else: # remap to contiguous numbers starting with 1 instance_classes = [ config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1 for cls in instance_classes ] masks = np.array([label == inst for inst in instances], dtype=np.uint8) #import cProfile #start1 = time.time() boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) #boxes1_time = time.time() - start1 #pr = cProfile.Profile() #pr.enable() #start1 = time.time() #boxes2 = get_bboxes_from_segmentation_masks(masks) #print("boxes1", boxes1_time, "boxes2", time.time() - start1) #pr.disable() #pr.print_stats(sort="cumulative") #assert (boxes1 == boxes2).all(), (boxes1, boxes2) boxes = boxes1 second_klass = np.array(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("mapillary: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start print("mapillary example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) return ds
def get_train_dataflow_coco(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): print("start preproc coco") start = time.time() if config.USE_SECOND_HEAD: fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \ img['second_class'], img['is_crowd'] else: fname, boxes, klass, is_crowd = img['file_name'], img[ 'boxes'], img['class'], img['is_crowd'] second_klass = None res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("coco: preproc_img returned None on", fname) return None ret, params = res im = ret[0] boxes = ret[3] # masks if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img.get('segmentation', None)) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes), (len(segmentation), len(boxes)) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) end = time.time() elapsed = end - start print("coco example done, elapsed:", elapsed) return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=4, map_func=preprocess, buffer_size=20) return ds
def get_train_dataflow(src): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) classes = ( 'BG', # always index 0 'bathtub', 'bed', 'bookshelf', 'box', 'chair', 'counter', 'desk', 'door', 'dresser', 'garbage_bin', 'lamp', 'monitor', 'night_stand', 'pillow', 'sink', 'sofa', 'table', 'toilet', 'tv') class_to_ind = dict(list(zip(classes, list(range(len(classes)))))) #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/' textfile_index = natsorted( [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')]) imgs = [] count = 0 for fn in textfile_index: each_file = {} count = count + 1 print(str(count) + ':::', fn) F = open(fn, 'r') file_F = F.read() file_F = file_F.split('\n') each_file['file_name'] = file_F[0] im = cv2.imread(each_file['file_name']) each_file['height'] = im.shape[0] each_file['width'] = im.shape[1] objects = file_F[2:len(file_F) - 1] boxes = [] class_ = [] for obj in objects: objs_line = obj.split(' ') x1 = float(objs_line[1]) - 1.0 y1 = float(objs_line[2]) - 1.0 x2 = float(objs_line[3]) - 1.0 y2 = float(objs_line[4]) - 1.0 y2 = float(objs_line[4]) - 1.0 if x1 >= x2: x2 = x1 + 1 boxes.append([x1, y1, x2, y2]) cls = class_to_ind[objs_line[0]] class_.append(cls) each_file['boxes'] = np.array(boxes).astype(np.float32) each_file['class'] = np.array(class_).astype(np.int32) each_file['is_crowd'] = np.zeros_like(each_file['class']).astype( np.int8) imgs.append(each_file) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow_davis(add_mask=False): # train_img_path = config.DAVIS_PATH + "train/" # train_label_path = config.DAVIS_PATH + "train-gt/" # imgs = glob.glob(train_img_path + "*/*.jpg") # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/" # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/" train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/" train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/" imgs = sorted(glob.glob(train_img_path + "*/*.jpg")) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): # print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] if len(instances) == 0: print("no instances") pil_label.close() return None masks = np.array([label == inst for inst in instances], dtype=np.uint8) boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) boxes = boxes1 # second_klass = np.array(instance_classes, dtype=np.int) second_klass = np.zeros_like(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("davis: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start # print("davis example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret ds = MapData(ds, preprocess) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess) return ds
def get_train_dataflow(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) """ To train on your own data, change this to your loader. Produce "igms" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str boxes: kx4 floats class: k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k numpy arrays. Each array is a polygon of shape Nx2. If your segmentation annotations are masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img.get('segmentation', None)) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret ds = MultiProcessMapData(ds, 3, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "roidbs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy arrays corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_batch_train_dataflow(batch_size): """ Return a training dataflow. Each datapoint consists of the following: A batch of images: (BS, h, w, 3), For each image 1 or more pairs of (anchor_labels, anchor_boxes) : anchor_labels: (BS, h', w', maxNumAnchors) anchor_boxes: (BS, h', w', maxNumAnchors, 4) gt_boxes: (BS, maxNumAnchors, 4) gt_labels: (BS, maxNumAnchors) If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w) """ print("In train dataflow") roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print("Done loading roidbs") # print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs))) roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True) # will shuffle it later at every rank print("Batching roidbs") batched_roidbs = [] if cfg.PREPROC.PREDEFINED_PADDING: taken = [False for _ in roidbs] done = False for i, d in enumerate(roidbs): batch = [] if not taken[i]: batch.append(d) padding_shape = get_padding_shape(d['height'], d['width']) while len(batch) < batch_size: k = get_next_roidb(roidbs, i, padding_shape, taken) if k == None: done = True break batch.append(roidbs[k]) taken[i], taken[k] = True, True if not done: batched_roidbs.append(batch) else: batch = [] for i, d in enumerate(roidbs): if i % batch_size == 0: if len(batch) == batch_size: batched_roidbs.append(batch) batch = [] batch.append(d) #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size) #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size) print("Done batching roidbs") # Notes: # - discard any leftover images # - The batches will be shuffled, but the contents of each batch will always be the same # - TODO: Fix lack of batch contents shuffling aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True)]) # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint ds = DataFromList(batched_roidbs, shuffle=True) if cfg.TRAINER == 'horovod': # ds = MapData(ds, preprocess) ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] # masks segmentation = img.get('segmentation', None) if segmentation is not None: segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret ds = MapData(ds, preprocess) ds = PrefetchDataZMQ(ds, 1) return ds
def get_train_dataflow(): roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) ds = DataFromList(roidbs, shuffle=True) # for now let's not do flipping to keep things simple aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) ]) #, #imgaug.Flip(horiz=True)]) if cfg.MODE_HARD_MINING: from annoy import AnnoyIndex hard_mining_index = AnnoyIndex(128, 'euclidean') hard_mining_index.load(cfg.HARD_MINING_DATA_PATH + "/index_all/index.ann") names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt" hard_mining_names_all = [] with open(names_path) as f: for l in f: hard_mining_names_all.append(l.strip()) hard_example_names_got = [ x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/") ] hard_example_names_vid = [ x[12:] for x in hard_mining_names_all if x.startswith("ImageNetVID/") ] hard_example_names_ytbvos = [ x[11:] for x in hard_mining_names_all if x.startswith("YouTubeVOS/") ] hard_example_names_lasot = [ x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/") ] assert len(hard_example_names_got) > 0 assert len(hard_example_names_vid) > 0 assert len(hard_example_names_ytbvos) > 0 assert len(hard_example_names_lasot) > 0 hard_example_names_got.sort() hard_example_names_vid.sort() hard_example_names_ytbvos.sort() hard_example_names_lasot.sort() hard_mining_names = { "all": hard_mining_names_all, "GOT10k": hard_example_names_got, "ImageNetVID": hard_example_names_vid, "YouTubeVOS": hard_example_names_ytbvos, "LaSOT": hard_example_names_lasot } else: hard_mining_index = None hard_mining_names = None def preprocess(roidb): if roidb.startswith("VID/"): return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("DAVIS/"): return _preprocess_davis_like( roidb[6:], aug, os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p")) elif roidb.startswith("YouTubeVOS/"): return _preprocess_davis_like( roidb[11:], aug, os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train", "Annotations"), "YouTubeVOS", hard_mining_index, hard_mining_names) elif roidb.startswith("GOT10K/"): return _preprocess_got10k(roidb[7:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("LaSOT/"): return _preprocess_lasot(roidb[6:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("YouTube-BB/"): return _preprocess_youtube_bb(roidb[11:], aug) elif roidb.startswith("TrackingNet/"): return _preprocess_trackingnet(roidb[12:], aug) else: assert False #ds = MultiProcessMapDataZMQ(ds, 10, preprocess) #ds = MapData(ds, preprocess) if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD: ds = MapData(ds, preprocess) else: #ds = MultiThreadMapData(ds, 6, preprocess) ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: input image: (h, w, 3), semantic label image: (h, w, 1) """ # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label' imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True) # imgs = COCODetection.load_many( # cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str boxes: kx4 floats class: k integers difficult: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) # log invalid training ds = DataFromList(imgs, shuffle=True) mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1]) if cfg.DATA.NAME == 'cityscapes': aspect_exp = 1.1 elif cfg.DATA.NAME == 'cocostuff': aspect_exp = 1.1 #2.0 else: logger.warn('Dataset name not known.') assert False aug = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN), imgaug.Flip(horiz=True), SSDColorJitter(mean_rgbgr=mean_bgr) ]) aug_label = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST), imgaug.Flip(horiz=True) ]) def preprocess(img): fn_img, fn_label = img['fn_img'], img['fn_label'] # load head (and landmark) data as well im = cv2.imread(fn_img, cv2.IMREAD_COLOR) if fn_label.endswith('.mat'): # cocostuff label = loadmat(fn_label)['S'].astype(int) label = (label - 1).astype(np.uint8) # -1 becomes 255 else: label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE) label = np.expand_dims(label, 2) assert (im is not None) and (label is not None), fn_img im = im.astype('float32') # label = label.astype('int32') # augmentation im, params = aug.augment_return_params(im) # TODO: better way to adjust label? params_label = deepcopy(params[:-1]) params_label[0].mean_rgbgr = [255,] params_label[1].interp = cv2.INTER_NEAREST label = aug_label.augment_with_params(label, params_label) label = label.astype('int32') ret = [im, label] return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: # ds = MapData(ds, preprocess) # for debugging ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess) ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE) return ds