def get_pascal_voc_train_dataflow(batch_size=1): from dataset import register_pascal_voc # register_coco(os.path.expanduser("/media/ubuntu/Working/common_data/coco")) register_pascal_voc(os.path.expanduser("/media/ubuntu/Working/voc2012/VOC2012/")) print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) aspect_grouping = [1] aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs] group_ids = _quantize(aspect_ratios, aspect_grouping) ds = DataFromList(np.arange(len(roidbs)), shuffle=True) ds.reset_state() ds = AspectGroupingDataFlow(roidbs, ds, group_ids, batch_size=batch_size, drop_uneven=True).__iter__() preprocess = TrainingDataPreprocessor() while True: batch_roidbs = next(ds) yield preprocess(batch_roidbs)
def get_train_aseval_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname = img['file_name'] im = cv2.imread(fname, cv2.IMREAD_COLOR) orig_shape = im.shape[:2] assert im is not None, fname im = im.astype('float32') # augmentation: im, params = aug.augment_return_params(im) ret = [fname, im, orig_shape] return ret ds = MapData(ds, preprocess) return ds
def get_resnet_train_dataflow(): imgs = ResnetDetection.load_many( config.BASEDIR, config.TRAIN_DATASET) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(imgs) ds = DataFromList(imgs, shuffle=True) augmentors = get_resnet_augmentor() def preprocess(img): im, fname, label = img['image_data'], img['id'], img['with_ship'] im = cv2.imread(im) #============Aug================ im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE)) augmented = strong_aug()(image=im) im = augmented['image'] # im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7)) #============================ ret = [im, label] return ret ds = MapData(ds, preprocess) ds = AugmentImageComponent(ds, augmentors, copy=False) ds = BatchData(ds, config.RESNET_BATCH) ds = PrefetchDataZMQ(ds, 6) return ds
def get_debug_dataflow(add_mask=True, imageHW=768): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = Detection.load_many( config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(imgs) import os import pandas as pd csv_path = os.path.join(config.BASEDIR, 'train_ship_segmentations_v2.csv') df = pd.read_csv(csv_path, engine="python") df = df.dropna(axis=0) df = df.set_index('ImageId') ds = DataFromList(imgs, shuffle=True) def preprocess(img): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) im = cv2.imread(im) im, multi_mask = fix_resize_transform_range(im, multi_mask, [imageHW, imageHW], 1.0) boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) return boxes ds = MapData(ds, preprocess) ds = PrefetchDataZMQ(ds, 6) return ds
def get_val_dataflow( datadir, batch_size, augmentors, parallel=None, num_splits=None, split_index=None): assert datadir is not None assert isinstance(augmentors, list) if parallel is None: parallel = min(40, multiprocessing.cpu_count()) if num_splits is None: ds = dataset.ILSVRC12Files(datadir, 'val', shuffle=False) else: assert split_index < num_splits files = dataset.ILSVRC12Files(datadir, 'val', shuffle=False) files.reset_state() files = list(files.get_data()) logger.info("#ValidationData = {}".format(len(files))) split_size = len(files) // num_splits start, end = split_size * split_index, split_size * (split_index + 1) end = min(end, len(files)) logger.info("#ValidationSplit = {} - {}".format(start, end)) files = files[start: end] ds = DataFromList(files, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) # ds = PrefetchDataZMQ(ds, 1) # do not fork() under MPI return ds
def get_train_dataflow(add_mask=True): """ """ if config.CROSS_VALIDATION: imgs = BRATS_SEG.load_from_file(config.BASEDIR, config.TRAIN_DATASET) else: imgs = BRATS_SEG.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=False, add_mask=add_mask) # no filter for training imgs = list(imgs) ds = DataFromList(imgs, shuffle=True) def preprocess(data): if config.NO_CACHE: fname, gt, im = data['file_name'], data['gt'], data['image_data'] volume_list, label, weight, _, _ = crop_brain_region(im, gt) batch = sampler3d(volume_list, label, weight) else: volume_list, label, weight, _, _ = data['preprocessed'] batch = sampler3d(volume_list, label, weight) return [batch['images'], batch['weights'], batch['labels']] ds = BatchData(MapData(ds, preprocess), config.BATCH_SIZE) ds = PrefetchDataZMQ(ds, 6) return ds
def get_batch_train_dataflow(roidbs, batch_size): """ Tensorpack batch text dataflow. """ batched_roidbs = [] batch = [] for i, d in enumerate(roidbs): if i % batch_size == 0: if len(batch) == batch_size: batched_roidbs.append(batch) batch = [] batch.append(d) def preprocess(roidb_batch): """ Tensorpack batch text data preprocess function. """ datapoint_list = [] for roidb in roidb_batch: filename, label, mask, bbox, polygon = roidb['filename'], roidb[ 'label'], roidb['mask'], roidb['bbox'], roidb['polygon'] img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = affine_transform(img, polygon) # img = img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] if image.shape[0]<cfg.stride/2 or image.shape[1]<cfg.stride/2 else image img = img if image.shape[0] < cfg.stride / 2 or image.shape[ 1] < cfg.stride / 2 else image largest_side = np.random.randint(cfg.crop_min_size, cfg.image_size) img = aspect_preserving_resize(img, largest_side) img, crop_bbox = padding_image(img, cfg.image_size) normalized_bbox = [coord / cfg.image_size for coord in crop_bbox] img = img.astype("float32") / 255. ret = { "image": img, "label": label, "mask": mask, "normalized_bbox": normalized_bbox } datapoint_list.append(ret) batched_datapoint = {"is_training": True, "dropout_keep_prob": 0.5} for stackable_field in ["image", "label", "mask", "normalized_bbox"]: batched_datapoint[stackable_field] = np.stack( [d[stackable_field] for d in datapoint_list]) return batched_datapoint ds = DataFromList(batched_roidbs, shuffle=True) ds = MultiThreadMapData(ds, cfg.num_threads, preprocess) # ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4) return ds
def build_iter(self,samples): map_func=partial(self._map_func,is_training=self.training_flag) ds = DataFromList(samples, shuffle=True) ds = MultiThreadMapData(ds, self.thread_num, map_func, buffer_size=self.buffer_size) ds = BatchData(ds, self.num_gpu * self.batch_size) ds = MultiProcessPrefetchData(ds, self.prefetch_size, self.process_num) ds.reset_state() ds = ds.get_data() return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print( "---------------------------------------------------------------- data.py:343" ) print_class_histogram(roidbs) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": # one dataflow for each process, therefore don't need large buffer buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def create_dataflow(graphs: List[GraphAdjacencyTuple], max_nodes: int, metrics_fn: Callable[[np.ndarray, np.ndarray], float], validator: Optional[GraphValidatorFn] = None, shuffle: bool = False) -> SelectComponent: ds = DataFromList(graphs, shuffle) ds = AppendNodeFeatures(ds, data_key='AtomCode') ds_conv = GraphConvEmbedding(ds, max_nodes, validator) ds = AppendMolMetrics(ds_conv, metrics_fn, index_edges=0, index_node=2) ds = SelectComponent(ds, [0, 2, 3]) return ds
def get_plain_train_dataflow(batch_size=2): # no aspect ratio grouping print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor() buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds.reset_state() dataiter = ds.__iter__() return dataiter
def make_data(): from COCOAllJoints import COCOJoints from dataset import Preprocessing d = COCOJoints() train_data, _ = d.load_data(1) from tensorpack.dataflow import DataFromList, MapData, BatchData dp = DataFromList(train_data) dp = MapData(dp, Preprocessing) dp = BatchData(dp, cfg.batch_size, use_list=True) dp.reset_state() dataiter = dp.get_data() return dataiter
def get_train_dataflow(roidb): """ Tensorpack text dataflow. """ ds = DataFromList(roidb, shuffle=True) preprocess = TextDataPreprocessor(cfg) buffer_size = cfg.num_threads * 10 ds = MultiThreadMapData(ds, cfg.num_threads, preprocess, buffer_size=buffer_size) # ds = MultiProcessMapData(ds, cfg.num_workers, preprocess, buffer_size=buffer_size) ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4) #ds = BatchData(ds, cfg.batch_size, remainder=True) return ds
def get_resnet_val_dataflow(): imgs = ResnetDetection.load_many( config.BASEDIR, config.VAL_DATASET) imgs = list(imgs) # ds = DataFromListOfDict(imgs, ['image_data', 'with_ship', 'id']) ds = DataFromList(imgs, shuffle=False) def f(img): image, label = img['image_data'], img['with_ship'] im = cv2.imread(image) im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE)) return [im, label] ds = MapData(ds, f) ds = BatchData(ds, config.RESNET_BATCH) ds = PrefetchDataZMQ(ds, 1) return ds
def get_val_dataflow(datadir, batch_size, augmentors=None, parallel=None, num_splits=None, split_index=None, dataname="val"): if augmentors is None: augmentors = fbresnet_augmentor(False) assert datadir is not None assert isinstance(augmentors, list) if parallel is None: parallel = min(40, multiprocessing.cpu_count()) if num_splits is None: ds = dataset.ILSVRC12Files(datadir, dataname, shuffle=True) else: # shard validation data assert False assert split_index < num_splits files = dataset.ILSVRC12Files(datadir, dataname, shuffle=True) files.reset_state() files = list(files.get_data()) logger.info("Number of validation data = {}".format(len(files))) split_size = len(files) // num_splits start, end = split_size * split_index, split_size * (split_index + 1) end = min(end, len(files)) logger.info("Local validation split = {} - {}".format(start, end)) files = files[start:end] ds = DataFromList(files, shuffle=True) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) #from BGR to RGB im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=min(2000, ds.size()), strict=True) ds = BatchData(ds, batch_size, remainder=False) ds = RepeatedData(ds, num=-1) # do not fork() under MPI return ds
def build_dataflow(files): train_ds = DataFromList(files) aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False)) def mapper(dp): idx, fname, label = dp img = cv2.imread(fname) img = aug.augment(img) return img, idx train_ds = MultiProcessMapDataZMQ(train_ds, num_proc=8, map_func=mapper, strict=True) train_ds = BatchData(train_ds, local_batch_size) train_ds.reset_state() return train_ds
def get_query_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load_query() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname, boxes, re_id_class = img['file_name'], img['boxes'], img[ 're_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = [im, boxes, re_id_class] return ret ds = MapData(ds, preprocess) return ds
def get_train_dataflow_coco(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): print("start preproc coco") start = time.time() if config.USE_SECOND_HEAD: fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \ img['second_class'], img['is_crowd'] else: fname, boxes, klass, is_crowd = img['file_name'], img[ 'boxes'], img['class'], img['is_crowd'] second_klass = None res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("coco: preproc_img returned None on", fname) return None ret, params = res im = ret[0] boxes = ret[3] # masks if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img.get('segmentation', None)) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes), (len(segmentation), len(boxes)) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) end = time.time() elapsed = end - start print("coco example done, elapsed:", elapsed) return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=4, map_func=preprocess, buffer_size=20) return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False): train_img_path = config.MAPILLARY_PATH + "training/images/" train_label_path = config.MAPILLARY_PATH + "training/instances/" imgs = glob.glob(train_img_path + "*.jpg") ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] # filter by categories we use instances_valid = [ cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes ] instances = [ inst for inst, valid in zip(instances, instances_valid) if valid ] instance_classes = [ cls for cls, valid in zip(instance_classes, instances_valid) if valid ] if len(instances) == 0: print("no instances") pil_label.close() return None if map_to_coco: instance_classes = [ config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes ] instance_classes = [ config.VOID_LABEL if cls == config.VOID_LABEL else COCOMeta.category_id_to_class_id[cls] for cls in instance_classes ] else: # remap to contiguous numbers starting with 1 instance_classes = [ config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1 for cls in instance_classes ] masks = np.array([label == inst for inst in instances], dtype=np.uint8) #import cProfile #start1 = time.time() boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) #boxes1_time = time.time() - start1 #pr = cProfile.Profile() #pr.enable() #start1 = time.time() #boxes2 = get_bboxes_from_segmentation_masks(masks) #print("boxes1", boxes1_time, "boxes2", time.time() - start1) #pr.disable() #pr.print_stats(sort="cumulative") #assert (boxes1 == boxes2).all(), (boxes1, boxes2) boxes = boxes1 second_klass = np.array(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("mapillary: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start print("mapillary example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret #ds = MapData(ds, preprocess) ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) return ds
def get_train_dataflow_davis(add_mask=False): # train_img_path = config.DAVIS_PATH + "train/" # train_label_path = config.DAVIS_PATH + "train-gt/" # imgs = glob.glob(train_img_path + "*/*.jpg") # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/" # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/" train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/" train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/" imgs = sorted(glob.glob(train_img_path + "*/*.jpg")) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): # print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] if len(instances) == 0: print("no instances") pil_label.close() return None masks = np.array([label == inst for inst in instances], dtype=np.uint8) boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) boxes = boxes1 # second_klass = np.array(instance_classes, dtype=np.int) second_klass = np.zeros_like(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("davis: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start # print("davis example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret ds = MapData(ds, preprocess) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess) return ds
def get_train_dataflow(src): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) classes = ( 'BG', # always index 0 'bathtub', 'bed', 'bookshelf', 'box', 'chair', 'counter', 'desk', 'door', 'dresser', 'garbage_bin', 'lamp', 'monitor', 'night_stand', 'pillow', 'sink', 'sofa', 'table', 'toilet', 'tv') class_to_ind = dict(list(zip(classes, list(range(len(classes)))))) #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/' textfile_index = natsorted( [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')]) imgs = [] count = 0 for fn in textfile_index: each_file = {} count = count + 1 print(str(count) + ':::', fn) F = open(fn, 'r') file_F = F.read() file_F = file_F.split('\n') each_file['file_name'] = file_F[0] im = cv2.imread(each_file['file_name']) each_file['height'] = im.shape[0] each_file['width'] = im.shape[1] objects = file_F[2:len(file_F) - 1] boxes = [] class_ = [] for obj in objects: objs_line = obj.split(' ') x1 = float(objs_line[1]) - 1.0 y1 = float(objs_line[2]) - 1.0 x2 = float(objs_line[3]) - 1.0 y2 = float(objs_line[4]) - 1.0 y2 = float(objs_line[4]) - 1.0 if x1 >= x2: x2 = x1 + 1 boxes.append([x1, y1, x2, y2]) cls = class_to_ind[objs_line[0]] class_.append(cls) each_file['boxes'] = np.array(boxes).astype(np.float32) each_file['class'] = np.array(class_).astype(np.int32) each_file['is_crowd'] = np.zeros_like(each_file['class']).astype( np.int8) imgs.append(each_file) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_sniper_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) scale_index: i If MODE_MASK, gt_masks: (N, h, w) """ OUTPUT_FILE = 'train_512_annotation.txt' OUTPUT_IMG_DIR = 'out' out_file = open(OUTPUT_FILE, 'w') class SniperDataFlow(ProxyDataFlow): def __init__(self, ds): super(SniperDataFlow, self).__init__(ds) # self.ds = ds def size(self): raise NotImplementedError() def get_data(self): for img in self.ds.get_data(): for chip in img: yield chip imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) # aug = imgaug.AugmentorList([ # CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), # imgaug.Flip(horiz=True) # ]) assert os.path.isfile(cfg.SNIPER.PRN_PRE) proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] new_name = '%s_%d' % (img_name, i) cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i]) ret = [im[i]] + [boxes[i], klass[i]] for j in range(len(klass[i])): if j == 0: out_file.write(new_name) out_file.write(' %d %f %f %f %f' % (klass[i][j], boxes[i][j][0], boxes[i][j][1], boxes[i][j][2], boxes[i][j][3])) if j == len(klass[i]) - 1: out_file.write('\n') rets.append(ret) return rets if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # ds = PrefetchDataZM # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) # ds = SniperDataFlow(ds) return ds
def get_train_dataflow(add_mask=False): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] # masks segmentation = img.get('segmentation', None) if segmentation is not None: segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret ds = MapData(ds, preprocess) ds = PrefetchDataZMQ(ds, 1) return ds
def get_train_dataflow(): roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) ds = DataFromList(roidbs, shuffle=True) # for now let's not do flipping to keep things simple aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) ]) #, #imgaug.Flip(horiz=True)]) if cfg.MODE_HARD_MINING: from annoy import AnnoyIndex hard_mining_index = AnnoyIndex(128, 'euclidean') hard_mining_index.load(cfg.HARD_MINING_DATA_PATH + "/index_all/index.ann") names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt" hard_mining_names_all = [] with open(names_path) as f: for l in f: hard_mining_names_all.append(l.strip()) hard_example_names_got = [ x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/") ] hard_example_names_vid = [ x[12:] for x in hard_mining_names_all if x.startswith("ImageNetVID/") ] hard_example_names_ytbvos = [ x[11:] for x in hard_mining_names_all if x.startswith("YouTubeVOS/") ] hard_example_names_lasot = [ x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/") ] assert len(hard_example_names_got) > 0 assert len(hard_example_names_vid) > 0 assert len(hard_example_names_ytbvos) > 0 assert len(hard_example_names_lasot) > 0 hard_example_names_got.sort() hard_example_names_vid.sort() hard_example_names_ytbvos.sort() hard_example_names_lasot.sort() hard_mining_names = { "all": hard_mining_names_all, "GOT10k": hard_example_names_got, "ImageNetVID": hard_example_names_vid, "YouTubeVOS": hard_example_names_ytbvos, "LaSOT": hard_example_names_lasot } else: hard_mining_index = None hard_mining_names = None def preprocess(roidb): if roidb.startswith("VID/"): return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("DAVIS/"): return _preprocess_davis_like( roidb[6:], aug, os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p")) elif roidb.startswith("YouTubeVOS/"): return _preprocess_davis_like( roidb[11:], aug, os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train", "Annotations"), "YouTubeVOS", hard_mining_index, hard_mining_names) elif roidb.startswith("GOT10K/"): return _preprocess_got10k(roidb[7:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("LaSOT/"): return _preprocess_lasot(roidb[6:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("YouTube-BB/"): return _preprocess_youtube_bb(roidb[11:], aug) elif roidb.startswith("TrackingNet/"): return _preprocess_trackingnet(roidb[12:], aug) else: assert False #ds = MultiProcessMapDataZMQ(ds, 10, preprocess) #ds = MapData(ds, preprocess) if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD: ds = MapData(ds, preprocess) else: #ds = MultiThreadMapData(ds, 6, preprocess) ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80) return ds
def get_data_set(root_path, ana_path): data_list = get_train_data_list(root_path, ana_path) dataset = DataFromList(data_list, shuffle=True) return dataset
def get_dataflow(is_train=True): train_df = pd.read_csv(os.path.join('/data/kaggle/HPA', 'train.csv')) #train_df = oversample(train_df) labels = [[int(i) for i in s.split()] for s in train_df['Target']] fnames = train_df['Id'].tolist() fnames = [os.path.join(config.TRAIN_DATASET, f) for f in fnames] sprase_label = [ np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0) for la in labels ] extra_df = pd.read_csv( os.path.join('/data/kaggle/HPA', 'HPAv18RGBY_WithoutUncertain_wodpl.csv')) #extra_df = oversample(extra_df) extra_labels = [[int(i) for i in s.split()] for s in extra_df['Target']] extra_labels = [ np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0) for la in extra_labels ] extra_fnames = extra_df['Id'].tolist() extra_fnames = [ os.path.join(config.EXTRA_DATASET, f) for f in extra_fnames ] fnames = fnames + extra_fnames sprase_label = sprase_label + extra_labels fnames = np.array(fnames) sprase_label = np.array(sprase_label) msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42) for train_index, test_index in msss.split(fnames, sprase_label): x_train, x_test = fnames[train_index], fnames[test_index] y_train, y_test = sprase_label[train_index], sprase_label[test_index] holdout_data = list(zip(x_test, y_test)) # 5 fold the rest mskf = MultilabelStratifiedKFold(n_splits=5, random_state=1) for fold_num, (train_index, test_index) in enumerate(mskf.split(x_train, y_train)): if fold_num == config.FOLD: foldx_train, foldx_test = x_train[train_index], x_train[test_index] foldy_train, foldy_test = y_train[train_index], y_train[test_index] break train_data = list(zip(foldx_train, foldy_train)) val_data = list(zip(foldx_test, foldy_test)) train_data = oversample_2(train_data) pseudo_df = pd.read_csv(os.path.join('/data/kaggle/HPA', 'LB623.csv')) pseudo_fnames = pseudo_df['Id'].tolist() pseudo_fnames = [ os.path.join(config.TEST_DATASET, f) for f in pseudo_fnames ] #pseudo_labels = np.load("./SOTA.npy") #pseudo_labels = [np.array(_) for _ in pseudo_labels] pseudo_labels = [[int(i) for i in s.split()] for s in pseudo_df['Predicted']] pseudo_labels = [ np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0) for la in pseudo_labels ] pseudo_data = list(zip(pseudo_fnames, pseudo_labels)) train_data = train_data + pseudo_data print("train: ", len(train_data), len(val_data)) if not is_train: return val_data ds = DataFromList(train_data, shuffle=True) ds = BatchData(MapData(ds, preprocess), config.BATCH) ds = PrefetchDataZMQ(ds, 6) return ds
def get_train_dataflow_YCBV(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ img_ids = YCBVDetectionDataset().load_training_image_ids(cfg.DATA.TRAIN) # print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. # num = len(img_ids) # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( # num - len(roidbs), len(roidbs))) ds = DataFromList(img_ids, shuffle=True) # aug = imgaug.AugmentorList( # [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(image_id): roidb = YCBVDetectionDataset().load_single_roidb(image_id) fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: # im, params = aug.augment_return_params(im) points = box_to_point8(boxes) # points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. ret['gt_masks'] = segmentation # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_eval_dataflow(name, shard=0, num_shards=1): seqs = [] with open("davis2017_fast_val_ids.txt") as f: for l in f: seqs.append(l.strip()) seqs_timesteps = [] for seq in seqs: files = sorted( glob.glob(cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq.split("__")[0] + "/*.jpg"))[1:-1] timesteps = [f.split('/')[-1].replace(".jpg", "") for f in files] for timestep in timesteps: ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq.split( "__")[0] + '/' + timestep + ".png" ann = np.array(PIL.Image.open(ann_fn)) ann_mask = ann == int(seq.split("__")[1]) if ann_mask.any(): seqs_timesteps.append( (seq.split('__')[0], seq.split('__')[1], timestep)) # seqs_timesteps += [(seq.split('__')[0], seq.split('__')[1], timestep) for timestep in timesteps] num_seqs_timesteps = len(seqs_timesteps) seqs_timesteps_per_shard = num_seqs_timesteps // num_shards seqs_timesteps_range = (shard * seqs_timesteps_per_shard, (shard + 1) * seqs_timesteps_per_shard if shard + 1 < num_shards else num_seqs_timesteps) ds = DataFromList( seqs_timesteps[seqs_timesteps_range[0]:seqs_timesteps_range[1]]) def preprocess(seq_timestep): seq, obj_id, timestep = seq_timestep ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + timestep + ".png" ann = np.array(PIL.Image.open(ann_fn)) ann_mask = ann == int(obj_id) if not ann_mask.any(): return None, None, None, None, None # ann_box = np.array([-1000000, -1000000, 100000, 100000]) else: ann_box = get_bbox_from_segmentation_mask_np(ann_mask) ff_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + str( 0).zfill(5) + ".png" ff = np.array(PIL.Image.open(ff_fn)) ff_mask = ff == int(obj_id) ff_box = get_bbox_from_segmentation_mask_np(ff_mask) x1, y1, x2, y2 = [float(x) for x in ann_box] target_bbox = np.array([x1, y1, x2, y2], dtype=np.float32) x1, y1, x2, y2 = [float(x) for x in ff_box] ref_bbox = np.array([x1, y1, x2, y2], dtype=np.float32) target_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + timestep + ".jpg" ref_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + str( 0).zfill(5) + ".jpg" target_img = cv2.imread(target_img_fn, cv2.IMREAD_COLOR) ref_img = cv2.imread(ref_img_fn, cv2.IMREAD_COLOR) return ref_img, ref_bbox, target_img, target_bbox, "__".join( seq_timestep) ds = MapData(ds, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: input image: (h, w, 3), semantic label image: (h, w, 1) """ # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label' imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True) # imgs = COCODetection.load_many( # cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str boxes: kx4 floats class: k integers difficult: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) # log invalid training ds = DataFromList(imgs, shuffle=True) mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1]) if cfg.DATA.NAME == 'cityscapes': aspect_exp = 1.1 elif cfg.DATA.NAME == 'cocostuff': aspect_exp = 1.1 #2.0 else: logger.warn('Dataset name not known.') assert False aug = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN), imgaug.Flip(horiz=True), SSDColorJitter(mean_rgbgr=mean_bgr) ]) aug_label = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST), imgaug.Flip(horiz=True) ]) def preprocess(img): fn_img, fn_label = img['fn_img'], img['fn_label'] # load head (and landmark) data as well im = cv2.imread(fn_img, cv2.IMREAD_COLOR) if fn_label.endswith('.mat'): # cocostuff label = loadmat(fn_label)['S'].astype(int) label = (label - 1).astype(np.uint8) # -1 becomes 255 else: label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE) label = np.expand_dims(label, 2) assert (im is not None) and (label is not None), fn_img im = im.astype('float32') # label = label.astype('int32') # augmentation im, params = aug.augment_return_params(im) # TODO: better way to adjust label? params_label = deepcopy(params[:-1]) params_label[0].mean_rgbgr = [255,] params_label[1].interp = cv2.INTER_NEAREST label = aug_label.augment_with_params(label, params_label) label = label.astype('int32') ret = [im, label] return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: # ds = MapData(ds, preprocess) # for debugging ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess) ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE) return ds