def get_dataflow(annot_path, img_dir, strict, x_size = 224, y_size = 28): """ This function initializes the tensorpack dataflow and serves generator for training operation. :param annot_path: path to the annotation file :param img_dir: path to the images :return: dataflow object """ coco_crop_size = 368 # configure augmentors augmentors = [ ScaleAug(scale_min=0.5, scale_max=1.1, target_dist=0.6, interp=cv2.INTER_CUBIC), RotateAug(rotate_max_deg=40, interp=cv2.INTER_CUBIC, border=cv2.BORDER_CONSTANT, border_value=(128, 128, 128), mask_border_val=1), CropAug(coco_crop_size, coco_crop_size, center_perterb_max=40, border_value=128, mask_border_val=1), FlipAug(num_parts=18, prob=0.5), ResizeAug(x_size, x_size) ] # prepare augment function augment_func = functools.partial(augment, augmentors=augmentors) # prepare building sample function build_sample_func = functools.partial(build_sample, y_size=y_size) # build the dataflow df = CocoDataFlow((coco_crop_size, coco_crop_size), annot_path, img_dir) df.prepare() size = df.size() df = MapData(df, read_img) df = MapData(df, augment_func) df = MultiProcessMapDataZMQ(df, num_proc=4, map_func=build_sample_func, buffer_size=200, strict=strict) return df, size
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == 'horovod': buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapDataZMQ(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def build_dataflow(files): train_ds = DataFromList(files) aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False)) def mapper(dp): idx, fname, label = dp img = cv2.imread(fname) img = aug.augment(img) return img, idx train_ds = MultiProcessMapDataZMQ(train_ds, num_proc=8, map_func=mapper, strict=True) train_ds = BatchData(train_ds, local_batch_size) train_ds.reset_state() return train_ds
def get_sequential_loader(ds, isTrain, batch_size, augmentors, parallel=None): """ Load a Single-File LMDB (Sequential Read) Args: augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)` Returns: A LMDBData which produces BGR images and labels. See explanations in the tutorial: http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html """ assert isinstance(augmentors, list) aug = imgaug.AugmentorList(augmentors) if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = LocallyShuffleData(ds, 50000) ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) ds = AugmentImageComponent(ds, aug, copy=False) if parallel < 16: logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = BatchData(ds, batch_size, remainder=False, use_list=True) ds = MultiProcessRunnerZMQ(ds, parallel) else: def mapper(data): im, label = data im = cv2.imdecode(im, cv2.IMREAD_COLOR) im = aug.augment(im) return im, label ds = MultiProcessMapDataZMQ(ds, parallel, mapper, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True, use_list=True) return ds
def __init__(self, mode, batch_size=256, shuffle=False, num_workers=25, cache=50000, device='cuda'): # enumerate standard imagenet augmentors imagenet_augmentors = fbresnet_augmentor(mode == 'train') # load the lmdb if we can find it base_dir = '/userhome/cs/u3003679/' lmdb_loc = os.path.join(base_dir, 'ILSVRC-{}.lmdb'.format(mode)) #lmdb_loc = os.path.join(os.environ['IMAGENET'], 'ILSVRC-%s.lmdb'%mode) ds = LMDBSerializer.load(lmdb_loc, shuffle=shuffle) ds = LocallyShuffleData(ds, cache) # ds = td.LMDBDataPoint(ds) def f(dp): x, label = dp x = cv2.imdecode(x, cv2.IMREAD_COLOR) for aug in imagenet_augmentors: x = aug.augment(x) return x, label ds = MultiProcessMapDataZMQ(ds, num_proc=8, map_func=f) # ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) # ds = AugmentImageComponent(ds, imagenet_augmentors) # ds = td.PrefetchData(ds, 5000, 1) # ds = td.MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) # ds = td.AugmentImageComponent(ds, imagenet_augmentors) # ds = td.PrefetchDataZMQ(ds, num_workers) self.ds = BatchData(ds, batch_size) # self.ds = MultiProcessRunnerZMQ(self.ds, 4) self.ds.reset_state() self.batch_size = batch_size self.num_workers = num_workers self.device = device
def get_train_dataflow(src): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) classes = ( 'BG', # always index 0 'bathtub', 'bed', 'bookshelf', 'box', 'chair', 'counter', 'desk', 'door', 'dresser', 'garbage_bin', 'lamp', 'monitor', 'night_stand', 'pillow', 'sink', 'sofa', 'table', 'toilet', 'tv') class_to_ind = dict(list(zip(classes, list(range(len(classes)))))) #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/' textfile_index = natsorted( [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')]) imgs = [] count = 0 for fn in textfile_index: each_file = {} count = count + 1 print(str(count) + ':::', fn) F = open(fn, 'r') file_F = F.read() file_F = file_F.split('\n') each_file['file_name'] = file_F[0] im = cv2.imread(each_file['file_name']) each_file['height'] = im.shape[0] each_file['width'] = im.shape[1] objects = file_F[2:len(file_F) - 1] boxes = [] class_ = [] for obj in objects: objs_line = obj.split(' ') x1 = float(objs_line[1]) - 1.0 y1 = float(objs_line[2]) - 1.0 x2 = float(objs_line[3]) - 1.0 y2 = float(objs_line[4]) - 1.0 y2 = float(objs_line[4]) - 1.0 if x1 >= x2: x2 = x1 + 1 boxes.append([x1, y1, x2, y2]) cls = class_to_ind[objs_line[0]] class_.append(cls) each_file['boxes'] = np.array(boxes).astype(np.float32) each_file['class'] = np.array(class_).astype(np.int32) each_file['is_crowd'] = np.zeros_like(each_file['class']).astype( np.int8) imgs.append(each_file) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_sniper_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) scale_index: i If MODE_MASK, gt_masks: (N, h, w) """ OUTPUT_FILE = 'train_512_annotation.txt' OUTPUT_IMG_DIR = 'out' out_file = open(OUTPUT_FILE, 'w') class SniperDataFlow(ProxyDataFlow): def __init__(self, ds): super(SniperDataFlow, self).__init__(ds) # self.ds = ds def size(self): raise NotImplementedError() def get_data(self): for img in self.ds.get_data(): for chip in img: yield chip imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=False) # aug = imgaug.AugmentorList([ # CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), # imgaug.Flip(horiz=True) # ]) assert os.path.isfile(cfg.SNIPER.PRN_PRE) proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] new_name = '%s_%d' % (img_name, i) cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i]) ret = [im[i]] + [boxes[i], klass[i]] for j in range(len(klass[i])): if j == 0: out_file.write(new_name) out_file.write(' %d %f %f %f %f' % (klass[i][j], boxes[i][j][0], boxes[i][j][1], boxes[i][j][2], boxes[i][j][3])) if j == len(klass[i]) - 1: out_file.write('\n') rets.append(ret) return rets if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # ds = PrefetchDataZM # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) # ds = SniperDataFlow(ds) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: input image: (h, w, 3), semantic label image: (h, w, 1) """ # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label' imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True) # imgs = COCODetection.load_many( # cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str boxes: kx4 floats class: k integers difficult: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) # log invalid training ds = DataFromList(imgs, shuffle=True) mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1]) if cfg.DATA.NAME == 'cityscapes': aspect_exp = 1.1 elif cfg.DATA.NAME == 'cocostuff': aspect_exp = 1.1 #2.0 else: logger.warn('Dataset name not known.') assert False aug = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN), imgaug.Flip(horiz=True), SSDColorJitter(mean_rgbgr=mean_bgr) ]) aug_label = imgaug.AugmentorList([ \ SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]), SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST), imgaug.Flip(horiz=True) ]) def preprocess(img): fn_img, fn_label = img['fn_img'], img['fn_label'] # load head (and landmark) data as well im = cv2.imread(fn_img, cv2.IMREAD_COLOR) if fn_label.endswith('.mat'): # cocostuff label = loadmat(fn_label)['S'].astype(int) label = (label - 1).astype(np.uint8) # -1 becomes 255 else: label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE) label = np.expand_dims(label, 2) assert (im is not None) and (label is not None), fn_img im = im.astype('float32') # label = label.astype('int32') # augmentation im, params = aug.augment_return_params(im) # TODO: better way to adjust label? params_label = deepcopy(params[:-1]) params_label[0].mean_rgbgr = [255,] params_label[1].interp = cv2.INTER_NEAREST label = aug_label.augment_with_params(label, params_label) label = label.astype('int32') ret = [im, label] return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: # ds = MapData(ds, preprocess) # for debugging ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess) ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE) return ds
def get_batch_train_dataflow(batch_size): """ Return a training dataflow. Each datapoint consists of the following: A batch of images: (BS, h, w, 3), For each image 1 or more pairs of (anchor_labels, anchor_boxes) : anchor_labels: (BS, h', w', maxNumAnchors) anchor_boxes: (BS, h', w', maxNumAnchors, 4) gt_boxes: (BS, maxNumAnchors, 4) gt_labels: (BS, maxNumAnchors) If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w) """ print("In train dataflow") roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print("Done loading roidbs") # print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs))) roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True) # will shuffle it later at every rank print("Batching roidbs") batched_roidbs = [] if cfg.PREPROC.PREDEFINED_PADDING: taken = [False for _ in roidbs] done = False for i, d in enumerate(roidbs): batch = [] if not taken[i]: batch.append(d) padding_shape = get_padding_shape(d['height'], d['width']) while len(batch) < batch_size: k = get_next_roidb(roidbs, i, padding_shape, taken) if k == None: done = True break batch.append(roidbs[k]) taken[i], taken[k] = True, True if not done: batched_roidbs.append(batch) else: batch = [] for i, d in enumerate(roidbs): if i % batch_size == 0: if len(batch) == batch_size: batched_roidbs.append(batch) batch = [] batch.append(d) #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size) #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size) print("Done batching roidbs") # Notes: # - discard any leftover images # - The batches will be shuffled, but the contents of each batch will always be the same # - TODO: Fix lack of batch contents shuffling aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True)]) # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint ds = DataFromList(batched_roidbs, shuffle=True) if cfg.TRAINER == 'horovod': # ds = MapData(ds, preprocess) ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # Add rpn data to dataflow: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == 'horovod': buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapDataZMQ(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def get_sniper_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) scale_index: i If MODE_MASK, gt_masks: (N, h, w) """ class SniperDataFlow(ProxyDataFlow): def __init__(self, ds): super(SniperDataFlow, self).__init__(ds) # self.ds = ds def size(self): raise NotImplementedError() def get_data(self): for img in self.ds.get_data(): for chip in img: yield chip imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([imgaug.Flip(horiz=True)]) assert os.path.isfile(cfg.SNIPER.PRN_PRE) proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue # anchor_labels, anchor_boxes gt_invalid = [] maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0] minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1] maxbox = sys.maxsize if maxbox == -1 else maxbox minbox = 0 if minbox == -1 else minbox for box in boxes[i]: w = box[2] - box[0] h = box[3] - box[1] if w >= maxbox or h >= maxbox or (w < minbox and h < minbox): gt_invalid.append(box) anchor_inputs = get_sniper_rpn_anchor_input( im[i], boxes[i], is_crowd[i], gt_invalid) assert len(anchor_inputs) == 2 boxes[i] = boxes[i][is_crowd[i] == 0] # skip crowd boxes in training target klass[i] = klass[i][is_crowd[i] == 0] if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]] rets.append(ret) return rets if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # ds = PrefetchDataZM # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) ds = SniperDataFlow(ds) return ds
def get_wider_dataflow(augment=False): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ logger.info("loading wider attributes dataset...") roidbs_train = load_many(cfg.WIDER.BASEDIR, 'train', augment) roidbs_test = load_many(cfg.WIDER.BASEDIR, 'test', augment) roidbs = roidbs_train + roidbs_test logger.info("load finished!") """ To train on your own data, change this to your loader. Produce "roidbs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list(filter(lambda img: len(img['bbox']) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname = roidb['img'] x1, y1, w, h = np.split(roidb['bbox'], 4, axis=1) boxes = np.concatenate([x1, y1, x1 + w, y1 + h], axis=1) klass = np.ones(len(roidb['bbox']), dtype=np.int32) male = roidb['male'] longhair = roidb['longhair'] sunglass = roidb['sunglass'] hat = roidb['hat'] tshirt = roidb['tshirt'] longsleeve = roidb['longsleeve'] formal = roidb['formal'] shorts = roidb['shorts'] jeans = roidb['jeans'] longpants = roidb['longpants'] skirt = roidb['skirt'] facemask = roidb['facemask'] logo = roidb['logo'] stripe = roidb['stripe'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, np.zeros(len(boxes), dtype=int)) ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['male'] = male ret['longhair'] = longhair ret['sunglass'] = sunglass ret['hat'] = hat ret['tshirt'] = tshirt ret['longsleeve'] = longsleeve ret['formal'] = formal ret['shorts'] = shorts ret['jeans'] = jeans ret['longpants'] = longpants ret['skirt'] = skirt ret['facemask'] = facemask ret['logo'] = logo ret['stripe'] = stripe if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_attributes_dataflow(augment=False, two_cls=False): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ # roidbs = load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN) logger.info("loading wider attributes dataset...") roidbs_train = load_many('/root/datasets/WiderAttribute', 'train', augment, two_cls) roidbs_val = load_many('/root/datasets/WiderAttribute', 'val', augment, two_cls) roidbs_wider = roidbs_train + roidbs_val def attr_augment(names, multiple): datalist = [] for name in names: datalist += [ roidb for roidb in roidbs_wider if np.sum(roidb[name] == 1) > 0 ] return datalist * multiple attr_names = ['sunglass', 'stripe', 'facemask', 'jeans'] roidbs_wider += attr_augment(attr_names, 2) logger.info("load finished!") """ To train on your own data, change this to your loader. Produce "roidbs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats attrs: numpy array of k integers ,-1->negtive 0->ignorce 1->positive """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. # num = len(roidbs) # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( # num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs_wider, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname = roidb['img'] boxes = roidb['bbox'] male = roidb['male'] longhair = roidb['longhair'] sunglass = roidb['sunglass'] hat = roidb['hat'] tshirt = roidb['tshirt'] longsleeve = roidb['longsleeve'] formal = roidb['formal'] shorts = roidb['shorts'] jeans = roidb['jeans'] longpants = roidb['longpants'] skirt = roidb['skirt'] facemask = roidb['facemask'] logo = roidb['logo'] stripe = roidb['stripe'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = { 'image': im, 'gt_boxes': boxes, 'male': male, 'longhair': longhair, 'sunglass': sunglass, 'hat': hat, 'tshirt': tshirt, 'longsleeve': longsleeve, 'formal': formal, 'shorts': shorts, 'jeans': jeans, 'longpants': longpants, 'skirt': skirt, 'facemask': facemask, 'logo': logo, 'stripe': stripe, 'anchor_labels': get_rpn_anchor_input(im, boxes, np.zeros(len(boxes), dtype=int))[0], 'anchor_boxes': get_rpn_anchor_input(im, boxes, np.zeros(len(boxes), dtype=int))[1] } return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load() """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. """ ds = DataFromList(imgs, shuffle=cfg.DATA.TEST.SHUFFLE) # imgaug.Flip(horiz=True) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname, boxes, klass, is_crowd, re_id_class = img['file_name'], img['boxes'], \ img['class'], img['is_crowd'], img['re_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) orig_shape = im.shape[:2] orig_im = np.copy(im) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [ boxes, klass, re_id_class, orig_shape, orig_im ] return ret ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ #roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) #print_class_histogram(roidbs) roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = imread(fname) assert im is not None, fname im = np.expand_dims(im, axis=2) im = np.repeat(im, 3, axis=2) im = im.astype('float32') #height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height #source_image = Image.fromarray(im.astype('uint8')) #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True) """ draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i])) source_image.save('./input_image1', "JPEG") """ # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" klass_tmp = np.copy(klass) #print(klass) #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True) """ source_image = Image.fromarray(im.astype('uint8')) draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i])) source_image.save('./input_image2', "JPEG") """ ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: #ds = MultiProcessMapDataZMQ(ds, 10, preprocess, buffer_size=20) ds = MultiProcessMapDataZMQ(ds, 10, preprocess, buffer_size=1) return ds
def get_train_dataflow_YCBV(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ img_ids = YCBVDetectionDataset().load_training_image_ids(cfg.DATA.TRAIN) # print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. # num = len(img_ids) # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( # num - len(roidbs), len(roidbs))) ds = DataFromList(img_ids, shuffle=True) # aug = imgaug.AugmentorList( # [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(image_id): roidb = YCBVDetectionDataset().load_single_roidb(image_id) fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: # im, params = aug.augment_return_params(im) points = box_to_point8(boxes) # points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. ret['gt_masks'] = segmentation # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "imgs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy array corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(imgs) imgs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(imgs), len(imgs))) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable( multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK) """ To train on your own data, change this to your loader. Produce "roidbs" as a list of dict, in the dict the following keys are needed for training: height, width: integer file_name: str, full path to the image boxes: numpy array of kx4 floats class: numpy array of k integers is_crowd: k booleans. Use k False if you don't know what it means. segmentation: k lists of numpy arrays (one for each box). Each list of numpy arrays corresponds to the mask for one instance. Each numpy array in the list is a polygon of shape Nx2, because one mask can be represented by N polygons. If your segmentation annotations are originally masks rather than polygons, either convert it, or the augmentation code below will need to be changed or skipped accordingly. """ # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds