def draw_final_outputs(img, results): """ Args: results: [DetectionResult] """ if len(results) == 0: return img # Display in largest to smallest order to reduce occlusion boxes = np.asarray([r.box for r in results]) areas = np_area(boxes) sorted_inds = np.argsort(-areas) ret = img tags = [] for result_id in sorted_inds: r = results[result_id] if r.mask is not None: ret = draw_mask(ret, r.mask) for r in results: tags.append("{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score)) ret = viz.draw_boxes(ret, boxes, tags) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] new_name = '%s_%d' % (img_name, i) cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i]) ret = [im[i]] + [boxes[i], klass[i]] for j in range(len(klass[i])): if j == 0: out_file.write(new_name) out_file.write(' %d %f %f %f %f' % (klass[i][j], boxes[i][j][0], boxes[i][j][1], boxes[i][j][2], boxes[i][j][3])) if j == len(klass[i]) - 1: out_file.write('\n') rets.append(ret) return rets
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None return ret
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def _augment_boxes(boxes, aug, params): points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) #assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" if np.min(np_area(boxes)) <= 0: return None return boxes
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if config.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] # TODO pad im when FPN if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) if multi_mask is None: return None im = cv2.imread(im) #============================ #if random.random() > 0.5: # im = np.fliplr(im) # h, w, 3 # multi_mask = np.fliplr(multi_mask) #im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7)) augmented = strong_aug()(image=im, mask=multi_mask) im, multi_mask = augmented['image'], augmented['mask'] #============================ # Resize im, multi_mask = fix_resize_transform_range(im, multi_mask, [768, 2000], 1.0) im = pad_to_factor(im) multi_mask = pad_to_factor(multi_mask) boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) if len(boxes) == 0 or np.min(np_area(boxes)) <= 0: log_once("Input have zero area box: {}".format(fname), 'warn') return None # rpn anchor: try: if config.FPN: fm_labels, fm_boxes = get_rpn_anchor_input_FPN(im, boxes, is_crowd) else: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass, masks] """ from viz import draw_annotation, draw_mask viz = draw_annotation(im, boxes, klass) for ind, mask in enumerate(masks): viz = draw_mask(viz, mask) cv2.imwrite("./test_{}.jpg".format(np.random.rand()), viz) if (len(boxes) > 3): exit() """ return ret
def preprocess(img): fname, boxes, re_id_class = img['file_name'], img['boxes'], img[ 're_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = [im, boxes, re_id_class] return ret
def preprocess(img): fname, boxes, klass, is_crowd, re_id_class = img['file_name'], img['boxes'], \ img['class'], img['is_crowd'], img['re_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) orig_shape = im.shape[:2] orig_im = np.copy(im) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [ boxes, klass, re_id_class, orig_shape, orig_im ] return ret
def __call__(self, roidb): fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = self.aug.augment_return_params(im) points = box_to_point8(boxes) points = self.aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [self.aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width)) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
csv_path = os.path.join(config.BASEDIR, 'train_ship_segmentations_v2.csv') df = pd.read_csv(csv_path, engine="python") df = df.dropna(axis=0) df = df.set_index('ImageId') from tqdm import tqdm for img in tqdm(imgs, total=len(imgs)): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) im = cv2.imread(im) #============================ # Resize augmented = strong_aug()(image=im, mask=multi_mask) im, multi_mask = augmented['image'], augmented['mask'] boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) if len(boxes) == 0 or np.min(np_area(boxes)) <= 0: log_once("Input have zero area box: {}".format(fname), 'warn') print(boxes) exit() """ from viz import draw_annotation, draw_mask viz = draw_annotation(im, boxes, klass) for ind, mask in enumerate(masks): viz = draw_mask(viz, mask) cv2.imwrite("./eval_gt/{}.jpg".format(fname), viz) """ """ # for each gt, find all those anchors (including ties) that has the max ious with it ANCHOR_SIZES = (32,64,128,256,512) RAIOS = (0.5,1,2) #ANCHOR_SIZES = (16, 32, 64, 128, 256)
def draw_final_outputs(img, results): """ Args: results: [DetectionResult] """ # new_results = [] # for r in results: # if r.score <=0.49: # new_results.append(r) # results = new_results if len(results) == 0: return img # Display in largest to smallest order to reduce occlusion boxes = np.asarray([r.box for r in results]) areas = np_area(boxes) sorted_inds = np.argsort(-areas) ret = img tags = [] new_boxes = [] # rm_lst = class_nms(results, sorted_inds) rm_lst = box_class_nms(results, sorted_inds) print("rm_lst = ", rm_lst) for result_id in sorted_inds: if result_id in rm_lst: continue r = results[result_id] # print("r = ", r) if r.mask is not None: level = str(r.class_id).split(" ")[0] if "1" in level: # color = (0, 255, 0) # color = [0.000, 255.000, 0.000] color_id = 23 # color_id = 9 elif "2" in level: color_id = 22 # color_id = 9 # color = [0.000, 255.000, 255.000] # color = (0, 255, 255) elif "3" in level: color_id = 9 # color = [0.000, 0.000, 255.000] # color = (0, 0, 255) else: color = [0.000, 255.000, 0.000] # color = (0, 255, 0) print("error level!") ret = draw_mask(ret, r.mask, color=None, color_id=color_id) for result_id in sorted_inds: if result_id in rm_lst: continue r = results[result_id] new_boxes.append(r.box) tags.append("{}, {:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score)) # for r in results: # tags.append( # "{}, {:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score)) ret = viz.draw_boxes(ret, new_boxes, tags) return ret
def __call__(self, roidb): fname, boxes_house, boxes_damage, klass, is_crowd = roidb[ "file_name"], roidb["boxes_house"], roidb["boxes_damage"], roidb[ "class"], roidb["is_crowd"] assert boxes_damage.ndim == 2 and boxes_damage.shape[ 1] == 4, boxes_damage.shape boxes_house = np.copy(boxes_house) boxes_damage = np.copy(boxes_damage) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes_damage.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes_house[:, 0::2] *= width boxes_house[:, 1::2] *= height boxes_damage[:, 0::2] *= width boxes_damage[:, 1::2] *= height # augmentation: tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points_house = box_to_point4(boxes_house) points_house = tfms.apply_coords(points_house) boxes_house = point4_to_box(points_house) if len(boxes_house): assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes_house)) > 0, "Some boxes have zero area!" points_damage = box_to_point4(boxes_damage) points_damage = tfms.apply_coords(points_damage) boxes_damage = point4_to_box(points_damage) if len(boxes_damage): assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes_damage)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: # CHANGE TWO RPN anchors here multilevel_anchor_inputs_house = self.get_multilevel_rpn_anchor_input( im, boxes_house, is_crowd) for i, (anchor_labels, anchor_boxes_house ) in enumerate(multilevel_anchor_inputs_house): ret["anchor_labels_lvl{}_house".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}_house".format( i + 2)] = anchor_boxes_house multilevel_anchor_inputs_damage = self.get_multilevel_rpn_anchor_input( im, boxes_damage, is_crowd) for i, (anchor_labels, anchor_boxes_damage ) in enumerate(multilevel_anchor_inputs_damage): ret["anchor_labels_lvl{}_damage".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}_damage".format( i + 2)] = anchor_boxes_damage else: ret["anchor_labels"], ret[ "anchor_boxes_house"] = self.get_rpn_anchor_input( im, boxes_house, is_crowd) ret["anchor_labels"], ret[ "anchor_boxes_damage"] = self.get_rpn_anchor_input( im, boxes_damage, is_crowd) boxes_house = boxes_house[is_crowd == 0] # skip crowd boxes in training target boxes_damage = boxes_damage[ is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes_house"] = boxes_house ret["gt_boxes_damage"] = boxes_damage ret["gt_labels"] = klass except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes_house) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [tfms.apply_coords(p) for p in polys] masks.append( polygons_to_mask(polys, im.shape[0], gt_mask_width)) if len(masks): masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) else: # no gt on the image masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8') ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = imread(fname) assert im is not None, fname im = np.expand_dims(im, axis=2) im = np.repeat(im, 3, axis=2) im = im.astype('float32') #height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height #source_image = Image.fromarray(im.astype('uint8')) #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True) """ draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i])) source_image.save('./input_image1', "JPEG") """ # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" klass_tmp = np.copy(klass) #print(klass) #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True) """ source_image = Image.fromarray(im.astype('uint8')) draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i])) source_image.save('./input_image2', "JPEG") """ ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] img_name = fname.split('/')[-1] img_id = int(img_name[3:-4]) # pretrain rpn for negtive chip extraction proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index( img_id)] proposals[2:4] += proposals[0:2] # from [x,y,w,h] to [x1,y1,x2,y2] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" chip_generator = Im2Chip(im, boxes, klass, proposals, cfg.SNIPER.SCALES, cfg.SNIPER.VALID_RANGES, is_crowd=is_crowd, chip_size=cfg.SNIPER.CHIP_SIZE, chip_stride=cfg.SNIPER.CHIP_STRIDE) im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale( ) rets = [] for i in range(len(im)): try: if len(boxes[i]) == 0: continue # anchor_labels, anchor_boxes gt_invalid = [] maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0] minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1] maxbox = sys.maxsize if maxbox == -1 else maxbox minbox = 0 if minbox == -1 else minbox for box in boxes[i]: w = box[2] - box[0] h = box[3] - box[1] if w >= maxbox or h >= maxbox or (w < minbox and h < minbox): gt_invalid.append(box) anchor_inputs = get_sniper_rpn_anchor_input( im[i], boxes[i], is_crowd[i], gt_invalid) assert len(anchor_inputs) == 2 boxes[i] = boxes[i][is_crowd[i] == 0] # skip crowd boxes in training target klass[i] = klass[i][is_crowd[i] == 0] if not len(boxes[i]): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format( fname, str(e)), 'warn') ret = None continue # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i] # ] + [scale_indices[i]*len(boxes[i])] ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]] rets.append(ret) return rets