def draw_final_outputs(img, results): """ Args: results: [DetectionResult] """ if len(results) == 0: return img # Display in largest to smallest order to reduce occlusion boxes = np.asarray([r.box for r in results]) areas = np_area(boxes) sorted_inds = np.argsort(-areas) ret = img tags = [] for result_id in sorted_inds: r = results[result_id] if r.mask is not None: ret = draw_mask(ret, r.mask) for r in results: tags.append("{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score)) ret = viz.draw_boxes(ret, boxes, tags) return ret
def training_roidbs(self): lines = self.read_lines() roidbs = [] for line in lines: file_name = self.read_file_name(line["md5"]) if os.path.isfile(file_name): boxes, classes = self.read_labels(line["labels"]) # Remove boxes with empty area if boxes.size: non_zero_area = np_area(boxes) > 0 boxes = boxes[non_zero_area, :] classes = classes[non_zero_area] boxes = np.float32(boxes) roidb = { "file_name": file_name, "boxes": boxes, "class": classes, "is_crowd": np.zeros((classes.shape[0])) } roidbs.append(roidb) return roidbs
def __call__(self, roidb): fname, boxes, klass, is_crowd = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) if len(boxes): assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [tfms.apply_coords(p) for p in polys] masks.append( polygons_to_mask(polys, im.shape[0], gt_mask_width)) if len(masks): masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) else: # no gt on the image masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8') ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret