def run_network(self, img: np.ndarray): """ Runs an image through the network + postprocessing and returns the masks and bboxes Args: img (np.ndarray): The image to process. Returns: (tuple): the masks and bboxes """ # Run image through the network img_gpu = torch.from_numpy(img).cuda().float() batch = FastBaseTransform()(img_gpu.unsqueeze(0)) preds = self.net(batch) h, w, _ = img.shape # Post process t = postprocess(preds, w, h, visualize_lincomb=True, crop_masks=True, score_threshold=0.15) top_k = 15 # Further restrict the number of predictions to parse idx = t[1].argsort(0, descending=True)[:top_k] masks = t[3][idx].cpu().numpy() classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] return masks, boxes
def prediction(self, img): self.net.detect.cross_class_nms = True cfg.mask_proto_debug = False with torch.no_grad(): frame = torch.Tensor(img).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) time_start = time.clock() preds = self.net(batch) h, w, _ = img.shape t = postprocess(preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=self.threshold) torch.cuda.synchronize() masks = t[3][:self.top_k] classes, scores, bboxes = [ x[:self.top_k].cpu().numpy() for x in t[:3] ] time_elapsed = (time.clock() - time_start) num_dets_to_consider = min(self.top_k, classes.shape[0]) for i in range(num_dets_to_consider): if scores[i] < self.threshold: num_dets_to_consider = i break if num_dets_to_consider >= 1: masks = masks[:num_dets_to_consider, :, :, None] masks_msg = masks.cpu().detach().numpy() masks_msg = masks_msg.astype(np.uint8) scores_msg = np.zeros(num_dets_to_consider) class_label_msg = np.empty(num_dets_to_consider, dtype="S20") bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int) for i in reversed(range(num_dets_to_consider)): scores_msg[i] = scores[i] class_label_msg[i] = cfg.dataset.class_names[classes[i]] bboxes_msg[i] = bboxes[i] print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i])) os.system('cls' if os.name == 'nt' else 'clear') print("%.2f" % (1 / time_elapsed), "hz") if self.display_cv: self.display(frame, masks, classes, scores, bboxes, num_dets_to_consider) return masks_msg, class_label_msg, scores_msg, bboxes_msg
def prep_benchmark(dets_out, h, w): with timer.env('Postprocess'): t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) with timer.env('Copy'): classes, scores, boxes, masks = [x[:args.top_k] for x in t] if isinstance(scores, list): box_scores = scores[0].cpu().numpy() mask_scores = scores[1].cpu().numpy() else: scores = scores.cpu().numpy() classes = classes.cpu().numpy() boxes = boxes.cpu().numpy() masks = masks.cpu().numpy() with timer.env('Sync'): # Just in case torch.cuda.synchronize()
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [ x[:args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: str_ = "" for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) #pub = rospy.Publisher('chatter',String,queue_size=10) #rate = rospy.Rate(50) #10hz #str_ += text_str #rospy.loginfo(str_) #pub.publish(str_) #rate.sleep() return img_numpy
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, image_header=Header()): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ with torch.no_grad(): detections = Detections() if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [ x[:args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() print("Num dets: ", num_dets_to_consider) if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 2) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize( text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 10) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) det = Detection() det.box.x1 = x1 det.box.y1 = y1 det.box.x2 = x2 det.box.y2 = y2 det.class_name = _class det.score = score mask_shape = np.shape(masks[j]) #print("Shape: ", mask_shape) #mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2] # Crop mask_bb = np.squeeze( masks[j].cpu().numpy(), axis=2)[:, :] # Every mask (1280 * 720) #print("Box: x1:", x1,", x2: ",x2,", y1: ",y1,", y2: ",y2) #print("Mask in box shape: ", np.shape(mask_bb)) mask_rs = np.reshape(mask_bb, -1) #print("New shape: ", np.shape(mask_rs)) #print("Mask:\n",mask_bb) det.mask.height = y2 - y1 det.mask.width = x2 - x1 det.mask.mask = np.array(mask_rs, dtype=bool) detections.detections.append(det) detections.header.stamp = image_header.stamp detections.header.frame_id = image_header.frame_id self.detections_pub.publish(detections) self.get_orientation_from_mask(num_dets_to_consider, img_numpy, detections, masks) try: self.image_pub.publish( self.bridge.cv2_to_imgmsg(img_numpy, "bgr8")) except CvBridgeError as e: print(e)
def process(self, image: np.ndarray, pos: int): """:returns (classes, scores, boxes) where `boxes` is an array of bounding boxes of detected objects in (xleft, ytop, width, height) format. `classes` is the class ids of the corresponding objects. `scores` are the computed class scores corresponding to the detected objects. Roughly high score indicates strong belief that the object belongs to the identified class. """ _ts = time.perf_counter() logging.debug(f'Received frame {pos}') if self.net is None: self.sigError.emit(YolactException('Network not initialized')) return # Partly follows yolact eval.py tic = time.perf_counter_ns() _ = qc.QMutexLocker(self.mutex) with torch.no_grad(): if self.cuda: image = torch.from_numpy(image).cuda().float() else: image = torch.from_numpy(image).float() batch = FastBaseTransform()(image.unsqueeze(0)) preds = self.net(batch) image_gpu = image / 255.0 h, w, _ = image.shape save = self.config.rescore_bbox self.config.rescore_bbox = True classes, scores, boxes, masks = oututils.postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=self.score_threshold) idx = scores.argsort(0, descending=True)[:self.top_k] # if self.config.eval_mask_branch: # masks = masks[idx] classes, scores, boxes = [ x[idx].cpu().numpy() for x in (classes, scores, boxes) ] # This is probably not required, `postprocess` uses # `score_thresh` already num_dets_to_consider = min(self.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.score_threshold: num_dets_to_consider = j break # logging.debug('Bounding boxes: %r', boxes) # Convert from top-left bottom-right format to # top-left, width, height format if len(boxes) == 0: self.sigProcessed.emit(boxes, pos) return boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2] boxes = np.asanyarray(boxes, dtype=np.int_) if self.overlap_thresh < 1: dist_matrix = pairwise_distance(new_bboxes=boxes, bboxes=boxes, boxtype=OutlineStyle.bbox, metric=DistanceMetric.ios) bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \ for jj in range(ii+1, dist_matrix.shape[1]) \ if dist_matrix[ii, jj] < 1 - self.overlap_thresh] good_idx = list(set(range(boxes.shape[0])) - set(bad_idx)) boxes = boxes[good_idx].copy() toc = time.perf_counter_ns() logging.debug('Time to process single _image: %f s', 1e-9 * (toc - tic)) self.sigProcessed.emit(boxes, pos) logging.debug(f'Emitted bboxes for frame {pos}: {boxes}') _dt = time.perf_counter() - _ts logging.debug( f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections: Detections = None): """ Returns a list of APs for this image, with each element being for a class """ if not args.output_coco_json: with timer.env('Prepare gt'): gt_boxes = torch.Tensor(gt[:, :4]) gt_boxes[:, [0, 2]] *= w gt_boxes[:, [1, 3]] *= h gt_classes = list(gt[:, 4].astype(int)) gt_masks = torch.Tensor(gt_masks).view(-1, h * w) if num_crowd > 0: split = lambda x: (x[-num_crowd:], x[:-num_crowd]) crowd_boxes, gt_boxes = split(gt_boxes) crowd_masks, gt_masks = split(gt_masks) crowd_classes, gt_classes = split(gt_classes) with timer.env('Postprocess'): classes, scores, boxes, masks = postprocess( dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) if classes.size(0) == 0: return classes = list(classes.cpu().numpy().astype(int)) if isinstance(scores, list): box_scores = list(scores[0].cpu().numpy().astype(float)) mask_scores = list(scores[1].cpu().numpy().astype(float)) else: scores = list(scores.cpu().numpy().astype(float)) box_scores = scores mask_scores = scores masks = masks.view(-1, h * w).cuda() boxes = boxes.cuda() if args.output_coco_json: with timer.env('JSON Output'): boxes = boxes.cpu().numpy() masks = masks.view(-1, h, w).cpu().numpy() for i in range(masks.shape[0]): # Make sure that the bounding box actually makes sense and a mask was produced if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: detections.add_bbox(image_id, classes[i], boxes[i, :], box_scores[i]) detections.add_mask(image_id, classes[i], masks[i, :, :], mask_scores[i]) return with timer.env('Eval Setup'): num_pred = len(classes) num_gt = len(gt_classes) mask_iou_cache = _mask_iou(masks, gt_masks) bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) if num_crowd > 0: crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) else: crowd_mask_iou_cache = None crowd_bbox_iou_cache = None box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item(), lambda i: box_scores[i], box_indices), ('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item(), lambda i: mask_scores[i], mask_indices)] timer.start('Main loop') for _class in set(classes + gt_classes): ap_per_iou = [] num_gt_for_class = sum([1 for x in gt_classes if x == _class]) for iouIdx in range(len(iou_thresholds)): iou_threshold = iou_thresholds[iouIdx] for iou_type, iou_func, crowd_func, score_func, indices in iou_types: gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_for_class) for i in indices: if classes[i] != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j in range(num_gt): if gt_used[j] or gt_classes[j] != _class: continue iou = iou_func(i, j) if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(score_func(i), True) else: # If the detection matches a crowd, we can just ignore it matched_crowd = False if num_crowd > 0: for j in range(len(crowd_classes)): if crowd_classes[j] != _class: continue iou = crowd_func(i, j) if iou > iou_threshold: matched_crowd = True break # All this crowd code so that we can make sure that our eval code gives the # same result as COCOEval. There aren't even that many crowd annotations to # begin with, but accuracy is of the utmost importance. if not matched_crowd: ap_obj.push(score_func(i), False) timer.stop('Main loop')
def segment_yolact(frame, score_threshold, top_k, overlap_thresh, cfgfile, netfile, cuda): """Segment objects in frame using YOLACT. Parameters ---------- frame: numpy.ndarray (WxHxC) integer array with the image content. score_threshold: float Minimum score to include object, should be in `(0, 1)`. top_k: int The number of segmented objects to keep. overlap_thresh: float Merge objects whose bounding boxes overlap (intersection over union) more than this amount. cfgfile: str Path to YOLACT configuration file. netfile: str Path to YOLACT network weights file. cuda: bool Whether to use CUDA. Returns ------- numpy.ndarray An array of bounding boxes of detected objects in (xleft, ytop, width, height) format. """ global ynet global config if ynet is None: init_yolact(cfgfile, netfile, cuda) # Partly follows yolact eval.py tic = time.perf_counter_ns() with torch.no_grad(): if cuda: frame = torch.from_numpy(frame).cuda().float() else: frame = torch.from_numpy(frame).float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = ynet(batch) h, w, _ = frame.shape config.rescore_bbox = True classes, scores, boxes, masks = oututils.postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=score_threshold) idx = scores.argsort(0, descending=True)[:top_k] # if self.config.eval_mask_branch: # masks = masks[idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in (classes, scores, boxes)] # This is probably not required, `postprocess` uses # `score_thresh` already # num_dets_to_consider = min(self.top_k, classes.shape[0]) # for j in range(num_dets_to_consider): # if scores[j] < self.score_threshold: # num_dets_to_consider = j # break # logging.debug('Bounding boxes: %r', boxes) # Convert from top-left bottom-right format to # top-left, width, height format if len(boxes) == 0: return np.empty(0) boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2] boxes = np.asanyarray(np.rint(boxes), dtype=np.int_) if overlap_thresh < 1: dist_matrix = ut.pairwise_distance(new_bboxes=boxes, bboxes=boxes, boxtype=OutlineStyle.bbox, metric=DistanceMetric.iou) bad_boxes = [] for ii in range(dist_matrix.shape[0] - 1): for jj in range(ii + 1, dist_matrix.shape[1]): if dist_matrix[ii, jj] < 1 - overlap_thresh: bad_boxes.append(jj) boxes = np.array([boxes[ii] for ii in range(boxes.shape[0]) if ii not in bad_boxes], dtype=np.int_) toc = time.perf_counter_ns() logging.debug('Time to process single image: %f s', 1e-9 * (toc - tic)) return boxes
def prep_display( dets_out, img, h, w, cfg: YolactConfig, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str="", display_lincomb=False, ): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env("Postprocess"): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h) cfg.rescore_bbox = save with timer.env("Copy"): idx = t[1].argsort(0, descending=True) # [:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = classes.shape[0] # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255.0 color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat( [ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0, ) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[: (num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize( fps_str, font_face, font_scale, font_thickness )[0] img_gpu[0 : text_h + 8, 0 : text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText( img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA, ) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = ( "%s: %.2f" % (_class, score) if args.display_scores else _class ) font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize( text_str, font_face, font_scale, font_thickness )[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle( img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1 ) cv2.putText( img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA, ) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] if args.only_person: for i, _class in enumerate(classes): if _class != 0: scores[i] = -1 num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if (args.display_masks or args.identify_people) and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) if args.identify_people: # Key = original detection index. Value = person index. det_to_person_index = {} prep_silh_images = np.empty((0, 299, 299, 3)) for i in range(num_dets_to_consider): _class = cfg.dataset.class_names[classes[i]] if _class == "person": x1, y1, x2, y2 = boxes[i, :] silh_image = (img_gpu * masks[i] * 255)[y1:(y2+1), x1:(x2+1), [2, 1, 0]] numpy_silh_image = silh_image.byte().cpu().numpy() prep_silh_image, _ = data.dataset.preprocess(numpy_silh_image, None, 299) prep_silh_images = np.vstack((prep_silh_images, np.expand_dims(prep_silh_image, axis=0))) det_to_person_index[i] = prep_silh_images.shape[0] - 1 # cv2.imshow("mask", numpy_silh_image) # while cv2.waitKey(1) != ord("q"): # pass # data.dataset.show_batch(prep_silh_images, [0, 1, 2], ["prova1", "prova2", "prova3"]) # pickle.dump(prep_silh_images, open("prep_silh_images.pkl", "wb")) raw_person_preds = person_classifier.predict(prep_silh_images) person_preds = np.argmax(raw_person_preds, axis=1) person_scores = np.max(raw_person_preds, axis=1) print(person_preds, person_scores) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: if args.identify_people: with open("data/casia_gait/DatasetB_split_reduced/demo_class_names.txt", "r") as person_classes_file: person_classes = person_classes_file.read().splitlines() for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] if args.identify_people and (j in det_to_person_index): person_index = det_to_person_index[j] person_pred = person_preds[person_index] _class = person_classes[person_pred] score = person_scores[person_index] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy