def run_once(self, src): self.net.detect.cross_class_nms = True self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False with torch.no_grad(): frame = torch.Tensor(src).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) time_start = time.clock() preds = self.net(batch) time_elapsed = (time.clock() - time_start) h, w, _ = src.shape t = postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=0.) # TODO: give a suitable threshold torch.cuda.synchronize() classes, scores, bboxes, masks = [ x[:self.output_num].cpu().numpy() for x in t ] # TODO: Only 5 objects for test print(time_elapsed) instances = self.build_up_result(masks.shape[0], classes, bboxes, masks, scores) return {"instances": instances}
def processing(dets_out, img): boxes = np.array([]) masks = np.array([]) h, w, _ = img.shape cfg.rescore_bbox = True t = postprocess(dets_out, w, h, score_threshold=score_threshold) idx = t[1].argsort(0, descending=True)[:top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] boxes = boxes[classes == 0] masks = masks[classes == 0] scores = scores[classes == 0] boxes = boxes[scores >= 0.6] masks = masks[scores >= 0.6] scores = scores[scores >= 0.6] return boxes, masks.to(torch.bool).detach().cpu().numpy(), scores
def evalimages(net: Yolact): times = 0 cocoGt = COCO("data/test.json") detections = Detections() for imgid in cocoGt.imgs: name = cocoGt.loadImgs(ids=imgid)[0]['file_name'] h = cocoGt.loadImgs(ids=imgid)[0]['height'] w = cocoGt.loadImgs(ids=imgid)[0]['width'] path = "data/test_images/" + name predict, timee = evalimage(net, path) times += timee classes, scores, boxes, masks = postprocess( predict, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) classes = list(classes.cpu().numpy().astype(int)) scores = list(scores.cpu().numpy().astype(float)) masks = masks.cpu().numpy() boxes = boxes.cpu().numpy() if len(classes) > 0: # If any objects are detected in this image for i in range(masks.shape[0]): # Loop all instances # save information of the instance in a dictionary then append on coco_dt list if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: detections.add_mask(imgid, classes[i], masks[i, :, :], scores[i]) detections.dump() print(times / 100) print('Done.')
def prep_benchmark(dets_out, h, w): with timer.env('Postprocess'): t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) result = {} with timer.env('Copy'): classes, scores, boxes, masks = [x[:args.top_k] for x in t] if isinstance(scores, list): box_scores = scores[0] #.numpy() mask_scores = scores[1] #.numpy() jt.fetch( box_scores, lambda box_scores: result.update({'box_scores': box_scores})) jt.fetch( mask_scores, lambda mask_scores: result.update( {'mask_scores': mask_scores})) else: # scores = scores#.numpy() jt.fetch(scores, lambda scores: result.update({'scores': scores})) # classes = classes#.numpy() # boxes = boxes#.numpy() # masks = masks#.numpy() jt.fetch(classes, lambda classes: result.update({'classes': classes})) jt.fetch(boxes, lambda boxes: result.update({'boxes': boxes})) jt.fetch(masks, lambda masks: result.update({'masks': masks})) with timer.env('Sync'): # Just in case jt.sync_all()
def post_process(dets_out, img, h, w, top_k=1, score_threshold=0.6, undo_transform=True): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=False, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] final_res = (img_gpu * 255).byte().cpu().numpy() final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA) if num_dets_to_consider == 0: return final_res masks = masks[:num_dets_to_consider, :, :, None] _mask = (masks * 255).byte().cpu().numpy()[0] # Then assign the mask to the last channel of the image final_res[:, :, 3] = _mask.squeeze() return final_res
def prep_benchmark(dets_out, h, w): with timer.env('Postprocess'): t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) with timer.env('Copy'): classes, scores, boxes, masks = [x[:args.top_k].cpu().numpy() for x in t] with timer.env('Sync'): # Just in case torch.cuda.synchronize()
def get_mask_bbox_and_score(yolact_net: Yolact, img, threshold=0.0, max_predictions=1): """ Create and return the masks, bboxs and scores given the yolact net and the img :param yolact_net: Yolact net initialized :param img: ndarray img to segment :param threshold: threshold segmentation :param max_predictions: maximum number of predictions :returns: - masks_to_return - a list or single value of ndarray with 0 and 1 representing the mask(s) - boxes_to_return - a list or single value of ndarray representing the bbox(s) - scores_to_return - a list or single value of float in [0, 1] representing the confidence(s) """ with torch.no_grad(): frame = torch.from_numpy(img).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = yolact_net(batch) h, w, _ = img.shape save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(preds, w, h, visualize_lincomb=False, crop_masks=True) cfg.rescore_bbox = save idx = t[1].argsort(0, descending=True)[:max_predictions] classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t[:]] num_dets_to_consider = min(max_predictions, classes.shape[0]) # Remove detections below the threshold for j in range(num_dets_to_consider): if scores[j] < threshold: num_dets_to_consider = j break masks_to_return = boxes_to_return = scores_to_return = None if num_dets_to_consider > 0: masks = masks[:num_dets_to_consider, :, :, None] masks_to_return = [] boxes_to_return = [] scores_to_return = [] for m, b, s in zip(masks, boxes, scores): masks_to_return.append(m) boxes_to_return.append(b) scores_to_return.append(s) if len(masks_to_return) == 1: masks_to_return = masks_to_return[0] if len(boxes_to_return) == 1: boxes_to_return = boxes_to_return[0] if len(scores_to_return) == 1: scores_to_return = scores_to_return[0] return masks_to_return, boxes_to_return, scores_to_return
def detection(image_frame): with torch.no_grad(): frame = torch.from_numpy(image_frame).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes h, w, _ = frame.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True # 检测结果 t = postprocess(preds, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] # 提取类别为'person'和'car'的目标 remain_list = [] items_1 = ['person'] items_2 = ['car'] num_items_1 = 0 num_items_2 = 0 for j in range(classes.shape[0]): if cfg.dataset.class_names[classes[j]] in items_1: if num_items_1 < top_k_person and scores[ j] > score_threshold_person: remain_list.append(j) num_items_1 += 1 elif cfg.dataset.class_names[classes[j]] in items_2: if num_items_2 < top_k_vehicle and scores[ j] > score_threshold_vehicle: remain_list.append(j) num_items_2 += 1 num_dets_to_consider = len(remain_list) target_masks = masks[remain_list] target_classes = classes[remain_list] return frame, num_dets_to_consider, target_masks, target_classes
def raw_inference(self, img, preds=None, frame=None, batch_idx=0): """ optional arg preds, frame: if not None, avoids process_batch() call, used to speedup cached inferences. """ if preds is None or frame is None: preds, frame = self.process_batch(img) if frame.ndim == 4: n, h, w, _ = frame.shape elif frame.ndim == 3: h, w, _ = frame.shape else: assert False, "Oops, the frame has unexpected number of dimensions: {}".format( frame.shape) if self.batchsize > 1: assert batch_idx is not None, "In batch mode, you must provide batch_idx - meaning which row of batch is used as the results, [0, {}-1]".format( n) t = postprocess(preds, w=w, h=h, batch_idx=batch_idx, interpolation_mode='bilinear', visualize_lincomb=False, crop_masks=True, score_threshold=self.score_threshold) #honor top_k limit col_scores = 1 idx = t[col_scores].argsort(0, descending=True)[:self.top_k] classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t[:4] ] #x[idx] or x[idx].cpu().numpy() assert len(classes) == len(scores) == len(masks) # also get centroids centroids = [] for i in range(len(masks)): #cv2.imshow('bin_mask',masks[i]) #cv2.waitKey(200) #if classes[i] > 0: #kuka class centroids.append(self.find_centroids_(masks[i], 1)) #in pixel space class_names = [self.class_names_tuple[x] for x in classes] #TODO do we want to keep tensor, or convert to py list[]? assert len(classes) <= self.top_k return classes, class_names, scores, boxes, masks, centroids
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ boxes = np.array([]) masks = np.array([]) img_gpu = img / 255.0 h, w, _ = img.shape save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break boxes = boxes[classes == 0] masks = masks[classes == 0] scores = scores[classes == 0] return boxes, np.squeeze(masks.to(torch.bool).detach().cpu().numpy())
def prep_benchmark(dets_out, h, w): with timer.env('Postprocess'): t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) with timer.env('Copy'): classes, scores, boxes, masks = [x[:args.top_k] for x in t] if isinstance(scores, list): box_scores = scores[0].cpu().numpy() mask_scores = scores[1].cpu().numpy() else: scores = scores.cpu().numpy() classes = classes.cpu().numpy() boxes = boxes.cpu().numpy() masks = masks.cpu().numpy() with timer.env('Sync'): # Just in case torch.cuda.synchronize()
def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3): img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = False, crop_masks = True, score_threshold = score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:top_k] img_gpu = img_gpu * masks[0] # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() return img_numpy
def run_once(self, src, image_name): """ 只对一张图像进行预测.参数: - src # ? 要预测的图像 - image_name 图像名称 # ? 猜测就是图像的文件名 """ # step 0 准备 self.net.detect.cross_class_nms = True self.net.detect.use_fast_nms = True cfg.mask_proto_debug = False # step 1 预测 with torch.no_grad(): frame = torch.Tensor(src).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) time_start = time.clock() preds = self.net(batch) time_elapsed = (time.clock() - time_start) h, w, _ = src.shape # NOTICE 这里并没有设置最小的阈值 t = postprocess( preds, w, h, visualize_lincomb=False, crop_masks=True, score_threshold=0.) # TODO: give a suitable threshold torch.cuda.synchronize() classes, scores, boxes, masks = [ x[:self.output_num].cpu().numpy() for x in t ] # TODO: Only 5 objects for test print(time_elapsed) # 将预测得到的每一个结果都添加到detection对象中 for i in range(masks.shape[0]): self.detections.add_instance(image_name, i, classes[i], boxes[i, :], masks[i, :, :], scores[i]) # step 2 保存所有预测结果 self.detections.dump_all()
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = self.args.display_lincomb, crop_masks = self.args.crop, score_threshold = self.args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:self.args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] self.classes, self.scores, self.boxes = classes, scores, boxes num_dets_to_consider = min(self.args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if self.args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # remove overlapped area of mask results for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] overlapped_list = [] box_size = int((x2-x1)*(y2-y1)) color = get_color(j) score = scores[j] for k in reversed(range(num_dets_to_consider)): if (k != j): a1, b1, a2, b2 = boxes[k, :] box_size_sub = int((a2-a1)*(b2-b1)) if ((min(a2, x2) - max(a1, x1) > 0) and (min(b2, y2) - max(b1, y1) > 0)): # overlapped area S_jk = (min(a2, x2) - max(a1, x1)) * (min(b2, y2) - max(b1, y1)) if (S_jk / box_size > 0.9): # included other BBox pass elif (S_jk / box_size_sub > 0.3): # Subtract overlapped area in current bounding box # Find overlapped Bbox position x_list = [x1, x2, a1, a2] y_list = [y1, y2, b1, b2] x_list.sort() y_list.sort() overlapped_list.append([int(x_list[1]), int(y_list[1]), int(x_list[2]), int(y_list[2])]) for ov_bbox in overlapped_list: masks[j][ov_bbox[1]: ov_bbox[3], ov_bbox[0]: ov_bbox[2]] = 0 self.masks = masks # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha self.masks_color = colors self.masks_color_2 = masks_color # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand self.img_gpu = img_gpu if self.args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if self.args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) self.num_dets_to_consider = num_dets_to_consider if num_dets_to_consider == 0: return img_numpy if self.args.display_text or self.args.display_bboxes: self.text_str = {} draw_masks = self.masks.squeeze(-1).to(torch.device("cpu")).detach().numpy().astype(np.float32) update_masks = self.masks.clone() overlapped_list = [] for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] box_size = int((x2-x1)*(y2-y1)) color = get_color(j) score = scores[j] if self.args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s:%d_%.2f' % (_class, classes[j], score) if self.args.display_scores else _class self.text_str[j] = text_str font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 + 15) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 + text_h + 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def detect(): img_path = '/home/user/dataset/pear/train/JPEGImages' save_path = '/home/user/pear_output' weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth' set_cfg('pear_config') with torch.no_grad(): torch.cuda.set_device(0) ###### # If the input image size is constant, this make things faster (hence why we can use it in a video setting). # cudnn.benchmark = True # cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') ###### net = Yolact() net.load_weights(weight_path) net.eval() net = net.cuda() print('model loaded...') net.detect.cross_class_nms = True net.detect.use_fast_nms = True cfg.mask_proto_debug = False if not os.path.exists(save_path): os.mkdir(save_path) img_names = [ name for name in os.listdir(img_path) if name.endswith('.jpg') or name.endswith('.png') ] #for img_name in tqdm(img_names): for img_name in img_names: img = cv2.imread(os.path.join(img_path, img_name)) img = torch.from_numpy(img).cuda().float() img = FastBaseTransform()(img.unsqueeze(0)) start = time.time() preds = net(img) print('clw: image_name: %s, inference time use %.3fs' % (img_name, time.time() - start)) # inference time use 0.023s, 550x550 # start = time.time() h, w = img.shape[2:] result = postprocess( preds, w, h, crop_masks=True, score_threshold=0.3) # classes, scores, boxes, masks 按照score排序 # top_k = 10 # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result] # clw note TODO: 是否有必要只取top_k个? # print('clw: postprocess time use %.3fs' % (time.time() - start)) # 0.001s ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask # start = time.time() bFindPear = False for i, cls_id in enumerate(result[0]): if cls_id == 0 and not bFindPear: pear_mask = result[3][i].cpu().numpy() bFindPear = True # 从梨的mask中提取轮廓 pear_outline = get_outline_from_mask(pear_mask, w, h) # print('pear_mask.sum:', pear_mask.sum()) # 124250.0 # print('pear_outline.sum:', pear_outline.sum()) # 34335.0 # print('clw: outline extract time use %.3fs' % (time.time() - start)) # 0.001s roundness = compute_roundness(pear_outline) ### result.append(roundness)
def prep_display_for_img(dets_out, img, h=None, w=None, undo_transform=True, class_color=False, mask_alpha=0.45): if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: masks = t[3][idx] classes, scores, boxes = [x[idx] for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] if class_color else j) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand img_numpy = (img_gpu * 255).byte().cpu().numpy() if num_dets_to_consider == 0: return img_numpy font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] if args.display_scores: text_str_class = f"{_class}" text_str_score = f": {score:.2f}" text_w_class, text_h_class = cv2.getTextSize( text_str_class, font_face, font_scale, font_thickness)[0] img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) img_numpy = ps.putBText(img_numpy, text_str_score, text_offset_x=x1, text_offset_y=y1 + text_h_class + 2, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) else: text_str_class = '%s' % _class img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) return img_numpy
def prep_display_for_video(dets_out, img, h=None, w=None, save_folder=None, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str='', override_args: Config = None): if undo_transform: assert w is not None and h is not None, "with undo_transform=True, w,h params must be specified!" img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape img_numpy_ori = (img_gpu * 255).byte().cpu().numpy() global args if override_args is not None: args = override_args with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: masks = t[3][idx] classes, scores, boxes = [x[idx] for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] if class_color else j) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color global frame_compare if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: if frame_compare != save_folder[4]: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand img_numpy = (img_gpu * 255).byte().cpu().numpy() if num_dets_to_consider == 0: if os.path.isdir( save_folder[0]) and save_folder[4] % args.video_fps == 0: file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png' cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy) cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori) return [img_numpy, img_numpy_ori] font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 if args.display_text or args.display_bboxes: if frame_compare != save_folder[4]: frame_compare = save_folder[4] for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] # text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class if args.display_scores: text_str_class = f"{_class}" text_str_score = f": {score:.2f}" text_w_class, text_h_class = \ cv2.getTextSize(text_str_class, font_face, font_scale, font_thickness)[0] img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) img_numpy = ps.putBText(img_numpy, text_str_score, text_offset_x=x1, text_offset_y=y1 + text_h_class + 2, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) else: text_str_class = '%s' % (_class) img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) if save_folder[4] % args.video_fps == 0: dist = ocr(img_numpy_ori) result = save_folder[ 4], f"{dist}", f"{_class}", f"{score:.2f}", f"{x1}", f"{y1}", f"{x2}", f"{y2}" result_list.append(result) if os.path.isdir( save_folder[0]) and save_folder[4] % args.video_fps == 0: file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png' cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy) cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori) return [img_numpy, img_numpy_ori, result_list] return [img_numpy, img_numpy_ori]
def image_callback(image_data): time_start = time.time() global cv_image cv_image = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1) global display_switch display_switch = rospy.get_param("/display_mode") global record_switch record_switch = rospy.get_param("/record_mode") global record_initialized global video_out if record_switch and not record_initialized: out_path = 'video_out.mp4' if seucar_switch: target_fps = 10 else: target_fps = 30 frame_height = 480 frame_width = 640 video_out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height), True) record_initialized = True print("Start recording.") if not display_switch: cv2.destroyAllWindows() if not record_switch and record_initialized: video_out.release() record_initialized = False print("Save video.") # region_output是8行4列数组,第i行存储第i个区域的信息 # 每行的第1列为污染等级(0,1,2,3,4,5,6,7)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8) region_output = np.zeros((8, 4)) for region_i in range(8): region_output[region_i, 3] = region_i + 1 with torch.no_grad(): # 目标检测 frame = torch.from_numpy(cv_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系 h, w, _ = frame.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True # 检测结果 t = postprocess(preds, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider > 0: target_masks = masks[:num_dets_to_consider, :, :] target_classes = classes[:num_dets_to_consider] target_scores = scores[:num_dets_to_consider] target_boxes = boxes[:num_dets_to_consider, :] # 显示检测结果 if display_switch or record_switch: result_image = result_display(frame, target_masks, target_classes, target_scores, target_boxes, num_dets_to_consider) else: result_image = frame.byte().cpu().numpy() # 分别存储垃圾目标和植被目标 check_k = 0 rubbish_remain_list = [] vegetation_remain_list = [] rubbish_items = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb'] vegetation_items = ['grass', 'shrub', 'flower'] while check_k < target_classes.shape[0]: if cfg.dataset.class_names[target_classes[check_k]] in rubbish_items: rubbish_remain_list.append(check_k) if cfg.dataset.class_names[target_classes[check_k]] in vegetation_items: vegetation_remain_list.append(check_k) check_k += 1 rubbish_masks = target_masks[rubbish_remain_list, :, :] rubbish_classes = target_classes[rubbish_remain_list] rubbish_scores = target_scores[rubbish_remain_list] rubbish_boxes = target_boxes[rubbish_remain_list, :] vegetation_masks = target_masks[vegetation_remain_list, :, :] vegetation_classes = target_classes[vegetation_remain_list] vegetation_scores = target_scores[vegetation_remain_list] vegetation_boxes = target_boxes[vegetation_remain_list, :] rubbsih_num = len(rubbish_remain_list) vegetation_num = len(vegetation_remain_list) # 针对垃圾目标的处理 if rubbsih_num > 0: # 掩膜边界取点 result_image, rubbish_boundary_pts = get_boundary(result_image, rubbsih_num, rubbish_masks, cpt_num=10) # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积 s_polygon = np.zeros((rubbsih_num, 1)) rubbish_list = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb'] rubbish_weight_coefficient_list = [80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000] # region_s存储各区域内垃圾目标的最大单体面积 region_s = np.zeros((8, 1)) # region_w存储各区域内垃圾目标的质量的总和 region_w = np.zeros((8, 1)) # 遍历每个目标 for i in range(rubbish_boundary_pts.shape[0]): effective_pt_num = 0 b_x, b_z = [], [] b_area_id = [] # 统计有效点数量,计算每个有效点的世界坐标和所在区域 for b_pt in range(rubbish_boundary_pts.shape[1]): b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0] b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_x.append(loc_b_pt[0]) b_z.append(loc_b_pt[1]) b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) # 如果有效点数不小于3,计算面积和质量 if effective_pt_num >= 3: # 计算目标面积 s_sum = 0 for b_pt in range(effective_pt_num): s_sum += b_x[b_pt]*b_z[(b_pt + 1)%effective_pt_num] - b_z[b_pt]*b_x[(b_pt + 1)%effective_pt_num] s_polygon[i, 0] = abs(s_sum) / 2 # 更新各区域内最大单体目标面积 for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: if s_polygon[i, 0] > region_s[b_area_id[b_pt] - 1, 0]: region_s[b_area_id[b_pt] - 1, 0] = s_polygon[i, 0] # 计算目标质量并分配给各区域 for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: rubbish_weight = s_polygon[i, 0] * rubbish_weight_coefficient_list[rubbish_list.index(cfg.dataset.class_names[rubbish_classes[i]])] region_w[b_area_id[b_pt] - 1, 0] += rubbish_weight / effective_pt_num # 界定污染等级 for region_i in range(8): if region_w[region_i, 0] > 0 and region_w[region_i, 0] <= 50: region_output[region_i, 0] = 1 elif region_w[region_i, 0] > 50 and region_w[region_i, 0] <= 100: region_output[region_i, 0] = 2 elif region_w[region_i, 0] > 100 and region_w[region_i, 0] <= 150: region_output[region_i, 0] = 3 elif region_w[region_i, 0] > 150 and region_w[region_i, 0] <= 200: region_output[region_i, 0] = 4 elif region_w[region_i, 0] > 200 and region_w[region_i, 0] <= 250: region_output[region_i, 0] = 5 elif region_w[region_i, 0] > 250 and region_w[region_i, 0] <= 300: region_output[region_i, 0] = 6 elif region_w[region_i, 0] > 300: region_output[region_i, 0] = 7 if display_switch or record_switch: result_image = s_display(result_image, region_s, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1) result_image = w_display(result_image, region_w, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1) # 针对植被目标的处理 if vegetation_num > 0: # 掩膜边界取点 result_image, vegetation_boundary_pts = get_boundary(result_image, vegetation_num, vegetation_masks, cpt_num=20) # region_vegetation_type存储各区域内植被类型 region_vegetation_type = np.zeros((8, 1)) # 遍历每个目标 for i in range(vegetation_boundary_pts.shape[0]): effective_pt_num = 0 b_area_id = [] # 统计有效点数量,计算每个有效点的世界坐标和所在区域 for b_pt in range(vegetation_boundary_pts.shape[1]): b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0] b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) # 计算植被类型优先级 vegetation_list = ['grass', 'shrub', 'flower'] v_type = vegetation_list.index(cfg.dataset.class_names[vegetation_classes[i]]) + 1 # 更新各区域内植被类型 for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: if v_type > region_vegetation_type[b_area_id[b_pt] - 1, 0]: region_vegetation_type[b_area_id[b_pt] - 1, 0] = v_type # 界定植被类型 for region_i in range(8): region_output[region_i, 1] = region_vegetation_type[region_i, 0] else: result_image = frame.byte().cpu().numpy() if seucar_switch: areasinfo_msg = AreasInfo() for region_i in range(8): region_output_msg = AreaInfo() region_output_msg.rubbish_grade = int(region_output[region_i, 0]) region_output_msg.has_person = bool(region_output[region_i, 2]) region_output_msg.vegetation_type = int(region_output[region_i, 1]) region_output_msg.area_id = int(region_output[region_i, 3]) areasinfo_msg.infos.append(region_output_msg) pub.publish(areasinfo_msg) if display_switch or record_switch: result_image = CameraT.drawLine(result_image, w=1) result_image = output_display(result_image, region_output, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1) cv2.putText(result_image, str(time.time()), (5, 20), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) if display_switch: print('region_output') print(region_output) cv2.imshow("result_image", result_image) if cv2.waitKey(1) == 27: if record_switch and record_initialized: video_out.release() print("Save video.") cv2.destroyAllWindows() # 停止python程序 rospy.signal_shutdown("It's over.") if record_switch and record_initialized: video_out.write(result_image) time_end_all = time.time() print("totally time cost:", time_end_all - time_start)
def live_analysis(self): """ Function for live stream video masking """ bar = [ " Waiting for frame [= ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ =] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", " Waiting for frame [ = ] ", ] idx = 0 while not rospy.is_shutdown(): start_time = time.time() self.masked_id = [] current_frame = self.frame current_depth_frame = self.depth_frame if len(current_frame)==0 or len(current_depth_frame)==0 : print(bar[idx % len(bar)], end= "\r") idx = idx +1 time.sleep(0.1) else: nn_start_time = time.time() if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge': frame = torch.from_numpy(current_frame).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) if self.nn == 'yolact_edge': extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None} preds = self.net(batch.cuda(), extras=extras) preds = preds["pred_outs"] else: preds = self.net(batch.cuda()) nn_pred_time = time.time() h, w, _ = frame.shape b = {} r = {} b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold) r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy()) r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy()) r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy()) r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy()) elif self.nn == 'mrcnn': results = self.model.detect([current_frame],verbose=1) r = results[0] r['masks'] = np.swapaxes(r['masks'],0,2) r['masks'] = np.swapaxes(r['masks'],1,2) for i in range(r['rois'].shape[0]): buff = r['rois'][i] r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]] r['class_ids'] = r['class_ids'] - 1 ''' Deprecated, did not enhance speed j=0 for i in range(len(r['class_ids'])): if not np.in1d(r['class_ids'][j], self.selected_classes): r['class_ids'] = np.delete(r['class_ids'], j) r['scores']= np.delete(r['scores'], j) r['rois']= np.delete(r['rois'], j,axis=0) r['masks']= np.delete(r['masks'], j, axis=0) else: j=j+1 ''' self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0]) for j in range(self.number_observation): if r['scores'][j] < self.score_threshold: self.number_observation = j break r['class_ids'] = r['class_ids'][:self.number_observation] r['scores'] = r['scores'][:self.number_observation] r['rois'] = r['rois'][:self.number_observation] r['masks'] = r['masks'][:self.number_observation] nn_time = time.time() mask_depth = self.get_masking_depth(current_depth_frame, r['masks']) # Read object tf pose self.read_objects_pose() # Read camera tf pose try: (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0)) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException): transc = np.array([0.,0.,0.]) rotc = np.array([0.,0.,0.,1.]) euler = tf.transformations.euler_from_quaternion(rotc) rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2]) h_mat = rot h_mat[0:3,3:] = np.array([transc]).T objects_to_delete = [] # Main filter update and prediction step if len(r['rois']) == 0: for i in self.objects_dict: self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1 if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames: objects_to_delete.append(i) for i in objects_to_delete: self.delete_object(i) else : current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth) if not self.objects_dict: if not len(current_centroids)==0: for i in range(len(current_centroids)): self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i]) for i in self.objects_dict: self.objects_dict[i]["kalmanFilter"].prediction() self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat) self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3] self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6] else: objects_pose = np.zeros((len(self.objects_dict),3)) objects_ids = np.zeros((len(self.objects_dict))) index = 0 for i in self.objects_dict: objects_pose[index,] = self.objects_dict[i]["centroid"] objects_ids[index] = i index = index + 1 centroids_pose = np.zeros((len(current_centroids),3)) for i in range(len(current_centroids)): centroids_pose[i,] = current_centroids[i] eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten() index_sorted = np.argsort(eucledian_dist_pairwise) used_objects = [] used_centroids = [] for index in range(len(eucledian_dist_pairwise)): object_id = int(index_sorted[index] / len(centroids_pose)) centroid_id = index_sorted[index] % len(centroids_pose) if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5): if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]: timebefore = time.time() used_objects.append(object_id) used_centroids.append(centroid_id) self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction() self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat) self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3] self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6] if self.objects_dict[objects_ids[object_id]]["classID"] == 0: max_threshold = self.human_threshold else: max_threshold = self.object_threshold if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold: self.objects_dict[objects_ids[object_id]]["activeObject"] = 1 else: self.objects_dict[objects_ids[object_id]]["activeObject"] = 0 if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0: iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id]) if iou<self.iou_threshold: self.objects_dict[objects_ids[object_id]]["activeObject"] = 1 else: x=1 self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id] self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id] self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0 self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id] self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id] if len(centroids_pose) < len(objects_pose): for index in range(len(eucledian_dist_pairwise)): object_id = int(index_sorted[index] / len(objects_pose)) if not np.in1d(object_id, used_objects): self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1 self.objects_dict[objects_ids[object_id]]["activeObject"] = 0 if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames: self.delete_object(objects_ids[object_id]) used_objects.append(object_id) else: self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction() self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3] self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6] elif len(centroids_pose) > len(objects_pose): buff_id = self.next_object_id for index in range(len(eucledian_dist_pairwise)): centroid_id = index_sorted[index] % len(centroids_pose) if not np.in1d(centroid_id, used_centroids): self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id]) self.objects_dict[buff_id]["kalmanFilter"].prediction() self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat) self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3] self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6] buff_id = buff_id + 1 kalman_time = time.time() # Write objects filter pose to tf self.handle_objects_pose() result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks']) DDITS = Image() DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1') DDITS.header = self.depth_msg_header self.dynamic_depth_image_pub.publish(DDITS) DITS = Image() DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1') DITS.header = self.depth_msg_header self.depth_image_pub.publish(DITS) print_time = time.time()
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape # print("height:", h, "width:", w) with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand from skimage.feature import hog def bin_spatial(img, color_space='RGB', size=(32, 32)): # Convert image to new color space (if specified) if color_space != 'RGB': if color_space == 'HSV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) elif color_space == 'LUV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV) elif color_space == 'HLS': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS) elif color_space == 'YUV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV) elif color_space == 'YCrCb': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb) else: feature_image = np.copy(img) # Use cv2.resize().ravel() to create the feature vector # small_img = cv2.resize(feature_image, (32, 32)) features = feature_image.ravel() # Remove this line! # Return the feature vector return features # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() crop_tray_img = img_numpy.copy() if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if classes[j] == 6: #_class == "tray": crop_tray_img = crop_tray_img[y1:y2, x1:x2] # process tray cropped image using regression model to predict hidden patties height, width = crop_tray_img.shape[0], crop_tray_img.shape[1] aspect_ratio = height / width height = int(aspect_ratio * 256) # print(type(crop_tray_img)) crop_tray_img = np.array(crop_tray_img, dtype='uint8') crop_tray_img = Image.fromarray(crop_tray_img).resize( (256, height), Image.BICUBIC) crop_tray_img = np.asarray(crop_tray_img, dtype=float) # print("Crop_tray:",crop_tray_img) # print("Crop_tray shape:",crop_tray_img.shape) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_color = [255, 255, 255] text_pt = (x1, y1 - 3) cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) # counting not visible patties n_patties = np.sum(classes != 6) chicken_count = np.sum(classes == 0) + np.sum(classes == 1) ham_quarter_count = np.sum(classes == 2) + np.sum(classes == 3) ham_1by10_count = np.sum(classes == 4) + np.sum(classes == 5) # find the maximum of the three by adding count to a list and choose model accordingly labels = [chicken_count, ham_quarter_count, ham_1by10_count] max_index = labels.index(max(labels)) if max_index == 0: model_path = "weights/regressor/cp_regressor_crop" elif max_index == 1: model_path = "weights/regressor/qp_regressor_crop" else: model_path = "weights/regressor/op_regressor_crop" # load the pickle model in memory, scale the input and feed it into the model import pickle from sklearn.preprocessing import StandardScaler loaded_model = pickle.load(open(model_path, 'rb')) n_bin = 32 originalFeatures = bin_spatial(crop_tray_img) originalFeatures, _ = np.histogram(originalFeatures, n_bin, density=True) originalFeatures = np.array([originalFeatures]) sc_X = StandardScaler() originalFeatures = sc_X.fit_transform( originalFeatures.reshape(n_bin, 1)).reshape(1, n_bin) # print("originalFeatures:",originalFeatures) prediction = round(loaded_model.predict(originalFeatures)[0]) # print("model_path:",model_path) if max_index == 0 and chicken_count < 4: prediction = chicken_count count_text = "Calculated: {} Visible Patties: {} -> Chicken: {}, Ham Quarter: {}, HAM 1by10: {}".format( prediction, n_patties, chicken_count, ham_quarter_count, ham_1by10_count) count_text_w, count_text_h = cv2.getTextSize(count_text, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) count_text_pt = (w - count_text_w, h - count_text_h) cv2.putText(img_numpy, count_text, count_text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) # print(count_text) return img_numpy
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [ x[:args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: str_ = "" for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) #pub = rospy.Publisher('chatter',String,queue_size=10) #rate = rospy.Rate(50) #10hz #str_ += text_str #rospy.loginfo(str_) #pub.publish(str_) #rate.sleep() return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) torch.cuda.synchronize() all_pred = [] #print(len(t)) #print(type(t)) #print("classes") #print(t[0].cpu().numpy()) #print(len(t[0].cpu().numpy())) #print(t[1].cpu().numpy()) # bbox print(t[2].cpu().numpy()) #print(t[3].cpu().numpy()) # classes, scores, boxes, masks categories = t[0].cpu().numpy() scores = t[1].cpu().numpy() masks = t[3].cpu().numpy() #print(masks.shape) n_instances = len(scores) #if len(categories) > 0: # If any objects are detected in this image for i in range(n_instances): # Loop all instances # save information of the instance in a dictionary then append on all_pred list pred = {} #pred['image_id'] = imgid # this imgid must be same as the key of test.json pred['category_id'] = int(categories[i]) + 1 pred['segmentation'] = binary_mask_to_rle(masks[i,:,:]) # save binary mask to RLE, e.g. 512x512 -> rle pred['score'] = float(scores[i]) all_pred.append(pred) with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy, all_pred if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy, all_pred
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): #idx = t[1].argsort(0, descending=True)[:args.top_k] idx1 = t[1].argsort() idx = idx1.argsort() if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] mask_picture = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] obj_info, obj_num = data_save(mask_picture, classes, scores, boxes) #print(classes) #print('---------') #np.save('masks.npy', masks.cpu().numpy()) #print(obj_info[0][4][0], obj_info[0][4][1]) num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (obj_info[j][0] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float() #only show mask #img_gpu = img_gpu * masks[0] #mike0225 mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float() #0209 global mask_numpy mask_numpy = (mask_img * 255).byte().cpu().numpy() #0209 mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha #mask_img[0:text_h+8, 0:text_w+8] *= 0.6 #0209 # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: global frame_count, state_pre, flag, predict_pos, centerX, centerY, preX, preY, degree frame_count += 1 for j in range(obj_num): #mask_info = obj_info[j][5] global mask_numpy1, img_num, temp_x, temp_y mask_image = mask_picture[j:j + 1, :, :, None] mask_image = img_gpu * (mask_image.sum(dim=0) > 0.5).float() #0209 mask_numpy1 = (mask_image * 255).byte().cpu().numpy() #0209 mask_numpy1 = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY) if obj_info[j][2] == 1: ''' if frame_count%10 == 3: centerX.append(obj_info[j][4][0]) centerY.append(obj_info[j][4][1]) predict_pos[j][0].append(obj_info[j][4][0]) predict_pos[j][1].append(obj_info[j][4][1]) if predict_pos[j][0][0] == 0: predict_pos[j][0].pop(0) if predict_pos[j][1][0] == 0: predict_pos[j][1].pop(0) if len(predict_pos[j][0]) > 2: #predict_pos[j][2] = predict_next( predict_pos[j][0], predict_pos[j][1]) degree, predict_pos[j][2] = predict1_next( mask_numpy1, predict_pos[j][0], predict_pos[j][1]) # test0227 temp_x,temp_y=trans_degree(predict_pos[j][2][0,4,0],predict_pos[j][2][0,4,1],degree) predict_pos[j][0].pop(0) #0->1 predict_pos[j][1].pop(0) if state_pre == True: if predict_pos[j][2] != []: for i in range(5): if (predict_pos[j][2][0,i,0]) > 640 or (predict_pos[j][2][0,i,1]) > 480: pass else: pass #cv2.circle(img_numpy,(predict_pos[j][2][0,i,0],predict_pos[j][2][0,i,1]),5,(0,0,213),-1) cv2.line(img_numpy,(int(obj_info[j][4][0]+temp_x),int(obj_info[j][4][1]+temp_y)),(int(obj_info[j][4][0]-temp_x),int(obj_info[j][4][1]-temp_y)),(0,0,255),3) if flag ==False: for i in range(5): preX.append(predict_pos[j][2][0,i,0]) preY.append(predict_pos[j][2][0,i,1]) #preY.append(num) else: preX.append(predict_pos[j][2][0,4,0]) preY.append(predict_pos[j][2][0,4,1]) #preY.append(num) flag = True ''' color = get_color(obj_info[j][0]) score = obj_info[j][3] if args.display_bboxes: cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][3], obj_info[j][4][5]), color, 1) if args.display_text: _class = obj_info[j][1] #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class text_str = '%s: %s' % (obj_info[j][0], _class ) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][2] + text_w, obj_info[j][4][4] - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) else: for i in range(2): predict_pos[j][i] = [0] predict_pos[j][2] = [] return img_numpy
def prep_display(dets_out, img, h, w, args, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].detach().cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand img_numpy_mask = (masks_color_summand * 255).byte().cpu().numpy() cv2.imwrite('results/mask_car_image.jpg', img_numpy_mask) print("Mask for all visible car is generated") if args.display_best_masks_only == True and args.top_k == 1: masks = masks[:num_dets_to_consider, :, :, None] num_dets_to_consider = min(args.top_k, classes.shape[0]) print('maskshape', (masks.shape)) for i in range(num_dets_to_consider): msk = masks[i, :, :, None] mask = msk.view(1, masks.shape[1], masks.shape[2], 1) print('newmaskshape', (mask.shape)) img_gpu_masked = img_gpu * (mask.sum(dim=0) >= 1).float().expand( -1, -1, 3) img_numpy_masked = (img_gpu_masked * 255).byte().cpu().numpy() cv2.imwrite('results/mask_image' + str(i) + '.jpg', img_numpy_masked) print("Mask for the most visible car is generated") if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_best_bboxes_only == 'True': crop = img_numpy[y1:y2, x1:x2] cv2.imwrite('results/crop_object.png', crop) print("crop for the most visible car is generated") if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. gt and gt_masks are also allowed to be none (until I reimplement that functionality). """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:args.top_k] # We'll need this later classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] if classes.shape[0] == 0: return (img_gpu * 255).byte().cpu().numpy() def get_color(j): color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)] if not undo_transform: color = (color[2], color[1], color[0]) return color # Draw masks first on the gpu if args.display_masks and cfg.eval_mask_branch: for j in reversed(range(min(args.top_k, classes.shape[0]))): if scores[j] >= args.score_threshold: color = get_color(j) mask = masks[j, :, :, None] mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0) mask_alpha = 0.45 # Alpha only the region of the image that contains the mask img_gpu = img_gpu * (1 - mask) \ + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: for j in reversed(range(min(args.top_k, classes.shape[0]))): score = scores[j] if scores[j] >= args.score_threshold: x1, y1, x2, y2 = boxes[j, :] color = get_color(j) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = COCO_CLASSES[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def image_callback(image_data): time_start = time.time() global cv_image cv_image = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1) # region_output是8行4列数组,第i行存储第i个区域的信息,每行的第1列为污染等级(0,1,2,3,4)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8) region_output = np.zeros((8, 4)) for region_i in range(8): region_output[region_i, 3] = region_i + 1 with torch.no_grad(): # 目标检测 frame = torch.from_numpy(cv_image).cuda().float() batch = FastBaseTransform()(frame.unsqueeze(0)) preds = net(batch) # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系 h, w, _ = frame.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True # 检测结果 t = postprocess(preds, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider > 0: target_masks = masks[:num_dets_to_consider, :, :] target_classes = classes[:num_dets_to_consider] target_scores = scores[:num_dets_to_consider] target_boxes = boxes[:num_dets_to_consider, :] # 忽略类别为'road hole' 'water stain'的目标 check_k = 0 remain_list = [] while check_k < target_classes.shape[0]: if cfg.dataset.class_names[target_classes[check_k]] not in ['road hole', 'water stain']: remain_list.append(check_k) check_k += 1 target_masks = target_masks[remain_list, :, :] target_classes = target_classes[remain_list] target_scores = target_scores[remain_list] target_boxes = target_boxes[remain_list, :] # 显示检测结果 if display_switch: result_image = result_display(frame, target_masks, target_classes, target_scores, target_boxes, num_dets_to_consider) else: result_image = frame # 分别存储垃圾目标和植被目标 check_k = 0 rubbish_remain_list = [] vegetation_remain_list = [] rubbish_items = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb'] vegetation_items = ['grass', 'shrub', 'flower'] while check_k < target_classes.shape[0]: if cfg.dataset.class_names[target_classes[check_k]] in rubbish_items: rubbish_remain_list.append(check_k) if cfg.dataset.class_names[target_classes[check_k]] in vegetation_items: vegetation_remain_list.append(check_k) check_k += 1 rubbish_masks = target_masks[rubbish_remain_list, :, :] rubbish_classes = target_classes[rubbish_remain_list] rubbish_scores = target_scores[rubbish_remain_list] rubbish_boxes = target_boxes[rubbish_remain_list, :] vegetation_masks = target_masks[vegetation_remain_list, :, :] vegetation_classes = target_classes[vegetation_remain_list] vegetation_scores = target_scores[vegetation_remain_list] vegetation_boxes = target_boxes[vegetation_remain_list, :] rubbsih_num = len(rubbish_remain_list) vegetation_num = len(vegetation_remain_list) # 针对垃圾目标的处理 if rubbsih_num > 0: # 掩膜边界取点 result_image, rubbish_boundary_pts = get_boundary(result_image, rubbsih_num, rubbish_masks, cpt_num=10) # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积 s_polygon = np.zeros((rubbsih_num, 1)) rubbish_list = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb'] rubbish_weight_list = [80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000] # region_w存储各区域内垃圾目标的质量的总和 region_w = np.zeros((8, 1)) for i in range(rubbish_boundary_pts.shape[0]): effective_pt_num = 0 b_x, b_z = [], [] b_area_id = [] for b_pt in range(rubbish_boundary_pts.shape[1]): b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0] b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_x.append(loc_b_pt[0]) b_z.append(loc_b_pt[1]) b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) if effective_pt_num >= 3: s_sum = 0 for b_pt in range(effective_pt_num - 2): s_sum += abs(b_x[0]*b_z[b_pt + 1] - b_z[0]*b_x[b_pt + 1] + b_x[b_pt + 1]*b_z[b_pt + 2] - b_z[b_pt + 1]*b_x[b_pt + 2] + b_x[b_pt + 2]*b_z[0] - b_z[b_pt + 2]*b_x[0]) s_polygon[i, 0] = s_sum / 2 for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: region_w[b_area_id[b_pt] - 1, 0] += s_polygon[i, 0] * rubbish_weight_list[rubbish_list.index(cfg.dataset.class_names[rubbish_classes[i]])] / effective_pt_num # 界定污染等级 for region_i in range(8): if region_w[region_i, 0] > 0 and region_w[region_i, 0] <= 250: region_output[region_i, 0] = 1 elif region_w[region_i, 0] > 250 and region_w[region_i, 0] <= 500: region_output[region_i, 0] = 2 elif region_w[region_i, 0] > 500 and region_w[region_i, 0] <= 1000: region_output[region_i, 0] = 3 elif region_w[region_i, 0] > 1000: region_output[region_i, 0] = 4 if display_switch: print('region_w') print(region_w) result_image = w_display(result_image, region_w, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1) # 针对植被目标的处理 if vegetation_num > 0: # 掩膜边界取点 result_image, vegetation_boundary_pts = get_boundary(result_image, vegetation_num, vegetation_masks, cpt_num=20) # region_vegetation_type存储各区域内植被类型 region_vegetation_type = np.zeros((8, 1)) for i in range(vegetation_boundary_pts.shape[0]): effective_pt_num = 0 b_area_id = [] for b_pt in range(vegetation_boundary_pts.shape[1]): b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0] b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1] # 排除像素坐标无效点(u=0,v=0) if b_pt_u or b_pt_v: loc_b_pt = p2d_table[b_pt_u, b_pt_v] # 排除世界坐标无效点(x=0,z=0) if loc_b_pt[0] or loc_b_pt[1]: effective_pt_num += 1 b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1])) for b_pt in range(effective_pt_num): # 排除区域ID无效点(ID=0) if b_area_id[b_pt]: # 优先级顺序 vegetation_list = ['grass', 'shrub', 'flower'] v_type = vegetation_list.index(cfg.dataset.class_names[vegetation_classes[i]]) + 1 current_v_type = region_vegetation_type[b_area_id[b_pt] - 1, 0] if v_type > current_v_type: region_vegetation_type[b_area_id[b_pt] - 1, 0] = v_type for region_i in range(8): region_output[region_i, 1] = region_vegetation_type[region_i, 0] else: result_image = frame.byte().cpu().numpy() areasinfo_msg = AreasInfo() for region_i in range(8): region_output_msg = AreaInfo() region_output_msg.rubbish_grade = int(region_output[region_i, 0]) region_output_msg.has_person = bool(region_output[region_i, 2]) region_output_msg.vegetation_type = int(region_output[region_i, 1]) region_output_msg.area_id = int(region_output[region_i, 3]) areasinfo_msg.infos.append(region_output_msg) pub.publish(areasinfo_msg) if display_switch: print('region_output') print(region_output) result_image = CameraT.drawLine(result_image, w=1) result_image = output_display(result_image, region_output, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1) cv2.imshow("result_image", result_image) if record_switch: video_out.write(result_image) if cv2.waitKey(1) == 27: if record_switch: video_out.release() cv2.destroyAllWindows() rospy.signal_shutdown("It's over.") time_end_all = time.time() print("totally time cost:", time_end_all - time_start)
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = jt.array(img_numpy) else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx, _ = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = jt.array(list(color)).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pyjt tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider].unsqueeze(3) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = jt.contrib.concat([ get_color(j, 0).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) #print(masks.repeat(1,1,1,3).shape,colors.shape,mask_alpha) masks_color = masks.repeat(1, 1, 1, 3) * colors.repeat( 1, masks.shape[1], masks.shape[2], 1) * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).uint8().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, image_header=Header()): with torch.no_grad(): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ dets = Detections() if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = False, crop_masks = True, score_threshold = 0.3) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:100] classes, scores, boxes = [x[:100].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(100, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < 0.3: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) det = Detection() det.box.x1 = x1 det.box.y1 = y1 det.box.x2 = x2 det.box.y2 = y2 det.class_name = _class det.score = score mask_shape = np.shape(masks[j]) #print("Num dets: ", num_dets_to_consider) #print("Shape: ", mask_shape) mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2] #print("Box: ", x1,",",x2,",",y1,",",y2) #print("Mask in box shape: ", np.shape(mask_bb)) mask_rs = np.reshape(mask_bb, -1) #print("New shape: ", np.shape(mask_rs)) #print("Mask:\n",mask_bb) det.mask.height = y2 - y1 det.mask.width = x2 - x1 det.mask.mask = np.array(mask_rs, dtype=bool) dets.detections.append(det) dets.header.stamp = image_header.stamp dets.header.frame_id = image_header.frame_id self.detections_pub.publish(dets) return img_numpy
def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections: Detections = None): """ Returns a list of APs for this image, with each element being for a class """ if not args.output_coco_json: with timer.env('Prepare gt'): gt_boxes = jt.array(gt[:, :4]) gt_boxes[:, [0, 2]] *= w gt_boxes[:, [1, 3]] *= h gt_classes = list(gt[:, 4].astype(int)) gt_masks = jt.array(gt_masks).view(-1, h * w) if num_crowd > 0: split = lambda x: (x[-num_crowd:], x[:-num_crowd]) crowd_boxes, gt_boxes = split(gt_boxes) crowd_masks, gt_masks = split(gt_masks) crowd_classes, gt_classes = split(gt_classes) with timer.env('Postprocess'): classes, scores, boxes, masks = postprocess( dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) if classes.size(0) == 0: return classes = list(classes.numpy().astype(int)) if isinstance(scores, list): box_scores = list(scores[0].numpy().astype(float)) mask_scores = list(scores[1].numpy().astype(float)) else: scores = list(scores.numpy().astype(float)) box_scores = scores mask_scores = scores masks = masks.view(-1, h * w) boxes = boxes #print('GG') if args.output_coco_json: with timer.env('JSON Output'): boxes = boxes.numpy() masks = masks.view(-1, h, w).numpy() for i in range(masks.shape[0]): # Make sure that the bounding box actually makes sense and a mask was produced if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: detections.add_bbox(image_id, classes[i], boxes[i, :], box_scores[i]) detections.add_mask(image_id, classes[i], masks[i, :, :], mask_scores[i]) return #print('GG') with timer.env('Eval Setup'): num_pred = len(classes) num_gt = len(gt_classes) mask_iou_cache = _mask_iou(masks, gt_masks).numpy() bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()).numpy() if num_crowd > 0: crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True).numpy() crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True).numpy() else: crowd_mask_iou_cache = None crowd_bbox_iou_cache = None box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item(), lambda i: box_scores[i], box_indices), ('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item(), lambda i: mask_scores[i], mask_indices)] #print('GG') #print(bbox_iou_cache) timer.start('Main loop') for _class in set(classes + gt_classes): ap_per_iou = [] num_gt_for_class = sum([1 for x in gt_classes if x == _class]) for iouIdx in range(len(iou_thresholds)): iou_threshold = iou_thresholds[iouIdx] for iou_type, iou_func, crowd_func, score_func, indices in iou_types: gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_for_class) for i in indices: if classes[i] != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j in range(num_gt): if gt_used[j] or gt_classes[j] != _class: continue iou = iou_func(i, j) if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(score_func(i), True) else: # If the detection matches a crowd, we can just ignore it matched_crowd = False if num_crowd > 0: for j in range(len(crowd_classes)): if crowd_classes[j] != _class: continue iou = crowd_func(i, j) if iou > iou_threshold: matched_crowd = True break # All this crowd code so that we can make sure that our eval code gives the # same result as COCOEval. There aren't even that many crowd annotations to # begin with, but accuracy is of the utmost importance. if not matched_crowd: ap_obj.push(score_func(i), False) timer.stop('Main loop')
def prep_display(net, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True preds = net.detect( { 'loc': dets_out[0], 'conf': dets_out[1], 'mask': dets_out[2], 'priors': dets_out[3], 'proto': dets_out[4] }, net) t = postprocess(preds, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break img_tmp = torch.zeros(img_gpu.shape) for i in range(num_dets_to_consider): cfg.dataset.class_names[classes[i]] mask = masks[i] classy = cfg.dataset.class_names[classes[i]] if args.classes == None or classy in args.classes: img_tmp[mask == 1] = img_gpu[mask == 1] # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_tmp * 255).byte().cpu().numpy() if num_dets_to_consider == 0: return img_numpy return img_numpy