def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None ) alpha = 0.3 else: alpha = 0.5 frame_visualizer.overlay_instances( # boxes=None if masks is not None else boxes, # boxes are a bit distracting boxes=boxes, masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def draw_instance_predictions(self, frame, predictions, effect_type): frame_visualizer = Visualizer(frame, self.metadata) cnt = predictions["current_frame"] num_instances = predictions["num_instances"] if num_instances == 0: return frame_visualizer.output boxes = predictions["boxes"] scores = predictions["scores"] classes = predictions["classes"] keypoints = predictions["keypoints"] masks = predictions["masks"] detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None) alpha = 0.3 else: alpha = 0.5 if effect_type == 0: frame_visualizer.overlay_instances_scanning( boxes=None if masks is not None else boxes, # boxes are a bit distracting cnt=cnt, masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) else: frame_visualizer.overlay_instances_stop_motion( boxes=None if masks is not None else boxes, # boxes are a bit distracting cnt=cnt, masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def draw_instance_predictions(self, predictions): """ Draw instance-level prediction results on an image. Args: predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ boxes = None scores = None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = _create_text_labels( classes, scores, ["Hv", "Hp", "CLS", "BL", "PD", "PB", "CC", "LM", "D/P"]) keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = np.asarray(predictions.pred_masks) masks = [ GenericMask(x, self.output.height, self.output.width) for x in masks ] else: masks = None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes ] alpha = 0.8 else: colors = None alpha = 0 if self._instance_mode == ColorMode.IMAGE_BW: self.output.img = self._create_grayscale_image(( predictions.pred_masks.any(dim=0) > 0 ).numpy() if predictions.has("pred_masks") else None) alpha = 0.3 self.overlay_instances( masks=masks, boxes=boxes, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return self.output
def draw_instance_predictions(self, predictions, track_ids): """ Draw instance-level prediction results on an image. Args: predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ boxes = predictions.pred_boxes if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = np.asarray(predictions.pred_masks) masks = [ GenericMask(x, self.output.height, self.output.width) for x in masks ] else: masks = None # set the color according to the track ids colors = [cm.tab20(id_) for id_ in track_ids] alpha = 0.6 labels = [ f'Track {id_} {label}' for label, id_ in zip(labels, track_ids) ] # increase font size if self._default_font_size < 20: self._default_font_size *= 1.3 if self._instance_mode == ColorMode.IMAGE_BW: assert predictions.has( "pred_masks"), "ColorMode.IMAGE_BW requires segmentations" self.output.img = self._create_grayscale_image( (predictions.pred_masks.any(dim=0) > 0).numpy()) self.overlay_instances( masks=masks, boxes=boxes, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return self.output
def object_detection_obtain_label(predictor,cfg,img): outputs = predictor(img) predictions = outputs["instances"] scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None labels = _create_text_labels(classes, None, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).get("thing_classes", None)) label = np.unique(np.array(labels)) return label
def get_labels(self, imgs): # img is a numpy array labels = [] predictor = DefaultPredictor(self.cfg) for img in imgs: img = self.arr_to_rgb(img) img = np.moveaxis(img, 0, 2) outputs = predictor(img) scores = outputs["instances"].scores if outputs["instances"].has("scores") else None classes = outputs["instances"].pred_classes if outputs["instances"].has("pred_classes") else None labels.append(list(set(_create_text_labels(classes, None, MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).get("thing_classes", None))))) return labels
def draw_instance_predictions(vis, tubelet_ids, tubelet_instances, tubelet_instance_projections, draw_projections=False): def get_color(i): colors = "bgrcmy" return colors[i % len(colors)] if not any(tubelet_instances): return vis.output tubelet_instance_ids = [ i for i, inst in zip(tubelet_ids, tubelet_instances) if inst is not None ] tubelet_instances = Instances.cat( [inst for inst in tubelet_instances if inst is not None]) labels = visualizer._create_text_labels( tubelet_instances.pred_classes, tubelet_instances.scores, vis.metadata.get("thing_classes", None)) for i, tubelet_id in enumerate(tubelet_instance_ids): labels[ i] = f"{labels[i]} ({tubelet_instances.generation_process[i]}, #{tubelet_id})" colors = [get_color(i) for i in tubelet_instance_ids] vis.overlay_instances( boxes=tubelet_instances.pred_boxes, labels=labels, assigned_colors=colors, alpha=0.5, ) if draw_projections: tubelet_instance_projection_ids = [ i for i, inst in zip(tubelet_ids, tubelet_instance_projections) if inst is not None ] tubelet_instance_projections = Instances.cat([ inst for inst in tubelet_instance_projections if inst is not None ]) colors = [get_color(i) for i in tubelet_instance_projection_ids] labels = [f"Pred. #{i}" for i in tubelet_instance_projection_ids] vis.overlay_instances( boxes=tubelet_instance_projections.pred_boxes, labels=labels, assigned_colors=colors, alpha=0.1, ) return vis.output
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores". Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None detected = [ _DetectedInstance(classes[i], boxes[i], color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( ) alpha = 0.3 else: alpha = 0.5 frame_visualizer.overlay_instances( boxes=boxes, # boxes are a bit distracting labels=labels, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def draw_instance_predictions_with_filters(self, filters, predictions): print("draw_instance_predictions_with_filters") boxes = predictions.pred_boxes if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = vis._create_text_labels( classes, scores, self.metadata.get("thing_classes", None)) keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = np.asarray(predictions.pred_masks) masks = [ vis.GenericMask(x, self.output.height, self.output.width) for x in masks ] else: masks = None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes ] alpha = 0.8 else: colors = None alpha = 0.5 if self._instance_mode == ColorMode.IMAGE_BW: self.output.img = self._create_grayscale_image( (predictions.pred_masks.any(dim=0) > 0).numpy()) alpha = 0.3 _, detected_objects, objects_stats = self.overlay_instances_with_filters( filters, masks=masks, boxes=boxes, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) #2020/08/12 Added detected_objects, and objects_stats return (self.output, detected_objects, objects_stats)
def get_video_labels(self, frame, predictions): frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return "" boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) #print("labels in get video labels:", labels) return labels
def object_detection_obtain_label(predictor, cfg, img): """" Arguments: predictor: object predictor implementing COCO-Detection faster-rcnn backbone architecture cfg: object including parameters for the model like weights and threshold img: image numpy array Returns: label: One numpy array containing only detected object names in the image(string) """ outputs = predictor(img) predictions = outputs["instances"] scores = predictions.scores if predictions.has("scores") else None classes = (predictions.pred_classes.tolist() if predictions.has("pred_classes") else None) labels = _create_text_labels( classes, None, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).get("thing_classes", None), ) label = np.unique(np.array(labels)) return label
def draw_instance_bbox(self, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ num_instances = len(predictions) # If there is no newly detected instance, return old instances # which are detected at previous frame if num_instances == 0: for idx, inst in enumerate(self._old_instances): inst.ttl -= 1 if inst.ttl <= 0: del self._old_instances[idx] boxes = [inst.bbox.tolist() for inst in self._old_instances] colors = [inst.color for inst in self._old_instances] indices = [inst.index for inst in self._old_instances] labels = _create_text_labels( None, indices, self.metadata.get("thing_classes", None)) return self._old_instances boxes = (predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None) classes = (predictions.pred_classes.numpy() if predictions.has("pred_classes") else None) keypoints = (predictions.pred_keypoints if predictions.has("pred_keypoints") else None) # Detect small box which area is small than area threshold del_idx = [] for idx, box in enumerate(boxes): area = (box[2] - box[0]) * (box[3] - box[1]) if area < self.area_threshold: num_instances -= 1 del_idx.append(idx) del_idx.reverse() for _, idx in enumerate(del_idx): boxes = np.delete(boxes, idx, 0) classes = np.delete(classes, idx, 0) keypoints = np.delete(keypoints, idx, 0) # If all of newly instance is smaller than area_threshold, # return old instances which are detected at previous frame if num_instances == 0: for idx, inst in enumerate(self._old_instances): inst.ttl -= 1 if inst.ttl <= 0: del self._old_instances[idx] boxes = [inst.bbox.tolist() for inst in self._old_instances] colors = [inst.color for inst in self._old_instances] indices = [inst.index for inst in self._old_instances] labels = _create_text_labels( None, indices, self.metadata.get("thing_classes", None)) return self._old_instances if predictions.has("pred_masks"): masks = predictions.pred_masks else: masks = None detected = [ _DetectedInstance( classes[i], bbox=boxes[i], index=None, path=[boxes[i]], extra=False, hide=False, hide_time=1, overlap=False, keypoint=keypoints[i], sit=False, mask_rle=None, color=None, ttl=50, ) for i in range(num_instances) ] colors, indices = self.tracking(detected) labels = _create_text_labels(classes, indices, self.metadata.get("thing_classes", None)) boxes = [inst.bbox.tolist() for inst in self._old_instances] if self._instance_mode == ColorMode.IMAGE_BW: alpha = 0.3 else: alpha = 0.5 # The function which return true when first point is at higher location than other points. # For example, *points=head, wrist, ankle, it will return true in normal case def isHigh(*points): std = points[0][1] for point in points: if std > point[1]: return False return True # Calculate a degree between point1, point2 and point3 def calDegree(point1, point2, point3): a = point1[:2] b = point2[:2] c = point3[:2] ba = a - b bc = c - b cosine_angle = np.dot(ba, bc) + 1e-6 / ( (np.linalg.norm(ba) * np.linalg.norm(bc)) + 1e-6) if cosine_angle < -1: cosine_angle = -1.0 if cosine_angle > 1: cosine_angle = 1.0 angle = np.degrees(np.arccos(cosine_angle)) return angle for idx, inst in enumerate(self._old_instances): # Detect whether each keypoint is located at plausible position or not # For example, shoulder should be located higher than writst, knee, foot if not isHigh( inst.keypoint[5], inst.keypoint[11], inst.keypoint[12], inst.keypoint[13], inst.keypoint[14], inst.keypoint[15], inst.keypoint[16], ): inst.keypoint[5][2] = 0 if not isHigh( inst.keypoint[6], inst.keypoint[11], inst.keypoint[12], inst.keypoint[13], inst.keypoint[14], inst.keypoint[15], inst.keypoint[16], ): inst.keypoint[6][2] = 0 # wrist should be located higher than foot if not isHigh(inst.keypoint[11], inst.keypoint[15], inst.keypoint[16]): inst.keypoint[11][2] = 0 if not isHigh(inst.keypoint[12], inst.keypoint[15], inst.keypoint[16]): inst.keypoint[12][2] = 0 # Detect whether instance sit or not left_wrist = inst.keypoint[11, :] left_knee = inst.keypoint[13, :] left_ankle = inst.keypoint[15, :] right_wrist = inst.keypoint[12, :] right_knee = inst.keypoint[14, :] right_ankle = inst.keypoint[16, :] if (calDegree(left_wrist, left_knee, left_ankle) < self.sit_threshold or calDegree(right_wrist, right_knee, right_ankle) < self.sit_threshold): inst.sit = True return self._old_instances
def main(args): global bev_im mp.set_start_method("spawn", force=True) args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) view = True predictor = DefaultPredictor(cfg) metadata = MetadataCatalog.get(cfg.DATASETS.TEST) f_rgb_detections = open('nuscenes_rgb_detections.txt', "a") dataset = nuscenes_object.nuscenes_object( '/raid/datasets/extracted_nuscenes', split='val', velo_kind='lidar_top') if not os.path.exists(os.path.join(args.output_dir)): os.mkdir(os.path.join(args.output_dir)) if not os.path.exists(os.path.join(args.output_dir, 'data')): os.mkdir(os.path.join(args.output_dir, 'data')) current_scene = 0 current_time = 0 for idx in range(0, len(dataset)): name = dataset.get_idx_name(idx)[1:] if current_scene == int(name[:4]) and current_time >= int(name[-4:]): continue else: current_scene = int(name[:4]) current_time = int(name[-4:]) print(name) ims = [] for ii in range(0, 6): # print(ii) im = dataset.get_image_by_name(str(ii) + name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) predictions = predictor(im)[0] print(predictions) instances = predictions["instances"].to(torch.device("cpu")) #draw the detections over the original images visualizer = Visualizer(im, metadata) classes = instances.pred_classes.cpu().numpy() class_names = visualizer.metadata.get("thing_classes") # print(instances.pred_classes) # print(class_names) # print(classes) labels = [class_names[i] for i in classes] vis_colors = [ colormap(rgb=True, maximum=1)[i] if i < 74 else (0, 0, 0) for i in classes ] if (view): visualizer.overlay_instances( boxes=instances.pred_boxes, # masks=instances.pred_masks, labels=_create_text_labels(instances.pred_classes, \ instances.scores, \ visualizer.metadata.get("thing_classes", None)), # ['Car', 'Pedestrian', 'Cyclist', 'Motorcyclist']), assigned_colors=vis_colors, alpha=0.5, ) for jj in range(len(instances)): bbox = instances.pred_boxes[jj].get_numpy()[0] output_str = os.path.join( dataset.image_dir, '%s.jpg' % (str(ii) + name)) + " %s %f %.2f %.2f %.2f %.2f\n" % ( labels[jj], instances.scores[jj].cpu().numpy(), bbox[0], bbox[1], bbox[2], bbox[3]) # print(output_str) f_rgb_detections.write(output_str) det_filename = os.path.join(args.output_dir, 'data', '%s.txt' % (str(ii) + name)) with open(det_filename, 'a+') as f: bbox = instances.pred_boxes[jj].get_numpy()[0] output_eval = '%s -1 -1 -10 %.3f %.3f %.3f %.3f -1 -1 -1 -1 -1 -1 -1 %.3f\n' %\ (labels[jj], bbox[0], bbox[1], bbox[2], bbox[3], instances.scores[jj].cpu().numpy()) f.write(output_eval) # print(output_eval) if (view): im_view = np.array(visualizer.output.get_image()[:, :, ::-1]) # im_v = cv2.rectangle(im_view, (0,0), (im_view.shape[1], im_view.shape[0]), colors[ii], thickness = 30) if (ii == 0): ims = [] ims.append(im_view) if (view): h1 = cv2.hconcat((ims[1], ims[0], ims[2])) h2 = cv2.hconcat((ims[5], ims[3], ims[4])) v1 = cv2.vconcat((h1, h2)) cv2.namedWindow('6im', cv2.WINDOW_NORMAL) cv2.imshow('6im', v1) if (view): key = cv2.waitKey(0) if key == 115: cv2.imwrite('%s_6im.png' % name, v1) cv2.imwrite('%s_bev_im.png' % name, bev_im) print('SAVING IMAGES') if key == 27: break # esc to quit
def draw_instance_predictions_custom(self, frame, predictions, incl_boxes=True, incl_labels=True, incl_scores=True, target_alpha=None): frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None scores = scores if incl_scores else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None) alpha = 0.3 else: alpha = 0.5 boxes = boxes if incl_boxes else None labels = labels if incl_labels else None alpha = alpha if target_alpha is None else target_alpha frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes, # boxes are a bit distracting masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def draw_instance_predictions(self, predictions): """ :param predictions: :return: Besides the functions of its mother class method, this method deals with extreme points. """ ext_points = predictions.ext_points if predictions.has( "ext_points") else None pred_polys = predictions.pred_polys if predictions.has( "pred_polys") else None if False: return super().draw_instance_predictions(predictions) else: boxes = predictions.pred_boxes if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = _create_text_labels( classes, scores, self.metadata.get("thing_classes", None)) keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = np.asarray(predictions.pred_masks) masks = [ GenericMask(x, self.output.height, self.output.width) for x in masks ] else: if predictions.has("pred_polys"): output_height = predictions.image_size[0] output_width = predictions.image_size[1] pred_masks = get_polygon_rles( predictions.pred_polys.flatten(), (output_height, output_width)) masks = np.asarray(pred_masks) masks = [ GenericMask(x, self.output.height, self.output.width) for x in masks ] else: masks = None path = predictions.pred_path.numpy() if predictions.has( "pred_path") else None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter( [x / 255 for x in self.metadata.thing_colors[c]]) for c in classes ] alpha = 0.8 else: colors = None alpha = 0.5 if self._instance_mode == ColorMode.IMAGE_BW: assert predictions.has( "pred_masks"), "ColorMode.IMAGE_BW requires segmentations" self.output.img = self._create_grayscale_image( (predictions.pred_masks.any(dim=0) > 0).numpy()) alpha = 0.3 self.overlay_instances( masks=masks, boxes=boxes, labels=labels, ext_points=ext_points, path=path, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return self.output
def draw_dataset_dict(self, dic, given_colour=None): """ Draw annotations/segmentaions in Detectron2 Dataset format. Args: dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None boxes = [ BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) if len(x["bbox"]) == 4 else x["bbox"] for x in annos ] colors = None category_ids = [x["category_id"] for x in annos] if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter( [x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids ] names = self.metadata.get("thing_classes", None) labels = _create_text_labels( category_ids, scores=None, class_names=[ "Hv", "Hp", "CLS", "BL", "PD", "PB", "CC", "LM", "D/P" ], is_crowd=[x.get("iscrowd", 0) for x in annos], ) labels = None boxes = None alpha = 0 self.overlay_instances( labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors, alpha=alpha, given_colour=given_colour, ) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: with PathManager.open(dic["sem_seg_file_name"], "rb") as f: sem_seg = Image.open(f) sem_seg = np.asarray(sem_seg, dtype="uint8") if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) pan_seg = dic.get("pan_seg", None) if pan_seg is None and "pan_seg_file_name" in dic: with PathManager.open(dic["pan_seg_file_name"], "rb") as f: pan_seg = Image.open(f) pan_seg = np.asarray(pan_seg) from panopticapi.utils import rgb2id pan_seg = rgb2id(pan_seg) if pan_seg is not None: segments_info = dic["segments_info"] pan_seg = torch.Tensor(pan_seg) self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5) return self.output
def sbd_pred(cv_img): ''' print("sbd prediction start........") cv_img = test_imge_generator() start_time = time.time() print(settings.systemID, settings.prediction_model) opt = get_args() print("1") scores, classes, boxes = efficientDet_pred(cv_img, opt) print("2") for box_id in range(boxes.shape[0]): pred_prob = float(scores[box_id]) if pred_prob < opt.cls_threshold: break pred_label = int(classes[box_id]) xmin, ymin, xmax, ymax = boxes[box_id, :] color = colors[pred_label] cv2.rectangle(cv_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.3f' % pred_prob, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle(cv_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( cv_img, COCO_CLASSES[pred_label] + ' : %.3f' % pred_prob, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) # cv2.imwrite(os.path.join('demo', "demo_result.jpg"), cv_img) print("3") res1.processingTime = int((stop_time - start_time) * 1000) # cv2.destroyAllWindows print("total time:", stop_time - start_time) return res1 ''' # mask rcnn obj_predictions, visualized_output = demo.run_on_image(cv_img) predictions = obj_predictions["instances"].to("cpu") img_result = visualized_output.get_image()[:, :, ::-1] boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has("pred_classes") else None labels = _create_text_labels(classes, scores, melbourne_metadata.get("thing_classes", None)) print("classes.shape, labels",classes.shape, labels) # a = torch.from_numpy(classes) # print(a) objects = [] # COCO_CLASSES if classes is not None: for i in range(len(classes)): if scores[i]>0.9: label = labels[i].split(" ")[0] objects.append(label) print(label) if label == "suitcase": print("suitcase detected") elif label == "tray": print("tray") elif label == "soft_bag": print("soft bag") elif label == "extended_handle": response.flags.append(0) num_tray = sum(['tray' in x for x in objects ]) # tricky: count "tray:xxxx", not just "tray", so not use: num_tray = objects.count("tray") num_suitcase = sum(['suitcase' in x for x in objects ]) num_soft_bag = sum(['soft_bag' in x for x in objects ]) response = AnalysisResponse() if (num_suitcase + num_soft_bag) > 1: # multi bags response.flags.append(1) if num_tray >1: response.result = 2 #TubDetected elif num_soft_bag > 1 and num_tray ==0: response.result = 3 # TubRequired elif num_suitcase > 0 : response.result = 1 # NoTubRequired else: response.result = 0 print(response.result,response.flags) cv2.imwrite("image_sbd.jpg", img_result) return response
def run_on_image(self, image, debug): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None obj = None predictions = self.predictor(image.astype(np.uint8)) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) if debug: print('in panoptic_seg') else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if debug: print("in sem_seg") if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) if debug: vis_output = visualizer.draw_instance_predictions( predictions=instances) print('in instances') #if output is json, debug is false if not debug: boxes = instances.pred_boxes.tensor.numpy( ) if instances.has("pred_boxes") else None scores = instances.scores if instances.has( 'scores') else None classes = instances.pred_classes if instances.has( "pred_classes") else None labels = _create_text_labels( classes, scores, visualizer.metadata.get("thing_classes", None)) keypoints = instances.pred_keypoints if instances.has( "pred_keypoints") else None if instances.has("pred_masks"): masks = np.asarray(instances.pred_masks) masks = [ GenericMask(x, visualizer.output.height, visualizer.output.width) for x in masks ] else: masks = None obj = {} for i, _ in enumerate(labels): tmp = {} split = labels[i].split() tmp['class'] = split[0] tmp['score'] = scores[i].item() tmp['box'] = {} tmp['box']['left-up'] = [ boxes[i][0].item(), boxes[i][1].item() ] tmp['box']['right-down'] = [ boxes[i][2].item(), boxes[i][3].item() ] tmp['polygons'] = {} if masks is not None: for idx, segment in enumerate(masks[i].polygons): tmp['polygons'][idx] = segment.reshape( -1, 2).tolist() obj[i] = tmp return predictions, vis_output, obj
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) # MOONLITE: zero out frame frame_visualizer.output.img = np.zeros( frame_visualizer.output.img.shape) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None) alpha = 0.3 else: alpha = 0.5 # only keep instance if class_names = self.metadata.get("thing_classes", None) # get indices of all focused_class = "person" num_class_instances = [ i for i in range(num_instances) if (class_names[classes[i]] == focused_class) ] # strip instances down to only instances of our focused class boxes = [boxes[i] for i in num_class_instances] masks = [masks[i] for i in num_class_instances] labels = ["" for i in num_class_instances] keypoints = [keypoints[i] for i in num_class_instances] colors = [(scores[i], scores[i], scores[i]) for i in num_class_instances] #for i in range(num_instances): # if class_names[classes[i]] == "person": # colors[i] = (scores[i],scores[i],scores[i]) # else: # colors[i] = (0.0,0.0,0.0) # # labels[i] = "" alpha = 1.0 frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes, # boxes are a bit distracting masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def new_draw_instance_predictions(self, predictions): """ Draw instance-level prediction results on an image. Args: predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ boxes = predictions.pred_boxes if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = _create_text_labels( classes, scores, CLASS_NAMES) #self.metadata.get("thing_classes", None)) keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None # if predictions.has("pred_masks"): # masks = np.asarray(predictions.pred_masks) # masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] # else: masks = None if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get( "thing_colors"): colors = [ self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes ] alpha = 0.8 else: colors = None alpha = 0.5 if self._instance_mode == ColorMode.IMAGE_BW: self.output.img = self._create_grayscale_image(None) # (predictions.pred_masks.any(dim=0) > 0).numpy() # if predictions.has("pred_masks") # else None alpha = 0.3 print(labels) self.overlay_instances( masks=masks, boxes=boxes, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return self.output # args = parse_args() # cfg = modify_cfg(args) #use the same config as training # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained # cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold # predictor = DefaultPredictor(cfg) # # facemask_1_metadata = MetadataCatalog.get("facemask_1_val") # facemask_1_metadata, dataset_dicts = register_facemask_dataset(split='val') # # evaluator = COCOEvaluator("facemask_1_val", ("bbox", "segm"), False, output_dir="./output/") # # val_loader = build_detection_test_loader(cfg, "facemask_1_val") # # print(inference_on_dataset(predictor, val_loader, evaluator)) # # # another equivalent way to evaluate the model is to use `trainer.test` # def get_iou(pred_box, gt_box): # """ # pred_box : the coordinate for predict bounding box # gt_box : the coordinate for ground truth bounding box # return : the iou score # the left-down coordinate of pred_box:(pred_box[0], pred_box[1]) # the right-up coordinate of pred_box:(pred_box[2], pred_box[3]) # """ # # 1.get the coordinate of inters # ixmin = max(pred_box[0], gt_box[0]) # ixmax = min(pred_box[2], gt_box[2]) # iymin = max(pred_box[1], gt_box[1]) # iymax = min(pred_box[3], gt_box[3]) # iw = np.maximum(ixmax-ixmin+1., 0.) # ih = np.maximum(iymax-iymin+1., 0.) # # 2. calculate the area of inters # inters = iw*ih # # 3. calculate the area of union # uni = ((pred_box[2]-pred_box[0]+1.) * (pred_box[3]-pred_box[1]+1.) + # (gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) - # inters) # # 4. calculate the overlaps between pred_box and gt_box # iou = inters / uni # return iou # correct = 0 # total = 0 # count = 0 # for d in dataset_dicts: # if count % 100 == 0: # print(count, '/', len(dataset_dicts)) # count += 1 # im = cv2.imread(d["file_name"]) # outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # # print(outputs["instances"].pred_classes.detach().cpu().numpy()) # # print([dict['category_id'] for dict in d["annotations"]]) # pred = np.array(outputs["instances"].pred_classes.detach().cpu().numpy()) # ground_truth = np.array([dict['category_id'] for dict in d["annotations"]]) # ml = min(len(pred), len(ground_truth)) # diff = pred[:ml] - ground_truth[:ml] # correct += len(ground_truth) - len(np.where(diff>0)[0]) # total += len(ground_truth) # print(outputs["instances"].pred_boxes) # print(dict) # print(correct, total) # print('total class accuracy: ', correct/total) # # #randomly select 5 images to visualize # # for d in random.sample(dataset_dicts, 5): # # im = cv2.imread(d["file_name"]) # # import time # # start = time.time() # # outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # # print('used', time.time() - start, 'sec') # # v = newVisualizer(im[:, :, ::-1], # # metadata=facemask_1_metadata, # # scale=0.5, # # instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models # # ) # # out = v.new_draw_instance_predictions(outputs["instances"].to("cpu")) # # # cv2_imshow(out.get_image()[:, :, ::-1]) # # print(out) # # plt.imshow(out.get_image()[:, :, ::-1]) # # plt.show()
ppl = VisualizationDemo(cfg) if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = ppl.run_on_image(img) num_instances = len(predictions["instances"]) if num_instances > 0: classes = predictions["instances"].pred_classes labels = _create_text_labels( classes, predictions["instances"].scores, ppl.metadata.get("thing_classes", None)) print(labels) matchers = ['person'] matching = [ s for s in labels if any(xs in s for xs in matchers) ] if len(matching) > 0: print("Person detected!") payload = ":" payload = payload.join(matching) payload += " From " payload += args.input[0] publish.single("cameras/person", payload, hostname="192.168.1.20")
outputs = predictor(im) v = Visualizer(im[:, :, ::-1], #Get class names from dataset train metadata to put on visualization. metadata=my_dataset_train_metadata, scale=1 ) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) #As described in draw_instance_predictions function of Visualizer, we can get data about each image. #We might need these, but currently only interested in labels as they contain predicted classes for each image and prediction score. imagePredClasses = outputs["instances"].pred_classes imagePredBoxes = outputs["instances"].pred_boxes imagePredScores = outputs["instances"].scores #Initialise a dictionary and store inference data (labels with class name and prediction score) for each: imageDataArray = {imageBaseName: []} imageDataArray[imageBaseName] = _create_text_labels(outputs["instances"].pred_classes, outputs["instances"].scores, my_dataset_train_metadata.get("thing_classes", None)) print(imageDataArray) #data in it. As it loops through all images it will fill the JSON with needed data. # Show images with predictions in system window. If not using host, then: #cv2.imshow('Inference Preview',out.get_image()[:, :, ::-1]) # If running inference locally, to view result with timer in system window, uncomment: #cv2.waitKey(10000) # Save images with predictions to savePath folder and imageName with path to image removed from name. savePath = './inferenceContent/output' cv2.imwrite(os.path.join(savePath , imageBaseName), out.get_image()[:, :, ::-1]) # Create a JSON object and append results for each image.
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None colors = predictions.COLOR if predictions.has( "COLOR") else [None] * len(predictions) durations = predictions.ID_duration if predictions.has( "ID_duration") else None duration_threshold = self.metadata.get("duration_threshold", 0) visibilities = None if durations is None else [ x > duration_threshold for x in durations ] if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=colors[i], ttl=8) for i in range(num_instances) ] if not predictions.has("COLOR"): colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.reset_image( frame_visualizer._create_grayscale_image((masks.any( dim=0) > 0).numpy() if masks is not None else None)) alpha = 0.3 else: alpha = 0.5 labels = ( None if labels is None else [y[0] for y in filter(lambda x: x[1], zip(labels, visibilities))] ) # noqa assigned_colors = ( None if colors is None else [y[0] for y in filter(lambda x: x[1], zip(colors, visibilities))] ) # noqa frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes[visibilities], # boxes are a bit distracting masks=None if masks is None else masks[visibilities], labels=labels, keypoints=None if keypoints is None else keypoints[visibilities], assigned_colors=assigned_colors, alpha=alpha, ) return frame_visualizer.output
if not color_frame: continue # Convert images to numpy arrays img = np.asanyarray(color_frame.get_data()) start_time = time.time() obj_predictions, visualized_output = demo.run_on_image(img) predictions = obj_predictions["instances"].to("cpu") boxes = predictions.pred_boxes if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has( "scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None labels = _create_text_labels( classes, scores, melbourne_metadata.get("thing_classes", None)) cv2.imshow("front camera", visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(1) == 27: break # esc to quit # TODO # event_list, pose = pose_dim_estimation(portrait,profile,aligned_depth_frame, color_frame, # depth_intrin,depth_to_color_extrin,width,height) event_list = ["Wheel at front", "The bag is upright"] from detectron2.event_output import output_json # mock image image_file = "/home/don/code/BagAnalysis/3DImaging/GPU_based_solution/Deeplearning/detectron2/detectron2/data/melbourne/train_melb_mask/img_860.jpg"