def predict(self, obj, mode: str = "image"): # Make prediction if mode == "image": image = obj[:, :, ::-1] image_visualizer = Visualizer(image, metadata=self.metadata, instance_mode=self.instance_mode, scale=1.2) outputs = self.predictor(obj) instances = outputs["instances"].to("cpu") instances.remove('pred_classes') vis_output = image_visualizer.draw_instance_predictions(instances) elif mode == "video": video_visualizer = VideoVisualizer( metadata=self.metadata, instance_mode=self.instance_mode) outputs, vis_output = [], [] while obj.isOpened(): success, frame = obj.read() if success: output = self.predictor(frame) outputs.append(output) instances = output["instances"].to("cpu") frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) vis_frame = video_visualizer.draw_instance_predictions( frame, instances) vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) vis_output.append(vis_frame) else: break return outputs, vis_output
def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__()
class AnnotateVideo(Pipeline): """Pipeline task for video annotation.""" def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__() def map(self, data): dst_image = data["image"].copy() data[self.dst] = dst_image self.annotate_frame_num(data) self.annotate_predictions(data) return data def annotate_frame_num(self, data): dst_image = data[self.dst] frame_idx = data["frame_num"] put_text(dst_image, f"{frame_idx:04d}", (0, 0), color=colors.get("white").to_bgr(), bg_color=colors.get("black").to_bgr(), org_pos="tl") def annotate_predictions(self, data): if "predictions" not in data: return dst_image = data[self.dst] dst_image = dst_image[:, :, ::-1] # Convert OpenCV BGR to RGB format predictions = data["predictions"] if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_image = self.video_visualizer.draw_panoptic_seg_predictions(dst_image, panoptic_seg.to(self.cpu_device), segments_info) elif "sem_seg" in predictions: sem_seg = predictions["sem_seg"].argmax(dim=0) vis_image = self.video_visualizer.draw_sem_seg(dst_image, sem_seg.to(self.cpu_device)) elif "instances" in predictions: instances = predictions["instances"] vis_image = self.video_visualizer.draw_instance_predictions(dst_image, instances.to(self.cpu_device)) # Converts RGB format to OpenCV BGR format vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR) data[self.dst] = vis_image
def prediction_on_video(video): model = "modelsfiles/model_final.pth" config = "modelsfiles/config.yml" threshold = 0.5 save_path = "output" predictor, cfg = get_model(model, config, threshold) parser = argparse.ArgumentParser( description='Detect objects from webcam images') parser.add_argument('-s', '--show', default=True, action="store_false", help='Show output') parser.add_argument( '-sp', '--save_path', type=str, default='', help='Path to save the output. If None output won\'t be saved') args = parser.parse_args() print("Started") video_file = video #"/home/oem/Downloads/video.mp4" cap = cv2.VideoCapture(video_file) if not cap.isOpened(): print("Error opening video stream or file") MetadataCatalog.get("customtrain").thing_classes = [ 'ear plugs', 'welding shield' ] metadata = MetadataCatalog.get("customtrain") while cap.isOpened(): ret, image = cap.read() outputs = predictor(image) #v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) #VideoVisualizer #v = Visualizer(image[:, :, ::-1], metadata, scale=1.2) video_visualizer = VideoVisualizer(metadata, instancemode) v = video_visualizer.draw_instance_predictions( image, outputs["instances"].to("cpu")) #v = v.draw_instance_predictions(outputs["instances"].to("cpu")) if args.show: ui_main_window = Ui_MainWindow() ui_main_window.displayImage( cv2.imshow('object_detection', v.get_image()[:, :, ::-1])) #cv2.imshow('object_detection', v.get_image()[:, :, ::-1]) if cv2.waitKey(25) & 0xFF == ord('q'): break
def main(): args = parse_args() with open(args.config, "r") as f: config = yaml.safe_load(f) if "classes" not in config: raise Exception("Could not find class names") n_classes = len(config["classes"]) classes = config["classes"] cfg = get_cfg() cfg.merge_from_file(args.model_config) cfg.DATASETS.TRAIN = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 50000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes) if args.model_weights is None: cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") else: cfg.MODEL.WEIGHTS = args.model_weights cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model cfg.DATASETS.TEST = ("custom_test",) predictor = DefaultPredictor(cfg) DatasetCatalog.register("custom_test", lambda d="test": None) MetadataCatalog.get("custom_test").set(thing_classes=classes) custom_metadata = MetadataCatalog.get("custom_test") os.makedirs(args.output, exist_ok=True) cap = cv2.VideoCapture(args.video) n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) vis = VideoVisualizer(metadata=custom_metadata) for i in tqdm.tqdm(range(0, n_frames, args.skip_frames)): assert cap.isOpened() cap.set(cv2.CAP_PROP_POS_FRAMES, i) success, image = cap.read() assert success outputs = predictor(image) v = vis.draw_instance_predictions( image[:, :, ::-1], outputs["instances"].to("cpu")) filename = os.path.join(args.output, "prediction_%09d.jpg" % i) cv2.imwrite(filename, v.get_image()[:, :, ::-1]) plt.show()
def run_on_video(video, cfg): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ predictor = DefaultPredictor(cfg) metadata = MetadataCatalog.get("__unused") video_visualizer = VideoVisualizer(metadata) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) predictions = predictions["instances"].to('cpu') vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame def frame_from_video(video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break frame_gen = frame_from_video(video) for frame in frame_gen: frame = np.array(frame) yield process_predictions(frame, predictor(frame))
def run(self, video): video_visualizer = VideoVisualizer(self.metadata, ColorMode.IMAGE) def process_predictions(frame, predictions): predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame, predictions frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, (frame, frame_pos) in enumerate(frame_gen): frame_data.append([frame, frame_pos]) self.predictor.put(frame) if cnt >= buffer_size: frame, frame_pos = frame_data.popleft() predictions = self.predictor.get() yield frame_pos, process_predictions(frame, predictions) while len(frame_data): frame, frame_pos = frame_data.popleft() predictions = self.predictor.get() yield frame_pos, process_predictions(frame, predictions) else: for frame, frame_pos in frame_gen: yield frame_pos, process_predictions(frame, self.predictor(frame))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info ) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) ) elif "proposals" in predictions: predictions = predictions["proposals"].to(self.cpu_device) predictions.pred_boxes = predictions.proposal_boxes predictions.scores = predictions.objectness_logits predictions.pred_classes[:] = -1 vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
def run_on_video(video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(metadata, instancemode) def process_predictions(frame, predictions): # frame = cv2.flip(frame, 1) # just for flipping the camera... frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to("cpu"), segments_info # ) # elif "instances" in predictions: predictions = predictions["instances"].to("cpu") # predictions.remove('pred_masks') vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) # elif "sem_seg" in predictions: # vis_frame = video_visualizer.draw_sem_seg( # frame, predictions["sem_seg"].argmax(dim=0).to("cpu") # ) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = _frame_from_video(video) # if self.parallel: # buffer_size = self.predictor.default_buffer_size # # frame_data = deque() # # for cnt, frame in enumerate(frame_gen): # frame_data.append(frame) # self.predictor.put(frame) # # if cnt >= buffer_size: # frame = frame_data.popleft() # predictions = self.predictor.get() # yield process_predictions(frame, predictions) # # while len(frame_data): # frame = frame_data.popleft() # predictions = self.predictor.get() # yield process_predictions(frame, predictions) # else: for frame in frame_gen: yield process_predictions(frame, predictor(frame))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # convert from RGB to BGR # choose mode if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info ) # instance segmentation mode # what is in predicitons[] """ predictions A list of dictionaries each dict contains one key "instances" the key maps to class "Instances" and has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) print(predictions.pred_masks) vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) )
def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE, frame_num=True, predictions=True, pose_flows=True): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.frame_num = frame_num self.predictions = predictions self.pose_flows = pose_flows self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__()
def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if self.parallel == 1: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "instances" in predictions: predictions['instances'] = predictions['instances'].to('cpu') indices = predictions['instances'].pred_classes == 1 predictions['instances'] = predictions['instances'][indices] if (len(predictions['instances']) == 0): vis_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) else: vis_frame = video_visualizer.draw_instance_predictions( frame, predictions['instances']) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): print(frame, type(frame)) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to(self.cpu_device), segments_info # ) panoptic_seg, segments_info = predictions["panoptic_seg"] success, ocean_frame = ocean.read() res = mask(panoptic_seg, segments_info, frame, ocean_frame, 21, 0.8) res = mask(panoptic_seg, segments_info, res, sky, 40, 0.8) # res = mask(panoptic_seg, segments_info, res, graffiti, 50, 0.5) img = cv2.cvtColor(res, cv2.COLOR_BGRA2BGR) return np.array(img) # Converts Matplotlib RGB format to OpenCV BGR format frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
def run_on_video(self, video, predictions, effect_type, current_frame): video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(cnt, frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to(self.cpu_device), segments_info # ) vis_frame = 0 elif "instances" in predictions: print("video instances") vis_frame = video_visualizer.draw_instance_predictions( cnt, frame, predictions) elif "sem_seg" in predictions: print("sem_seg") vis_frame = None vis_frame = video_visualizer.draw_instance_predictions( frame, predictions, effect_type) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): # print(cnt,1) frame_data.append(frame) if cnt >= buffer_size: frame = frame_data.popleft() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() yield process_predictions(frame, predictions) else: for cnt, frame in enumerate(frame_gen): # print("non-parallel prediction",cnt) # if predictions[cnt]['current_frame'] == cnt: if current_frame <= cnt and cnt < current_frame + 30: yield process_predictions(cnt, frame, predictions[cnt])
def run_on_video(self, video): video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # conversion vers BGR pour openCv vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: #le rendering / plusieurs gpus est activé, pas encore supporté ici buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor.predictions(frame))
def run_on_video(self, video): video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(cnt, frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] return cnt, panoptic_seg, segments_info elif "instances" in predictions: print("instances") predictions = predictions["instances"].to(self.cpu_device) return cnt, predictions elif "sem_seg" in predictions: print("sem_seg") vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) return cnt, vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): # print(cnt,1) frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for cnt, frame in enumerate(frame_gen): # print("non-parallel prediction",cnt) yield process_predictions(cnt, frame, self.predictor(frame))
def run_on_video(self, video, dictionary): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions, dictionary): resulte = 0 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) #判断框 max_inform_keypoint = self.search_max_box_information( predictions) if (max_inform_keypoint != None): #画框 bbox = max_inform_keypoint[0] frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) # 画关键点 keypoint_list = max_inform_keypoint[1] for i, keypoint in enumerate(keypoint_list): circle_coord = (int(keypoint[0]), int(keypoint[1])) frame = cv2.putText(frame, str(i), circle_coord, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) #画角度 frame = self.write(frame, dictionary["angle"], keypoint_list) #画距离 frame = self.write_distance(frame, dictionary["distance"], keypoint_list) #判断仰卧起坐 resulte = self.poll_situp(keypoint_list, dictionary) #存结果 # save_json = self.save_resulte(keypoint_list,dictionary) vis_frame = frame[..., ::-1] else: vis_frame = frame[..., ::-1] # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return { "vis_frame": vis_frame, "resulte": resulte, "max_inform_keypoint": max_inform_keypoint } frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame), dictionary)
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer_object = VideoVisualizer(self.metadata_object, self.instance_mode) video_visualizer_keypoint = VideoVisualizer(self.metadata_keypoint, self.instance_mode) def process_predictions(frame, predictions_object, predictions_keypoint): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) blank_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8) if "instances" in predictions_object: predictions_object = predictions_object["instances"].to( self.cpu_device) boxes_area = predictions_object.get('pred_boxes').area() if boxes_area.nelement() != 0: max_val, max_idx = torch.max(boxes_area, dim=0) pred_boxes_object = predictions_object.get('pred_boxes')[ max_idx.item()] scores_object = predictions_object.get('scores')[ max_idx.item()] pred_classes_object = predictions_object.get( 'pred_classes')[max_idx.item()] draw_instance_object = Instances([1280, 720]) draw_instance_object.set('pred_boxes', pred_boxes_object) draw_instance_object.set( 'scores', torch.unsqueeze(scores_object, dim=0)) draw_instance_object.set( 'pred_classes', torch.unsqueeze(pred_classes_object, dim=0)) self.data_json['object_detection'][ 'pred_boxes'] = predictions_object.get('pred_boxes')[ max_idx.item()].tensor.numpy().tolist() self.data_json['object_detection'][ 'scores'] = predictions_object.get('scores')[ max_idx.item()].numpy().tolist() vis_frame = video_visualizer_object.draw_instance_predictions( blank_image, draw_instance_object) else: self.data_json['object_detection']['pred_boxes'] = [] self.data_json['object_detection']['scores'] = [] vis_frame = video_visualizer_object.draw_instance_predictions( blank_image, predictions_object) if "instances" in predictions_keypoint: predictions_keypoint = predictions_keypoint["instances"].to( self.cpu_device) boxes_area = predictions_keypoint.get('pred_boxes').area() if boxes_area.nelement() != 0: max_val, max_idx = torch.max(boxes_area, dim=0) pred_boxes_keypoint = predictions_keypoint.get( 'pred_boxes')[max_idx.item()] scores_keypoint = predictions_keypoint.get('scores')[ max_idx.item()] pred_classes_keypoint = predictions_keypoint.get( 'pred_classes')[max_idx.item()] pred_keypoints_keypoint = predictions_keypoint.get( 'pred_keypoints')[max_idx.item()] draw_instance_keypoint = Instances([1280, 720]) draw_instance_keypoint.set('pred_boxes', pred_boxes_keypoint) draw_instance_keypoint.set( 'scores', torch.unsqueeze(scores_keypoint, dim=0)) draw_instance_keypoint.set( 'pred_classes', torch.unsqueeze(pred_classes_keypoint, dim=0)) draw_instance_keypoint.set( 'pred_keypoints', torch.unsqueeze(pred_keypoints_keypoint, dim=0)) self.data_json['keypoint_detection'][ 'pred_boxes'] = predictions_keypoint.get('pred_boxes')[ max_idx.item()].tensor.numpy().tolist() self.data_json['keypoint_detection'][ 'scores'] = predictions_keypoint.get('scores')[ max_idx.item()].numpy().tolist() self.data_json['keypoint_detection'][ 'pred_keypoints'] = predictions_keypoint.get( 'pred_keypoints')[max_idx.item()].numpy().tolist() vis_frame = video_visualizer_keypoint.draw_instance_predictions( vis_frame.get_image(), draw_instance_keypoint) else: self.data_json['keypoint_detection']['pred_boxes'] = [] self.data_json['keypoint_detection']['scores'] = [] self.data_json['keypoint_detection']['pred_keypoints'] = [] vis_frame = video_visualizer_keypoint.draw_instance_predictions( vis_frame.get_image(), predictions_keypoint) # head pose estimation predictions, bounding_box, face_keypoints, w, face_area = head_pose_estimation( frame, self.mtcnn, self.head_pose_module, self.transformations, self.softmax, self.idx_tensor) vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) if len(face_area) != 0: max_val, max_idx = torch.max(torch.Tensor(face_area), dim=0) self.data_json['head_pose_estimation'][ 'predictions'] = predictions[max_idx.item()] self.data_json['head_pose_estimation'][ 'pred_boxes'] = bounding_box[max_idx.item()] # Converts Matplotlib RGB format to OpenCV BGR format plot_pose_cube(vis_frame, predictions[max_idx.item()][0], predictions[max_idx.item()][1], predictions[max_idx.item()][2], \ tdx = (face_keypoints[max_idx.item()][0] + face_keypoints[max_idx.item()][2]) / 2, \ tdy= (face_keypoints[max_idx.item()][1] + face_keypoints[max_idx.item()][3]) / 2, \ size = w[max_idx.item()]) # draw_axis(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \ # tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \ # tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \ # size = w[i]) data_json = self.data_json self.data_json['frame'] = self.frame_count self.frame_count += 1 return vis_frame, data_json frame_gen = self._frame_from_video(video) for frame in frame_gen: yield process_predictions(frame, self.predictor_object(frame), self.predictor_keypoint(frame))
class AnnotateVideo(Pipeline): """Pipeline task for video annotation.""" def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE, frame_num=True, predictions=True, pose_flows=True): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.frame_num = frame_num self.predictions = predictions self.pose_flows = pose_flows self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__() def map(self, data): dst_image = data["image"].copy() data[self.dst] = dst_image if self.frame_num: self.annotate_frame_num(data) if self.predictions: self.annotate_predictions(data) if self.pose_flows: self.annotate_pose_flows(data) return data def annotate_frame_num(self, data): dst_image = data[self.dst] frame_idx = data["frame_num"] put_text(dst_image, f"{frame_idx:04d}", (0, 0), color=colors.get("white").to_bgr(), bg_color=colors.get("black").to_bgr(), org_pos="tl") def annotate_predictions(self, data): if "predictions" not in data: return dst_image = data[self.dst] dst_image = dst_image[:, :, ::-1] # Convert OpenCV BGR to RGB format predictions = data["predictions"] if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_image = self.video_visualizer.draw_panoptic_seg_predictions( dst_image, panoptic_seg.to(self.cpu_device), segments_info) elif "sem_seg" in predictions: sem_seg = predictions["sem_seg"].argmax(dim=0) vis_image = self.video_visualizer.draw_sem_seg( dst_image, sem_seg.to(self.cpu_device)) elif "instances" in predictions: instances = predictions["instances"] vis_image = self.video_visualizer.draw_instance_predictions( dst_image, instances.to(self.cpu_device)) # Converts RGB format to OpenCV BGR format vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR) data[self.dst] = vis_image def annotate_pose_flows(self, data): if "pose_flows" not in data: return predictions = data["predictions"] instances = predictions["instances"] keypoints = instances.pred_keypoints.cpu().numpy() l_pairs = [ (0, 1), (0, 2), (1, 3), (2, 4), # Head (5, 6), (5, 7), (7, 9), (6, 8), (8, 10), (6, 12), (5, 11), (11, 12), # Body (11, 13), (12, 14), (13, 15), (14, 16) ] dst_image = data[self.dst] height, width = dst_image.shape[:2] pose_flows = data["pose_flows"] pose_colors = list(colors.items()) pose_colors_len = len(pose_colors) for idx, pose_flow in enumerate(pose_flows): pid = pose_flow["pid"] pose_color_idx = ((pid * 10) % pose_colors_len + pose_colors_len) % pose_colors_len pose_color_bgr = pose_colors[pose_color_idx][1].to_bgr() (start_x, start_y, end_x, end_y) = pose_flow["box"].astype("int") cv2.rectangle(dst_image, (start_x, start_y), (end_x, end_y), pose_color_bgr, 2, cv2.LINE_AA) put_text(dst_image, f"{pid:d}", (start_x, start_y), color=pose_color_bgr, bg_color=colors.get("black").to_bgr(), org_pos="tl") instance_keypoints = keypoints[idx] l_points = {} p_scores = {} # Draw keypoints for n in range(instance_keypoints.shape[0]): score = instance_keypoints[n, 2] if score <= 0.05: continue cor_x = int(np.clip(instance_keypoints[n, 0], 0, width)) cor_y = int(np.clip(instance_keypoints[n, 1], 0, height)) l_points[n] = (cor_x, cor_y) p_scores[n] = score cv2.circle(dst_image, (cor_x, cor_y), 2, pose_color_bgr, -1) # Draw limbs for i, (start_p, end_p) in enumerate(l_pairs): if start_p in l_points and end_p in l_points: start_xy = l_points[start_p] end_xy = l_points[end_p] start_score = p_scores[start_p] end_score = p_scores[end_p] cv2.line(dst_image, start_xy, end_xy, pose_color_bgr, int(2 * (start_score + end_score) + 1))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) k = 0 try: vis_frame, colors = video_visualizer.draw_instance_predictions( frame, predictions) k = 1 except: vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) if k == 1: boxes = predictions.pred_boxes.tensor.numpy( ) if predictions.has("pred_boxes") else None classes = predictions.pred_classes.numpy( ) if predictions.has("pred_classes") else None person_list = [] person_track = [] for box, class_label, color in zip(boxes, classes, colors): if int(class_label) == 0: pixel_width = box[2] - box[0] # print(box,'=========================>') # print(pixel_width,'============================>') box = np.asarray([[box[0], box[1]], [box[2], box[3]]]) # pixel_per_metric = 15.45 # original_width = pixel_width * pixel_per_metric # distance_z = (original_width*3)/pixel_width #D’ = (W x F) / P distance_z = pixel_width cX = np.average(box[:, 0]) cY = np.average(box[:, 1]) # cY = cY + distance_z person_list.append([cX, cY, distance_z]) person_track.append(color) # print('<=============================>',person_list,'<=============================>') #find the center of the box by top-left x and bottom-right x / 2 and same for y elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) # D = dist.cdist(person_list,person_list,'euclidean') # print(person_list,D) # def midpoint(ptA, ptB): # return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5) self.time_count += 1 vis_frame = frame if k == 1: person = sorted(zip(person_list, person_track)) hh, ww, c = (540, 960, 3) # hh,ww,c = vis_frame.shape # aspect_ratio = 960/540 # width_scale = (530/960) # height_scale = (600/540) # result_width = int(vis_frame.shape[1]*width_scale) # result_height= int(vis_frame.shape[0]*height_scale) # result = np.zeros((result_width,result_height, 3)) result = np.zeros((530, 600, 3)) # x_scale = (result_width/vis_frame.shape[1]) # y_scale = (result_height/vis_frame.shape[0]) x_scale = (530 / vis_frame.shape[1]) y_scale = (600 / vis_frame.shape[0]) ht, wd, cc = result.shape # print(ww,wd) xx = (ww - wd) // 2 yy = (hh - ht) // 2 # print(xx, yy,'.................') color = (245, 245, 245) layer1 = np.full((hh, ww, cc), color, dtype=np.uint8) green_list = [] yellow_list = [] red_list = [] for box_i, track_i in person: for box_j, track_j in person: objectid = str(track_i) + str(track_j) objectid = objectid.replace('[', '').replace( ']', '').replace('.', '').replace(' ', '') if self.time_count % 10: self.time_count = 0 for indexs, l in enumerate(self.all_track_id): if l != objectid: self.disappear(l) if self.maximum_wait[l] >= 10000: self.detrack(l, indexs) if box_i != box_j: xA, yA, zA = box_i xB, yB, zB = box_j z_check = abs(zA - zB) D = dist.euclidean((xA, yA), (xB, yB)) division_index_A = yA / y_division division_index_B = yB / y_division A_div = division[int(division_index_A)] B_div = division[int(division_index_B)] yA = abs(yA + A_div) yB = abs(yB + B_div) xA = abs(xA + A_div) xB = abs(xB + B_div) if abs(division_index_A - division_index_B) < 1.0: Main_threshold = min(A_div, B_div) else: Main_threshold = 0.4 # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), # (255,0,0), 2) # def midpoint(ptA, ptB): # return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5) # (mX, mY) = midpoint((xA, yA), (xB, yB)) # cv2.putText(vis_frame, "{:.1f}in".format(D), (int(mX), int(mY - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,0,0), 2) # # print('......... ...') if D < Main_threshold: if objectid in self.objects: self.update(id=objectid) else: self.all_track_id.append(objectid) self.create_track(id=objectid) if self.objects[objectid] <= 90: xA, yA, zA = box_i xB, yB, ZB = box_j # cv2.circle(vis_frame, (int(xA), int(yA)), 5, (255,0,0), -1) # cv2.circle(vis_frame, (int(xB), int(yB)), 5, (255,0,0), -1) # overlay = vis_frame.copy() cv2.circle(vis_frame, (int(xA), int(yA)), 3, (0, 255, 255), -1) cv2.circle(vis_frame, (int(xB), int(yB)), 3, (0, 255, 255), -1) cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), (255, 255, 0), 2) if box_i not in red_list and box_i not in yellow_list: yellow_list.append(box_i) new_box_i_x = int( round((box_i[0]) * x_scale)) new_box_i_y = int( round((box_i[1]) * y_scale)) new_box_j_x = int( round((box_j[0]) * x_scale)) new_box_j_y = int( round((box_j[1]) * y_scale)) cv2.line(result, (int(new_box_i_x), int(new_box_i_y)), (int(new_box_j_x), int(new_box_j_y)), (255, 255, 0), 2) # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.,0, vis_frame) else: xA, yA, zA = box_i xB, yB, zB = box_j # overlay = vis_frame.copy() cv2.circle(vis_frame, (int(xA), int(yA)), 3, (0, 0, 255), -1) cv2.circle(vis_frame, (int(xB), int(yB)), 3, (0, 0, 255), -1) cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), (255, 0, 0), 2) if box_i not in red_list: red_list.append(box_i) new_box_i_x = int( round((box_i[0]) * x_scale)) new_box_i_y = int( round((box_i[1]) * y_scale)) new_box_j_x = int( round((box_j[0]) * x_scale)) new_box_j_y = int( round((box_j[1]) * y_scale)) cv2.line(result, (int(new_box_i_x), int(new_box_i_y)), (int(new_box_j_x), int(new_box_j_y)), (0, 0, 255), 2) else: if box_i not in red_list and box_i not in yellow_list and box_i not in green_list: green_list.append(box_i) if box_j not in red_list and box_j not in yellow_list and box_j not in green_list: green_list.append(box_j) for box_check, track_check in person: if box_check in red_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 0, 255), 5) elif box_check in yellow_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 255, 255), 5) elif box_check in green_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 128, 0), 5) cv2.putText(result, "{:.1f}".format(len(red_list)), (int(20), int(40)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 5) cv2.putText(result, "{:.1f}".format(len(yellow_list)), (int(20), int(70)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 5) cv2.putText(result, "{:.1f}".format(len(green_list)), (int(20), int(100)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5) # for i in range(1,16): # xA = 1 # yA = y_division * i # xB = 700 # yB = yA # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),(255,0,0), 2) # print(vis_frame.shape,layer1.shape) # cv2.imwrite('imagetest.jpg',layer1) vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR) layer1[yy:yy + ht, xx:xx + wd] = result # vis_frame = cv2.resize(vis_frame,(960,540),interpolation = cv2.INTER_CUBIC) vis_frame = np.concatenate((vis_frame, layer1), axis=1) else: vis_frame = cv2.resize(vis_frame, (960, 540), interpolation=cv2.INTER_CUBIC) hh, ww, c = vis_frame.shape result = np.zeros((530, 600, 3)) # x_scale = (result_width/vis_frame.shape[1]) # y_scale = (result_height/vis_frame.shape[0]) x_scale = (530 / vis_frame.shape[1]) y_scale = (600 / vis_frame.shape[0]) ht, wd, cc = result.shape # print(ww,wd) xx = (ww - wd) // 2 yy = (hh - ht) // 2 # print(xx, yy,'.................') color = (245, 245, 245) layer1 = np.full((hh, ww, cc), color, dtype=np.uint8) layer1[yy:yy + ht, xx:xx + wd] = result vis_frame = cv2.resize(vis_frame, (960, 540), interpolation=cv2.INTER_CUBIC) # print(layer1.shape,vis_frame.shape) vis_frame = np.concatenate((vis_frame, layer1), axis=1) # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.1,0, vis_frame) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
# We can use `Visualizer` to draw the predictions on the image. #v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0) #v = v.draw_instance_predictions(outputs["instances"].to("cpu")) #cv2_imshow(v.get_image()[:, :, ::-1]) #cv2.imwrite('output.png',v.get_image()[:, :, ::-1]) if args.images_input_dir: all_detection_outputs = {} jpgs = sorted(glob.glob(os.path.join(args.images_input_dir, "*.jpg"))) num_frames = len(jpgs) predictor = DefaultPredictor(cfg) video_visualiser = VideoVisualizer( MetadataCatalog.get(cfg.DATASETS.TRAIN[0])) os.makedirs(os.path.join(args.output, 'detection'), exist_ok=True) predictions_save_path = os.path.join(args.output, "all_detection_outputs.pkl") assert not os.path.isfile(predictions_save_path), predictions_save_path for jpg in tqdm.tqdm(jpgs): image_basename = os.path.basename(jpg) frame_num = int(os.path.splitext(image_basename)[0]) frame = cv2.imread(jpg) visualised_jpg_path = os.path.join(args.output, 'detection', image_basename)
args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) demo = VisualizationDemo(cfg) output_file = None if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) files = os.listdir(args.input[0]) args.input = [args.input[0] + x for x in files] assert args.input, "The input path(s) was not found" visualizer = VideoVisualizer(MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"), instance_mode=ColorMode.IMAGE) for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image( img, visualizer=visualizer) if 'instances' in predictions: logger.info("{}: detected {} instances in {:.2f}s".format( path, len(predictions["instances"]), time.time() - start_time)) else: logger.info("{}: detected {} instances in {:.2f}s".format( path, len(predictions["proposals"]), time.time() - start_time))
class VisualizationDemo(object): def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if self.parallel == 1: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) instances = self.video_visualizer.draw_instance_bbox( predictions) return instances frame_gen = self._frame_from_video(video) if self.parallel == 1: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions, id_frame): # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # vis_frame = VisImage(frame) vis_frame = frame # if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to(self.cpu_device), segments_info # ) # elif "instances" in predictions: # predictions = predictions["instances"].to(self.cpu_device) # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) if "sem_seg" in predictions: sem_seg = predictions["sem_seg"].argmax(dim=0).to( self.cpu_device).cpu().numpy() # print(sem_seg) # print(sem_seg.shape) # print(labels) # print(areas) # 画图:画分割图 mask_color = colormap[sem_seg].astype(dtype=np.uint8) # print(mask_color) # cv2.imwrite('output/' + id_frame + "-mask.jpg", mask_color) # print(type(mask_color)) # print(mask_color.shape) # print(frame.shape) image = np.concatenate((frame, mask_color)) # cv2.imwrite('output/' + id_frame + "-concat.jpg", image) image = cv2.addWeighted(frame, 0.3, mask_color, 0.7, 0) # cv2.imwrite('output/' + id_frame + "-add.jpg",image) # vis_frame=VisImage(image) vis_frame = image # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) # vis_frame = video_visualizer.draw_sem_seg( # frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) # ) # print(type(vis_frame)) # Converts Matplotlib RGB format to OpenCV BGR format palette = (2**11 - 1, 2**15 - 1, 2**20 - 1) # dects = self.tracks[str(cnt+1)] # print(id_frame) #统计:增加分割结果统计 labels, areas = np.unique(sem_seg, return_counts=True) if int(id_frame) % 300 == 0: for i in range(len(labels)): all_cnt[str(labels[i])] += 1 # 总的计数 seg_cnt[str(labels[i])] += 1 # 分割的计数 # print(i) # print(i,labels[i],areas[i]) # 画图:画跟踪框 cateid = 0 if (self.tracks.get(id_frame)): # id_frame = str(int(id_frame)) dects = self.tracks[id_frame] for dect in dects: x1 = int(dect['bbox'][0]) x2 = int(dect['bbox'][0]) + int(dect['bbox'][2]) y1 = int(dect['bbox'][1]) y2 = int(dect['bbox'][1]) + int(dect['bbox'][3]) category = categories[dect['category_id']] cateid = dect['cateid'] catecnt = all_cnt[dect['category_id']] insid = int(dect['insid']) track_cnt[dect['category_id']] = cateid # 跟踪的计数 # print(track_cnt[dect['category_id']],cateid,insid) color = [ int((p * (insid**2 - insid + 1)) % 255) for p in palette ] color = tuple(color) vis_frame = cv2.rectangle(vis_frame, (x1, y1), (x2, y2), color, 3) label = '{:s}:{:d}/{:d} '.format( category, cateid, catecnt) # print(label) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] vis_frame = cv2.rectangle( vis_frame, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) cv2.putText(vis_frame, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) # 画图:写统计结果 for i in range(30): # print(categories[str(i)], ins_id_category[str(i)]) if (i < 19): color = colormap.tolist()[i] color = tuple(color) # color = tuple([int(color[0]*0.7),int(color[1]*0.7),int(color[2]*0.7)]) else: color = colormap.tolist()[19] color = tuple(color) # color = tuple([int(color[0]*0.7),int(color[1]*0.7),int(color[2]*0.7)]) label = '{:s}:{:d}/{:d} '.format( categories[str(i)], seg_cnt[str(i)] + track_cnt[str(i)], all_cnt[str(i)]) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] vis_frame = cv2.rectangle(vis_frame, (0, (t_size[1] + 10) * i), (t_size[0] + 1, (t_size[1] + 10) * (i + 1)), color, -1) #对角线画矩形框 cv2.putText(vis_frame, label, (0, (t_size[1] + 10) * (i + 1) - 5), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) # vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR) # cv2.imwrite('output/' + id_frame + '-sd.jpg', vis_frame) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() # dects=self.tracks[str(cnt+1-buffer_size)] yield process_predictions(frame, predictions, str(cnt + 1 - buffer_size)) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() # dects=self.tracks[-len(frame_data):][0] yield process_predictions(frame, predictions, str(-len(frame_data))) else: # for frame in frame_gen: predictions = [] for cnt, frame in enumerate(frame_gen): # dects = self.tracks[str(cnt+1)] # print(cnt) if cnt % 1 == 0: # print(cnt) predictions = self.predictor(frame) yield process_predictions(frame, predictions, str(cnt))
fps=float(frames_per_second), frameSize=(width, height), isColor=True, )""" while (cap.isOpened()): ret, frame = cap.read(0) frame = cv2.resize(frame, (224, 224)) print(fps) print(num_frames) try: outputs = predictor(frame) #v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0])) v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), instance_mode=ColorMode.IMAGE_BW) v = v.draw_instance_predictions(frame, outputs["instances"].to('cpu')) print(outputs["instances"].pred_classes) omt = str(outputs["instances"].pred_classes) outpclass = omt[8:9] print(outpclass) """while (cap.isOpened()): #outpclass is printing ang giving 0 if 0 comes then action this loop if outpclass == '0': #unlock(8) make ur own function to test time.sleep(10) #Lock will remains open for 10 seconds. make this run in loop #lock(8) #GPIO.cleanup(8)""" #out.write(v.get_image()) #cv2_imshow("Moda", v.get_image()) except:
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: preductions, ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): # See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # note tensor ==> pytorch.tensor frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # retval = panoptic_seg # TODO # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to(self.cpu_device), segments_info # ) # elif "instances" in predictions: if "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) # TODO: grab all these # classes = predictions.to(self.cpu_device).pred_classes.numpy() # scores = predictions.scores # retval = predictions.to(self.cpu_device).pred_boxes.tensor.numpy() retval = predictions # elif "sem_seg" in predictions: # vis_frame = video_visualizer.draw_sem_seg( # frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) # ) # retval = predictions["sem_seg"].argmax(dim=0) # TODO # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return retval, vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
siammask = Custom(anchors=cfg_siammask['anchors']) if args.siammask_resume: assert isfile( args.siammask_resume), 'Please download {} first.'.format( args.siammask_resume) siammask = load_pretrain(siammask, args.siammask_resume) siammask.eval().to(device) frame_gen = _frame_from_video(video) metadata = MetadataCatalog.get(cfg_detectron.DATASETS.TEST[0] if len( cfg_detectron.DATASETS.TEST) else "__unused") video_visualizer = VideoVisualizer(metadata, instance_mode=ColorMode.IMAGE) if detectron_only: maxDissapear = 1 objectTracker = ObjectTracker(maxDissapear) frame_idx = 0 df = pd.DataFrame(columns=[ 'FrameId', 'Id', 'X', 'Y', 'Width', 'Height', 'Confidence', 'ClassId', 'Visibility' ]) for frame in frame_gen: vis_frame, predictions = process_frame(frame, detector)
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer_object = VideoVisualizer(self.metadata_object, self.instance_mode) video_visualizer_keypoint = VideoVisualizer(self.metadata_keypoint, self.instance_mode) def get_parameters(annos): if annos["object_detection"]["pred_boxes"]: temp = annos["object_detection"]["pred_boxes"][0] obj_det = [1] temp = np.asarray(temp) temp = temp.flatten() key_det = annos["keypoint_detection"]["pred_keypoints"][0] key_det = np.asarray(key_det) key_det = key_det[0:11, 0:2] key_det = np.subtract(key_det, temp[0:2]) key_det = key_det.flatten() else: obj_det = [-1] obj_det = np.asarray(obj_det) key_det = annos["keypoint_detection"]["pred_keypoints"][0] key_det = np.asarray(key_det) key_det = key_det[0:11, 0:2] key_det = key_det.flatten() if annos["head_pose_estimation"]["predictions"]: hp_est = annos["head_pose_estimation"]["predictions"][0] hp_est = np.asarray(hp_est) else: hp_est = np.asarray([-100, -100, -100]) anno_list = np.concatenate((obj_det, key_det, hp_est)) return anno_list def process_predictions(frame, predictions_object, predictions_keypoint): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) blank_image = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8) if "instances" in predictions_object: predictions_object = predictions_object["instances"].to( self.cpu_device) self.data_json['object_detection'][ 'pred_boxes'] = predictions_object.get( 'pred_boxes').tensor.numpy().tolist() self.data_json['object_detection'][ 'scores'] = predictions_object.get( 'scores').numpy().tolist() vis_frame = video_visualizer_object.draw_instance_predictions( frame, predictions_object) if "instances" in predictions_keypoint: predictions_keypoint = predictions_keypoint["instances"].to( self.cpu_device) self.data_json['keypoint_detection'][ 'pred_boxes'] = predictions_keypoint.get( 'pred_boxes').tensor.numpy().tolist() self.data_json['keypoint_detection'][ 'scores'] = predictions_keypoint.get( 'scores').numpy().tolist() self.data_json['keypoint_detection'][ 'pred_keypoints'] = predictions_keypoint.get( 'pred_keypoints').numpy().tolist() vis_frame = video_visualizer_keypoint.draw_instance_predictions( vis_frame.get_image(), predictions_keypoint) # head pose estimation predictions, bounding_box, face_keypoints, w, face_area = head_pose_estimation( frame, self.mtcnn, self.head_pose_module, self.transformations, self.softmax, self.idx_tensor) self.data_json['head_pose_estimation']['predictions'] = predictions self.data_json['head_pose_estimation']['pred_boxes'] = bounding_box # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) for i in range(len(predictions)): plot_pose_cube(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \ tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \ tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \ size = w[i]) # draw_axis(vis_frame, predictions[i][0], predictions[i][1], predictions[i][2], \ # tdx = (face_keypoints[i][0] + face_keypoints[i][2]) / 2, \ # tdy= (face_keypoints[i][1] + face_keypoints[i][3]) / 2, \ # size = w[i]) data_json = self.data_json self.data_json['frame'] = self.frame_count self.frame_count += 1 inputs_MLP = get_parameters(self.data_json) inputs_MLP = Variable(torch.from_numpy(inputs_MLP)).float().cuda() outputs_MLP = self.mlp_model(inputs_MLP) predicted_MLP = (outputs_MLP >= 0.5) cv2.putText(vis_frame,str(predicted_MLP.item()), (10,700), \ cv2.FONT_HERSHEY_SIMPLEX, 3, (0,0,0), 10) return vis_frame, data_json frame_gen = self._frame_from_video(video) for frame in frame_gen: yield process_predictions(frame, self.predictor_object(frame), self.predictor_keypoint(frame))
def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions, tracker): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) tracker.update(boxes=predictions.pred_boxes.tensor.numpy(), labels=predictions.pred_classes.numpy()) if SAVE_PREDICTIONS: SAVED_PREDICTIONS.append(predictions) if len(SAVED_PREDICTIONS) == 100: with open('predictions.pkl', 'wb') as fp: pickle.dump(SAVED_PREDICTIONS, fp) print('Saving done!') vis_frame = draw_instance_predictions(video_visualizer, frame, predictions, tracker) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions, self.tracker) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions, self.tracker) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame), self.tracker)