def main(): """ Main function. """ log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() class_map = load_class_map(args.class_map) assert class_map is not None ie_core = load_ie_core(args.device, args.cpu_extension) person_detector = PersonDetector(args.detection_model, args.device, ie_core, num_requests=2, output_shape=DETECTOR_OUTPUT_SHAPE) action_recognizer = ActionRecognizer(args.action_model, args.device, ie_core, num_requests=2, img_scale=ACTION_IMAGE_SCALE, num_classes=len(class_map)) person_tracker = Tracker(person_detector, TRACKER_SCORE_THRESHOLD, TRACKER_IOU_THRESHOLD) video_stream = VideoStream(args.input, ACTION_NET_INPUT_FPS, action_recognizer.input_length) video_stream.start() visualizer = Visualizer(VISUALIZER_TRG_FPS) visualizer.register_window('Demo') presenter = monitors.Presenter(args.utilization_monitors) samples_library = None if args.samples_dir is not None and os.path.exists(args.samples_dir): visualizer.register_window('Gesture library') visualizer.start() library_queue = visualizer.get_queue('Gesture library') samples_library = VideoLibrary(args.samples_dir, SAMPLES_MAX_WINDOW_SIZE, list(class_map.values()), library_queue, SAMPLES_TRG_FPS) samples_library.start() else: visualizer.start() last_caption = None active_object_id = -1 tracker_labels_map = dict() tracker_labels = set() start_time = time.perf_counter() while True: frame = video_stream.get_live_frame() batch = video_stream.get_batch() if frame is None or batch is None: break detections, tracker_labels_map = person_tracker.add_frame( frame, len(OBJECT_IDS), tracker_labels_map) if detections is None: active_object_id = -1 last_caption = None if len(detections) == 1: active_object_id = 0 if active_object_id >= 0: cur_det = [det for det in detections if det.id == active_object_id] if len(cur_det) != 1: active_object_id = -1 last_caption = None continue recognizer_result = action_recognizer(batch, cur_det[0].roi.reshape(-1)) if recognizer_result is not None: action_class_id = np.argmax(recognizer_result) action_class_label = \ class_map[action_class_id] if class_map is not None else action_class_id action_class_score = np.max(recognizer_result) if action_class_score > args.action_threshold: last_caption = 'Last gesture: {} '.format( action_class_label) end_time = time.perf_counter() elapsed_time = end_time - start_time start_time = end_time presenter.drawGraphs(frame) if active_object_id >= 0: current_fps = 1.0 / elapsed_time cv2.putText(frame, 'FPS: {:.2f}'.format(current_fps), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) if detections is not None: tracker_labels = set(det.id for det in detections) for det in detections: roi_color = (0, 255, 0) if active_object_id == det.id else (128, 128, 128) border_width = 2 if active_object_id == det.id else 1 person_roi = det.roi[0] cv2.rectangle(frame, (person_roi[0], person_roi[1]), (person_roi[2], person_roi[3]), roi_color, border_width) cv2.putText(frame, str(det.id), (person_roi[0] + 10, person_roi[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, roi_color, 2) if last_caption is not None: cv2.putText(frame, last_caption, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) if args.no_show: continue visualizer.put_queue(frame, 'Demo') key = visualizer.get_key() if key == 27: # esc break elif key == ord(' '): # space active_object_id = -1 last_caption = None elif key == 13: # enter last_caption = None elif key in OBJECT_IDS: # 0-9 local_bbox_id = int(chr(key)) if local_bbox_id in tracker_labels: active_object_id = local_bbox_id else: presenter.handleKey(key) if samples_library is not None: if key == ord('f'): # forward samples_library.next() elif key == ord('b'): # backward samples_library.prev() if samples_library is not None: samples_library.release() visualizer.release() video_stream.release() print(presenter.reportMeans())
def main(): args = build_argparser().parse_args() class_map = load_class_map(args.class_map) if class_map is None: raise RuntimeError("Can't read {}".format(args.class_map)) core = load_core() person_detector = PersonDetector(args.detection_model, args.device, core, num_requests=2, output_shape=DETECTOR_OUTPUT_SHAPE) action_recognizer = ActionRecognizer(args.action_model, args.device, core, num_requests=2, img_scale=ACTION_IMAGE_SCALE, num_classes=len(class_map)) person_tracker = Tracker(person_detector, TRACKER_SCORE_THRESHOLD, TRACKER_IOU_THRESHOLD) video_stream = VideoStream(args.input, ACTION_NET_INPUT_FPS, action_recognizer.input_length) video_stream.start() metrics = PerformanceMetrics() visualizer = Visualizer(VISUALIZER_TRG_FPS) visualizer.register_window('Demo') presenter = monitors.Presenter(args.utilization_monitors) samples_library = None if args.samples_dir is not None and os.path.exists(args.samples_dir): visualizer.register_window('Gesture library') visualizer.start() library_queue = visualizer.get_queue('Gesture library') samples_library = VideoLibrary(args.samples_dir, SAMPLES_MAX_WINDOW_SIZE, list(class_map.values()), library_queue, SAMPLES_TRG_FPS) samples_library.start() else: visualizer.start() last_caption = None active_object_id = -1 tracker_labels_map = {} tracker_labels = set() frames_processed = 0 while True: start_time = perf_counter() frame = video_stream.get_live_frame() batch = video_stream.get_batch() if frame is None or batch is None: break if frames_processed == 0: video_writer = cv2.VideoWriter() if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), video_stream.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") detections, tracker_labels_map = person_tracker.add_frame( frame, len(OBJECT_IDS), tracker_labels_map) if detections is None: active_object_id = -1 last_caption = None if len(detections) == 1: active_object_id = 0 if active_object_id >= 0: cur_det = [det for det in detections if det.id == active_object_id] if len(cur_det) != 1: active_object_id = -1 last_caption = None continue recognizer_result = action_recognizer(batch, cur_det[0].roi.reshape(-1)) if recognizer_result is not None: action_class_id = np.argmax(recognizer_result) action_class_label = \ class_map[action_class_id] if class_map is not None else action_class_id action_class_score = np.max(recognizer_result) if action_class_score > args.action_threshold: last_caption = 'Last gesture: {} '.format( action_class_label) presenter.drawGraphs(frame) if detections is not None: tracker_labels = {det.id for det in detections} for det in detections: roi_color = (0, 255, 0) if active_object_id == det.id else (128, 128, 128) border_width = 2 if active_object_id == det.id else 1 person_roi = det.roi[0] cv2.rectangle(frame, (person_roi[0], person_roi[1]), (person_roi[2], person_roi[3]), roi_color, border_width) cv2.putText(frame, str(det.id), (person_roi[0] + 10, person_roi[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, roi_color, 2) if last_caption is not None: cv2.putText(frame, last_caption, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if args.no_show: continue visualizer.put_queue(frame, 'Demo') key = visualizer.get_key() if key == 27: # esc break elif key == ord(' '): # space active_object_id = -1 last_caption = None elif key == 13: # enter last_caption = None elif key in OBJECT_IDS: # 0-9 local_bbox_id = int(chr(key)) if local_bbox_id in tracker_labels: active_object_id = local_bbox_id else: presenter.handleKey(key) if samples_library is not None: if key == ord('f'): # forward samples_library.next() elif key == ord('b'): # backward samples_library.prev() if samples_library is not None: samples_library.release() visualizer.release() video_stream.release() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)