def run(params, config, capture, detector, reid): win_name = 'Multi camera tracking' frame_number = 0 avg_latency = AverageEstimator() output_detections = [[] for _ in range(capture.get_num_sources())] key = -1 if config['normalizer_config']['enabled']: capture.add_transform( NormalizerCLAHE( config['normalizer_config']['clip_limit'], config['normalizer_config']['tile_size'], )) tracker = MultiCameraTracker(capture.get_num_sources(), reid, config['sct_config'], **config['mct_config'], visual_analyze=config['analyzer']) thread_body = FramesThreadBody(capture, max_queue_length=len(capture.captures) * 2) frames_thread = Thread(target=thread_body) frames_thread.start() if len(params.output_video): frame_size, fps = capture.get_source_parameters() target_width, target_height = get_target_size( frame_size, None, **config['visualization_config']) video_output_size = (target_width, target_height) fourcc = cv.VideoWriter_fourcc(*'XVID') output_video = cv.VideoWriter(params.output_video, fourcc, min(fps), video_output_size) else: output_video = None prev_frames = thread_body.frames_queue.get() detector.run_async(prev_frames, frame_number) presenter = monitors.Presenter(params.utilization_monitors, 0) while thread_body.process: if not params.no_show: key = check_pressed_keys(key) if key == 27: break presenter.handleKey(key) start = time.perf_counter() try: frames = thread_body.frames_queue.get_nowait() except queue.Empty: frames = None if frames is None: continue all_detections = detector.wait_and_grab() if params.save_detections: update_detections(output_detections, all_detections, frame_number) frame_number += 1 detector.run_async(frames, frame_number) all_masks = [[] for _ in range(len(all_detections))] for i, detections in enumerate(all_detections): all_detections[i] = [det[0] for det in detections] all_masks[i] = [det[2] for det in detections if len(det) == 3] tracker.process(prev_frames, all_detections, all_masks) tracked_objects = tracker.get_tracked_objects() latency = max(time.perf_counter() - start, sys.float_info.epsilon) avg_latency.update(latency) fps = round(1. / latency, 1) vis = visualize_multicam_detections(prev_frames, tracked_objects, fps, **config['visualization_config']) presenter.drawGraphs(vis) if not params.no_show: cv.imshow(win_name, vis) if output_video: output_video.write(cv.resize(vis, video_output_size)) print('\rProcessing frame: {}, fps = {} (avg_fps = {:.3})'.format( frame_number, fps, 1. / avg_latency.get()), end="") prev_frames, frames = frames, prev_frames print(presenter.reportMeans()) print('') thread_body.process = False frames_thread.join() if len(params.history_file): save_json_file(params.history_file, tracker.get_all_tracks_history(), description='History file') if len(params.save_detections): save_json_file(params.save_detections, output_detections, description='Detections') if len(config['embeddings']['save_path']): save_embeddings(tracker.scts, **config['embeddings'])
def main(): """Visualize the results of the multi camera multi person tracker demo""" parser = argparse.ArgumentParser(description='Multi camera multi person \ tracking visualization demo script') parser.add_argument('-i', type=str, nargs='+', help='Input videos') parser.add_argument('--history_file', type=str, default='', required=True, help='File with tracker history') parser.add_argument('--output_video', type=str, default='', required=False, help='Output video file') parser.add_argument('--gt_files', type=str, nargs='+', required=False, help='Files with ground truth annotation') parser.add_argument('--timeline', type=str, default='', help='Plot and save timeline') parser.add_argument('--match_gt_ids', default=False, action='store_true', help='Match GT ids to ids from history') parser.add_argument('--merge_outputs', default=False, action='store_true', help='Merge GT and history tracks into one frame') args = parser.parse_args() capture = MulticamCapture(args.i) with open(args.history_file) as hist_f: history = json.load(hist_f) assert len(history) == capture.get_num_sources() # Configure output video files output_video = None output_video_gt = None frame_size, fps_source = capture.get_source_parameters() if len(args.output_video): video_output_size, fps = calc_output_video_params(frame_size, fps_source, args.gt_files, args.merge_outputs) fourcc = cv.VideoWriter_fourcc(*'XVID') output_video = cv.VideoWriter(args.output_video, fourcc, fps, video_output_size) if args.gt_files and not args.merge_outputs: ext = args.output_video.split('.')[-1] output_path = args.output_video[:len(args.output_video) - len(ext) - 1] + '_gt.' + ext output_video_gt = cv.VideoWriter(output_path, fourcc, fps, video_output_size) # Read GT tracks if necessary if args.gt_files: assert len(args.gt_files) == capture.get_num_sources() gt_tracks, _ = read_gt_tracks(args.gt_files) accs = [mm.MOTAccumulator(auto_id=True) for _ in args.gt_files] else: gt_tracks = None # If we need for matching GT IDs, accumulate metrics if gt_tracks and args.match_gt_ids: accumulate_mot_metrics(accs, gt_tracks, history) match_gt_indices(gt_tracks, history, accs) metrics_accumulated = True else: metrics_accumulated = False # Process frames win_name = 'Multi camera tracking history visualizer' time = 0 key = -1 while True: print('\rVisualizing frame: {}'.format(time), end="") key = check_pressed_keys(key) if key == 27: break has_frames, frames = capture.get_frames() if not has_frames: break if gt_tracks: gt_detections = get_detections_from_tracks(gt_tracks, time) vis_gt = visualize_multicam_detections(copy.deepcopy(frames), gt_detections, fps='Ground truth') else: vis_gt = None active_detections = get_detections_from_tracks(history, time) vis = visualize_multicam_detections(frames, active_detections) if vis_gt is not None: if args.merge_outputs: vis = np.hstack([vis, vis_gt]) cv.imshow(win_name, vis) else: cv.imshow('GT', vis_gt) cv.imshow(win_name, vis) else: cv.imshow(win_name, vis) time += 1 if output_video: output_video.write(cv.resize(vis, video_output_size)) if vis_gt is not None and output_video_gt is not None: output_video_gt.write(cv.resize(vis_gt, video_output_size)) if len(args.timeline): for i in range(len(history)): log.info('Source_{}: drawing timeline...'.format(i)) plot_timeline(i, time, history[i], save_path=args.timeline, name='SCT') if gt_tracks: for i in range(len(gt_tracks)): log.info('GT_{}: drawing timeline...'.format(i)) plot_timeline(i, time, gt_tracks[i], save_path=args.timeline, name='GT') if gt_tracks: if not metrics_accumulated: accumulate_mot_metrics(accs, gt_tracks, history) mh = mm.metrics.create() summary = mh.compute_many(accs, metrics=mm.metrics.motchallenge_metrics, generate_overall=True, names=['video ' + str(i) for i in range(len(accs))]) strsummary = mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names) print(strsummary)
def run(params, config, capture, detector, reid): win_name = 'Multi camera tracking' frame_number = 0 output_detections = [[] for _ in range(capture.get_num_sources())] key = -1 if config.normalizer_config.enabled: capture.add_transform( NormalizerCLAHE( config.normalizer_config.clip_limit, config.normalizer_config.tile_size, )) tracker = MultiCameraTracker(capture.get_num_sources(), reid, config.sct_config, **vars(config.mct_config), visual_analyze=config.analyzer) thread_body = FramesThreadBody(capture, max_queue_length=len(capture.captures) * 2) frames_thread = Thread(target=thread_body) frames_thread.start() frames_read = False set_output_params = False prev_frames = thread_body.frames_queue.get() detector.run_async(prev_frames, frame_number) metrics = PerformanceMetrics() presenter = monitors.Presenter(params.utilization_monitors, 0) while thread_body.process: if not params.no_show: key = check_pressed_keys(key) if key == 27: break presenter.handleKey(key) start_time = time.perf_counter() try: frames = thread_body.frames_queue.get_nowait() frames_read = True except queue.Empty: frames = None if frames is None: continue all_detections = detector.wait_and_grab() if params.save_detections: update_detections(output_detections, all_detections, frame_number) frame_number += 1 detector.run_async(frames, frame_number) all_masks = [[] for _ in range(len(all_detections))] for i, detections in enumerate(all_detections): all_detections[i] = [det[0] for det in detections] all_masks[i] = [det[2] for det in detections if len(det) == 3] tracker.process(prev_frames, all_detections, all_masks) tracked_objects = tracker.get_tracked_objects() vis = visualize_multicam_detections( prev_frames, tracked_objects, **vars(config.visualization_config)) metrics.update(start_time, vis) presenter.drawGraphs(vis) if not params.no_show: cv.imshow(win_name, vis) if frames_read and not set_output_params: set_output_params = True if len(params.output_video): frame_size = [frame.shape[::-1] for frame in frames] fps = capture.get_fps() target_width, target_height = get_target_size( frame_size, None, **vars(config.visualization_config)) video_output_size = (target_width, target_height) fourcc = cv.VideoWriter_fourcc(*'XVID') output_video = cv.VideoWriter(params.output_video, fourcc, min(fps), video_output_size) else: output_video = None if set_output_params and output_video: output_video.write(cv.resize(vis, video_output_size)) prev_frames, frames = frames, prev_frames metrics.log_total() for rep in presenter.reportMeans(): log.info(rep) thread_body.process = False frames_thread.join() if len(params.history_file): save_json_file(params.history_file, tracker.get_all_tracks_history(), description='History file') if len(params.save_detections): save_json_file(params.save_detections, output_detections, description='Detections') if len(config.embeddings.save_path): save_embeddings(tracker.scts, **vars(config.embeddings))
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument('-i', '--input', required=True, help='Required. Path to a video file or a device node of a web-camera.') parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop.') parser.add_argument('-o', '--output', required=False, help='Optional. Name of the output file(s) to save.') parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int, help='Optional. Number of frames to store in output. ' 'If 0 is set, all frames are stored.') parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False, help='Required. Path to the instance segmentation model.') parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False, help='Required. Path to the semantic segmentation model.') parser.add_argument('-t', '--threshold', type=float, default=0.6, help='Optional. Threshold for person instance segmentation model.') parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU', help='Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified.') parser.add_argument('-l', '--cpu_extension', type=str, default=None, help='MKLDNN (CPU)-targeted custom layers. Absolute \ path to a shared library with the kernels impl.') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() cap = open_images_capture(args.input, args.loop) if cap.get_type() not in ('VIDEO', 'CAMERA'): raise RuntimeError("The input should be a video file or a numeric camera ID") if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation): raise ValueError('Set up exactly one of segmentation models: ' '--m_instance_segmentation or --m_semantic_segmentation') labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes' mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation log.info('Reading model {}'.format(model_path)) if args.m_instance_segmentation: labels_file = str(labels_dir / 'coco_80cl_bkgr.txt') segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) elif args.m_semantic_segmentation: labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt') segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) log.info('The model {} is loaded to {}'.format(model_path, args.device)) metrics = PerformanceMetrics() video_writer = cv2.VideoWriter() black_board = False frame_number = 0 key = -1 start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0] * 2) output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8') presenter = monitors.Presenter(args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: mask = None detections = segmentation.get_detections([frame]) expand_mask(detections, frame.shape[1] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) metrics.update(start_time, merged_frame) if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit-1): video_writer.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1) board = output_frame[y0: y1, x0: x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) frame_number += 1 start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument('-i', type=str, help='Input sources (index of camera \ or path to a video file)', required=True) parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False, help='Path to the instance segmentation model') parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False, help='Path to the semantic segmentation model') parser.add_argument('-t', '--threshold', type=float, default=0.6, help='Threshold for person instance segmentation model') parser.add_argument('--output_video', type=str, default='', required=False, help='Optional. Path to output video') parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU', help='Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified') parser.add_argument('-l', '--cpu_extension', type=str, default=None, help='MKLDNN (CPU)-targeted custom layers.Absolute \ path to a shared library with the kernels impl.') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially') args = parser.parse_args() capture = VideoCapture(args.i) if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation): raise ValueError('Set up exactly one of segmentation models: '\ '--m_instance_segmentation or --m_semantic_segmentation') frame_size, fps = capture.get_source_parameters() out_frame_size = (int(frame_size[0]), int(frame_size[1] * 2)) presenter = monitors.Presenter(args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) root_dir = osp.dirname(osp.abspath(__file__)) mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) if args.output_video: fourcc = cv2.VideoWriter_fourcc(*'XVID') output_video = cv2.VideoWriter(args.output_video, fourcc, fps, out_frame_size) else: output_video = None log.info("Initializing Inference Engine") ie = IECore() if args.m_instance_segmentation: labels_file = osp.join(root_dir, 'coco_labels.txt') segmentation = MaskRCNN(ie, args.m_instance_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) elif args.m_semantic_segmentation: labels_file = osp.join(root_dir, 'cityscapes_labels.txt') segmentation = SemanticSegmentation(ie, args.m_semantic_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) black_board = False output_frame = np.full((frame_size[1], frame_size[0], 3), 255, dtype='uint8') frame_number = 0 key = -1 while True: start = time.time() _, frame = capture.get_frame() mask = None if frame is not None: detections = segmentation.get_detections([frame]) expand_mask(detections, frame_size[0] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) else: break if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) if output_video is not None: output_video.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1) board = output_frame[y0: y1, x0: x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) end = time.time() print('\rProcessing frame: {}, fps = {:.3}' \ .format(frame_number, 1. / (end - start)), end="") frame_number += 1 print('') log.info(presenter.reportMeans()) if output_video is not None: output_video.release()
def run(params, config, capture, detector, reid): ix,iy = -1,-1 pts_src = np.array([[561,1022],[990,698],[486,273],[95,504]],dtype='float32') pts_dest = np.array([[0,0],[0,400],[400,700],[0,700]],dtype='float32') # calculate matrix H h, status = cv.findHomography(pts_src,pts_dest) win_name = 'Multi camera tracking' frame_number = 0 avg_latency = AverageEstimator() output_detections = [[] for _ in range(capture.get_num_sources())] key = -1 refObj = [] if config['normalizer_config']['enabled']: capture.add_transform( NormalizerCLAHE( config['normalizer_config']['clip_limit'], config['normalizer_config']['tile_size'], ) ) tracker = MultiCameraTracker(capture.get_num_sources(), reid, config['sct_config'], **config['mct_config'], visual_analyze=config['analyzer']) thread_body = FramesThreadBody(capture, max_queue_length=len(capture.captures) * 2) frames_thread = Thread(target=thread_body) frames_thread.start() if len(params.output_video): frame_size, fps = capture.get_source_parameters() target_width, target_height = get_target_size(frame_size, None, **config['visualization_config']) video_output_size = (target_width, target_height) fourcc = cv.VideoWriter_fourcc(*'XVID') output_video = cv.VideoWriter(params.output_video,cv.VideoWriter_fourcc('M','J','P','G'),min(fps),video_output_size) # output_video = cv.VideoWriter(params.output_video, fourcc, min(fps), video_output_size) else: output_video = None prev_frames = thread_body.frames_queue.get() detector.run_async(prev_frames, frame_number) while thread_body.process: if not params.no_show: key = check_pressed_keys(key) if key == 27: break start = time.time() try: frames = thread_body.frames_queue.get_nowait() except queue.Empty: frames = None if frames is None: continue all_detections = detector.wait_and_grab() for det in all_detections: for obj in det: print("Boxes:",obj[0]) print("Confidence:",obj[1]) if params.save_detections: update_detections(output_detections, all_detections, frame_number) frame_number += 1 detector.run_async(frames, frame_number) all_masks = [[] for _ in range(len(all_detections))] for i, detections in enumerate(all_detections): all_detections[i] = [det[0] for det in detections] all_masks[i] = [det[2] for det in detections if len(det) == 3] tracker.process(prev_frames, all_detections, all_masks) tracked_objects = tracker.get_tracked_objects() latency = time.time() - start avg_latency.update(latency) fps = round(1. / latency, 1) vis = visualize_multicam_detections(prev_frames, tracked_objects, fps, **config['visualization_config'],h=h) if not params.no_show: cv.setMouseCallback(win_name, getMousePointer) if ix!=-1 and iy!=-1: refObj.append((ix,iy)) ix=-1 iy=-1 print(len(refObj)) if len(refObj)==2: print("Len 2 Rectangle Drawn.") vis = cv.rectangle(vis, refObj[0], refObj[1],(255,0,0), 2) refObj.clear() cv.imshow(win_name, vis) # cv.imwrite("refPicture.png",vis) if output_video: output_video.write(cv.resize(vis, video_output_size)) # print('\rProcessing frame: {}, fps = {} (avg_fps = {:.3})'.format( # frame_number, fps, 1. / avg_latency.get()), end="") prev_frames, frames = frames, prev_frames # print('') thread_body.process = False frames_thread.join() if len(params.history_file): save_json_file(params.history_file, tracker.get_all_tracks_history(), description='History file') if len(params.save_detections): save_json_file(params.save_detections, output_detections, description='Detections') if len(config['embeddings']['save_path']): save_embeddings(tracker.scts, **config['embeddings'])