def main(): current_dir = os.path.dirname(os.path.abspath(__file__)) """Prepares data for the person recognition demo""" parser = argparse.ArgumentParser(description='Multi camera multi person \ tracking live demo script') parser.add_argument('-i', type=str, nargs='+', help='Input sources (indexes \ of cameras or paths to video files)', required=True) parser.add_argument('--config', type=str, default=os.path.join(current_dir, 'config.py'), required=False, help='Configuration file') parser.add_argument('--detections', type=str, help='JSON file with bounding boxes') parser.add_argument('-m', '--m_detector', type=str, required=False, help='Path to the person detection model') parser.add_argument('--t_detector', type=float, default=0.6, help='Threshold for the person detection model') parser.add_argument('--m_segmentation', type=str, required=False, help='Path to the person instance segmentation model') parser.add_argument( '--t_segmentation', type=float, default=0.6, help='Threshold for person instance segmentation model') parser.add_argument('--m_reid', type=str, required=True, help='Path to the person re-identification model') parser.add_argument('--output_video', type=str, default='', required=False, help='Optional. Path to output video') parser.add_argument( '--history_file', type=str, default='', required=False, help='Optional. Path to file in JSON format to save results of the demo' ) parser.add_argument( '--save_detections', type=str, default='', required=False, help='Optional. Path to file in JSON format to save bounding boxes') parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU') parser.add_argument('-l', '--cpu_extension', help='MKLDNN (CPU)-targeted custom layers.Absolute \ path to a shared library with the kernels impl.', type=str, default=None) parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() if check_detectors(args) != 1: sys.exit(1) if len(args.config): log.info('Reading configuration file {}'.format(args.config)) config = read_py_config(args.config) else: log.error( 'No configuration file specified. Please specify parameter \'--config\'' ) sys.exit(1) random.seed(config['random_seed']) capture = MulticamCapture(args.i) log.info("Creating Inference Engine") ie = IECore() if args.detections: person_detector = DetectionsFromFileReader(args.detections, args.t_detector) elif args.m_segmentation: person_detector = MaskRCNN(ie, args.m_segmentation, args.t_segmentation, args.device, args.cpu_extension, capture.get_num_sources()) else: person_detector = Detector(ie, args.m_detector, args.t_detector, args.device, args.cpu_extension, capture.get_num_sources()) if args.m_reid: person_recognizer = VectorCNN(ie, args.m_reid, args.device, args.cpu_extension) else: person_recognizer = None run(args, config, capture, person_detector, person_recognizer) log.info('Demo finished successfully')
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument('-i', '--input', required=True, help='Required. Path to a video file or a device node of a web-camera.') parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop.') parser.add_argument('-o', '--output', required=False, help='Optional. Name of the output file(s) to save.') parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int, help='Optional. Number of frames to store in output. ' 'If 0 is set, all frames are stored.') parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False, help='Required. Path to the instance segmentation model.') parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False, help='Required. Path to the semantic segmentation model.') parser.add_argument('-t', '--threshold', type=float, default=0.6, help='Optional. Threshold for person instance segmentation model.') parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU', help='Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified.') parser.add_argument('-l', '--cpu_extension', type=str, default=None, help='MKLDNN (CPU)-targeted custom layers. Absolute \ path to a shared library with the kernels impl.') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() cap = open_images_capture(args.input, args.loop) if cap.get_type() not in ('VIDEO', 'CAMERA'): raise RuntimeError("The input should be a video file or a numeric camera ID") if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation): raise ValueError('Set up exactly one of segmentation models: ' '--m_instance_segmentation or --m_semantic_segmentation') labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes' mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation log.info('Reading model {}'.format(model_path)) if args.m_instance_segmentation: labels_file = str(labels_dir / 'coco_80cl_bkgr.txt') segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) elif args.m_semantic_segmentation: labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt') segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) log.info('The model {} is loaded to {}'.format(model_path, args.device)) metrics = PerformanceMetrics() video_writer = cv2.VideoWriter() black_board = False frame_number = 0 key = -1 start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0] * 2) output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8') presenter = monitors.Presenter(args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: mask = None detections = segmentation.get_detections([frame]) expand_mask(detections, frame.shape[1] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) metrics.update(start_time, merged_frame) if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit-1): video_writer.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1) board = output_frame[y0: y1, x0: x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) frame_number += 1 start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): current_dir = os.path.dirname(os.path.abspath(__file__)) """Prepares data for the object tracking demo""" parser = argparse.ArgumentParser(description='Multi camera multi object \ tracking live demo script') parser.add_argument( '-i', '--input', required=True, nargs='+', help= 'Required. Input sources (indexes of cameras or paths to video files)') parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop') parser.add_argument('--config', type=str, default=os.path.join(current_dir, 'configs/person.py'), required=False, help='Configuration file') parser.add_argument('--detections', type=str, help='JSON file with bounding boxes') parser.add_argument('-m', '--m_detector', type=str, required=False, help='Path to the object detection model') parser.add_argument('--t_detector', type=float, default=0.6, help='Threshold for the object detection model') parser.add_argument('--m_segmentation', type=str, required=False, help='Path to the object instance segmentation model') parser.add_argument( '--t_segmentation', type=float, default=0.6, help='Threshold for object instance segmentation model') parser.add_argument( '--m_reid', type=str, required=True, help='Required. Path to the object re-identification model') parser.add_argument('--output_video', type=str, default='', required=False, help='Optional. Path to output video') parser.add_argument( '--history_file', type=str, default='', required=False, help='Optional. Path to file in JSON format to save results of the demo' ) parser.add_argument( '--save_detections', type=str, default='', required=False, help='Optional. Path to file in JSON format to save bounding boxes') parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() if check_detectors(args) != 1: sys.exit(1) if len(args.config): log.debug('Reading config from {}'.format(args.config)) config = read_py_config(args.config) else: log.error( 'No configuration file specified. Please specify parameter \'--config\'' ) sys.exit(1) random.seed(config.random_seed) capture = MulticamCapture(args.input, args.loop) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.detections: object_detector = DetectionsFromFileReader(args.detections, args.t_detector) elif args.m_segmentation: object_detector = MaskRCNN(core, args.m_segmentation, config.obj_segm.trg_classes, args.t_segmentation, args.device, capture.get_num_sources()) else: object_detector = Detector(core, args.m_detector, config.obj_det.trg_classes, args.t_detector, args.device, capture.get_num_sources()) if args.m_reid: object_recognizer = VectorCNN(core, args.m_reid, args.device) else: object_recognizer = None run(args, config, capture, object_detector, object_recognizer)
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument('-i', type=str, help='Input sources (index of camera \ or path to a video file)', required=True) parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False, help='Path to the instance segmentation model') parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False, help='Path to the semantic segmentation model') parser.add_argument('-t', '--threshold', type=float, default=0.6, help='Threshold for person instance segmentation model') parser.add_argument('--output_video', type=str, default='', required=False, help='Optional. Path to output video') parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU', help='Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified') parser.add_argument('-l', '--cpu_extension', type=str, default=None, help='MKLDNN (CPU)-targeted custom layers.Absolute \ path to a shared library with the kernels impl.') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially') args = parser.parse_args() capture = VideoCapture(args.i) if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation): raise ValueError('Set up exactly one of segmentation models: '\ '--m_instance_segmentation or --m_semantic_segmentation') frame_size, fps = capture.get_source_parameters() out_frame_size = (int(frame_size[0]), int(frame_size[1] * 2)) presenter = monitors.Presenter(args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) root_dir = osp.dirname(osp.abspath(__file__)) mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) if args.output_video: fourcc = cv2.VideoWriter_fourcc(*'XVID') output_video = cv2.VideoWriter(args.output_video, fourcc, fps, out_frame_size) else: output_video = None log.info("Initializing Inference Engine") ie = IECore() if args.m_instance_segmentation: labels_file = osp.join(root_dir, 'coco_labels.txt') segmentation = MaskRCNN(ie, args.m_instance_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) elif args.m_semantic_segmentation: labels_file = osp.join(root_dir, 'cityscapes_labels.txt') segmentation = SemanticSegmentation(ie, args.m_semantic_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) black_board = False output_frame = np.full((frame_size[1], frame_size[0], 3), 255, dtype='uint8') frame_number = 0 key = -1 while True: start = time.time() _, frame = capture.get_frame() mask = None if frame is not None: detections = segmentation.get_detections([frame]) expand_mask(detections, frame_size[0] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) else: break if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) if output_video is not None: output_video.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1) board = output_frame[y0: y1, x0: x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) end = time.time() print('\rProcessing frame: {}, fps = {:.3}' \ .format(frame_number, 1. / (end - start)), end="") frame_number += 1 print('') log.info(presenter.reportMeans()) if output_video is not None: output_video.release()