def get_ie_version(): try: from openvino.runtime import get_version # pylint: disable=import-error,no-name-in-module,import-outside-toplevel return get_version() except ImportError: try: from openvino.inference_engine import get_version # pylint: disable=import-error,no-name-in-module,import-outside-toplevel return get_version() except ImportError: return None
def __init__(self, args, interactive_mode): self.args = args log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) self.ie = Core() self.encoder = read_net(self.args.m_encoder, self.ie, 'Formula Recognition Encoder') self.decoder = read_net(self.args.m_decoder, self.ie, 'Formula Recognition Decoder') self.compiled_encoder = self.ie.compile_model( self.encoder, device_name=self.args.device) log.info( 'The Formula Recognition Encoder model {} is loaded to {}'.format( args.m_encoder, args.device)) self.compiled_decoder = self.ie.compile_model( self.decoder, device_name=self.args.device) log.info( 'The Formula Recognition Decoder model {} is loaded to {}'.format( args.m_decoder, args.device)) self.images_list = [] self.vocab = Vocab(self.args.vocab_path) self.model_status = Model.Status.READY self.is_async = interactive_mode self.infer_request_encoder = self.compiled_encoder.create_infer_request( ) self.infer_request_decoder = self.compiled_decoder.create_infer_request( ) self.num_infers_decoder = 0 self.check_model_dimensions() if not interactive_mode: self.preprocess_inputs()
def main(): args = build_argparser().parse_args() start_time = perf_counter() with wave.open(args.input, 'rb') as wave_read: channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams( ) assert sample_width == 2, "Only 16-bit WAV PCM supported" assert compression_type == 'NONE', "Only linear PCM WAV files supported" assert channel_num == 1, "Only mono WAV PCM supported" assert sampling_rate == 16000, "Only 16 KHz audio supported" audio = np.frombuffer(wave_read.readframes(pcm_length * channel_num), dtype=np.int16).reshape((1, pcm_length)) audio = audio.astype(float) / np.iinfo(np.int16).max log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() model = Wav2Vec(core, args.model, audio.shape, args.device, args.vocab, args.dynamic_shape) normalized_audio = model.preprocess(audio) character_probs = model.infer(normalized_audio) transcription = model.decode(character_probs) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) print(transcription)
def __init__(self, net_model_xml_path, device, stride): self.device = device self.stride = stride log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) self.core = Core() log.info('Reading model {}'.format(net_model_xml_path)) self.model = self.core.read_model(net_model_xml_path) required_output_keys = {'features', 'heatmaps', 'pafs'} for output_tensor_name in required_output_keys: try: self.model.output(output_tensor_name) except RuntimeError: raise RuntimeError( "The demo supports only topologies with the following output keys: {}" .format(', '.join(required_output_keys))) self.input_tensor_name = self.model.inputs[0].get_any_name() compiled_model = self.core.compile_model(self.model, self.device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(net_model_xml_path, self.device))
def main(): args = build_argparser().parse_args() start_time = perf_counter() with wave.open(args.input, 'rb') as wave_read: channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams( ) assert sample_width == 2, "Only 16-bit WAV PCM supported" assert compression_type == 'NONE', "Only linear PCM WAV files supported" assert channel_num == 1, "Only mono WAV PCM supported" assert sampling_rate == 16000, "Only 16 KHz audio supported" audio = np.frombuffer(wave_read.readframes(pcm_length * channel_num), dtype=np.int16).reshape( (pcm_length, channel_num)) log_melspectrum = QuartzNet.audio_to_melspectrum(audio.flatten(), sampling_rate) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() quartz_net = QuartzNet(core, args.model, log_melspectrum.shape, args.device) character_probs = quartz_net.infer(log_melspectrum) transcription = QuartzNet.ctc_greedy_decode(character_probs) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) print(transcription)
def __init__(self, args): self.gpu_ext = args.gpu_lib self.allow_grow = args.allow_grow and not args.no_show log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.cpu_lib and 'CPU' in {args.d_fd, args.d_lm, args.d_reid}: core.add_extension(args.cpu_lib, 'CPU') self.face_detector = FaceDetector(core, args.m_fd, args.fd_input_size, confidence_threshold=args.t_fd, roi_scale_factor=args.exp_r_fd) self.landmarks_detector = LandmarksDetector(core, args.m_lm) self.face_identifier = FaceIdentifier(core, args.m_reid, match_threshold=args.t_id, match_algo=args.match_algo) self.face_detector.deploy(args.d_fd, self.get_config(args.d_fd)) self.landmarks_detector.deploy(args.d_lm, self.get_config(args.d_lm), self.QUEUE_SIZE) self.face_identifier.deploy(args.d_reid, self.get_config(args.d_reid), self.QUEUE_SIZE) log.debug('Building faces database using images from {}'.format( args.fg)) self.faces_database = FacesDatabase( args.fg, self.face_identifier, self.landmarks_detector, self.face_detector if args.run_detector else None, args.no_show) self.face_identifier.set_faces_database(self.faces_database) log.info('Database is built, registered {} identities'.format( len(self.faces_database)))
def create_core(): if openvino_absent: raise ImportError('The OpenVINO package is not installed') log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) return Core()
def main(): args = build_argparser().parse_args() # Plugin initialization log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if 'GPU' in args.device: core.set_property("GPU", {"GPU_ENABLE_LOOP_UNROLLING": "NO", "CACHE_DIR": "./"}) # Read IR log.info('Reading model {}'.format(args.model)) model = core.read_model(args.model) if len(model.inputs) != 1: raise RuntimeError("Demo supports only single input topologies") input_tensor_name = model.inputs[0].get_any_name() if args.output_blob is not None: output_tensor_name = args.output_blob else: if len(model.outputs) != 1: raise RuntimeError("Demo supports only single output topologies") output_tensor_name = model.outputs[0].get_any_name() characters = get_characters(args) codec = CTCCodec(characters, args.designated_characters, args.top_k) if len(codec.characters) != model.output(output_tensor_name).shape[2]: raise RuntimeError("The text recognition model does not correspond to decoding character list") input_batch_size, input_channel, input_height, input_width = model.inputs[0].shape # Read and pre-process input image (NOTE: one image only) preprocessing_start_time = perf_counter() input_image = preprocess_input(args.input, height=input_height, width=input_width)[None, :, :, :] preprocessing_total_time = perf_counter() - preprocessing_start_time if input_batch_size != input_image.shape[0]: raise RuntimeError("The model's input batch size should equal the input image's batch size") if input_channel != input_image.shape[1]: raise RuntimeError("The model's input channel should equal the input image's channel") # Loading model to the plugin compiled_model = core.compile_model(model, args.device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) # Start sync inference start_time = perf_counter() for _ in range(args.number_iter): infer_request.infer(inputs={input_tensor_name: input_image}) preds = infer_request.get_tensor(output_tensor_name).data[:] result = codec.decode(preds) print(result) total_latency = ((perf_counter() - start_time) / args.number_iter + preprocessing_total_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) sys.exit()
def load_core(device, cpu_extension=None): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if device == "CPU" and cpu_extension: core.add_extension(cpu_extension, "CPU") return core
def _prepare_ie(self, log=True): if log: print_info('IE version: {}'.format(get_version())) if self._is_multi(): self._prepare_multi_device(log) else: self.async_mode = self.get_value_from_config('async_mode') if log: self._log_versions() self._device_specific_configuration()
def __init__(self, model_path, device): log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path) self.input_tensor_name = "Placeholder" compiled_model = core.compile_model(self.model, device) self.output_tensor = compiled_model.outputs[0] self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def __init__(self, model_path, device, cpu_extension): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if cpu_extension and device == 'CPU': core.add_extension(cpu_extension, 'CPU') log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path) self.input_tensor_name = "Placeholder" compiled_model = core.compile_model(self.model, device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def __init__(self, model_xml, model_bin, device, output_name): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(model_xml)) self.model = core.read_model(model_xml, model_bin) compiled_model = core.compile_model(self.model, args.device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_xml, device)) self.input_tensor_name = "tokens" self.output_tensor_name = output_name self.model.output( self.output_tensor_name) # ensure a tensor with the name exists
def __init__(self, model_path, device): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path) self.input_tensor_name = self.model.inputs[0].get_any_name() self.input_size = self.model.input(self.input_tensor_name).shape self.nchw_layout = self.input_size[1] == 3 compiled_model = core.compile_model(self.model, device) self.output_tensor = compiled_model.outputs[0] self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def __init__(self, model_path, input_name, output_name, quantiles): device = "CPU" log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(model_path)) model = core.read_model(model_path) compiled_model = core.compile_model(model, device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device)) self.input_tensor_name = input_name self.output_tensor_name = output_name self.quantiles = quantiles model.output( self.output_tensor_name) # ensure a tensor with the name exists
def __init__(self, model_path, device, cpu_extension): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if cpu_extension and device == 'CPU': core.add_extension(cpu_extension, 'CPU') log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path, model_path.with_suffix('.bin')) self.input_tensor_name = self.model.inputs[0].get_any_name() self.input_size = self.model.input(self.input_tensor_name).shape self.nchw_layout = self.input_size[1] == 3 compiled_model = core.compile_model(self.model, device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def main(): args = build_argparser().parse_args() if args.labels: with open(args.labels) as f: labels = [line.strip() for line in f] else: labels = None log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if 'MYRIAD' in args.device: myriad_config = {'VPU_HW_STAGES_OPTIMIZATION': 'YES'} core.set_config(myriad_config, 'MYRIAD') decoder_target_device = 'CPU' if args.device != 'CPU': encoder_target_device = args.device else: encoder_target_device = decoder_target_device models = [IEModel(args.m_encoder, core, encoder_target_device, model_type='Action Recognition Encoder', num_requests=(3 if args.device == 'MYRIAD' else 1))] if args.architecture_type == 'en-de': if args.m_decoder is None: raise RuntimeError('No decoder for encoder-decoder model type (-m_de) provided') models.append(IEModel(args.m_decoder, core, decoder_target_device, model_type='Action Recognition Decoder', num_requests=2)) seq_size = models[1].input_shape[1] elif args.architecture_type == 'en-mean': models.append(DummyDecoder(num_requests=2)) seq_size = args.decoder_seq_size elif args.architecture_type == 'i3d-rgb': seq_size = models[0].input_shape[1] presenter = monitors.Presenter(args.utilization_monitors, 70) result_presenter = ResultRenderer(no_show=args.no_show, presenter=presenter, output=args.output, limit=args.output_limit, labels=labels, label_smoothing_window=args.label_smoothing) cap = open_images_capture(args.input, args.loop) run_pipeline(cap, args.architecture_type, models, result_presenter.render_frame, args.raw_output_message, seq_size=seq_size, fps=cap.fps()) for rep in presenter.reportMeans(): log.info(rep)
def main(): args = build_arg_parser().parse_args() # Loading source image img = cv2.imread(args.input, cv2.IMREAD_COLOR) if img is None: log.error("Cannot load image " + args.input) return -1 if args.auto_mask_color and args.auto_mask_random: log.error("-ar and -ac options cannot be used together") return -1 log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(args.model)) inpainting_processor = ImageInpainting(core, args.model, args.device) log.info('The model {} is loaded to {}'.format(args.model, args.device)) if args.auto_mask_color or args.auto_mask_random: # Command-line inpaining for just one image concat_image, result = inpaint_auto(img, inpainting_processor, args) if args.output != "": cv2.imwrite(args.output, result) if not args.no_show: cv2.imshow('Image Inpainting Demo', concat_image) cv2.waitKey(0) else: # Inpainting with GUI if args.no_show: log.error("--no_show argument cannot be used in GUI mode") return -1 InpaintingGUI(img, inpainting_processor).run() return 0
def main(): args = build_argparser() log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) ie = Core() if args.device == "CPU" and args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') log.info('Reading model {}'.format(args.model)) model = ie.read_model(args.model, args.model[:-4] + ".bin") if len(model.inputs) != 1: log.error("Demo supports only models with 1 input layer") sys.exit(1) input_tensor_name = model.inputs[0].get_any_name() if len(model.outputs) != 1: log.error("Demo supports only models with 1 output layer") sys.exit(1) batch_size, channels, one, length = model.inputs[0].shape if one != 1: raise RuntimeError( "Wrong third dimension size of model input shape - {} (expected 1)" .format(one)) hop = length - args.overlap if isinstance(args.overlap, int) else int( length * (1.0 - args.overlap)) if hop < 0: log.error( "Wrong value for '-ol/--overlap' argument - overlapping more than clip length" ) sys.exit(1) compiled_model = ie.compile_model(model, args.device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) labels = [] if args.labels: with open(args.labels, "r") as file: labels = [line.rstrip() for line in file.readlines()] start_time = perf_counter() audio = AudioSource(args.input, channels=channels, samplerate=args.sample_rate) outputs = [] clips = 0 for idx, chunk in enumerate( audio.chunks(length, hop, num_chunks=batch_size)): chunk = np.reshape(chunk, model.inputs[0].shape) output = next( iter(infer_request.infer({ input_tensor_name: chunk }).values())) clips += batch_size for batch, data in enumerate(output): chunk_start_time = (idx * batch_size + batch) * hop / audio.samplerate chunk_end_time = ( (idx * batch_size + batch) * hop + length) / audio.samplerate outputs.append(data) label = np.argmax(data) if chunk_start_time < audio.duration(): log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format( chunk_start_time, chunk_end_time, data[label], labels[label] if labels else "Class {}".format(label))) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) sys.exit(0)
def import_core_modules(silent: bool, path_to_module: str): """ This function checks that OpenVINO Python API is available and necessary python modules exists. So the next list of imports must contain all IE/NG Python API imports that are used inside MO. :param silent: enables or disables logs printing to stdout :param path_to_module: path where python API modules were found :return: True if all imports were successful and False otherwise """ try: from openvino.offline_transformations import apply_moc_transformations, apply_moc_legacy_transformations,\ apply_low_latency_transformation # pylint: disable=import-error,no-name-in-module from openvino.offline_transformations import apply_make_stateful_transformation, generate_mapping_file # pylint: disable=import-error,no-name-in-module from openvino.offline_transformations import generate_mapping_file, apply_make_stateful_transformation, serialize # pylint: disable=import-error,no-name-in-module from openvino.runtime import Model, get_version # pylint: disable=import-error,no-name-in-module from openvino.runtime.op import Parameter # pylint: disable=import-error,no-name-in-module from openvino.runtime import PartialShape, Dimension # pylint: disable=import-error,no-name-in-module from openvino.frontend import FrontEndManager, FrontEnd # pylint: disable=no-name-in-module,import-error import openvino.frontend # pylint: disable=import-error,no-name-in-module if silent: return True ie_version = str(get_version()) mo_version = str(v.get_version()) # pylint: disable=no-member,no-name-in-module print("{}: \t{}".format("OpenVINO runtime found in", os.path.dirname(openvino.__file__))) print("{}: \t{}".format("OpenVINO runtime version", ie_version)) print("{}: \t{}".format("Model Optimizer version", mo_version)) versions_mismatch = False mo_hash = v.extract_hash_from_version(mo_version) ie_hash = v.extract_hash_from_version(ie_version) if mo_hash is not None and ie_hash is not None: min_length = min(len(mo_hash), len(ie_hash)) mo_hash = mo_hash[:min_length] ie_hash = ie_hash[:min_length] if mo_hash != ie_hash or mo_hash is None or ie_hash is None: versions_mismatch = True extracted_mo_release_version = v.extract_release_version( mo_version) mo_is_custom = extracted_mo_release_version == (None, None) print( "[ WARNING ] Model Optimizer and OpenVINO runtime versions do no match." ) print( "[ WARNING ] Consider building the OpenVINO Python API from sources or reinstall OpenVINO " "(TM) toolkit using", end=" ") if mo_is_custom: print( "\"pip install openvino\" (may be incompatible with the current Model Optimizer version)" ) else: print("\"pip install openvino=={}.{}\"".format( *extracted_mo_release_version)) simplified_mo_version = v.get_simplified_mo_version() message = str( dict({ "platform": platform.system(), "mo_version": simplified_mo_version, "ie_version": v.get_simplified_ie_version(version=ie_version), "versions_mismatch": versions_mismatch, })) send_telemetry(simplified_mo_version, message, 'ie_version_check') return True except Exception as e: # Do not print a warning if module wasn't found or silent mode is on if "No module named 'openvino" not in str(e): print("[ WARNING ] Failed to import OpenVINO Python API in: {}". format(path_to_module)) print("[ WARNING ] {}".format(e)) # Send telemetry message about warning simplified_mo_version = v.get_simplified_mo_version() message = str( dict({ "platform": platform.system(), "mo_version": simplified_mo_version, "ie_version": v.get_simplified_ie_version(env=os.environ), "python_version": sys.version, "error_type": classify_error_type(e), })) send_telemetry(simplified_mo_version, message, 'ie_import_failed') return False
def load_core(): log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) return Core()
def run_demo(args): cap = open_images_capture(args.input, args.loop) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading Object Detection model {}'.format(args.model_od)) detector_person = Detector(core, args.model_od, device=args.device, label_class=args.person_label) log.info('The Object Detection model {} is loaded to {}'.format(args.model_od, args.device)) log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe)) single_human_pose_estimator = HumanPoseEstimator(core, args.model_hpe, device=args.device) log.info('The Human Pose Estimation model {} is loaded to {}'.format(args.model_hpe, args.device)) delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) video_writer = cv2.VideoWriter() frames_processed = 0 presenter = monitors.Presenter(args.utilization_monitors, 25) metrics = PerformanceMetrics() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") while frame is not None: bboxes = detector_person.detect(frame) human_poses = [single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes] presenter.drawGraphs(frame) colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)] for pose, bbox in zip(human_poses, bboxes): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2) for id_kpt, kpt in enumerate(pose): cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: cv2.imshow('Human Pose Estimation Demo', frame) key = cv2.waitKey(delay) if key == 27: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): args = build_argparser().parse_args() log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading Translation model {}'.format(args.translation_model)) gan_model = CocosnetModel(core, args.translation_model, args.device) log.info('The Translation model {} is loaded to {}'.format( args.translation_model, args.device)) log.info('Reading Semantic Segmentation model {}'.format( args.segmentation_model)) seg_model = SegmentationModel( core, args.segmentation_model, args.device) if args.segmentation_model else None log.info('The Semantic Segmentation model {} is loaded to {}'.format( args.segmentation_model, args.device)) input_data = [] use_seg = bool(args.input_images) and bool(args.segmentation_model) assert use_seg ^ (bool(args.input_semantics) and bool( args.reference_semantics)), "Don't know where to get data" input_images = get_files(args.input_images) input_semantics = get_files(args.input_semantics) reference_images = get_files(args.reference_images) reference_semantics = get_files(args.reference_semantics) number_of_objects = len(reference_images) if use_seg: samples = [ input_images, number_of_objects * [''], reference_images, number_of_objects * [''] ] else: samples = [ number_of_objects * [''], input_semantics, reference_images, reference_semantics ] for input_img, input_sem, ref_img, ref_sem in zip(*samples): if use_seg: in_img = cv2.imread(input_img) if in_img is None: raise IOError('Image {} cannot be read'.format(input_img)) input_sem = get_mask_from_image(in_img, seg_model) r_img = cv2.imread(ref_img) if r_img is None: raise IOError('Image {} cannot be read'.format(ref_img)) ref_sem = get_mask_from_image(r_img, seg_model) else: input_sem_file = input_sem input_sem = cv2.imread(input_sem_file, cv2.IMREAD_GRAYSCALE) if input_sem is None: raise IOError('Image {} cannot be read'.format(input_sem_file)) ref_sem_file = ref_sem ref_sem = cv2.imread(ref_sem, cv2.IMREAD_GRAYSCALE) if ref_sem is None: raise IOError('Image {} cannot be read'.format(ref_sem_file)) input_sem = preprocess_semantics( input_sem, input_size=gan_model.input_semantic_size) ref_img_file = ref_img ref_img = cv2.imread(ref_img_file) if ref_img is None: raise IOError('Image {} cannot be read'.format(ref_img_file)) ref_img = preprocess_image(ref_img, input_size=gan_model.input_image_size) ref_sem = preprocess_semantics( ref_sem, input_size=gan_model.input_semantic_size) input_dict = { 'input_semantics': input_sem, 'reference_image': ref_img, 'reference_semantics': ref_sem } input_data.append(input_dict) outs = [gan_model.infer(**data) for data in input_data] results = [postprocess(out) for out in outs] save_result(results, args.output_dir) log.info("Result image was saved to {}".format(args.output_dir))
def main(): args = parse_arguments() log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() if 'CPU' in args.target_device: if args.number_threads is not None: core.set_property("CPU", {'CPU_THREADS_NUM': str(args.number_threads)}) elif 'GPU' not in args.target_device: raise AttributeError( "Device {} do not support of 3D convolution. " "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*") log.info('Reading model {}'.format(args.path_to_model)) model = core.read_model(args.path_to_model) if len(model.inputs) != 1: raise RuntimeError("only 1 input layer model is supported") input_tensor_name = model.inputs[0].get_any_name() if args.shape: log.debug("Reshape model from {} to {}".format(model.inputs[0].shape, args.shape)) model.reshape({input_tensor_name: PartialShape(args.shape)}) if len(model.inputs[0].shape) != 5: raise RuntimeError( "Incorrect shape {} for 3d convolution network".format(args.shape)) n, c, d, h, w = model.inputs[0].shape compiled_model = core.compile_model(model, args.target_device) output_tensor = compiled_model.outputs[0] infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.path_to_model, args.target_device)) start_time = perf_counter() if not os.path.exists(args.path_to_input_data): raise AttributeError("Path to input data: '{}' does not exist".format( args.path_to_input_data)) input_type = get_input_type(args.path_to_input_data) is_nifti_data = (input_type == NIFTI_FILE or input_type == NIFTI_FOLDER) if input_type == NIFTI_FOLDER: series_name = find_series_name(args.path_to_input_data) original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, data_name=series_name, sizes=(d, h, w), mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range) elif input_type == NIFTI_FILE: original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, data_name=args.path_to_input_data, sizes=(d, h, w), is_series=False, mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range) else: data_crop = np.zeros(shape=(n, c, d, h, w), dtype=np.float) im_seq = ImageSequence.Iterator(Image.open(args.path_to_input_data)) for i, page in enumerate(im_seq): im = np.array(page).reshape(h, w, c) for channel in range(c): data_crop[:, channel, i, :, :] = im[:, :, channel] original_data = data_crop original_size = original_data.shape[-3:] input_data = {input_tensor_name: data_crop} result = infer_request.infer(input_data)[output_tensor] batch, channels, out_d, out_h, out_w = result.shape list_img = [] list_seg_result = [] for batch, data in enumerate(result): seg_result = np.zeros(shape=original_size, dtype=np.uint8) if data.shape[1:] != original_size: x = bbox[1] - bbox[0] y = bbox[3] - bbox[2] z = bbox[5] - bbox[4] out_result = np.zeros(shape=((channels, ) + original_size), dtype=float) out_result[:, bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = \ resample_np(data, (channels, x, y, z), 1) else: out_result = data if channels == 1: reshaped_data = out_result.reshape(original_size[0], original_size[1], original_size[2]) mask = reshaped_data[:, :, :] > 0.5 reshaped_data[mask] = 1 seg_result = reshaped_data.astype(int) elif channels == 4: seg_result = np.argmax(out_result, axis=0).astype(int) elif channels == 3: res = np.zeros(shape=out_result.shape, dtype=bool) res = out_result > 0.5 wt = res[0] tc = res[1] et = res[2] seg_result[wt] = 2 seg_result[tc] = 1 seg_result[et] = 3 im = np.stack([ original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :] ], axis=3) im = 255 * (im - im.min()) / (im.max() - im.min()) color_seg_frame = np.zeros(im.shape, dtype=np.uint8) for idx, c in enumerate(CLASSES_COLOR_MAP): color_seg_frame[seg_result[:, :, :] == idx, :] = np.array( c, dtype=np.uint8) mask = seg_result[:, :, :] > 0 im[mask] = color_seg_frame[mask] for k in range(im.shape[2]): if is_nifti_data: list_img.append( Image.fromarray(im[:, :, k, :].astype('uint8'), 'RGB')) else: list_img.append( Image.fromarray(im[k, :, :, :].astype('uint8'), 'RGB')) if args.output_nifti and is_nifti_data: list_seg_result.append(seg_result) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) tiff_output_name = os.path.join(args.path_to_output, 'output.tiff') Image.new('RGB', (original_data.shape[3], original_data.shape[2])).save( tiff_output_name, append_images=list_img, save_all=True) log.debug("Result tiff file was saved to {}".format(tiff_output_name)) if args.output_nifti and is_nifti_data: for seg_res in list_seg_result: nii_filename = os.path.join( args.path_to_output, 'output_{}.nii.gz'.format(list_seg_result.index(seg_res))) nib.save(nib.Nifti1Image(seg_res, affine=affine), nii_filename) log.debug("Result nifti file was saved to {}".format(nii_filename))
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) with open(args.labels, 'rt') as labels_file: class_labels = labels_file.read().splitlines() assert len(class_labels), 'The file with class labels is empty' # Plugin initialization for specified device and load extensions library if specified. log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.cpu_extension and 'CPU' in args.device: core.add_extension(args.cpu_extension, 'CPU') # Read IR log.info('Reading model {}'.format(args.model)) model = core.read_model(args.model) image_input, image_info_input, ( n, c, h, w), model_type, output_names, postprocessor = check_model(model) args.no_keep_aspect_ratio = model_type == 'yolact' or args.no_keep_aspect_ratio compiled_model = core.compile_model(model, args.device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) if args.no_track: tracker = None else: tracker = StaticIOUTracker() if args.delay: delay = args.delay else: delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) frames_processed = 0 metrics = PerformanceMetrics() visualizer = Visualizer(class_labels, show_boxes=args.show_boxes, show_scores=args.show_scores) video_writer = cv2.VideoWriter() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0]) presenter = monitors.Presenter( args.utilization_monitors, 45, (round(out_frame_size[0] / 4), round(out_frame_size[1] / 8))) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: if args.no_keep_aspect_ratio: # Resize the image to a target size. scale_x = w / frame.shape[1] scale_y = h / frame.shape[0] input_image = cv2.resize(frame, (w, h)) else: # Resize the image to keep the same aspect ratio and to fit it to a window of a target size. scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1]) input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y) input_image_size = input_image.shape[:2] input_image = np.pad(input_image, ((0, h - input_image_size[0]), (0, w - input_image_size[1]), (0, 0)), mode='constant', constant_values=0) # Change data layout from HWC to CHW. input_image = input_image.transpose((2, 0, 1)) input_image = input_image.reshape((n, c, h, w)).astype(np.float32) input_image_info = np.asarray( [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32) # Run the model. feed_dict = {image_input: input_image} if image_info_input: feed_dict[image_info_input] = input_image_info infer_request.infer(feed_dict) outputs = { name: infer_request.get_tensor(name).data[:] for name in output_names } # Parse detection results of the current request scores, classes, boxes, masks = postprocessor(outputs, scale_x, scale_y, *frame.shape[:2], h, w, args.prob_threshold) if len(boxes) and args.raw_output_message: log.debug( ' -------------------------- Frame # {} -------------------------- ' .format(frames_processed)) log.debug( ' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ' ) for box, cls, score in zip(boxes, classes, scores): log.debug( '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} ' .format(cls, score, *box)) # Get instance track IDs. masks_tracks_ids = None if tracker is not None: masks_tracks_ids = tracker(masks, classes) # Visualize masks. frame = visualizer(frame, boxes, classes, scores, presenter, masks, masks_tracks_ids) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: # Show resulting image. cv2.imshow('Results', frame) if not args.no_show: key = cv2.waitKey(delay) esc_code = 27 if key == esc_code: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): args = build_argparser().parse_args() profile = get_profile(args.profile) if args.block_size is None: sr = profile['model_sampling_rate'] args.block_size = round(sr * 10) if not args.realtime else round( sr * profile['frame_stride_seconds'] * 16) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() start_load_time = time.perf_counter() stt = DeepSpeechSeqPipeline( core=core, model=args.model, lm=args.lm, beam_width=args.beam_width, max_candidates=args.max_candidates, profile=profile, device=args.device, online_decoding=args.realtime, ) log.debug( "Loading, including network weights, OpenVINO Runtime initialization, LM, building LM vocabulary trie: {} s" .format(time.perf_counter() - start_load_time)) start_time = time.perf_counter() with wave.open(args.input, 'rb') as wave_read: channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams( ) assert sample_width == 2, "Only 16-bit WAV PCM supported" assert compression_type == 'NONE', "Only linear PCM WAV files supported" assert channel_num == 1, "Only mono WAV PCM supported" assert abs(sampling_rate / profile['model_sampling_rate'] - 1) < 0.1, "Only {} kHz WAV PCM supported".format( profile['model_sampling_rate'] / 1e3) log.debug("Audio file length: {} s".format(pcm_length / sampling_rate)) audio_pos = 0 play_start_time = time.perf_counter() iter_wrapper = tqdm if not args.realtime else (lambda x: x) for audio_iter in iter_wrapper(range(0, pcm_length, args.block_size)): audio_block = np.frombuffer( wave_read.readframes(args.block_size * channel_num), dtype=np.int16).reshape((-1, channel_num)) if audio_block.shape[0] == 0: break audio_pos += audio_block.shape[0] # # It is possible to call stt.recognize_audio(): 1) for either whole audio files or # by splitting files into blocks, and 2) to reuse stt object for multiple files like this: # transcription1 = stt.recognize_audio(whole_audio1, sampling_rate) # transcription2 = stt.recognize_audio(whole_audio2, sampling_rate) # stt.recognize_audio(whole_audio3_block1, sampling_rate, finish=False) # transcription3 = stt.recognize_audio(whole_audio3_block2, sampling_rate, finish=True) # If you need intermediate features, you can call pipeline stage by stage: see # the implementation of DeepSpeechSeqPipeline.recognize_audio() method. # partial_transcr = stt.recognize_audio(audio_block, sampling_rate, finish=False) if args.realtime: if partial_transcr is not None and len(partial_transcr) > 0: print('\r' + partial_transcr[0].text[-args.realtime_window:], end='') to_wait = play_start_time + audio_pos / sampling_rate - time.perf_counter( ) if to_wait > 0: time.sleep(to_wait) transcription = stt.recognize_audio(None, sampling_rate, finish=True) if args.realtime: # Replace the transcription with its finalized version for real-time mode if transcription is not None and len(transcription) > 0: print('\r' + transcription[0].text[-args.realtime_window:]) else: # not args.realtime # Only show processing time in offline mode because real-time mode is being slowed down by time.sleep() total_latency = (time.perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) print("\nTranscription(s) and confidence score(s):") for candidate in transcription: print("{}\t{}".format(candidate.conf, candidate.text))
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument('-i', '--input', required=True, help='Required. Path to a video file or a device node of a web-camera.') parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop.') parser.add_argument('-o', '--output', required=False, help='Optional. Name of the output file(s) to save.') parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int, help='Optional. Number of frames to store in output. ' 'If 0 is set, all frames are stored.') parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False, help='Required. Path to the instance segmentation model.') parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False, help='Required. Path to the semantic segmentation model.') parser.add_argument('-t', '--threshold', type=float, default=0.6, help='Optional. Threshold for person instance segmentation model.') parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU', help='Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified.') parser.add_argument('-l', '--cpu_extension', type=str, default=None, help='MKLDNN (CPU)-targeted custom layers. Absolute \ path to a shared library with the kernels impl.') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() cap = open_images_capture(args.input, args.loop) if cap.get_type() not in ('VIDEO', 'CAMERA'): raise RuntimeError("The input should be a video file or a numeric camera ID") if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation): raise ValueError('Set up exactly one of segmentation models: ' '--m_instance_segmentation or --m_semantic_segmentation') labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes' mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation log.info('Reading model {}'.format(model_path)) if args.m_instance_segmentation: labels_file = str(labels_dir / 'coco_80cl_bkgr.txt') segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) elif args.m_semantic_segmentation: labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt') segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file, args.threshold, args.device, args.cpu_extension) log.info('The model {} is loaded to {}'.format(model_path, args.device)) metrics = PerformanceMetrics() video_writer = cv2.VideoWriter() black_board = False frame_number = 0 key = -1 start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0] * 2) output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8') presenter = monitors.Presenter(args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: mask = None detections = segmentation.get_detections([frame]) expand_mask(detections, frame.shape[1] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) metrics.update(start_time, merged_frame) if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit-1): video_writer.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1) board = output_frame[y0: y1, x0: x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) frame_number += 1 start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): current_dir = os.path.dirname(os.path.abspath(__file__)) """Prepares data for the object tracking demo""" parser = argparse.ArgumentParser(description='Multi camera multi object \ tracking live demo script') parser.add_argument( '-i', '--input', required=True, nargs='+', help= 'Required. Input sources (indexes of cameras or paths to video files)') parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop') parser.add_argument('--config', type=str, default=os.path.join(current_dir, 'configs/person.py'), required=False, help='Configuration file') parser.add_argument('--detections', type=str, help='JSON file with bounding boxes') parser.add_argument('-m', '--m_detector', type=str, required=False, help='Path to the object detection model') parser.add_argument('--t_detector', type=float, default=0.6, help='Threshold for the object detection model') parser.add_argument('--m_segmentation', type=str, required=False, help='Path to the object instance segmentation model') parser.add_argument( '--t_segmentation', type=float, default=0.6, help='Threshold for object instance segmentation model') parser.add_argument( '--m_reid', type=str, required=True, help='Required. Path to the object re-identification model') parser.add_argument('--output_video', type=str, default='', required=False, help='Optional. Path to output video') parser.add_argument( '--history_file', type=str, default='', required=False, help='Optional. Path to file in JSON format to save results of the demo' ) parser.add_argument( '--save_detections', type=str, default='', required=False, help='Optional. Path to file in JSON format to save bounding boxes') parser.add_argument("--no_show", help="Optional. Don't show output", action='store_true') parser.add_argument('-d', '--device', type=str, default='CPU') parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() if check_detectors(args) != 1: sys.exit(1) if len(args.config): log.debug('Reading config from {}'.format(args.config)) config = read_py_config(args.config) else: log.error( 'No configuration file specified. Please specify parameter \'--config\'' ) sys.exit(1) random.seed(config.random_seed) capture = MulticamCapture(args.input, args.loop) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.detections: object_detector = DetectionsFromFileReader(args.detections, args.t_detector) elif args.m_segmentation: object_detector = MaskRCNN(core, args.m_segmentation, config.obj_segm.trg_classes, args.t_segmentation, args.device, capture.get_num_sources()) else: object_detector = Detector(core, args.m_detector, config.obj_det.trg_classes, args.t_detector, args.device, capture.get_num_sources()) if args.m_reid: object_recognizer = VectorCNN(core, args.m_reid, args.device) else: object_recognizer = None run(args, config, capture, object_detector, object_recognizer)
def main(args): cap = open_images_capture(args.input, args.loop) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() log.info('Reading model {}'.format(args.model)) model = core.read_model(args.model) input_tensor_name = 'data_l' input_shape = model.input(input_tensor_name).shape assert input_shape[1] == 1, "Expected model input shape with 1 channel" inputs = {} for input in model.inputs: inputs[input.get_any_name()] = np.zeros(input.shape) assert len(model.outputs) == 1, "Expected number of outputs is equal 1" compiled_model = core.compile_model(model, device_name=args.device) output_tensor = compiled_model.outputs[0] infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) _, _, h_in, w_in = input_shape frames_processed = 0 imshow_size = (640, 480) graph_size = (imshow_size[0] // 2, imshow_size[1] // 4) presenter = monitors.Presenter(args.utilization_monitors, imshow_size[1] * 2 - graph_size[1], graph_size) metrics = PerformanceMetrics() video_writer = cv.VideoWriter() if args.output and not video_writer.open( args.output, cv.VideoWriter_fourcc(*'MJPG'), cap.fps(), (imshow_size[0] * 2, imshow_size[1] * 2)): raise RuntimeError("Can't open video writer") start_time = perf_counter() original_frame = cap.read() if original_frame is None: raise RuntimeError("Can't read an image from the input") while original_frame is not None: (h_orig, w_orig) = original_frame.shape[:2] if original_frame.shape[2] > 1: frame = cv.cvtColor(cv.cvtColor(original_frame, cv.COLOR_BGR2GRAY), cv.COLOR_GRAY2RGB) else: frame = cv.cvtColor(original_frame, cv.COLOR_GRAY2RGB) img_rgb = frame.astype(np.float32) / 255 img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab) img_l_rs = cv.resize(img_lab.copy(), (w_in, h_in))[:, :, 0] inputs[input_tensor_name] = np.expand_dims(img_l_rs, axis=[0, 1]) res = infer_request.infer(inputs)[output_tensor] update_res = np.squeeze(res) out = update_res.transpose((1, 2, 0)) out = cv.resize(out, (w_orig, h_orig)) img_lab_out = np.concatenate((img_lab[:, :, 0][:, :, np.newaxis], out), axis=2) img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) original_image = cv.resize(original_frame, imshow_size) grayscale_image = cv.resize(frame, imshow_size) colorize_image = (cv.resize(img_bgr_out, imshow_size) * 255).astype( np.uint8) lab_image = cv.resize(img_lab_out, imshow_size).astype(np.uint8) original_image = cv.putText(original_image, 'Original', (25, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA) grayscale_image = cv.putText(grayscale_image, 'Grayscale', (25, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA) colorize_image = cv.putText(colorize_image, 'Colorize', (25, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA) lab_image = cv.putText(lab_image, 'LAB interpretation', (25, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA) ir_image = [ cv.hconcat([original_image, grayscale_image]), cv.hconcat([lab_image, colorize_image]) ] final_image = cv.vconcat(ir_image) metrics.update(start_time, final_image) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(final_image) presenter.drawGraphs(final_image) if not args.no_show: cv.imshow('Colorization Demo', final_image) key = cv.waitKey(1) if key in {ord("q"), ord("Q"), 27}: break presenter.handleKey(key) start_time = perf_counter() original_frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) # Plugin initialization for specified device and load extensions library if specified log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() # Read IR log.info('Reading Proposal model {}'.format(args.model_pnet)) p_net = core.read_model(args.model_pnet) if len(p_net.inputs) != 1: raise RuntimeError("Pnet supports only single input topologies") if len(p_net.outputs) != 2: raise RuntimeError("Pnet supports two output topologies") log.info('Reading Refine model {}'.format(args.model_rnet)) r_net = core.read_model(args.model_rnet) if len(r_net.inputs) != 1: raise RuntimeError("Rnet supports only single input topologies") if len(r_net.outputs) != 2: raise RuntimeError("Rnet supports two output topologies") log.info('Reading Output model {}'.format(args.model_onet)) o_net = core.read_model(args.model_onet) if len(o_net.inputs) != 1: raise RuntimeError("Onet supports only single input topologies") if len(o_net.outputs) != 3: raise RuntimeError("Onet supports three output topologies") pnet_input_tensor_name = p_net.inputs[0].get_any_name() rnet_input_tensor_name = r_net.inputs[0].get_any_name() onet_input_tensor_name = o_net.inputs[0].get_any_name() for node in p_net.outputs: if node.shape[1] == 2: pnet_cls_name = node.get_any_name() elif node.shape[1] == 4: pnet_roi_name = node.get_any_name() else: raise RuntimeError("Unsupported output layer for Pnet") for node in r_net.outputs: if node.shape[1] == 2: rnet_cls_name = node.get_any_name() elif node.shape[1] == 4: rnet_roi_name = node.get_any_name() else: raise RuntimeError("Unsupported output layer for Rnet") for node in o_net.outputs: if node.shape[1] == 2: onet_cls_name = node.get_any_name() elif node.shape[1] == 4: onet_roi_name = node.get_any_name() elif node.shape[1] == 10: onet_pts_name = node.get_any_name() else: raise RuntimeError("Unsupported output layer for Onet") next_frame_id = 0 metrics = PerformanceMetrics() presenter = None video_writer = cv2.VideoWriter() is_loaded_before = False while True: start_time = perf_counter() origin_image = cap.read() if origin_image is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: presenter = monitors.Presenter(args.utilization_monitors, 55, (round(origin_image.shape[1] / 4), round(origin_image.shape[0] / 8))) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (origin_image.shape[1], origin_image.shape[0])): raise RuntimeError("Can't open video writer") next_frame_id += 1 rgb_image = cv2.cvtColor(origin_image, cv2.COLOR_BGR2RGB) oh, ow, _ = rgb_image.shape scales = utils.calculate_scales(rgb_image) # ************************************* # Pnet stage # ************************************* pnet_res = [] for i, scale in enumerate(scales): hs = int(oh*scale) ws = int(ow*scale) image = preprocess_image(rgb_image, ws, hs) p_net.reshape({pnet_input_tensor_name: PartialShape([1, 3, ws, hs])}) # Change weidth and height of input blob compiled_pnet = core.compile_model(p_net, args.device) infer_request_pnet = compiled_pnet.create_infer_request() if i == 0 and not is_loaded_before: log.info("The Proposal model {} is loaded to {}".format(args.model_pnet, args.device)) infer_request_pnet.infer(inputs={pnet_input_tensor_name: image}) p_res = {name: infer_request_pnet.get_tensor(name).data[:] for name in {pnet_roi_name, pnet_cls_name}} pnet_res.append(p_res) image_num = len(scales) rectangles = [] for i in range(image_num): roi = pnet_res[i][pnet_roi_name] cls = pnet_res[i][pnet_cls_name] _, _, out_h, out_w = cls.shape out_side = max(out_h, out_w) rectangle = utils.detect_face_12net(cls[0][1], roi[0], out_side, 1/scales[i], ow, oh, score_threshold[0], iou_threshold[0]) rectangles.extend(rectangle) rectangles = utils.NMS(rectangles, iou_threshold[1], 'iou') # Rnet stage if len(rectangles) > 0: r_net.reshape({rnet_input_tensor_name: PartialShape([len(rectangles), 3, 24, 24])}) # Change batch size of input blob compiled_rnet = core.compile_model(r_net, args.device) infer_request_rnet = compiled_rnet.create_infer_request() if not is_loaded_before: log.info("The Refine model {} is loaded to {}".format(args.model_rnet, args.device)) rnet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 24, 24) rnet_input.extend(crop_img) infer_request_rnet.infer(inputs={rnet_input_tensor_name: rnet_input}) rnet_res = {name: infer_request_rnet.get_tensor(name).data[:] for name in {rnet_roi_name, rnet_cls_name}} roi = rnet_res[rnet_roi_name] cls = rnet_res[rnet_cls_name] rectangles = utils.filter_face_24net(cls, roi, rectangles, ow, oh, score_threshold[1], iou_threshold[2]) # Onet stage if len(rectangles) > 0: o_net.reshape({onet_input_tensor_name: PartialShape([len(rectangles), 3, 48, 48])}) # Change batch size of input blob compiled_onet = core.compile_model(o_net, args.device) infer_request_onet = compiled_onet.create_infer_request() if not is_loaded_before: log.info("The Output model {} is loaded to {}".format(args.model_onet, args.device)) is_loaded_before = True onet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 48, 48) onet_input.extend(crop_img) infer_request_onet.infer(inputs={onet_input_tensor_name: onet_input}) onet_res = {name: infer_request_onet.get_tensor(name).data[:] for name in {onet_roi_name, onet_cls_name, onet_pts_name}} roi = onet_res[onet_roi_name] cls = onet_res[onet_cls_name] pts = onet_res[onet_pts_name] rectangles = utils.filter_face_48net(cls, roi, pts, rectangles, ow, oh, score_threshold[2], iou_threshold[3]) # display results for rectangle in rectangles: # Draw detected boxes cv2.putText(origin_image, 'confidence: {:.2f}'.format(rectangle[4]), (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0)) cv2.rectangle(origin_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1) # Draw landmarks for i in range(5, 15, 2): cv2.circle(origin_image, (int(rectangle[i+0]), int(rectangle[i+1])), 2, (0, 255, 0)) metrics.update(start_time, origin_image) if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id <= args.output_limit): video_writer.write(origin_image) if not args.no_show: cv2.imshow('MTCNN Results', origin_image) key = cv2.waitKey(1) if key in {ord('q'), ord('Q'), 27}: break presenter.handleKey(key) metrics.log_total()