Example #1
0
def get_ie_version():
    try:
        from openvino.runtime import get_version  # pylint: disable=import-error,no-name-in-module,import-outside-toplevel
        return get_version()
    except ImportError:
        try:
            from openvino.inference_engine import get_version  # pylint: disable=import-error,no-name-in-module,import-outside-toplevel
            return get_version()
        except ImportError:
            return None
Example #2
0
 def __init__(self, args, interactive_mode):
     self.args = args
     log.info('OpenVINO Inference Engine')
     log.info('\tbuild: {}'.format(get_version()))
     self.ie = Core()
     self.encoder = read_net(self.args.m_encoder, self.ie,
                             'Formula Recognition Encoder')
     self.decoder = read_net(self.args.m_decoder, self.ie,
                             'Formula Recognition Decoder')
     self.compiled_encoder = self.ie.compile_model(
         self.encoder, device_name=self.args.device)
     log.info(
         'The Formula Recognition Encoder model {} is loaded to {}'.format(
             args.m_encoder, args.device))
     self.compiled_decoder = self.ie.compile_model(
         self.decoder, device_name=self.args.device)
     log.info(
         'The Formula Recognition Decoder model {} is loaded to {}'.format(
             args.m_decoder, args.device))
     self.images_list = []
     self.vocab = Vocab(self.args.vocab_path)
     self.model_status = Model.Status.READY
     self.is_async = interactive_mode
     self.infer_request_encoder = self.compiled_encoder.create_infer_request(
     )
     self.infer_request_decoder = self.compiled_decoder.create_infer_request(
     )
     self.num_infers_decoder = 0
     self.check_model_dimensions()
     if not interactive_mode:
         self.preprocess_inputs()
def main():
    args = build_argparser().parse_args()

    start_time = perf_counter()
    with wave.open(args.input, 'rb') as wave_read:
        channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams(
        )
        assert sample_width == 2, "Only 16-bit WAV PCM supported"
        assert compression_type == 'NONE', "Only linear PCM WAV files supported"
        assert channel_num == 1, "Only mono WAV PCM supported"
        assert sampling_rate == 16000, "Only 16 KHz audio supported"
        audio = np.frombuffer(wave_read.readframes(pcm_length * channel_num),
                              dtype=np.int16).reshape((1, pcm_length))
        audio = audio.astype(float) / np.iinfo(np.int16).max

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    model = Wav2Vec(core, args.model, audio.shape, args.device, args.vocab,
                    args.dynamic_shape)
    normalized_audio = model.preprocess(audio)
    character_probs = model.infer(normalized_audio)
    transcription = model.decode(character_probs)
    total_latency = (perf_counter() - start_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))
    print(transcription)
    def __init__(self, net_model_xml_path, device, stride):
        self.device = device
        self.stride = stride

        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        self.core = Core()

        log.info('Reading model {}'.format(net_model_xml_path))
        self.model = self.core.read_model(net_model_xml_path)

        required_output_keys = {'features', 'heatmaps', 'pafs'}
        for output_tensor_name in required_output_keys:
            try:
                self.model.output(output_tensor_name)
            except RuntimeError:
                raise RuntimeError(
                    "The demo supports only topologies with the following output keys: {}"
                    .format(', '.join(required_output_keys)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        compiled_model = self.core.compile_model(self.model, self.device)
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(net_model_xml_path,
                                                       self.device))
Example #5
0
def main():
    args = build_argparser().parse_args()

    start_time = perf_counter()
    with wave.open(args.input, 'rb') as wave_read:
        channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams(
        )
        assert sample_width == 2, "Only 16-bit WAV PCM supported"
        assert compression_type == 'NONE', "Only linear PCM WAV files supported"
        assert channel_num == 1, "Only mono WAV PCM supported"
        assert sampling_rate == 16000, "Only 16 KHz audio supported"
        audio = np.frombuffer(wave_read.readframes(pcm_length * channel_num),
                              dtype=np.int16).reshape(
                                  (pcm_length, channel_num))

    log_melspectrum = QuartzNet.audio_to_melspectrum(audio.flatten(),
                                                     sampling_rate)

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    quartz_net = QuartzNet(core, args.model, log_melspectrum.shape,
                           args.device)
    character_probs = quartz_net.infer(log_melspectrum)
    transcription = QuartzNet.ctc_greedy_decode(character_probs)
    total_latency = (perf_counter() - start_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))
    print(transcription)
    def __init__(self, args):
        self.gpu_ext = args.gpu_lib
        self.allow_grow = args.allow_grow and not args.no_show

        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()
        if args.cpu_lib and 'CPU' in {args.d_fd, args.d_lm, args.d_reid}:
            core.add_extension(args.cpu_lib, 'CPU')

        self.face_detector = FaceDetector(core,
                                          args.m_fd,
                                          args.fd_input_size,
                                          confidence_threshold=args.t_fd,
                                          roi_scale_factor=args.exp_r_fd)
        self.landmarks_detector = LandmarksDetector(core, args.m_lm)
        self.face_identifier = FaceIdentifier(core,
                                              args.m_reid,
                                              match_threshold=args.t_id,
                                              match_algo=args.match_algo)

        self.face_detector.deploy(args.d_fd, self.get_config(args.d_fd))
        self.landmarks_detector.deploy(args.d_lm, self.get_config(args.d_lm),
                                       self.QUEUE_SIZE)
        self.face_identifier.deploy(args.d_reid, self.get_config(args.d_reid),
                                    self.QUEUE_SIZE)

        log.debug('Building faces database using images from {}'.format(
            args.fg))
        self.faces_database = FacesDatabase(
            args.fg, self.face_identifier, self.landmarks_detector,
            self.face_detector if args.run_detector else None, args.no_show)
        self.face_identifier.set_faces_database(self.faces_database)
        log.info('Database is built, registered {} identities'.format(
            len(self.faces_database)))
def create_core():
    if openvino_absent:
        raise ImportError('The OpenVINO package is not installed')

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    return Core()
Example #8
0
def main():
    args = build_argparser().parse_args()

    # Plugin initialization
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if 'GPU' in args.device:
        core.set_property("GPU", {"GPU_ENABLE_LOOP_UNROLLING": "NO", "CACHE_DIR": "./"})

    # Read IR
    log.info('Reading model {}'.format(args.model))
    model = core.read_model(args.model)

    if len(model.inputs) != 1:
        raise RuntimeError("Demo supports only single input topologies")
    input_tensor_name = model.inputs[0].get_any_name()

    if args.output_blob is not None:
        output_tensor_name = args.output_blob
    else:
        if len(model.outputs) != 1:
            raise RuntimeError("Demo supports only single output topologies")
        output_tensor_name = model.outputs[0].get_any_name()

    characters = get_characters(args)
    codec = CTCCodec(characters, args.designated_characters, args.top_k)
    if len(codec.characters) != model.output(output_tensor_name).shape[2]:
        raise RuntimeError("The text recognition model does not correspond to decoding character list")

    input_batch_size, input_channel, input_height, input_width = model.inputs[0].shape

    # Read and pre-process input image (NOTE: one image only)
    preprocessing_start_time = perf_counter()
    input_image = preprocess_input(args.input, height=input_height, width=input_width)[None, :, :, :]
    preprocessing_total_time = perf_counter() - preprocessing_start_time
    if input_batch_size != input_image.shape[0]:
        raise RuntimeError("The model's input batch size should equal the input image's batch size")
    if input_channel != input_image.shape[1]:
        raise RuntimeError("The model's input channel should equal the input image's channel")

    # Loading model to the plugin
    compiled_model = core.compile_model(model, args.device)
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    # Start sync inference
    start_time = perf_counter()
    for _ in range(args.number_iter):
        infer_request.infer(inputs={input_tensor_name: input_image})
        preds = infer_request.get_tensor(output_tensor_name).data[:]
        result = codec.decode(preds)
        print(result)
    total_latency = ((perf_counter() - start_time) / args.number_iter + preprocessing_total_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))

    sys.exit()
Example #9
0
def load_core(device, cpu_extension=None):
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()
    if device == "CPU" and cpu_extension:
        core.add_extension(cpu_extension, "CPU")

    return core
Example #10
0
 def _prepare_ie(self, log=True):
     if log:
         print_info('IE version: {}'.format(get_version()))
     if self._is_multi():
         self._prepare_multi_device(log)
     else:
         self.async_mode = self.get_value_from_config('async_mode')
         if log:
             self._log_versions()
     self._device_specific_configuration()
    def __init__(self, model_path, device):
        log.info('OpenVINO Runtime')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()

        log.info('Reading model {}'.format(model_path))
        self.model = core.read_model(model_path)
        self.input_tensor_name = "Placeholder"
        compiled_model = core.compile_model(self.model, device)
        self.output_tensor = compiled_model.outputs[0]
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(model_path, device))
    def __init__(self, model_path, device, cpu_extension):
        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()
        if cpu_extension and device == 'CPU':
            core.add_extension(cpu_extension, 'CPU')

        log.info('Reading model {}'.format(model_path))
        self.model = core.read_model(model_path)
        self.input_tensor_name = "Placeholder"
        compiled_model = core.compile_model(self.model, device)
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(model_path, device))
    def __init__(self, model_xml, model_bin, device, output_name):
        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()

        log.info('Reading model {}'.format(model_xml))
        self.model = core.read_model(model_xml, model_bin)
        compiled_model = core.compile_model(self.model, args.device)
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(model_xml, device))
        self.input_tensor_name = "tokens"
        self.output_tensor_name = output_name
        self.model.output(
            self.output_tensor_name)  # ensure a tensor with the name exists
    def __init__(self, model_path, device):
        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()

        log.info('Reading model {}'.format(model_path))
        self.model = core.read_model(model_path)
        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.input_size = self.model.input(self.input_tensor_name).shape
        self.nchw_layout = self.input_size[1] == 3
        compiled_model = core.compile_model(self.model, device)
        self.output_tensor = compiled_model.outputs[0]
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(model_path, device))
 def __init__(self, model_path, input_name, output_name, quantiles):
     device = "CPU"
     log.info('OpenVINO Inference Engine')
     log.info('\tbuild: {}'.format(get_version()))
     core = Core()
     log.info('Reading model {}'.format(model_path))
     model = core.read_model(model_path)
     compiled_model = core.compile_model(model, device)
     self.infer_request = compiled_model.create_infer_request()
     log.info('The model {} is loaded to {}'.format(model_path, device))
     self.input_tensor_name = input_name
     self.output_tensor_name = output_name
     self.quantiles = quantiles
     model.output(
         self.output_tensor_name)  # ensure a tensor with the name exists
    def __init__(self, model_path, device, cpu_extension):
        log.info('OpenVINO Inference Engine')
        log.info('\tbuild: {}'.format(get_version()))
        core = Core()
        if cpu_extension and device == 'CPU':
            core.add_extension(cpu_extension, 'CPU')

        log.info('Reading model {}'.format(model_path))
        self.model = core.read_model(model_path,
                                     model_path.with_suffix('.bin'))
        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.input_size = self.model.input(self.input_tensor_name).shape
        self.nchw_layout = self.input_size[1] == 3
        compiled_model = core.compile_model(self.model, device)
        self.infer_request = compiled_model.create_infer_request()
        log.info('The model {} is loaded to {}'.format(model_path, device))
Example #17
0
def main():
    args = build_argparser().parse_args()

    if args.labels:
        with open(args.labels) as f:
            labels = [line.strip() for line in f]
    else:
        labels = None

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if 'MYRIAD' in args.device:
        myriad_config = {'VPU_HW_STAGES_OPTIMIZATION': 'YES'}
        core.set_config(myriad_config, 'MYRIAD')

    decoder_target_device = 'CPU'
    if args.device != 'CPU':
        encoder_target_device = args.device
    else:
        encoder_target_device = decoder_target_device

    models = [IEModel(args.m_encoder, core, encoder_target_device, model_type='Action Recognition Encoder',
                      num_requests=(3 if args.device == 'MYRIAD' else 1))]

    if args.architecture_type == 'en-de':
        if args.m_decoder is None:
            raise RuntimeError('No decoder for encoder-decoder model type (-m_de) provided')
        models.append(IEModel(args.m_decoder, core, decoder_target_device, model_type='Action Recognition Decoder', num_requests=2))
        seq_size = models[1].input_shape[1]
    elif args.architecture_type == 'en-mean':
        models.append(DummyDecoder(num_requests=2))
        seq_size = args.decoder_seq_size
    elif args.architecture_type == 'i3d-rgb':
        seq_size = models[0].input_shape[1]

    presenter = monitors.Presenter(args.utilization_monitors, 70)
    result_presenter = ResultRenderer(no_show=args.no_show, presenter=presenter, output=args.output, limit=args.output_limit, labels=labels,
                                      label_smoothing_window=args.label_smoothing)
    cap = open_images_capture(args.input, args.loop)
    run_pipeline(cap, args.architecture_type, models, result_presenter.render_frame, args.raw_output_message,
                 seq_size=seq_size, fps=cap.fps())

    for rep in presenter.reportMeans():
        log.info(rep)
def main():
    args = build_arg_parser().parse_args()

    # Loading source image
    img = cv2.imread(args.input, cv2.IMREAD_COLOR)
    if img is None:
        log.error("Cannot load image " + args.input)
        return -1

    if args.auto_mask_color and args.auto_mask_random:
        log.error("-ar and -ac options cannot be used together")
        return -1

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    log.info('Reading model {}'.format(args.model))
    inpainting_processor = ImageInpainting(core, args.model, args.device)
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    if args.auto_mask_color or args.auto_mask_random:
        # Command-line inpaining for just one image
        concat_image, result = inpaint_auto(img, inpainting_processor, args)
        if args.output != "":
            cv2.imwrite(args.output, result)
        if not args.no_show:
            cv2.imshow('Image Inpainting Demo', concat_image)
            cv2.waitKey(0)
    else:
        # Inpainting with GUI
        if args.no_show:
            log.error("--no_show argument cannot be used in GUI mode")
            return -1
        InpaintingGUI(img, inpainting_processor).run()
    return 0
Example #19
0
def main():
    args = build_argparser()

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    ie = Core()
    if args.device == "CPU" and args.cpu_extension:
        ie.add_extension(args.cpu_extension, 'CPU')

    log.info('Reading model {}'.format(args.model))
    model = ie.read_model(args.model, args.model[:-4] + ".bin")

    if len(model.inputs) != 1:
        log.error("Demo supports only models with 1 input layer")
        sys.exit(1)
    input_tensor_name = model.inputs[0].get_any_name()
    if len(model.outputs) != 1:
        log.error("Demo supports only models with 1 output layer")
        sys.exit(1)

    batch_size, channels, one, length = model.inputs[0].shape
    if one != 1:
        raise RuntimeError(
            "Wrong third dimension size of model input shape - {} (expected 1)"
            .format(one))

    hop = length - args.overlap if isinstance(args.overlap, int) else int(
        length * (1.0 - args.overlap))
    if hop < 0:
        log.error(
            "Wrong value for '-ol/--overlap' argument - overlapping more than clip length"
        )
        sys.exit(1)

    compiled_model = ie.compile_model(model, args.device)
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    labels = []
    if args.labels:
        with open(args.labels, "r") as file:
            labels = [line.rstrip() for line in file.readlines()]

    start_time = perf_counter()
    audio = AudioSource(args.input,
                        channels=channels,
                        samplerate=args.sample_rate)

    outputs = []
    clips = 0
    for idx, chunk in enumerate(
            audio.chunks(length, hop, num_chunks=batch_size)):
        chunk = np.reshape(chunk, model.inputs[0].shape)
        output = next(
            iter(infer_request.infer({
                input_tensor_name: chunk
            }).values()))
        clips += batch_size
        for batch, data in enumerate(output):
            chunk_start_time = (idx * batch_size +
                                batch) * hop / audio.samplerate
            chunk_end_time = (
                (idx * batch_size + batch) * hop + length) / audio.samplerate
            outputs.append(data)
            label = np.argmax(data)
            if chunk_start_time < audio.duration():
                log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format(
                    chunk_start_time, chunk_end_time, data[label],
                    labels[label] if labels else "Class {}".format(label)))
    total_latency = (perf_counter() - start_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))
    sys.exit(0)
Example #20
0
def import_core_modules(silent: bool, path_to_module: str):
    """
        This function checks that OpenVINO Python API is available
        and necessary python modules exists. So the next list of imports
        must contain all IE/NG Python API imports that are used inside MO.

    :param silent: enables or disables logs printing to stdout
    :param path_to_module: path where python API modules were found
    :return: True if all imports were successful and False otherwise
    """
    try:
        from openvino.offline_transformations import apply_moc_transformations, apply_moc_legacy_transformations,\
            apply_low_latency_transformation  # pylint: disable=import-error,no-name-in-module
        from openvino.offline_transformations import apply_make_stateful_transformation, generate_mapping_file  # pylint: disable=import-error,no-name-in-module
        from openvino.offline_transformations import generate_mapping_file, apply_make_stateful_transformation, serialize  # pylint: disable=import-error,no-name-in-module

        from openvino.runtime import Model, get_version  # pylint: disable=import-error,no-name-in-module
        from openvino.runtime.op import Parameter  # pylint: disable=import-error,no-name-in-module
        from openvino.runtime import PartialShape, Dimension  # pylint: disable=import-error,no-name-in-module
        from openvino.frontend import FrontEndManager, FrontEnd  # pylint: disable=no-name-in-module,import-error

        import openvino.frontend  # pylint: disable=import-error,no-name-in-module

        if silent:
            return True

        ie_version = str(get_version())
        mo_version = str(v.get_version())  # pylint: disable=no-member,no-name-in-module

        print("{}: \t{}".format("OpenVINO runtime found in",
                                os.path.dirname(openvino.__file__)))
        print("{}: \t{}".format("OpenVINO runtime version", ie_version))
        print("{}: \t{}".format("Model Optimizer version", mo_version))

        versions_mismatch = False

        mo_hash = v.extract_hash_from_version(mo_version)
        ie_hash = v.extract_hash_from_version(ie_version)

        if mo_hash is not None and ie_hash is not None:
            min_length = min(len(mo_hash), len(ie_hash))
            mo_hash = mo_hash[:min_length]
            ie_hash = ie_hash[:min_length]

        if mo_hash != ie_hash or mo_hash is None or ie_hash is None:
            versions_mismatch = True
            extracted_mo_release_version = v.extract_release_version(
                mo_version)
            mo_is_custom = extracted_mo_release_version == (None, None)

            print(
                "[ WARNING ] Model Optimizer and OpenVINO runtime versions do no match."
            )
            print(
                "[ WARNING ] Consider building the OpenVINO Python API from sources or reinstall OpenVINO "
                "(TM) toolkit using",
                end=" ")
            if mo_is_custom:
                print(
                    "\"pip install openvino\" (may be incompatible with the current Model Optimizer version)"
                )
            else:
                print("\"pip install openvino=={}.{}\"".format(
                    *extracted_mo_release_version))

        simplified_mo_version = v.get_simplified_mo_version()
        message = str(
            dict({
                "platform": platform.system(),
                "mo_version": simplified_mo_version,
                "ie_version": v.get_simplified_ie_version(version=ie_version),
                "versions_mismatch": versions_mismatch,
            }))
        send_telemetry(simplified_mo_version, message, 'ie_version_check')

        return True
    except Exception as e:
        # Do not print a warning if module wasn't found or silent mode is on
        if "No module named 'openvino" not in str(e):
            print("[ WARNING ] Failed to import OpenVINO Python API in: {}".
                  format(path_to_module))
            print("[ WARNING ] {}".format(e))

            # Send telemetry message about warning
            simplified_mo_version = v.get_simplified_mo_version()
            message = str(
                dict({
                    "platform": platform.system(),
                    "mo_version": simplified_mo_version,
                    "ie_version": v.get_simplified_ie_version(env=os.environ),
                    "python_version": sys.version,
                    "error_type": classify_error_type(e),
                }))
            send_telemetry(simplified_mo_version, message, 'ie_import_failed')

        return False
Example #21
0
def load_core():
    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    return Core()
def run_demo(args):
    cap = open_images_capture(args.input, args.loop)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    log.info('Reading Object Detection model {}'.format(args.model_od))
    detector_person = Detector(core, args.model_od,
                               device=args.device,
                               label_class=args.person_label)
    log.info('The Object Detection model {} is loaded to {}'.format(args.model_od, args.device))

    log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe))
    single_human_pose_estimator = HumanPoseEstimator(core, args.model_hpe,
                                                     device=args.device)
    log.info('The Human Pose Estimation model {} is loaded to {}'.format(args.model_hpe, args.device))

    delay = int(cap.get_type() in ('VIDEO', 'CAMERA'))
    video_writer = cv2.VideoWriter()

    frames_processed = 0
    presenter = monitors.Presenter(args.utilization_monitors, 25)
    metrics = PerformanceMetrics()

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), (frame.shape[1], frame.shape[0])):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        bboxes = detector_person.detect(frame)
        human_poses = [single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes]

        presenter.drawGraphs(frame)

        colors = [(0, 0, 255),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)]

        for pose, bbox in zip(human_poses, bboxes):
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
            for id_kpt, kpt in enumerate(pose):
                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1)

        metrics.update(start_time, frame)

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit):
            video_writer.write(frame)

        if not args.no_show:
            cv2.imshow('Human Pose Estimation Demo', frame)
            key = cv2.waitKey(delay)
            if key == 27:
                break
            presenter.handleKey(key)

        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
def main():
    args = build_argparser().parse_args()

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    log.info('Reading Translation model {}'.format(args.translation_model))
    gan_model = CocosnetModel(core, args.translation_model, args.device)
    log.info('The Translation model {} is loaded to {}'.format(
        args.translation_model, args.device))

    log.info('Reading Semantic Segmentation model {}'.format(
        args.segmentation_model))
    seg_model = SegmentationModel(
        core, args.segmentation_model,
        args.device) if args.segmentation_model else None
    log.info('The Semantic Segmentation model {} is loaded to {}'.format(
        args.segmentation_model, args.device))

    input_data = []
    use_seg = bool(args.input_images) and bool(args.segmentation_model)
    assert use_seg ^ (bool(args.input_semantics) and bool(
        args.reference_semantics)), "Don't know where to get data"
    input_images = get_files(args.input_images)
    input_semantics = get_files(args.input_semantics)
    reference_images = get_files(args.reference_images)
    reference_semantics = get_files(args.reference_semantics)
    number_of_objects = len(reference_images)

    if use_seg:
        samples = [
            input_images, number_of_objects * [''], reference_images,
            number_of_objects * ['']
        ]
    else:
        samples = [
            number_of_objects * [''], input_semantics, reference_images,
            reference_semantics
        ]
    for input_img, input_sem, ref_img, ref_sem in zip(*samples):
        if use_seg:
            in_img = cv2.imread(input_img)
            if in_img is None:
                raise IOError('Image {} cannot be read'.format(input_img))
            input_sem = get_mask_from_image(in_img, seg_model)
            r_img = cv2.imread(ref_img)
            if r_img is None:
                raise IOError('Image {} cannot be read'.format(ref_img))
            ref_sem = get_mask_from_image(r_img, seg_model)
        else:
            input_sem_file = input_sem
            input_sem = cv2.imread(input_sem_file, cv2.IMREAD_GRAYSCALE)
            if input_sem is None:
                raise IOError('Image {} cannot be read'.format(input_sem_file))
            ref_sem_file = ref_sem
            ref_sem = cv2.imread(ref_sem, cv2.IMREAD_GRAYSCALE)
            if ref_sem is None:
                raise IOError('Image {} cannot be read'.format(ref_sem_file))
        input_sem = preprocess_semantics(
            input_sem, input_size=gan_model.input_semantic_size)
        ref_img_file = ref_img
        ref_img = cv2.imread(ref_img_file)
        if ref_img is None:
            raise IOError('Image {} cannot be read'.format(ref_img_file))
        ref_img = preprocess_image(ref_img,
                                   input_size=gan_model.input_image_size)
        ref_sem = preprocess_semantics(
            ref_sem, input_size=gan_model.input_semantic_size)
        input_dict = {
            'input_semantics': input_sem,
            'reference_image': ref_img,
            'reference_semantics': ref_sem
        }
        input_data.append(input_dict)

    outs = [gan_model.infer(**data) for data in input_data]

    results = [postprocess(out) for out in outs]

    save_result(results, args.output_dir)
    log.info("Result image was saved to {}".format(args.output_dir))
Example #24
0
def main():
    args = parse_arguments()

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if 'CPU' in args.target_device:
        if args.number_threads is not None:
            core.set_property("CPU",
                              {'CPU_THREADS_NUM': str(args.number_threads)})
    elif 'GPU' not in args.target_device:
        raise AttributeError(
            "Device {} do not support of 3D convolution. "
            "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*")

    log.info('Reading model {}'.format(args.path_to_model))
    model = core.read_model(args.path_to_model)

    if len(model.inputs) != 1:
        raise RuntimeError("only 1 input layer model is supported")

    input_tensor_name = model.inputs[0].get_any_name()
    if args.shape:
        log.debug("Reshape model from {} to {}".format(model.inputs[0].shape,
                                                       args.shape))
        model.reshape({input_tensor_name: PartialShape(args.shape)})

    if len(model.inputs[0].shape) != 5:
        raise RuntimeError(
            "Incorrect shape {} for 3d convolution network".format(args.shape))

    n, c, d, h, w = model.inputs[0].shape

    compiled_model = core.compile_model(model, args.target_device)
    output_tensor = compiled_model.outputs[0]
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.path_to_model,
                                                   args.target_device))

    start_time = perf_counter()
    if not os.path.exists(args.path_to_input_data):
        raise AttributeError("Path to input data: '{}' does not exist".format(
            args.path_to_input_data))

    input_type = get_input_type(args.path_to_input_data)
    is_nifti_data = (input_type == NIFTI_FILE or input_type == NIFTI_FOLDER)

    if input_type == NIFTI_FOLDER:
        series_name = find_series_name(args.path_to_input_data)
        original_data, data_crop, affine, original_size, bbox = \
            read_image(args.path_to_input_data, data_name=series_name, sizes=(d, h, w),
                       mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range)

    elif input_type == NIFTI_FILE:
        original_data, data_crop, affine, original_size, bbox = \
            read_image(args.path_to_input_data, data_name=args.path_to_input_data, sizes=(d, h, w), is_series=False,
                       mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range)
    else:
        data_crop = np.zeros(shape=(n, c, d, h, w), dtype=np.float)
        im_seq = ImageSequence.Iterator(Image.open(args.path_to_input_data))
        for i, page in enumerate(im_seq):
            im = np.array(page).reshape(h, w, c)
            for channel in range(c):
                data_crop[:, channel, i, :, :] = im[:, :, channel]
        original_data = data_crop
        original_size = original_data.shape[-3:]

    input_data = {input_tensor_name: data_crop}
    result = infer_request.infer(input_data)[output_tensor]
    batch, channels, out_d, out_h, out_w = result.shape

    list_img = []
    list_seg_result = []

    for batch, data in enumerate(result):
        seg_result = np.zeros(shape=original_size, dtype=np.uint8)
        if data.shape[1:] != original_size:
            x = bbox[1] - bbox[0]
            y = bbox[3] - bbox[2]
            z = bbox[5] - bbox[4]
            out_result = np.zeros(shape=((channels, ) + original_size),
                                  dtype=float)
            out_result[:, bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = \
                resample_np(data, (channels, x, y, z), 1)
        else:
            out_result = data

        if channels == 1:
            reshaped_data = out_result.reshape(original_size[0],
                                               original_size[1],
                                               original_size[2])
            mask = reshaped_data[:, :, :] > 0.5
            reshaped_data[mask] = 1
            seg_result = reshaped_data.astype(int)
        elif channels == 4:
            seg_result = np.argmax(out_result, axis=0).astype(int)
        elif channels == 3:
            res = np.zeros(shape=out_result.shape, dtype=bool)
            res = out_result > 0.5
            wt = res[0]
            tc = res[1]
            et = res[2]

            seg_result[wt] = 2
            seg_result[tc] = 1
            seg_result[et] = 3

        im = np.stack([
            original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :],
            original_data[batch, 0, :, :, :]
        ],
                      axis=3)

        im = 255 * (im - im.min()) / (im.max() - im.min())
        color_seg_frame = np.zeros(im.shape, dtype=np.uint8)
        for idx, c in enumerate(CLASSES_COLOR_MAP):
            color_seg_frame[seg_result[:, :, :] == idx, :] = np.array(
                c, dtype=np.uint8)
        mask = seg_result[:, :, :] > 0
        im[mask] = color_seg_frame[mask]

        for k in range(im.shape[2]):
            if is_nifti_data:
                list_img.append(
                    Image.fromarray(im[:, :, k, :].astype('uint8'), 'RGB'))
            else:
                list_img.append(
                    Image.fromarray(im[k, :, :, :].astype('uint8'), 'RGB'))

        if args.output_nifti and is_nifti_data:
            list_seg_result.append(seg_result)

    total_latency = (perf_counter() - start_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))
    tiff_output_name = os.path.join(args.path_to_output, 'output.tiff')
    Image.new('RGB', (original_data.shape[3], original_data.shape[2])).save(
        tiff_output_name, append_images=list_img, save_all=True)
    log.debug("Result tiff file was saved to {}".format(tiff_output_name))

    if args.output_nifti and is_nifti_data:
        for seg_res in list_seg_result:
            nii_filename = os.path.join(
                args.path_to_output,
                'output_{}.nii.gz'.format(list_seg_result.index(seg_res)))
            nib.save(nib.Nifti1Image(seg_res, affine=affine), nii_filename)
            log.debug("Result nifti file was saved to {}".format(nii_filename))
def main():
    args = build_argparser().parse_args()

    cap = open_images_capture(args.input, args.loop)

    with open(args.labels, 'rt') as labels_file:
        class_labels = labels_file.read().splitlines()
        assert len(class_labels), 'The file with class labels is empty'

    # Plugin initialization for specified device and load extensions library if specified.
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()
    if args.cpu_extension and 'CPU' in args.device:
        core.add_extension(args.cpu_extension, 'CPU')

    # Read IR
    log.info('Reading model {}'.format(args.model))
    model = core.read_model(args.model)
    image_input, image_info_input, (
        n, c, h,
        w), model_type, output_names, postprocessor = check_model(model)
    args.no_keep_aspect_ratio = model_type == 'yolact' or args.no_keep_aspect_ratio

    compiled_model = core.compile_model(model, args.device)
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    if args.no_track:
        tracker = None
    else:
        tracker = StaticIOUTracker()

    if args.delay:
        delay = args.delay
    else:
        delay = int(cap.get_type() in ('VIDEO', 'CAMERA'))

    frames_processed = 0
    metrics = PerformanceMetrics()
    visualizer = Visualizer(class_labels,
                            show_boxes=args.show_boxes,
                            show_scores=args.show_scores)
    video_writer = cv2.VideoWriter()

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    out_frame_size = (frame.shape[1], frame.shape[0])
    presenter = monitors.Presenter(
        args.utilization_monitors, 45,
        (round(out_frame_size[0] / 4), round(out_frame_size[1] / 8)))
    if args.output and not video_writer.open(args.output,
                                             cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), out_frame_size):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        if args.no_keep_aspect_ratio:
            # Resize the image to a target size.
            scale_x = w / frame.shape[1]
            scale_y = h / frame.shape[0]
            input_image = cv2.resize(frame, (w, h))
        else:
            # Resize the image to keep the same aspect ratio and to fit it to a window of a target size.
            scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1])
            input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y)

        input_image_size = input_image.shape[:2]
        input_image = np.pad(input_image,
                             ((0, h - input_image_size[0]),
                              (0, w - input_image_size[1]), (0, 0)),
                             mode='constant',
                             constant_values=0)
        # Change data layout from HWC to CHW.
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w)).astype(np.float32)
        input_image_info = np.asarray(
            [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32)

        # Run the model.
        feed_dict = {image_input: input_image}
        if image_info_input:
            feed_dict[image_info_input] = input_image_info

        infer_request.infer(feed_dict)
        outputs = {
            name: infer_request.get_tensor(name).data[:]
            for name in output_names
        }

        # Parse detection results of the current request
        scores, classes, boxes, masks = postprocessor(outputs, scale_x,
                                                      scale_y,
                                                      *frame.shape[:2], h, w,
                                                      args.prob_threshold)

        if len(boxes) and args.raw_output_message:
            log.debug(
                '  -------------------------- Frame # {} --------------------------  '
                .format(frames_processed))
            log.debug(
                '  Class ID | Confidence |     XMIN |     YMIN |     XMAX |     YMAX '
            )
            for box, cls, score in zip(boxes, classes, scores):
                log.debug(
                    '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} '
                    .format(cls, score, *box))

        # Get instance track IDs.
        masks_tracks_ids = None
        if tracker is not None:
            masks_tracks_ids = tracker(masks, classes)

        # Visualize masks.
        frame = visualizer(frame, boxes, classes, scores, presenter, masks,
                           masks_tracks_ids)

        metrics.update(start_time, frame)

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or
                                        frames_processed <= args.output_limit):
            video_writer.write(frame)

        if not args.no_show:
            # Show resulting image.
            cv2.imshow('Results', frame)

        if not args.no_show:
            key = cv2.waitKey(delay)
            esc_code = 27
            if key == esc_code:
                break
            presenter.handleKey(key)
        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
def main():
    args = build_argparser().parse_args()
    profile = get_profile(args.profile)
    if args.block_size is None:
        sr = profile['model_sampling_rate']
        args.block_size = round(sr * 10) if not args.realtime else round(
            sr * profile['frame_stride_seconds'] * 16)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    start_load_time = time.perf_counter()
    stt = DeepSpeechSeqPipeline(
        core=core,
        model=args.model,
        lm=args.lm,
        beam_width=args.beam_width,
        max_candidates=args.max_candidates,
        profile=profile,
        device=args.device,
        online_decoding=args.realtime,
    )
    log.debug(
        "Loading, including network weights, OpenVINO Runtime initialization, LM, building LM vocabulary trie: {} s"
        .format(time.perf_counter() - start_load_time))
    start_time = time.perf_counter()
    with wave.open(args.input, 'rb') as wave_read:
        channel_num, sample_width, sampling_rate, pcm_length, compression_type, _ = wave_read.getparams(
        )
        assert sample_width == 2, "Only 16-bit WAV PCM supported"
        assert compression_type == 'NONE', "Only linear PCM WAV files supported"
        assert channel_num == 1, "Only mono WAV PCM supported"
        assert abs(sampling_rate / profile['model_sampling_rate'] -
                   1) < 0.1, "Only {} kHz WAV PCM supported".format(
                       profile['model_sampling_rate'] / 1e3)
        log.debug("Audio file length: {} s".format(pcm_length / sampling_rate))

        audio_pos = 0
        play_start_time = time.perf_counter()
        iter_wrapper = tqdm if not args.realtime else (lambda x: x)
        for audio_iter in iter_wrapper(range(0, pcm_length, args.block_size)):
            audio_block = np.frombuffer(
                wave_read.readframes(args.block_size * channel_num),
                dtype=np.int16).reshape((-1, channel_num))
            if audio_block.shape[0] == 0:
                break
            audio_pos += audio_block.shape[0]
            #
            # It is possible to call stt.recognize_audio(): 1) for either whole audio files or
            # by splitting files into blocks, and 2) to reuse stt object for multiple files like this:
            #   transcription1 = stt.recognize_audio(whole_audio1, sampling_rate)
            #   transcription2 = stt.recognize_audio(whole_audio2, sampling_rate)
            #   stt.recognize_audio(whole_audio3_block1, sampling_rate, finish=False)
            #   transcription3 = stt.recognize_audio(whole_audio3_block2, sampling_rate, finish=True)
            # If you need intermediate features, you can call pipeline stage by stage: see
            # the implementation of DeepSpeechSeqPipeline.recognize_audio() method.
            #
            partial_transcr = stt.recognize_audio(audio_block,
                                                  sampling_rate,
                                                  finish=False)
            if args.realtime:
                if partial_transcr is not None and len(partial_transcr) > 0:
                    print('\r' +
                          partial_transcr[0].text[-args.realtime_window:],
                          end='')
                to_wait = play_start_time + audio_pos / sampling_rate - time.perf_counter(
                )
                if to_wait > 0:
                    time.sleep(to_wait)

    transcription = stt.recognize_audio(None, sampling_rate, finish=True)
    if args.realtime:
        # Replace the transcription with its finalized version for real-time mode
        if transcription is not None and len(transcription) > 0:
            print('\r' + transcription[0].text[-args.realtime_window:])
    else:  #  not args.realtime
        # Only show processing time in offline mode because real-time mode is being slowed down by time.sleep()

        total_latency = (time.perf_counter() - start_time) * 1e3
        log.info("Metrics report:")
        log.info("\tLatency: {:.1f} ms".format(total_latency))

    print("\nTranscription(s) and confidence score(s):")
    for candidate in transcription:
        print("{}\t{}".format(candidate.conf, candidate.text))
def main():
    parser = argparse.ArgumentParser(description='Whiteboard inpainting demo')
    parser.add_argument('-i', '--input', required=True,
                         help='Required. Path to a video file or a device node of a web-camera.')
    parser.add_argument('--loop', default=False, action='store_true',
                        help='Optional. Enable reading the input in a loop.')
    parser.add_argument('-o', '--output', required=False,
                        help='Optional. Name of the output file(s) to save.')
    parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int,
                        help='Optional. Number of frames to store in output. '
                             'If 0 is set, all frames are stored.')
    parser.add_argument('-m_i', '--m_instance_segmentation', type=str, required=False,
                        help='Required. Path to the instance segmentation model.')
    parser.add_argument('-m_s', '--m_semantic_segmentation', type=str, required=False,
                        help='Required. Path to the semantic segmentation model.')
    parser.add_argument('-t', '--threshold', type=float, default=0.6,
                        help='Optional. Threshold for person instance segmentation model.')
    parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true')
    parser.add_argument('-d', '--device', type=str, default='CPU',
                        help='Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is '
                             'acceptable. The demo will look for a suitable plugin for the device specified.')
    parser.add_argument('-l', '--cpu_extension', type=str, default=None,
                        help='MKLDNN (CPU)-targeted custom layers. Absolute \
                              path to a shared library with the kernels impl.')
    parser.add_argument('-u', '--utilization_monitors', default='', type=str,
                        help='Optional. List of monitors to show initially.')
    args = parser.parse_args()

    cap = open_images_capture(args.input, args.loop)
    if cap.get_type() not in ('VIDEO', 'CAMERA'):
        raise RuntimeError("The input should be a video file or a numeric camera ID")

    if bool(args.m_instance_segmentation) == bool(args.m_semantic_segmentation):
        raise ValueError('Set up exactly one of segmentation models: '
                         '--m_instance_segmentation or --m_semantic_segmentation')

    labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes'
    mouse = MouseClick()
    if not args.no_show:
        cv2.namedWindow(WINNAME)
        cv2.setMouseCallback(WINNAME, mouse.get_points)

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation
    log.info('Reading model {}'.format(model_path))
    if args.m_instance_segmentation:
        labels_file = str(labels_dir / 'coco_80cl_bkgr.txt')
        segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file,
                                args.threshold, args.device, args.cpu_extension)
    elif args.m_semantic_segmentation:
        labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt')
        segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file,
                                            args.threshold, args.device, args.cpu_extension)
    log.info('The model {} is loaded to {}'.format(model_path, args.device))

    metrics = PerformanceMetrics()
    video_writer = cv2.VideoWriter()
    black_board = False
    frame_number = 0
    key = -1

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    out_frame_size = (frame.shape[1], frame.shape[0] * 2)
    output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8')
    presenter = monitors.Presenter(args.utilization_monitors, 20,
                                   (out_frame_size[0] // 4, out_frame_size[1] // 16))
    if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), out_frame_size):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        mask = None
        detections = segmentation.get_detections([frame])
        expand_mask(detections, frame.shape[1] // 27)
        if len(detections[0]) > 0:
            mask = detections[0][0][2]
            for i in range(1, len(detections[0])):
                mask = cv2.bitwise_or(mask, detections[0][i][2])

        if mask is not None:
            mask = np.stack([mask, mask, mask], axis=-1)
        else:
            mask = np.zeros(frame.shape, dtype='uint8')

        clear_frame = remove_background(frame, invert_colors=not black_board)

        output_frame = np.where(mask, output_frame, clear_frame)
        merged_frame = np.vstack([frame, output_frame])
        merged_frame = cv2.resize(merged_frame, out_frame_size)

        metrics.update(start_time, merged_frame)

        if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit-1):
            video_writer.write(merged_frame)

        presenter.drawGraphs(merged_frame)
        if not args.no_show:
            cv2.imshow(WINNAME, merged_frame)
            key = check_pressed_keys(key)
            if key == 27:  # 'Esc'
                break
            if key == ord('i'):  # catch pressing of key 'i'
                black_board = not black_board
                output_frame = 255 - output_frame
            else:
                presenter.handleKey(key)

        if mouse.crop_available:
            x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \
                     max(mouse.points[0][0], mouse.points[1][0])
            y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \
                     max(mouse.points[0][1], mouse.points[1][1])
            x1, y1 = min(x1, output_frame.shape[1] - 1), min(y1, output_frame.shape[0] - 1)
            board = output_frame[y0: y1, x0: x1, :]
            if board.shape[0] > 0 and board.shape[1] > 0:
                cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO)
                cv2.imshow('Board', board)

        frame_number += 1
        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
Example #28
0
def main():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    """Prepares data for the object tracking demo"""
    parser = argparse.ArgumentParser(description='Multi camera multi object \
                                                  tracking live demo script')
    parser.add_argument(
        '-i',
        '--input',
        required=True,
        nargs='+',
        help=
        'Required. Input sources (indexes of cameras or paths to video files)')
    parser.add_argument('--loop',
                        default=False,
                        action='store_true',
                        help='Optional. Enable reading the input in a loop')
    parser.add_argument('--config',
                        type=str,
                        default=os.path.join(current_dir, 'configs/person.py'),
                        required=False,
                        help='Configuration file')

    parser.add_argument('--detections',
                        type=str,
                        help='JSON file with bounding boxes')

    parser.add_argument('-m',
                        '--m_detector',
                        type=str,
                        required=False,
                        help='Path to the object detection model')
    parser.add_argument('--t_detector',
                        type=float,
                        default=0.6,
                        help='Threshold for the object detection model')

    parser.add_argument('--m_segmentation',
                        type=str,
                        required=False,
                        help='Path to the object instance segmentation model')
    parser.add_argument(
        '--t_segmentation',
        type=float,
        default=0.6,
        help='Threshold for object instance segmentation model')

    parser.add_argument(
        '--m_reid',
        type=str,
        required=True,
        help='Required. Path to the object re-identification model')

    parser.add_argument('--output_video',
                        type=str,
                        default='',
                        required=False,
                        help='Optional. Path to output video')
    parser.add_argument(
        '--history_file',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save results of the demo'
    )
    parser.add_argument(
        '--save_detections',
        type=str,
        default='',
        required=False,
        help='Optional. Path to file in JSON format to save bounding boxes')
    parser.add_argument("--no_show",
                        help="Optional. Don't show output",
                        action='store_true')

    parser.add_argument('-d', '--device', type=str, default='CPU')
    parser.add_argument('-u',
                        '--utilization_monitors',
                        default='',
                        type=str,
                        help='Optional. List of monitors to show initially.')

    args = parser.parse_args()
    if check_detectors(args) != 1:
        sys.exit(1)

    if len(args.config):
        log.debug('Reading config from {}'.format(args.config))
        config = read_py_config(args.config)
    else:
        log.error(
            'No configuration file specified. Please specify parameter \'--config\''
        )
        sys.exit(1)

    random.seed(config.random_seed)
    capture = MulticamCapture(args.input, args.loop)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if args.detections:
        object_detector = DetectionsFromFileReader(args.detections,
                                                   args.t_detector)
    elif args.m_segmentation:
        object_detector = MaskRCNN(core, args.m_segmentation,
                                   config.obj_segm.trg_classes,
                                   args.t_segmentation, args.device,
                                   capture.get_num_sources())
    else:
        object_detector = Detector(core, args.m_detector,
                                   config.obj_det.trg_classes, args.t_detector,
                                   args.device, capture.get_num_sources())

    if args.m_reid:
        object_recognizer = VectorCNN(core, args.m_reid, args.device)
    else:
        object_recognizer = None

    run(args, config, capture, object_detector, object_recognizer)
Example #29
0
def main(args):
    cap = open_images_capture(args.input, args.loop)

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    log.info('Reading model {}'.format(args.model))
    model = core.read_model(args.model)

    input_tensor_name = 'data_l'
    input_shape = model.input(input_tensor_name).shape
    assert input_shape[1] == 1, "Expected model input shape with 1 channel"

    inputs = {}
    for input in model.inputs:
        inputs[input.get_any_name()] = np.zeros(input.shape)

    assert len(model.outputs) == 1, "Expected number of outputs is equal 1"

    compiled_model = core.compile_model(model, device_name=args.device)
    output_tensor = compiled_model.outputs[0]
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    _, _, h_in, w_in = input_shape

    frames_processed = 0
    imshow_size = (640, 480)
    graph_size = (imshow_size[0] // 2, imshow_size[1] // 4)
    presenter = monitors.Presenter(args.utilization_monitors,
                                   imshow_size[1] * 2 - graph_size[1],
                                   graph_size)
    metrics = PerformanceMetrics()

    video_writer = cv.VideoWriter()
    if args.output and not video_writer.open(
            args.output, cv.VideoWriter_fourcc(*'MJPG'), cap.fps(),
        (imshow_size[0] * 2, imshow_size[1] * 2)):
        raise RuntimeError("Can't open video writer")

    start_time = perf_counter()
    original_frame = cap.read()
    if original_frame is None:
        raise RuntimeError("Can't read an image from the input")

    while original_frame is not None:
        (h_orig, w_orig) = original_frame.shape[:2]

        if original_frame.shape[2] > 1:
            frame = cv.cvtColor(cv.cvtColor(original_frame, cv.COLOR_BGR2GRAY),
                                cv.COLOR_GRAY2RGB)
        else:
            frame = cv.cvtColor(original_frame, cv.COLOR_GRAY2RGB)

        img_rgb = frame.astype(np.float32) / 255
        img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab)
        img_l_rs = cv.resize(img_lab.copy(), (w_in, h_in))[:, :, 0]

        inputs[input_tensor_name] = np.expand_dims(img_l_rs, axis=[0, 1])

        res = infer_request.infer(inputs)[output_tensor]

        update_res = np.squeeze(res)

        out = update_res.transpose((1, 2, 0))
        out = cv.resize(out, (w_orig, h_orig))
        img_lab_out = np.concatenate((img_lab[:, :, 0][:, :, np.newaxis], out),
                                     axis=2)
        img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1)

        original_image = cv.resize(original_frame, imshow_size)
        grayscale_image = cv.resize(frame, imshow_size)
        colorize_image = (cv.resize(img_bgr_out, imshow_size) * 255).astype(
            np.uint8)
        lab_image = cv.resize(img_lab_out, imshow_size).astype(np.uint8)

        original_image = cv.putText(original_image, 'Original', (25, 50),
                                    cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2,
                                    cv.LINE_AA)
        grayscale_image = cv.putText(grayscale_image, 'Grayscale', (25, 50),
                                     cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
                                     2, cv.LINE_AA)
        colorize_image = cv.putText(colorize_image, 'Colorize', (25, 50),
                                    cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2,
                                    cv.LINE_AA)
        lab_image = cv.putText(lab_image, 'LAB interpretation', (25, 50),
                               cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2,
                               cv.LINE_AA)

        ir_image = [
            cv.hconcat([original_image, grayscale_image]),
            cv.hconcat([lab_image, colorize_image])
        ]
        final_image = cv.vconcat(ir_image)

        metrics.update(start_time, final_image)

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or
                                        frames_processed <= args.output_limit):
            video_writer.write(final_image)

        presenter.drawGraphs(final_image)
        if not args.no_show:
            cv.imshow('Colorization Demo', final_image)
            key = cv.waitKey(1)
            if key in {ord("q"), ord("Q"), 27}:
                break
            presenter.handleKey(key)
        start_time = perf_counter()
        original_frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
def main():
    args = build_argparser().parse_args()

    cap = open_images_capture(args.input, args.loop)

    # Plugin initialization for specified device and load extensions library if specified
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    # Read IR
    log.info('Reading Proposal model {}'.format(args.model_pnet))
    p_net = core.read_model(args.model_pnet)
    if len(p_net.inputs) != 1:
        raise RuntimeError("Pnet supports only single input topologies")
    if len(p_net.outputs) != 2:
        raise RuntimeError("Pnet supports two output topologies")

    log.info('Reading Refine model {}'.format(args.model_rnet))
    r_net = core.read_model(args.model_rnet)
    if len(r_net.inputs) != 1:
        raise RuntimeError("Rnet supports only single input topologies")
    if len(r_net.outputs) != 2:
        raise RuntimeError("Rnet supports two output topologies")

    log.info('Reading Output model {}'.format(args.model_onet))
    o_net = core.read_model(args.model_onet)
    if len(o_net.inputs) != 1:
        raise RuntimeError("Onet supports only single input topologies")
    if len(o_net.outputs) != 3:
        raise RuntimeError("Onet supports three output topologies")

    pnet_input_tensor_name = p_net.inputs[0].get_any_name()
    rnet_input_tensor_name = r_net.inputs[0].get_any_name()
    onet_input_tensor_name = o_net.inputs[0].get_any_name()

    for node in p_net.outputs:
        if node.shape[1] == 2:
            pnet_cls_name = node.get_any_name()
        elif node.shape[1] == 4:
            pnet_roi_name = node.get_any_name()
        else:
            raise RuntimeError("Unsupported output layer for Pnet")

    for node in r_net.outputs:
        if node.shape[1] == 2:
            rnet_cls_name = node.get_any_name()
        elif node.shape[1] == 4:
            rnet_roi_name = node.get_any_name()
        else:
            raise RuntimeError("Unsupported output layer for Rnet")

    for node in o_net.outputs:
        if node.shape[1] == 2:
            onet_cls_name = node.get_any_name()
        elif node.shape[1] == 4:
            onet_roi_name = node.get_any_name()
        elif node.shape[1] == 10:
            onet_pts_name = node.get_any_name()
        else:
            raise RuntimeError("Unsupported output layer for Onet")

    next_frame_id = 0

    metrics = PerformanceMetrics()
    presenter = None
    video_writer = cv2.VideoWriter()
    is_loaded_before = False

    while True:
        start_time = perf_counter()
        origin_image = cap.read()
        if origin_image is None:
            if next_frame_id == 0:
                raise ValueError("Can't read an image from the input")
            break
        if next_frame_id == 0:
            presenter = monitors.Presenter(args.utilization_monitors, 55,
                                           (round(origin_image.shape[1] / 4), round(origin_image.shape[0] / 8)))
            if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                                     cap.fps(), (origin_image.shape[1], origin_image.shape[0])):
                raise RuntimeError("Can't open video writer")
        next_frame_id += 1

        rgb_image = cv2.cvtColor(origin_image, cv2.COLOR_BGR2RGB)
        oh, ow, _ = rgb_image.shape

        scales = utils.calculate_scales(rgb_image)

        # *************************************
        # Pnet stage
        # *************************************

        pnet_res = []
        for i, scale in enumerate(scales):
            hs = int(oh*scale)
            ws = int(ow*scale)
            image = preprocess_image(rgb_image, ws, hs)

            p_net.reshape({pnet_input_tensor_name: PartialShape([1, 3, ws, hs])})  # Change weidth and height of input blob
            compiled_pnet = core.compile_model(p_net, args.device)
            infer_request_pnet = compiled_pnet.create_infer_request()
            if i == 0 and not is_loaded_before:
                log.info("The Proposal model {} is loaded to {}".format(args.model_pnet, args.device))

            infer_request_pnet.infer(inputs={pnet_input_tensor_name: image})
            p_res = {name: infer_request_pnet.get_tensor(name).data[:] for name in {pnet_roi_name, pnet_cls_name}}
            pnet_res.append(p_res)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            roi = pnet_res[i][pnet_roi_name]
            cls = pnet_res[i][pnet_cls_name]
            _, _, out_h, out_w = cls.shape
            out_side = max(out_h, out_w)
            rectangle = utils.detect_face_12net(cls[0][1], roi[0], out_side, 1/scales[i], ow, oh,
                                                score_threshold[0], iou_threshold[0])
            rectangles.extend(rectangle)
        rectangles = utils.NMS(rectangles, iou_threshold[1], 'iou')

        # Rnet stage
        if len(rectangles) > 0:

            r_net.reshape({rnet_input_tensor_name: PartialShape([len(rectangles), 3, 24, 24])})  # Change batch size of input blob
            compiled_rnet = core.compile_model(r_net, args.device)
            infer_request_rnet = compiled_rnet.create_infer_request()
            if not is_loaded_before:
                log.info("The Refine model {} is loaded to {}".format(args.model_rnet, args.device))

            rnet_input = []
            for rectangle in rectangles:
                crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
                crop_img = preprocess_image(crop_img, 24, 24)
                rnet_input.extend(crop_img)

            infer_request_rnet.infer(inputs={rnet_input_tensor_name: rnet_input})
            rnet_res = {name: infer_request_rnet.get_tensor(name).data[:] for name in {rnet_roi_name, rnet_cls_name}}

            roi = rnet_res[rnet_roi_name]
            cls = rnet_res[rnet_cls_name]
            rectangles = utils.filter_face_24net(cls, roi, rectangles, ow, oh, score_threshold[1], iou_threshold[2])

        # Onet stage
        if len(rectangles) > 0:

            o_net.reshape({onet_input_tensor_name: PartialShape([len(rectangles), 3, 48, 48])})  # Change batch size of input blob
            compiled_onet = core.compile_model(o_net, args.device)
            infer_request_onet = compiled_onet.create_infer_request()
            if not is_loaded_before:
                log.info("The Output model {} is loaded to {}".format(args.model_onet, args.device))
                is_loaded_before = True

            onet_input = []
            for rectangle in rectangles:
                crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
                crop_img = preprocess_image(crop_img, 48, 48)
                onet_input.extend(crop_img)

            infer_request_onet.infer(inputs={onet_input_tensor_name: onet_input})
            onet_res = {name: infer_request_onet.get_tensor(name).data[:] for name in {onet_roi_name, onet_cls_name, onet_pts_name}}

            roi = onet_res[onet_roi_name]
            cls = onet_res[onet_cls_name]
            pts = onet_res[onet_pts_name]
            rectangles = utils.filter_face_48net(cls, roi, pts, rectangles, ow, oh,
                                                 score_threshold[2], iou_threshold[3])

        # display results
        for rectangle in rectangles:
            # Draw detected boxes
            cv2.putText(origin_image, 'confidence: {:.2f}'.format(rectangle[4]),
                        (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0))
            cv2.rectangle(origin_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])),
                          (255, 0, 0), 1)
            # Draw landmarks
            for i in range(5, 15, 2):
                cv2.circle(origin_image, (int(rectangle[i+0]), int(rectangle[i+1])), 2, (0, 255, 0))

        metrics.update(start_time, origin_image)

        if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id <= args.output_limit):
            video_writer.write(origin_image)

        if not args.no_show:
            cv2.imshow('MTCNN Results', origin_image)
            key = cv2.waitKey(1)
            if key in {ord('q'), ord('Q'), 27}:
                break
            presenter.handleKey(key)

    metrics.log_total()