Example #1
0
    def load_model(self,
                   core,
                   model_xml,
                   device,
                   model_type,
                   num_reqs=1,
                   cpu_extension=''):
        """Loads a model in the Inference Engine format"""
        # Plugin initialization for specified device and load extensions library if specified
        if cpu_extension and 'CPU' in device:
            core.add_extension(cpu_extension, 'CPU')
        # Read IR
        log.info('Reading {} model {}'.format(model_type, model_xml))
        self.model = core.read_model(model_xml)

        if len(self.model.inputs) not in self.get_allowed_inputs_len():
            raise RuntimeError(
                "Supports topologies with only {} inputs, but got {}".format(
                    self.get_allowed_inputs_len(), len(self.model.inputs)))
        if len(self.model.outputs) not in self.get_allowed_outputs_len():
            raise RuntimeError(
                "Supports topologies with only {} outputs, but got {}".format(
                    self.get_allowed_outputs_len(), len(self.model.outputs)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()
        # Loading model to the plugin
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_reqs)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            model_type, model_xml, device))
Example #2
0
class IEModel:
    def __init__(self, model_path, core, target_device, num_requests, model_type):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)
        if len(self.model.inputs) != 1:
            log.error("Demo supports only models with 1 input")
            sys.exit(1)

        if len(self.model.outputs) != 1:
            log.error("Demo supports only models with 1 output")
            sys.exit(1)

        self.outputs = {}
        compiled_model = core.compile_model(self.model, target_device)
        self.output_tensor = compiled_model.outputs[0]
        self.input_name = self.model.inputs[0].get_any_name()
        self.input_shape = self.model.inputs[0].shape

        self.num_requests = num_requests
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, target_device))

    def completion_callback(self, infer_request, id):
        self.outputs[id] = infer_request.results[self.output_tensor]

    def async_infer(self, frame, req_id):
        input_data = {self.input_name: frame}
        self.infer_queue.start_async(input_data, req_id)

    def wait_request(self, req_id):
        self.infer_queue[req_id].wait()
        return self.outputs.pop(req_id, None)
Example #3
0
    def __init__(self, model_path, device, core, num_requests, model_type, output_shape=None):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)

        if len(self.model.inputs) != 1:
            raise RuntimeError("The {} wrapper supports only models with 1 input layer".format(model_type))

        self.outputs = {}
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, device))

        self.input_tensor_name = self.model.inputs[0].get_any_name()

        if len(self.model.outputs) > 1:
            if output_shape is not None:
                candidates = []
                for output_tensor in self.model.outputs:
                    if len(output_tensor.partial_shape) != len(output_shape):
                        continue

                    if output_tensor.partial_shape[1] == output_shape[1]:
                        candidates.append(output_tensor.get_any_name())

                if len(candidates) != 1:
                    raise RuntimeError("One output is expected")
                self.output_tensor_name = candidates[0]
            else:
                raise RuntimeError("One output is expected")
        else:
            self.output_tensor_name = self.model.outputs[0].get_any_name()

        self.input_size = self.model.input(self.input_tensor_name).shape
Example #4
0
 def deploy(self, device, max_requests=1):
     self.max_requests = max_requests
     compiled_model = self.core.compile_model(self.model, device)
     self.output_tensor = compiled_model.outputs[0]
     self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests)
     self.infer_queue.set_callback(self.completion_callback)
     log.info('The {} model {} is loaded to {}'.format(
         self.model_type, self.model_path, device))
Example #5
0
 def deploy(self, device, plugin_config, max_requests=1):
     self.max_requests = max_requests
     compiled_model = self.core.compile_model(self.model,
                                              device,
                                              config=plugin_config)
     self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests)
     self.infer_queue.set_callback(self.completion_callback)
     log.info('The {} model {} is loaded to {}'.format(
         self.model_type, self.model_path, device))
Example #6
0
    def load_model(self):
        self.compiled_model = self.core.compile_model(self.model, self.device, self.plugin_config)
        self.async_queue = AsyncInferQueue(self.compiled_model, self.max_num_requests)
        if self.max_num_requests == 0:
            # +1 to use it as a buffer of the pipeline
            self.async_queue = AsyncInferQueue(self.compiled_model, len(self.async_queue) + 1)

        log.info('The model {} is loaded to {}'.format("from buffer" if self.model_from_buffer else self.model_path, self.device))
        self.log_runtime_settings()
Example #7
0
def test_results_async_infer(device):
    jobs = 8
    num_request = 4
    core = Core()
    func = core.read_model(test_net_xml, test_net_bin)
    exec_net = core.compile_model(func, device)
    infer_queue = AsyncInferQueue(exec_net, num_request)
    jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]

    def callback(request, job_id):
        jobs_done[job_id]["finished"] = True
        jobs_done[job_id]["latency"] = request.latency

    img = read_image()
    infer_queue.set_callback(callback)
    assert infer_queue.is_ready
    for i in range(jobs):
        infer_queue.start_async({"data": img}, i)
    infer_queue.wait_all()

    request = exec_net.create_infer_request()
    outputs = request.infer({0: img})

    for i in range(num_request):
        np.allclose(list(outputs.values()),
                    list(infer_queue[i].results.values()))
Example #8
0
def test_infer_queue_is_ready(device):
    core = Core()
    param = ops.parameter([10])
    model = Model(ops.relu(param), [param])
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, 1)

    def callback(request, _):
        time.sleep(0.001)
    infer_queue.set_callback(callback)
    assert infer_queue.is_ready()
    infer_queue.start_async()
    assert not infer_queue.is_ready()
    infer_queue.wait_all()
Example #9
0
 def create_infer_requests(self, exe_network):
     if self.api_type == 'sync':
         requests = [exe_network.create_infer_request()]
     else:
         requests = AsyncInferQueue(exe_network, self.nireq)
         self.nireq = len(requests)
     return requests
Example #10
0
 def create_infer_requests(self, compiled_model):
     if self.api_type == 'sync':
         requests = [compiled_model.create_infer_request()]
     else:
         requests = AsyncInferQueue(compiled_model, self.nireq)
         self.nireq = len(requests)
     return requests
Example #11
0
    def _process_dataset_async(self,
                               stats_layout,
                               sampler,
                               print_progress=False,
                               need_metrics_per_sample=False,
                               requests_num=0):
        """Performs model inference on specified dataset subset asynchronously
        :param stats_layout: dict of stats collection functions {node_name: [fn]}(optional)
        :param sampler: sampling dataset to make inference
        :param print_progress: whether to print inference progress
        :param need_metrics_per_sample: whether to collect metrics for each batch
        :param requests_num: number of infer requests
        """
        def completion_callback(request, user_data):
            start_time, batch_id = user_data
            predictions = request.results
            self._process_infer_output(stats_layout, predictions,
                                       batch_annotations, batch_meta,
                                       need_metrics_per_sample)

            # Print progress
            if self._print_inference_progress(progress_log_fn, batch_id,
                                              len(sampler), start_time,
                                              time()):
                start_time = time()

        progress_log_fn = logger.info if print_progress else logger.debug
        self._ie.set_config(
            {
                'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO',
                'CPU_BIND_THREAD': 'YES'
            }, self._device)

        # Load model to the plugin
        compiled_model = self._ie.compile_model(model=self._model,
                                                device_name=self._device)

        optimal_requests_num = compiled_model.get_metric(
            'OPTIMAL_NUMBER_OF_INFER_REQUESTS')
        requests_num = optimal_requests_num if requests_num == 0 else requests_num
        logger.debug('Async mode requests number: %d', requests_num)
        infer_queue = AsyncInferQueue(compiled_model, requests_num)

        progress_log_fn('Start inference of %d images', len(sampler))

        sampler_iter = iter(enumerate(sampler))
        # Start inference
        start_time = time()
        infer_queue.set_callback(completion_callback)
        for batch_id, data_batch in sampler_iter:
            batch_annotations, image_batch, batch_meta = self._process_batch(
                data_batch)
            infer_queue.start_async(
                self._fill_input(compiled_model, image_batch),
                (start_time, batch_id))
        infer_queue.wait_all()
        progress_log_fn('Inference finished')
 def get_infer_queue(self, log=True):
     if self.config.get('num_requests', 'AUTO') == 'AUTO':
         num_requests = 0
     else:
         num_requests = self.num_requests
     queue = AsyncInferQueue(self.exec_network, num_requests)
     if log:
         print_info('Prepared async infer queue with {} requests'.format(
             len(queue)))
     return queue
Example #13
0
    def __init__(self, model_path, core, target_device, num_requests, model_type):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)
        if len(self.model.inputs) != 1:
            log.error("Demo supports only models with 1 input")
            sys.exit(1)

        if len(self.model.outputs) != 1:
            log.error("Demo supports only models with 1 output")
            sys.exit(1)

        self.outputs = {}
        compiled_model = core.compile_model(self.model, target_device)
        self.output_tensor = compiled_model.outputs[0]
        self.input_name = self.model.inputs[0].get_any_name()
        self.input_shape = self.model.inputs[0].shape

        self.num_requests = num_requests
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, target_device))
    def _init_model(self, inp_h, inp_w):
        # For better efficiency, model is initialized for batch_size 1 and every sample processed independently
        inp_shape = [1, inp_h, inp_w, 3]
        self.net.reshape({self.input_name: inp_shape})

        # Load network to device
        if "CPU" in self.device:
            self.core.set_property(
                "CPU", {
                    "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO",
                    "CPU_BIND_THREAD": "YES"
                })
        if "GPU" in self.device:
            self.core.set_property(
                "GPU", {"GPU_THROUGHPUT_STREAMS": "GPU_THROUGHPUT_AUTO"})

        compiled_model = self.core.compile_model(self.net, self.device)
        num_requests = compiled_model.get_property(
            "OPTIMAL_NUMBER_OF_INFER_REQUESTS")
        print(f"OpenVINO uses {num_requests} inference requests")
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
Example #15
0
class IEModel:  # pylint: disable=too-few-public-methods
    """ Class that allows worknig with OpenVINO Runtime model. """

    def __init__(self, model_path, device, core, num_requests, model_type, output_shape=None):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)

        if len(self.model.inputs) != 1:
            raise RuntimeError("The {} wrapper supports only models with 1 input layer".format(model_type))

        self.outputs = {}
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, device))

        self.input_tensor_name = self.model.inputs[0].get_any_name()

        if len(self.model.outputs) > 1:
            if output_shape is not None:
                candidates = []
                for output_tensor in self.model.outputs:
                    if len(output_tensor.partial_shape) != len(output_shape):
                        continue

                    if output_tensor.partial_shape[1] == output_shape[1]:
                        candidates.append(output_tensor.get_any_name())

                if len(candidates) != 1:
                    raise RuntimeError("One output is expected")
                self.output_tensor_name = candidates[0]
            else:
                raise RuntimeError("One output is expected")
        else:
            self.output_tensor_name = self.model.outputs[0].get_any_name()

        self.input_size = self.model.input(self.input_tensor_name).shape

    def completion_callback(self, infer_request, id):
        self.outputs[id] = infer_request.get_tensor(self.output_tensor_name).data[:]

    def infer(self, data):
        """Runs model on the specified input"""

        self.async_infer(data, 0)
        return self.wait_request(0)

    def async_infer(self, data, req_id):
        """Requests model inference for the specified input"""

        input_data = {self.input_tensor_name: data}
        self.infer_queue.start_async(input_data, req_id)

    def wait_request(self, req_id):
        """Waits for the model output by the specified request ID"""
        self.infer_queue.wait_all()
        try:
            return self.outputs.pop(req_id)
        except KeyError:
            return None
Example #16
0
def infer_async(compiled_model, number_iter, num_request, get_slice):
    result = None
    infer_queue = AsyncInferQueue(compiled_model, num_request)
    iteration = 0
    inference_time = time()
    while iteration < max(number_iter, num_request):
        idle_id = infer_queue.get_idle_request_id()
        if idle_id < 0:
            infer_queue.wait(num_requests=1)
            idle_id = infer_queue.get_idle_request_id()
        utils.set_input_to_blobs(infer_queue[idle_id], get_slice(iteration))
        infer_queue.start_async()
        iteration += 1
    infer_queue.wait_all()
    inference_time = time() - inference_time
    if number_iter == 1:
        request_results = [utils.get_request_result(request) for request in infer_queue]
        output_names = request_results[0].keys()
        result = dict.fromkeys(output_names, None)
        for key in result:
            result[key] = np.concatenate([result[key] for result in request_results], axis=0)
    return result, inference_time
Example #17
0
    def load_model(self, core, model_path, device, model_type, num_reqs=1):
        """Loads a model in the Inference Engine format"""

        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)

        if len(self.model.inputs) not in self.get_allowed_inputs_len():
            raise RuntimeError(
                "Supports topologies with only {} inputs, but got {}".format(
                    self.get_allowed_inputs_len(), len(self.model.inputs)))
        if len(self.model.outputs) not in self.get_allowed_outputs_len():
            raise RuntimeError(
                "Supports topologies with only {} outputs, but got {}".format(
                    self.get_allowed_outputs_len(), len(self.model.outputs)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()
        # Loading model to the plugin
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_reqs)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            model_type, model_path, device))
Example #18
0
def test_infer_queue_fail_on_py_model(device):
    jobs = 1
    num_request = 1
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)

    def callback(request, _):
        request = request + 21

    img = read_image()
    infer_queue.set_callback(callback)

    with pytest.raises(TypeError) as e:
        for _ in range(jobs):
            infer_queue.start_async({"data": img})
        infer_queue.wait_all()

    assert "unsupported operand type(s) for +" in str(e.value)
Example #19
0
def test_infer_queue_fail_on_cpp_model(device):
    jobs = 6
    num_request = 4
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)

    def callback(request, _):
        request.get_tensor("Unknown")

    img = read_image()
    infer_queue.set_callback(callback)

    with pytest.raises(RuntimeError) as e:
        for _ in range(jobs):
            infer_queue.start_async({"data": img})
        infer_queue.wait_all()

    assert "Port for tensor name Unknown was not found" in str(e.value)
Example #20
0
def test_infer_queue(device):
    jobs = 8
    num_request = 4
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)
    jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]

    def callback(request, job_id):
        jobs_done[job_id]["finished"] = True
        jobs_done[job_id]["latency"] = request.latency

    img = read_image()
    infer_queue.set_callback(callback)
    for i in range(jobs):
        infer_queue.start_async({"data": img}, i)
    infer_queue.wait_all()
    assert all(job["finished"] for job in jobs_done)
    assert all(job["latency"] > 0 for job in jobs_done)
Example #21
0
class Module:
    def __init__(self, core, model_path, model_type):
        self.core = core
        self.model_type = model_type
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)
        self.model_path = model_path
        self.active_requests = 0
        self.clear()

    def deploy(self, device, plugin_config, max_requests=1):
        self.max_requests = max_requests
        compiled_model = self.core.compile_model(self.model,
                                                 device,
                                                 config=plugin_config)
        self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            self.model_type, self.model_path, device))

    def completion_callback(self, infer_request, id):
        self.outputs[id] = next(iter(infer_request.results.values()))

    def enqueue(self, input):
        self.clear()

        if self.max_requests <= self.active_requests:
            log.warning('Processing request rejected - too many requests')
            return False

        self.infer_queue.start_async(input, self.active_requests)
        self.active_requests += 1
        return True

    def wait(self):
        if self.active_requests <= 0:
            return
        self.infer_queue.wait_all()
        self.active_requests = 0

    def get_outputs(self):
        self.wait()
        return [v for _, v in sorted(self.outputs.items())]

    def clear(self):
        self.outputs = {}

    def infer(self, inputs):
        self.clear()
        self.start_async(*inputs)
        return self.postprocess()
Example #22
0
        gil_released = True
    thread = Thread(target=detect_gil)
    thread.start()
    func(*args)
    if not gil_released:
        pytest.xfail(reason="Depend on condition race")
    thread.join()


device = os.environ.get("TEST_DEVICE") if os.environ.get("TEST_DEVICE") else "CPU"
core = Core()
core.set_property({"PERF_COUNT": "YES"})
param = ops.parameter([224, 224])
model = Model(ops.relu(param), [param])
compiled = core.compile_model(model, device)
infer_queue = AsyncInferQueue(compiled, 1)
user_stream = io.BytesIO()


# AsyncInferQueue

def test_gil_released_async_infer_queue_start_async():
    infer_queue.start_async()
    check_gil_released_safe(infer_queue.start_async)


def test_gil_released_async_infer_queue_is_ready():
    infer_queue.start_async()
    check_gil_released_safe(infer_queue.is_ready)

Example #23
0
def test_infer_queue_get_idle_handle(device):
    param = ops.parameter([10])
    model = Model(ops.relu(param), [param])
    core = Core()
    compiled = core.compile_model(model, device)
    queue = AsyncInferQueue(compiled, 2)
    niter = 10

    for _ in range(len(queue)):
        queue.start_async()
    queue.wait_all()
    for request in queue:
        assert request.wait_for(0)

    for _ in range(niter):
        idle_id = queue.get_idle_request_id()
        assert queue[idle_id].wait_for(0)
        queue.start_async()
    queue.wait_all()
Example #24
0
class IEModel:
    """Class for inference of models in the Inference Engine format"""
    def __init__(self,
                 core,
                 model_path,
                 device,
                 model_type,
                 num_reqs=1,
                 cpu_extension=''):
        self.load_model(core, model_path, device, model_type, num_reqs,
                        cpu_extension)
        self.outputs = {}

    def _preprocess(self, img):
        _, _, h, w = self.get_input_shape()
        img = np.expand_dims(cv2.resize(img, (w, h)).transpose(2, 0, 1),
                             axis=0)
        return img

    def completion_callback(self, infer_request, id):
        self.outputs[id] = infer_request.get_tensor(
            self.output_tensor_name).data[:]

    def forward(self, img):
        """Performs forward pass of the wrapped IE model"""
        self.forward_async(img, 0)
        self.infer_queue.wait_all()
        return self.outputs.pop(0)

    def forward_async(self, img, req_id):
        input_data = {self.input_tensor_name: self._preprocess(img)}
        self.infer_queue.start_async(input_data, req_id)

    def grab_all_async(self):
        self.infer_queue.wait_all()
        return [self.outputs.pop(i) for i in range(len(self.outputs))]

    def get_allowed_inputs_len(self):
        return (1, 2)

    def get_allowed_outputs_len(self):
        return (1, 2, 3, 4, 5)

    def get_input_shape(self):
        """Returns an input shape of the wrapped IE model"""
        return self.model.inputs[0].shape

    def load_model(self,
                   core,
                   model_xml,
                   device,
                   model_type,
                   num_reqs=1,
                   cpu_extension=''):
        """Loads a model in the Inference Engine format"""
        # Plugin initialization for specified device and load extensions library if specified
        if cpu_extension and 'CPU' in device:
            core.add_extension(cpu_extension, 'CPU')
        # Read IR
        log.info('Reading {} model {}'.format(model_type, model_xml))
        self.model = core.read_model(model_xml)

        if len(self.model.inputs) not in self.get_allowed_inputs_len():
            raise RuntimeError(
                "Supports topologies with only {} inputs, but got {}".format(
                    self.get_allowed_inputs_len(), len(self.model.inputs)))
        if len(self.model.outputs) not in self.get_allowed_outputs_len():
            raise RuntimeError(
                "Supports topologies with only {} outputs, but got {}".format(
                    self.get_allowed_outputs_len(), len(self.model.outputs)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()
        # Loading model to the plugin
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_reqs)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            model_type, model_xml, device))
Example #25
0
class OpenVINOSession:
    def __init__(self, cfg, device):
        self.core = Core()
        self.xml_path = cfg["init_weights"] + ".xml"
        self.device = device

        # Convert a frozen graph to OpenVINO IR format
        if not os.path.exists(self.xml_path):
            subprocess.run(
                [
                    "mo",
                    "--output_dir",
                    os.path.dirname(cfg["init_weights"]),
                    "--input_model",
                    cfg["init_weights"] + ".pb",
                    "--input_shape",
                    "[1, 747, 832, 3]",
                    "--extensions",
                    os.path.join(os.path.dirname(__file__), "mo_extensions"),
                    "--data_type",
                    "FP16",
                ],
                check=True,
            )

        # Read network into memory
        self.net = self.core.read_model(self.xml_path)
        self.input_name = self.net.inputs[0].get_any_name()
        self.output_name = self.net.outputs[0].get_any_name()
        self.infer_queue = None

    def _init_model(self, inp_h, inp_w):
        # For better efficiency, model is initialized for batch_size 1 and every sample processed independently
        inp_shape = [1, inp_h, inp_w, 3]
        self.net.reshape({self.input_name: inp_shape})

        # Load network to device
        if "CPU" in self.device:
            self.core.set_property(
                "CPU",
                {
                    "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO",
                    "CPU_BIND_THREAD": "YES",
                },
            )
        if "GPU" in self.device:
            self.core.set_property(
                "GPU", {"GPU_THROUGHPUT_STREAMS": "GPU_THROUGHPUT_AUTO"})

        compiled_model = self.core.compile_model(self.net, self.device)
        num_requests = compiled_model.get_property(
            "OPTIMAL_NUMBER_OF_INFER_REQUESTS")
        print(f"OpenVINO uses {num_requests} inference requests")
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)

    def run(self, out_name, feed_dict):
        inp_name, inp = next(iter(feed_dict.items()))

        if self.infer_queue is None:
            self._init_model(inp.shape[1], inp.shape[2])

        batch_size = inp.shape[0]
        batch_output = np.zeros([batch_size] +
                                self.net.outputs[out_name].shape,
                                dtype=np.float32)

        def completion_callback(request, inp_id):
            output = next(iter(request.results.values()))
            batch_output[out_id] = output

        self.infer_queue.set_callback(completion_callback)

        for inp_id in range(batch_size):
            self.infer_queue.start_async({inp_name: inp[inp_id:inp_id + 1]},
                                         inp_id)

        self.infer_queue.wait_all()

        return batch_output.reshape(-1, 3)
Example #26
0
def main() -> int:
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    args = parse_args()

    # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
    log.info('Creating OpenVINO Runtime Core')
    core = Core()

    # --------------------------- Step 2. Read a model --------------------------------------------------------------------
    log.info(f'Reading the model: {args.model}')
    # (.xml and .bin files) or (.onnx file)
    model = core.read_model(args.model)

    if len(model.inputs) != 1:
        log.error('Sample supports only single input topologies')
        return -1

    if len(model.outputs) != 1:
        log.error('Sample supports only single output topologies')
        return -1


# --------------------------- Step 3. Set up input --------------------------------------------------------------------
# Read input images
    images = [cv2.imread(image_path) for image_path in args.input]

    # Resize images to model input dims
    _, _, h, w = model.input().shape
    resized_images = [cv2.resize(image, (w, h)) for image in images]

    # Add N dimension
    input_tensors = [np.expand_dims(image, 0) for image in resized_images]

    # --------------------------- Step 4. Apply preprocessing -------------------------------------------------------------
    ppp = PrePostProcessor(model)

    # 1) Set input tensor information:
    # - input() provides information about a single model input
    # - precision of tensor is supposed to be 'u8'
    # - layout of data is 'NHWC'
    ppp.input().tensor() \
        .set_element_type(Type.u8) \
        .set_layout(Layout('NHWC'))  # noqa: N400

    # 2) Here we suppose model has 'NCHW' layout for input
    ppp.input().model().set_layout(Layout('NCHW'))

    # 3) Set output tensor information:
    # - precision of tensor is supposed to be 'f32'
    ppp.output().tensor().set_element_type(Type.f32)

    # 4) Apply preprocessing modifing the original 'model'
    model = ppp.build()

    # --------------------------- Step 5. Loading model to the device -----------------------------------------------------
    log.info('Loading the model to the plugin')
    compiled_model = core.compile_model(model, args.device)

    # --------------------------- Step 6. Create infer request queue ------------------------------------------------------
    log.info('Starting inference in asynchronous mode')
    infer_queue = AsyncInferQueue(compiled_model, len(input_tensors))
    infer_queue.set_callback(completion_callback)

    # --------------------------- Step 7. Do inference --------------------------------------------------------------------
    for i, input_tensor in enumerate(input_tensors):
        infer_queue.start_async({0: input_tensor}, args.input[i])

    infer_queue.wait_all()
    # ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0
class OpenvinoAdapter(ModelAdapter):
    """
    Works with OpenVINO model
    """
    def __init__(self,
                 core,
                 model_path,
                 weights_path=None,
                 model_parameters={},
                 device='CPU',
                 plugin_config=None,
                 max_num_requests=0):
        self.core = core
        self.model_path = model_path
        self.device = device
        self.plugin_config = plugin_config
        self.max_num_requests = max_num_requests
        self.model_parameters = model_parameters
        self.model_parameters['input_layouts'] = Layout.parse_layouts(
            self.model_parameters.get('input_layouts', None))

        if isinstance(model_path, (str, Path)):
            if Path(model_path).suffix == ".onnx" and weights_path:
                log.warning(
                    'For model in ONNX format should set only "model_path" parameter.'
                    'The "weights_path" will be omitted')

        self.model_from_buffer = isinstance(model_path, bytes) and isinstance(
            weights_path, bytes)
        log.info('Reading model {}'.format(
            'from buffer' if self.model_from_buffer else model_path))
        weights = weights_path if self.model_from_buffer else ''
        self.model = core.read_model(model_path, weights)

    def load_model(self):
        self.compiled_model = self.core.compile_model(self.model, self.device,
                                                      self.plugin_config)
        self.async_queue = AsyncInferQueue(self.compiled_model,
                                           self.max_num_requests)
        if self.max_num_requests == 0:
            # +1 to use it as a buffer of the pipeline
            self.async_queue = AsyncInferQueue(self.compiled_model,
                                               len(self.async_queue) + 1)

        log.info('The model {} is loaded to {}'.format(
            "from buffer" if self.model_from_buffer else self.model_path,
            self.device))
        self.log_runtime_settings()

    def log_runtime_settings(self):
        devices = set(parse_devices(self.device))
        if 'AUTO' not in devices:
            for device in devices:
                try:
                    nstreams = self.compiled_model.get_property(
                        device + '_THROUGHPUT_STREAMS')
                    log.info('\tDevice: {}'.format(device))
                    log.info('\t\tNumber of streams: {}'.format(nstreams))
                    if device == 'CPU':
                        nthreads = self.compiled_model.get_property(
                            'CPU_THREADS_NUM')
                        log.info('\t\tNumber of threads: {}'.format(
                            nthreads if int(nthreads) else 'AUTO'))
                except RuntimeError:
                    pass
        log.info('\tNumber of model infer requests: {}'.format(
            len(self.async_queue)))

    def get_input_layers(self):
        inputs = {}
        for input in self.model.inputs:
            input_layout = self.get_layout_for_input(input)
            inputs[input.get_any_name()] = Metadata(
                input.get_names(), list(input.shape), input_layout,
                input.get_element_type().get_type_name())
        inputs = self._get_meta_from_ngraph(inputs)
        return inputs

    def get_layout_for_input(self, input) -> str:
        input_layout = ''
        if self.model_parameters['input_layouts']:
            input_layout = Layout.from_user_layouts(
                input.get_names(), self.model_parameters['input_layouts'])
        if not input_layout:
            if not layout_helpers.get_layout(input).empty:
                input_layout = Layout.from_openvino(input)
            else:
                input_layout = Layout.from_shape(input.shape)
        return input_layout

    def get_output_layers(self):
        outputs = {}
        for output in self.model.outputs:
            output_shape = output.partial_shape.get_min_shape(
            ) if self.model.is_dynamic() else output.shape
            outputs[output.get_any_name()] = Metadata(
                output.get_names(),
                list(output_shape),
                precision=output.get_element_type().get_type_name())
        outputs = self._get_meta_from_ngraph(outputs)
        return outputs

    def reshape_model(self, new_shape):
        new_shape = {k: PartialShape(v) for k, v in new_shape.items()}
        self.model.reshape(new_shape)

    def get_raw_result(self, request):
        raw_result = {
            key: request.get_tensor(key).data[:]
            for key in self.get_output_layers().keys()
        }
        return raw_result

    def infer_sync(self, dict_data):
        self.infer_request = self.async_queue[
            self.async_queue.get_idle_request_id()]
        self.infer_request.infer(dict_data)
        return self.get_raw_result(self.infer_request)

    def infer_async(self, dict_data, callback_data) -> None:
        self.async_queue.start_async(dict_data,
                                     (self.get_raw_result, callback_data))

    def set_callback(self, callback_fn):
        self.async_queue.set_callback(callback_fn)

    def is_ready(self) -> bool:
        return self.async_queue.is_ready()

    def await_all(self) -> None:
        self.async_queue.wait_all()

    def await_any(self) -> None:
        self.async_queue.get_idle_request_id()

    def _get_meta_from_ngraph(self, layers_info):
        for node in self.model.get_ordered_ops():
            layer_name = node.get_friendly_name()
            if layer_name not in layers_info.keys():
                continue
            layers_info[layer_name].meta = node.get_attributes()
            layers_info[layer_name].type = node.get_type_name()
        return layers_info

    def operations_by_type(self, operation_type):
        layers_info = {}
        for node in self.model.get_ordered_ops():
            if node.get_type_name() == operation_type:
                layer_name = node.get_friendly_name()
                layers_info[layer_name] = Metadata(type=node.get_type_name(),
                                                   meta=node.get_attributes())
        return layers_info