Exemple #1
0
    def load_model(self):
        self.compiled_model = self.core.compile_model(self.model, self.device, self.plugin_config)
        self.async_queue = AsyncInferQueue(self.compiled_model, self.max_num_requests)
        if self.max_num_requests == 0:
            # +1 to use it as a buffer of the pipeline
            self.async_queue = AsyncInferQueue(self.compiled_model, len(self.async_queue) + 1)

        log.info('The model {} is loaded to {}'.format("from buffer" if self.model_from_buffer else self.model_path, self.device))
        self.log_runtime_settings()
Exemple #2
0
    def load_model(self,
                   core,
                   model_xml,
                   device,
                   model_type,
                   num_reqs=1,
                   cpu_extension=''):
        """Loads a model in the Inference Engine format"""
        # Plugin initialization for specified device and load extensions library if specified
        if cpu_extension and 'CPU' in device:
            core.add_extension(cpu_extension, 'CPU')
        # Read IR
        log.info('Reading {} model {}'.format(model_type, model_xml))
        self.model = core.read_model(model_xml)

        if len(self.model.inputs) not in self.get_allowed_inputs_len():
            raise RuntimeError(
                "Supports topologies with only {} inputs, but got {}".format(
                    self.get_allowed_inputs_len(), len(self.model.inputs)))
        if len(self.model.outputs) not in self.get_allowed_outputs_len():
            raise RuntimeError(
                "Supports topologies with only {} outputs, but got {}".format(
                    self.get_allowed_outputs_len(), len(self.model.outputs)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()
        # Loading model to the plugin
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_reqs)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            model_type, model_xml, device))
 def create_infer_requests(self, exe_network):
     if self.api_type == 'sync':
         requests = [exe_network.create_infer_request()]
     else:
         requests = AsyncInferQueue(exe_network, self.nireq)
         self.nireq = len(requests)
     return requests
Exemple #4
0
    def __init__(self, model_path, device, core, num_requests, model_type, output_shape=None):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)

        if len(self.model.inputs) != 1:
            raise RuntimeError("The {} wrapper supports only models with 1 input layer".format(model_type))

        self.outputs = {}
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, device))

        self.input_tensor_name = self.model.inputs[0].get_any_name()

        if len(self.model.outputs) > 1:
            if output_shape is not None:
                candidates = []
                for output_tensor in self.model.outputs:
                    if len(output_tensor.partial_shape) != len(output_shape):
                        continue

                    if output_tensor.partial_shape[1] == output_shape[1]:
                        candidates.append(output_tensor.get_any_name())

                if len(candidates) != 1:
                    raise RuntimeError("One output is expected")
                self.output_tensor_name = candidates[0]
            else:
                raise RuntimeError("One output is expected")
        else:
            self.output_tensor_name = self.model.outputs[0].get_any_name()

        self.input_size = self.model.input(self.input_tensor_name).shape
Exemple #5
0
def test_results_async_infer(device):
    jobs = 8
    num_request = 4
    core = Core()
    func = core.read_model(test_net_xml, test_net_bin)
    exec_net = core.compile_model(func, device)
    infer_queue = AsyncInferQueue(exec_net, num_request)
    jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]

    def callback(request, job_id):
        jobs_done[job_id]["finished"] = True
        jobs_done[job_id]["latency"] = request.latency

    img = read_image()
    infer_queue.set_callback(callback)
    assert infer_queue.is_ready
    for i in range(jobs):
        infer_queue.start_async({"data": img}, i)
    infer_queue.wait_all()

    request = exec_net.create_infer_request()
    outputs = request.infer({0: img})

    for i in range(num_request):
        np.allclose(list(outputs.values()),
                    list(infer_queue[i].results.values()))
Exemple #6
0
 def create_infer_requests(self, compiled_model):
     if self.api_type == 'sync':
         requests = [compiled_model.create_infer_request()]
     else:
         requests = AsyncInferQueue(compiled_model, self.nireq)
         self.nireq = len(requests)
     return requests
Exemple #7
0
 def deploy(self, device, max_requests=1):
     self.max_requests = max_requests
     compiled_model = self.core.compile_model(self.model, device)
     self.output_tensor = compiled_model.outputs[0]
     self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests)
     self.infer_queue.set_callback(self.completion_callback)
     log.info('The {} model {} is loaded to {}'.format(
         self.model_type, self.model_path, device))
Exemple #8
0
    def _process_dataset_async(self,
                               stats_layout,
                               sampler,
                               print_progress=False,
                               need_metrics_per_sample=False,
                               requests_num=0):
        """Performs model inference on specified dataset subset asynchronously
        :param stats_layout: dict of stats collection functions {node_name: [fn]}(optional)
        :param sampler: sampling dataset to make inference
        :param print_progress: whether to print inference progress
        :param need_metrics_per_sample: whether to collect metrics for each batch
        :param requests_num: number of infer requests
        """
        def completion_callback(request, user_data):
            start_time, batch_id = user_data
            predictions = request.results
            self._process_infer_output(stats_layout, predictions,
                                       batch_annotations, batch_meta,
                                       need_metrics_per_sample)

            # Print progress
            if self._print_inference_progress(progress_log_fn, batch_id,
                                              len(sampler), start_time,
                                              time()):
                start_time = time()

        progress_log_fn = logger.info if print_progress else logger.debug
        self._ie.set_config(
            {
                'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO',
                'CPU_BIND_THREAD': 'YES'
            }, self._device)

        # Load model to the plugin
        compiled_model = self._ie.compile_model(model=self._model,
                                                device_name=self._device)

        optimal_requests_num = compiled_model.get_metric(
            'OPTIMAL_NUMBER_OF_INFER_REQUESTS')
        requests_num = optimal_requests_num if requests_num == 0 else requests_num
        logger.debug('Async mode requests number: %d', requests_num)
        infer_queue = AsyncInferQueue(compiled_model, requests_num)

        progress_log_fn('Start inference of %d images', len(sampler))

        sampler_iter = iter(enumerate(sampler))
        # Start inference
        start_time = time()
        infer_queue.set_callback(completion_callback)
        for batch_id, data_batch in sampler_iter:
            batch_annotations, image_batch, batch_meta = self._process_batch(
                data_batch)
            infer_queue.start_async(
                self._fill_input(compiled_model, image_batch),
                (start_time, batch_id))
        infer_queue.wait_all()
        progress_log_fn('Inference finished')
Exemple #9
0
 def deploy(self, device, plugin_config, max_requests=1):
     self.max_requests = max_requests
     compiled_model = self.core.compile_model(self.model,
                                              device,
                                              config=plugin_config)
     self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests)
     self.infer_queue.set_callback(self.completion_callback)
     log.info('The {} model {} is loaded to {}'.format(
         self.model_type, self.model_path, device))
 def get_infer_queue(self, log=True):
     if self.config.get('num_requests', 'AUTO') == 'AUTO':
         num_requests = 0
     else:
         num_requests = self.num_requests
     queue = AsyncInferQueue(self.exec_network, num_requests)
     if log:
         print_info('Prepared async infer queue with {} requests'.format(
             len(queue)))
     return queue
def test_infer_queue_is_ready(device):
    core = Core()
    param = ops.parameter([10])
    model = Model(ops.relu(param), [param])
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, 1)

    def callback(request, _):
        time.sleep(0.001)
    infer_queue.set_callback(callback)
    assert infer_queue.is_ready()
    infer_queue.start_async()
    assert not infer_queue.is_ready()
    infer_queue.wait_all()
def test_infer_queue_get_idle_handle(device):
    param = ops.parameter([10])
    model = Model(ops.relu(param), [param])
    core = Core()
    compiled = core.compile_model(model, device)
    queue = AsyncInferQueue(compiled, 2)
    niter = 10

    for _ in range(len(queue)):
        queue.start_async()
    queue.wait_all()
    for request in queue:
        assert request.wait_for(0)

    for _ in range(niter):
        idle_id = queue.get_idle_request_id()
        assert queue[idle_id].wait_for(0)
        queue.start_async()
    queue.wait_all()
def test_infer_queue_fail_on_py_model(device):
    jobs = 1
    num_request = 1
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)

    def callback(request, _):
        request = request + 21

    img = read_image()
    infer_queue.set_callback(callback)

    with pytest.raises(TypeError) as e:
        for _ in range(jobs):
            infer_queue.start_async({"data": img})
        infer_queue.wait_all()

    assert "unsupported operand type(s) for +" in str(e.value)
def test_infer_queue_fail_on_cpp_model(device):
    jobs = 6
    num_request = 4
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)

    def callback(request, _):
        request.get_tensor("Unknown")

    img = read_image()
    infer_queue.set_callback(callback)

    with pytest.raises(RuntimeError) as e:
        for _ in range(jobs):
            infer_queue.start_async({"data": img})
        infer_queue.wait_all()

    assert "Port for tensor name Unknown was not found" in str(e.value)
def test_infer_queue(device):
    jobs = 8
    num_request = 4
    core = Core()
    model = core.read_model(test_net_xml, test_net_bin)
    compiled = core.compile_model(model, device)
    infer_queue = AsyncInferQueue(compiled, num_request)
    jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]

    def callback(request, job_id):
        jobs_done[job_id]["finished"] = True
        jobs_done[job_id]["latency"] = request.latency

    img = read_image()
    infer_queue.set_callback(callback)
    for i in range(jobs):
        infer_queue.start_async({"data": img}, i)
    infer_queue.wait_all()
    assert all(job["finished"] for job in jobs_done)
    assert all(job["latency"] > 0 for job in jobs_done)
    def __init__(self, model_path, core, target_device, num_requests, model_type):
        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)
        if len(self.model.inputs) != 1:
            log.error("Demo supports only models with 1 input")
            sys.exit(1)

        if len(self.model.outputs) != 1:
            log.error("Demo supports only models with 1 output")
            sys.exit(1)

        self.outputs = {}
        compiled_model = core.compile_model(self.model, target_device)
        self.output_tensor = compiled_model.outputs[0]
        self.input_name = self.model.inputs[0].get_any_name()
        self.input_shape = self.model.inputs[0].shape

        self.num_requests = num_requests
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(model_type, model_path, target_device))
    def _init_model(self, inp_h, inp_w):
        # For better efficiency, model is initialized for batch_size 1 and every sample processed independently
        inp_shape = [1, inp_h, inp_w, 3]
        self.net.reshape({self.input_name: inp_shape})

        # Load network to device
        if "CPU" in self.device:
            self.core.set_property(
                "CPU", {
                    "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO",
                    "CPU_BIND_THREAD": "YES"
                })
        if "GPU" in self.device:
            self.core.set_property(
                "GPU", {"GPU_THROUGHPUT_STREAMS": "GPU_THROUGHPUT_AUTO"})

        compiled_model = self.core.compile_model(self.net, self.device)
        num_requests = compiled_model.get_property(
            "OPTIMAL_NUMBER_OF_INFER_REQUESTS")
        print(f"OpenVINO uses {num_requests} inference requests")
        self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
Exemple #18
0
def infer_async(compiled_model, number_iter, num_request, get_slice):
    result = None
    infer_queue = AsyncInferQueue(compiled_model, num_request)
    iteration = 0
    inference_time = time()
    while iteration < max(number_iter, num_request):
        idle_id = infer_queue.get_idle_request_id()
        if idle_id < 0:
            infer_queue.wait(num_requests=1)
            idle_id = infer_queue.get_idle_request_id()
        utils.set_input_to_blobs(infer_queue[idle_id], get_slice(iteration))
        infer_queue.start_async()
        iteration += 1
    infer_queue.wait_all()
    inference_time = time() - inference_time
    if number_iter == 1:
        request_results = [utils.get_request_result(request) for request in infer_queue]
        output_names = request_results[0].keys()
        result = dict.fromkeys(output_names, None)
        for key in result:
            result[key] = np.concatenate([result[key] for result in request_results], axis=0)
    return result, inference_time
Exemple #19
0
    def load_model(self, core, model_path, device, model_type, num_reqs=1):
        """Loads a model in the Inference Engine format"""

        log.info('Reading {} model {}'.format(model_type, model_path))
        self.model = core.read_model(model_path)

        if len(self.model.inputs) not in self.get_allowed_inputs_len():
            raise RuntimeError(
                "Supports topologies with only {} inputs, but got {}".format(
                    self.get_allowed_inputs_len(), len(self.model.inputs)))
        if len(self.model.outputs) not in self.get_allowed_outputs_len():
            raise RuntimeError(
                "Supports topologies with only {} outputs, but got {}".format(
                    self.get_allowed_outputs_len(), len(self.model.outputs)))

        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()
        # Loading model to the plugin
        compiled_model = core.compile_model(self.model, device)
        self.infer_queue = AsyncInferQueue(compiled_model, num_reqs)
        self.infer_queue.set_callback(self.completion_callback)
        log.info('The {} model {} is loaded to {}'.format(
            model_type, model_path, device))
Exemple #20
0
def main() -> int:
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    args = parse_args()

    # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
    log.info('Creating OpenVINO Runtime Core')
    core = Core()

    # --------------------------- Step 2. Read a model --------------------------------------------------------------------
    log.info(f'Reading the model: {args.model}')
    # (.xml and .bin files) or (.onnx file)
    model = core.read_model(args.model)

    if len(model.inputs) != 1:
        log.error('Sample supports only single input topologies')
        return -1

    if len(model.outputs) != 1:
        log.error('Sample supports only single output topologies')
        return -1


# --------------------------- Step 3. Set up input --------------------------------------------------------------------
# Read input images
    images = [cv2.imread(image_path) for image_path in args.input]

    # Resize images to model input dims
    _, _, h, w = model.input().shape
    resized_images = [cv2.resize(image, (w, h)) for image in images]

    # Add N dimension
    input_tensors = [np.expand_dims(image, 0) for image in resized_images]

    # --------------------------- Step 4. Apply preprocessing -------------------------------------------------------------
    ppp = PrePostProcessor(model)

    # 1) Set input tensor information:
    # - input() provides information about a single model input
    # - precision of tensor is supposed to be 'u8'
    # - layout of data is 'NHWC'
    ppp.input().tensor() \
        .set_element_type(Type.u8) \
        .set_layout(Layout('NHWC'))  # noqa: N400

    # 2) Here we suppose model has 'NCHW' layout for input
    ppp.input().model().set_layout(Layout('NCHW'))

    # 3) Set output tensor information:
    # - precision of tensor is supposed to be 'f32'
    ppp.output().tensor().set_element_type(Type.f32)

    # 4) Apply preprocessing modifing the original 'model'
    model = ppp.build()

    # --------------------------- Step 5. Loading model to the device -----------------------------------------------------
    log.info('Loading the model to the plugin')
    compiled_model = core.compile_model(model, args.device)

    # --------------------------- Step 6. Create infer request queue ------------------------------------------------------
    log.info('Starting inference in asynchronous mode')
    infer_queue = AsyncInferQueue(compiled_model, len(input_tensors))
    infer_queue.set_callback(completion_callback)

    # --------------------------- Step 7. Do inference --------------------------------------------------------------------
    for i, input_tensor in enumerate(input_tensors):
        infer_queue.start_async({0: input_tensor}, args.input[i])

    infer_queue.wait_all()
    # ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0
Exemple #21
0
        gil_released = True
    thread = Thread(target=detect_gil)
    thread.start()
    func(*args)
    if not gil_released:
        pytest.xfail(reason="Depend on condition race")
    thread.join()


device = os.environ.get("TEST_DEVICE") if os.environ.get("TEST_DEVICE") else "CPU"
core = Core()
core.set_property({"PERF_COUNT": "YES"})
param = ops.parameter([224, 224])
model = Model(ops.relu(param), [param])
compiled = core.compile_model(model, device)
infer_queue = AsyncInferQueue(compiled, 1)
user_stream = io.BytesIO()


# AsyncInferQueue

def test_gil_released_async_infer_queue_start_async():
    infer_queue.start_async()
    check_gil_released_safe(infer_queue.start_async)


def test_gil_released_async_infer_queue_is_ready():
    infer_queue.start_async()
    check_gil_released_safe(infer_queue.is_ready)