def test_set_batch_default_batch_size():
    param1 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data1")
    param2 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data2")
    add = ops.add(param1, param2)
    func = Model(add, [param1, param2], "TestFunction")
    func_param1 = func.get_parameters()[0]
    func_param1.set_layout(Layout("NC"))
    set_batch(func)
    assert func.is_dynamic()
Esempio n. 2
0
 def create_infer_requests(self, model, path, batch_sizes=None):
     if batch_sizes is not None:
         requests = []
         for parameter in model.get_parameters():
             parameter.set_layout(Layout("BC"))
         for b_s in batch_sizes:
             set_batch(model, b_s)
             compiled_model = self.ie.compile_model(model, device_name=self.device)
             requests.append(compiled_model.create_infer_request())
     else:
         compiled_model = self.ie.compile_model(model, device_name=self.device)
         requests = compiled_model.create_infer_request()
     log.info('The WaveRNN model {} is loaded to {}'.format(path, self.device))
     return requests
Esempio n. 3
0
def test_set_batch_int():
    model = create_test_model()
    model_param1 = model.get_parameters()[0]
    model_param2 = model.get_parameters()[1]
    # batch == 2
    model_param1.set_layout(Layout("NC"))
    assert get_batch(model) == 2
    # set batch to 1
    set_batch(model, 1)
    assert get_batch(model) == 1
    # check if shape of param 1 has changed
    assert model_param1.get_output_shape(0) == PartialShape([1, 1])
    # check if shape of param 2 has not changed
    assert model_param2.get_output_shape(0) == PartialShape([2, 1])
def test_set_batch_int():
    param1 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data1")
    param2 = ops.parameter(Shape([2, 1]), dtype=np.float32, name="data2")
    add = ops.add(param1, param2)
    func = Model(add, [param1, param2], "TestFunction")
    func_param1 = func.get_parameters()[0]
    func_param2 = func.get_parameters()[1]
    # batch == 2
    func_param1.set_layout(Layout("NC"))
    assert get_batch(func) == 2
    # set batch to 1
    set_batch(func, 1)
    assert get_batch(func) == 1
    # check if shape of param 1 has changed
    assert str(func_param1.get_output_shape(0) == {1, 1})
    # check if shape of param 2 has not changed
    assert str(func_param2.get_output_shape(0) == {2, 1})
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    # Parsing and validation of input arguments
    if len(sys.argv) != 3:
        log.info(f'Usage: {sys.argv[0]} <path_to_model> <device_name>')
        return 1

    model_path = sys.argv[1]
    device_name = sys.argv[2]
    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    number_top = 1
    # ---------------------------Step 1. Initialize inference engine core--------------------------------------------------
    log.info('Creating OpenVINO Runtime Core')
    core = Core()

    # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation------------------------------
    log.info(
        f'Loading the model using ngraph function with weights from {model_path}'
    )
    model = create_ngraph_function(model_path)
    # ---------------------------Step 3. Apply preprocessing----------------------------------------------------------
    # Get names of input and output blobs
    ppp = PrePostProcessor(model)
    # 1) Set input tensor information:
    # - input() provides information about a single model input
    # - precision of tensor is supposed to be 'u8'
    # - layout of data is 'NHWC'
    ppp.input().tensor() \
        .set_element_type(Type.u8) \
        .set_layout(Layout('NHWC'))  # noqa: N400

    # 2) Here we suppose model has 'NCHW' layout for input
    ppp.input().model().set_layout(Layout('NCHW'))
    # 3) Set output tensor information:
    # - precision of tensor is supposed to be 'f32'
    ppp.output().tensor().set_element_type(Type.f32)

    # 4) Apply preprocessing modifing the original 'model'
    model = ppp.build()

    # Set a batch size equal to number of input images
    set_batch(model, digits.shape[0])
    # ---------------------------Step 4. Loading model to the device-------------------------------------------------------
    log.info('Loading the model to the plugin')
    compiled_model = core.compile_model(model, device_name)

    # ---------------------------Step 5. Prepare input---------------------------------------------------------------------
    n, c, h, w = model.input().shape
    input_data = np.ndarray(shape=(n, c, h, w))
    for i in range(n):
        image = digits[i].reshape(28, 28)
        image = image[:, :, np.newaxis]
        input_data[i] = image

    # ---------------------------Step 6. Do inference----------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    results = compiled_model.infer_new_request({0: input_data})

    # ---------------------------Step 7. Process output--------------------------------------------------------------------
    predictions = next(iter(results.values()))

    log.info(f'Top {number_top} results: ')
    for i in range(n):
        probs = predictions[i]
        # Get an array of number_top class IDs in descending order of probability
        top_n_idexes = np.argsort(probs)[-number_top:][::-1]

        header = 'classid probability'
        header = header + ' label' if labels else header

        log.info(f'Image {i}')
        log.info('')
        log.info(header)
        log.info('-' * len(header))

        for class_id in top_n_idexes:
            probability_indent = ' ' * (len('classid') - len(str(class_id)) +
                                        1)
            label_indent = ' ' * (len('probability') - 8) if labels else ''
            label = labels[class_id] if labels else ''
            log.info(
                f'{class_id}{probability_indent}{probs[class_id]:.7f}{label_indent}{label}'
            )
        log.info('')

    # ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0
Esempio n. 6
0
def test_set_batch_default_batch_size():
    model = create_test_model()
    model_param1 = model.get_parameters()[0]
    model_param1.set_layout(Layout("NC"))
    set_batch(model)
    assert model.is_dynamic()
Esempio n. 7
0
def main():
    args = parse_args()

    # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
    log.info('Creating OpenVINO Runtime Core')
    core = Core()

    # --------------------------- Step 2. Read a model --------------------------------------------------------------------
    if args.model:
        log.info(f'Reading the model: {args.model}')
        # (.xml and .bin files) or (.onnx file)
        model = core.read_model(args.model)

        # --------------------------- Step 3. Apply preprocessing -------------------------------------------------------------
        if args.output_layers:
            output_layer_names, output_layer_ports = parse_outputs_from_args(
                args)
            model.add_outputs(list(zip(output_layer_names,
                                       output_layer_ports)))

        if args.layout:
            layouts = parse_input_layouts(args, model.inputs)

        ppp = PrePostProcessor(model)

        for i in range(len(model.inputs)):
            ppp.input(i).tensor().set_element_type(Type.f32)

            input_name = model.input(i).get_any_name()

            if args.layout and input_name in layouts.keys():
                ppp.input(i).tensor().set_layout(Layout(layouts[input_name]))
                ppp.input(i).model().set_layout(Layout(layouts[input_name]))

        for i in range(len(model.outputs)):
            ppp.output(i).tensor().set_element_type(Type.f32)

        model = ppp.build()

        if args.batch_size:
            batch_size = args.batch_size if args.context_window_left == args.context_window_right == 0 else 1

            if any([not _input.node.layout.empty for _input in model.inputs]):
                set_batch(model, batch_size)
            else:
                log.warning(
                    'Layout is not set for any input, so custom batch size is not set'
                )

# ---------------------------Step 4. Configure plugin ---------------------------------------------------------
    devices = args.device.replace('HETERO:', '').split(',')
    plugin_config = {}

    if 'GNA' in args.device:
        gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO'
        devices[0] = 'GNA'

        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
        plugin_config['GNA_EXEC_TARGET'] = args.exec_target
        plugin_config['GNA_PWL_MAX_ERROR_PERCENT'] = str(args.pwl_me)

        # Set a GNA scale factor
        if args.import_gna_model:
            if args.scale_factor:
                log.warning(
                    f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}'
                )
                set_scale_factors(plugin_config, parse_scale_factors(args))
            else:
                log.info(
                    f'Using scale factor from the imported GNA model: {args.import_gna_model}'
                )
        else:
            if args.scale_factor:
                set_scale_factors(plugin_config, parse_scale_factors(args))
            else:
                scale_factors = []

                for file_name in re.split(', |,', args.input):
                    _, utterances = read_utterance_file(file_name)
                    scale_factors.append(get_scale_factor(utterances[0]))

                log.info(
                    'Using scale factor(s) calculated from first utterance')
                set_scale_factors(plugin_config, scale_factors)

        if args.export_embedded_gna_model:
            plugin_config[
                'GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
            plugin_config[
                'GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

        if args.performance_counter:
            plugin_config['PERF_COUNT'] = 'YES'

    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[
        0]

    # --------------------------- Step 5. Loading model to the device -----------------------------------------------------
    log.info('Loading the model to the plugin')
    if args.model:
        compiled_model = core.compile_model(model, device_str, plugin_config)
    else:
        with open(args.import_gna_model, 'rb') as f:
            buf = BytesIO(f.read())
            compiled_model = core.import_model(buf, device_str, plugin_config)

# --------------------------- Exporting GNA model using InferenceEngine AOT API ---------------------------------------
    if args.export_gna_model:
        log.info(f'Writing GNA Model to {args.export_gna_model}')
        user_stream = compiled_model.export_model()
        with open(args.export_gna_model, 'wb') as f:
            f.write(user_stream)
        return 0

    if args.export_embedded_gna_model:
        log.info(
            f'Exported GNA embedded model to file {args.export_embedded_gna_model}'
        )
        log.info(
            f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}'
        )
        return 0

# --------------------------- Step 6. Set up input --------------------------------------------------------------------
    if args.input_layers:
        input_layer_names = re.split(', |,', args.input_layers)
    else:
        input_layer_names = [
            _input.any_name for _input in compiled_model.inputs
        ]

    input_file_names = re.split(', |,', args.input)

    if len(input_layer_names) != len(input_file_names):
        log.error(
            f'Number of model inputs ({len(compiled_model.inputs)}) is not equal '
            f'to number of ark files ({len(input_file_names)})')
        sys.exit(-3)

    input_file_data = [
        read_utterance_file(file_name) for file_name in input_file_names
    ]

    infer_data = [{
        input_layer_names[j]: input_file_data[j].utterances[i]
        for j in range(len(input_layer_names))
    } for i in range(len(input_file_data[0].utterances))]

    if args.output_layers:
        output_layer_names, output_layer_ports = parse_outputs_from_args(args)
        # If a name of output layer contains a port number then concatenate output_layer_names and output_layer_ports
        if ':' in compiled_model.outputs[0].any_name:
            output_layer_names = [
                f'{output_layer_names[i]}:{output_layer_ports[i]}'
                for i in range(len(output_layer_names))
            ]
    else:
        output_layer_names = [compiled_model.outputs[0].any_name]

    if args.output:
        output_file_names = re.split(', |,', args.output)

        if len(output_layer_names) != len(output_file_names):
            log.error(
                'The number of output files is not equal to the number of model outputs.'
            )
            sys.exit(-6)

    if args.reference:
        reference_file_names = re.split(', |,', args.reference)

        if len(output_layer_names) != len(reference_file_names):
            log.error(
                'The number of reference files is not equal to the number of model outputs.'
            )
            sys.exit(-5)

        reference_file_data = [
            read_utterance_file(file_name)
            for file_name in reference_file_names
        ]

        references = [{
            output_layer_names[j]: reference_file_data[j].utterances[i]
            for j in range(len(output_layer_names))
        } for i in range(len(input_file_data[0].utterances))]

# --------------------------- Step 7. Create infer request ------------------------------------------------------------
    infer_request = compiled_model.create_infer_request()

    # --------------------------- Step 8. Do inference --------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    results = []
    total_infer_time = 0

    for i in range(len(infer_data)):
        start_infer_time = default_timer()

        # Reset states between utterance inferences to remove a memory impact
        for state in infer_request.query_state():
            state.reset()

        results.append(
            do_inference(
                infer_data[i],
                infer_request,
                args.context_window_left,
                args.context_window_right,
            ))

        infer_time = default_timer() - start_infer_time
        total_infer_time += infer_time
        num_of_frames = infer_data[i][input_layer_names[0]].shape[0]
        avg_infer_time_per_frame = infer_time / num_of_frames

        # --------------------------- Step 9. Process output ------------------------------------------------------------------
        log.info('')
        log.info(f'Utterance {i}:')
        log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms')
        log.info(f'Frames in utterance: {num_of_frames}')
        log.info(
            f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms'
        )

        for name in output_layer_names:
            log.info('')
            log.info(f'Output blob name: {name}')
            log.info(f'Number scores per frame: {results[i][name].shape[1]}')

            if args.reference:
                log.info('')
                compare_with_reference(results[i][name], references[i][name])

        if args.performance_counter:
            if 'GNA' in args.device:
                total_cycles = infer_request.profiling_info[
                    0].real_time.total_seconds()
                stall_cycles = infer_request.profiling_info[
                    1].real_time.total_seconds()
                active_cycles = total_cycles - stall_cycles
                frequency = 10**6
                if args.arch == 'CORE':
                    frequency *= GNA_CORE_FREQUENCY
                else:
                    frequency *= GNA_ATOM_FREQUENCY
                total_inference_time = total_cycles / frequency
                active_time = active_cycles / frequency
                stall_time = stall_cycles / frequency
                log.info('')
                log.info('Performance Statistics of GNA Hardware')
                log.info(
                    f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms'
                )
                log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
                log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')

    log.info('')
    log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms')

    if args.output:
        for i, name in enumerate(output_layer_names):
            data = [
                results[i][name]
                for i in range(len(input_file_data[0].utterances))
            ]
            write_utterance_file(output_file_names[i], input_file_data[0].keys,
                                 data)
            log.info(f'File {output_file_names[i]} was created!')


# ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, '
        'for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0
Esempio n. 8
0
# Explicit preprocessing steps. Layout conversion will be done automatically as last step
ppp.input().preprocess() \
    .convert_element_type() \
    .convert_color(ColorFormat.RGB) \
    .resize(ResizeAlgorithm.RESIZE_LINEAR) \
    .mean([123.675, 116.28, 103.53]) \
    .scale([58.624, 57.12, 57.375])

# Dump preprocessor
print(f'Dump preprocessor: {ppp}')
model = ppp.build()

# ======== Step 2: Change batch size ================
# In this example we also want to change batch size to increase throughput
set_batch(model, 2)

# ======== Step 3: Save the model ================
serialize(model, '/path/to/some_model_saved.xml',
          '/path/to/some_model_saved.bin')
# ! [ov:preprocess:save]

# ! [ov:preprocess:save_load]
core = Core()
core.set_property({'CACHE_DIR': '/path/to/cache/dir'})

# In case that no preprocessing is needed anymore, we can load model on target device directly
# With cached model available, it will also save some time on reading original model
compiled_model = core.compile_model('/path/to/some_model_saved.xml', 'CPU')
# ! [ov:preprocess:save_load]
Esempio n. 9
0
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

#! [import]
from openvino.runtime import Core, set_batch
from openvino.preprocess import PrePostProcessor
#! [import]

model_path = "model.xml"
batch_size = 8

#! [ov_gna_read_model]
core = Core()
model = core.read_model(model=model_path)
#! [ov_gna_read_model]

#! [ov_gna_set_nc_layout]
ppp = PrePostProcessor(model)
for i in range(len(model.inputs)):
    input_name = model.input(i).get_any_name()
    ppp.input(i).model().set_layout("N?")
model = ppp.build()
#! [ov_gna_set_nc_layout]

#! [ov_gna_set_batch_size]
set_batch(model, batch_size)
#! [ov_gna_set_batch_size]
Esempio n. 10
0
# Copyright (C) 2018-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from openvino.runtime import Core, Layout, set_batch
ov = Core()
model = ov.read_model("path/to/model")

#! [picture_snippet]
model.reshape([8, 3, 448, 448])
#! [picture_snippet]

#! [set_batch]
model.get_parameters()[0].set_layout(Layout("N..."))
set_batch(model, 5)
#! [set_batch]

#! [simple_spatials_change]
from cv2 import imread
image = imread("path/to/image")
model.reshape({1, 3, image.shape[0], image.shape[1]})
#! [simple_spatials_change]

#! [obj_to_shape]
port_to_shape = dict()
for input_obj in model.inputs:
    shape = input_obj.get_partial_shape()
    # modify shape to fit your needs
    # ...
    port_to_shape[input_obj] = shape
model.reshape(port_to_shape)
#! [obj_to_shape]