def test_ngraph_preprocess_input_output_by_name(): shape = [2, 2] param1 = ops.parameter(shape, dtype=np.int32, name="A") param2 = ops.parameter(shape, dtype=np.int32, name="B") function = Model([param1, param2], [param1, param2], "TestFunction") @custom_preprocess_function def custom_preprocess(output: Output): return ops.abs(output) p = PrePostProcessor(function) inp2 = p.input("B") inp2.tensor().set_element_type(Type.i32) inp2.preprocess().convert_element_type(Type.f32).mean(1.).scale(2.) inp1 = p.input("A") inp1.preprocess().convert_element_type(Type.f32).mean(1.) out1 = p.output("A") out1.postprocess().custom(custom_preprocess) out2 = p.output("B") out2.postprocess().custom(custom_preprocess) function = p.build() input_data1 = np.array([[0, 1], [2, -2]]).astype(np.int32) input_data2 = np.array([[-1, 3], [5, 7]]).astype(np.int32) expected_output1 = np.array([[1, 0], [1, 3]]).astype(np.float32) expected_output2 = np.array([[1, 1], [2, 3]]).astype(np.float32) runtime = get_runtime() computation = runtime.computation(function) [output1, output2] = computation(input_data1, input_data2) assert np.equal(output1, expected_output1).all() assert np.equal(output2, expected_output2).all()
def pre_post_processing(function: Model, app_inputs_info, input_precision: str, output_precision: str, input_output_precision: str): pre_post_processor = PrePostProcessor(function) if input_precision: element_type = get_element_type(input_precision) for i in range(len(function.inputs)): pre_post_processor.input(i).tensor().set_element_type(element_type) app_inputs_info[i].element_type = element_type if output_precision: element_type = get_element_type(output_precision) for i in range(len(function.outputs)): pre_post_processor.output(i).tensor().set_element_type( element_type) user_precision_map = {} if input_output_precision: user_precision_map = _parse_arg_map(input_output_precision) input_names = get_input_output_names(function.get_parameters()) output_names = get_input_output_names(function.get_results()) for node_name, precision in user_precision_map.items(): user_precision_map[node_name] = get_element_type(precision) for name, element_type in user_precision_map.items(): if name in input_names: port = input_names.index(name) app_inputs_info[port].element_type = element_type pre_post_processor.input(port).tensor().set_element_type( element_type) elif name in output_names: port = output_names.index(name) pre_post_processor.output(port).tensor().set_element_type( element_type) else: raise Exception(f"Node '{name}' does not exist in network") # update app_inputs_info if not input_precision: inputs = function.inputs for i in range(len(inputs)): if app_inputs_info[i].name in user_precision_map.keys(): app_inputs_info[i].element_type = user_precision_map[ app_inputs_info[i].name] elif app_inputs_info[i].is_image: app_inputs_info[i].element_type = Type.u8 pre_post_processor.input(i).tensor().set_element_type(Type.u8) else: app_inputs_info[i].element_type = inputs[i].get_element_type() # set layout for model input for port, info in enumerate(app_inputs_info): pre_post_processor.input(port).model().set_layout(info.layout) function = pre_post_processor.build()
def test_ngraph_preprocess_postprocess_layout(): shape = [1, 1, 3, 3] parameter_a = ops.parameter(shape, dtype=np.float32, name="A") model = parameter_a function = Model(model, [parameter_a], "TestFunction") layout1 = ov.Layout("NCWH") layout2 = ov.Layout("NCHW") p = PrePostProcessor(function) inp = p.input() inp.tensor().set_layout(layout1) inp.preprocess().mean(1.).convert_layout(layout2).reverse_channels() out = p.output() out.postprocess().convert_layout([0, 1, 2, 3]) function = p.build() input_data = np.array([[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]]).astype(np.float32) expected_output = np.array([[[[0, 3, 6], [1, 4, 7], [2, 5, 8]]]]).astype(np.float32) runtime = get_runtime() computation = runtime.computation(function) output = computation(input_data) assert np.equal(output, expected_output).all()
def test_ngraph_preprocess_spatial_static_shape(): shape = [2, 2, 2] parameter_a = ops.parameter(shape, dtype=np.int32, name="A") model = parameter_a function = Model(model, [parameter_a], "TestFunction") layout = ov.Layout("CHW") color_format = ColorFormat.RGB p = PrePostProcessor(function) inp = p.input() inp.tensor().set_layout(layout).set_spatial_static_shape( 2, 2).set_color_format(color_format, []) inp.preprocess().convert_element_type(Type.f32).mean([1., 2.]) inp.model().set_layout(layout) out = p.output() out.tensor().set_layout(layout).set_element_type(Type.f32) out.model().set_layout(layout) function = p.build() input_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.int32) expected_output = np.array([[[0, 1], [2, 3]], [[3, 4], [5, 6]]]).astype(np.float32) runtime = get_runtime() computation = runtime.computation(function) output = computation(input_data) assert np.equal(output, expected_output).all()
def test_ngraph_preprocess_output_postprocess(): shape = [2, 2] parameter_a = ops.parameter(shape, dtype=np.int32, name="A") model = parameter_a function = Model(model, [parameter_a], "TestFunction") layout1 = ov.Layout("NCHW") layout2 = ov.Layout("NHWC") layout3 = [0, 1] @custom_preprocess_function def custom_postprocess(output: Output): return ops.abs(output) p = PrePostProcessor(function) inp = p.input() inp.tensor().set_layout(layout1) inp.preprocess().convert_element_type(Type.f32).mean([1., 2.]) out = p.output() out.postprocess().convert_element_type(Type.f32) \ .convert_layout(layout2) \ .convert_layout(layout3).custom(custom_postprocess) function = p.build() input_data = np.array([[-1, -2], [-3, -4]]).astype(np.int32) expected_output = np.array([[2, 4], [4, 6]]).astype(np.float32) runtime = get_runtime() computation = runtime.computation(function) output = computation(input_data) assert np.equal(output, expected_output).all()
def main() -> int: log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- log.info(f'Reading the model: {args.model}') # (.xml and .bin files) or (.onnx file) model = core.read_model(args.model) if len(model.inputs) != 1: log.error('Sample supports only single input topologies') return -1 if len(model.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # --------------------------- Step 3. Set up input -------------------------------------------------------------------- # Read input images images = [cv2.imread(image_path) for image_path in args.input] # Resize images to model input dims _, _, h, w = model.input().shape resized_images = [cv2.resize(image, (w, h)) for image in images] # Add N dimension input_tensors = [np.expand_dims(image, 0) for image in resized_images] # --------------------------- Step 4. Apply preprocessing ------------------------------------------------------------- ppp = PrePostProcessor(model) # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) # noqa: N400 # 2) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 3) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 4) Apply preprocessing modifing the original 'model' model = ppp.build() # --------------------------- Step 5. Loading model to the device ----------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, args.device) # --------------------------- Step 6. Create infer request queue ------------------------------------------------------ log.info('Starting inference in asynchronous mode') infer_queue = AsyncInferQueue(compiled_model, len(input_tensors)) infer_queue.set_callback(completion_callback) # --------------------------- Step 7. Do inference -------------------------------------------------------------------- for i, input_tensor in enumerate(input_tensors): infer_queue.start_async({0: input_tensor}, args.input[i]) infer_queue.wait_all() # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) # Parsing and validation of input arguments if len(sys.argv) != 3: log.info('Usage: <path_to_model> <device_name>') return 1 model_path = sys.argv[1] device_name = sys.argv[2] labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] number_top = 1 # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating OpenVINO Runtime Core') core = Core() # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation------------------------------ log.info( f'Loading the network using ngraph function with weights from {model_path}' ) model = create_ngraph_function(model_path) # ---------------------------Step 3. Apply preprocessing---------------------------------------------------------- # Get names of input and output blobs ppp = PrePostProcessor(model) # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) # noqa: N400 # 2) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 3) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 4) Apply preprocessing modifing the original 'model' model = ppp.build() # Set a batch size equal to number of input images model.reshape({ model.input().get_any_name(): PartialShape( (digits.shape[0], model.input().shape[1], model.input().shape[2], model.input().shape[3])) }) # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, device_name) # ---------------------------Step 5. Prepare input--------------------------------------------------------------------- n, c, h, w = model.input().shape input_data = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = digits[i].reshape(28, 28) image = image[:, :, np.newaxis] input_data[i] = image # ---------------------------Step 6. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') results = compiled_model.infer_new_request({0: input_data}) # ---------------------------Step 7. Process output-------------------------------------------------------------------- predictions = next(iter(results.values())) log.info(f'Top {number_top} results: ') for i in range(n): probs = predictions[i] # Get an array of number_top class IDs in descending order of probability top_n_idexes = np.argsort(probs)[-number_top:][::-1] header = 'classid probability' header = header + ' label' if labels else header log.info(f'Image {i}') log.info('') log.info(header) log.info('-' * len(header)) for class_id in top_n_idexes: probability_indent = ' ' * (len('classid') - len(str(class_id)) + 1) label_indent = ' ' * (len('probability') - 8) if labels else '' label = labels[class_id] if labels else '' log.info( f'{class_id}{probability_indent}{probs[class_id]:.7f}{label_indent}{label}' ) log.info('') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) # Parsing and validation of input arguments if len(sys.argv) != 4: log.info('Usage: <path_to_model> <path_to_image> <device_name>') return 1 model_path = sys.argv[1] image_path = sys.argv[2] device_name = sys.argv[3] # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- log.info(f'Reading the network: {model_path}') # (.xml and .bin files) or (.onnx file) model = core.read_model(model_path) if len(model.inputs) != 1: log.error('Sample supports only single input topologies') return -1 if len(model.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # --------------------------- Step 3. Set up input -------------------------------------------------------------------- # Read input image image = cv2.imread(image_path) # Add N dimension input_tensor = np.expand_dims(image, 0) log.info( 'Reshaping the network to the height and width of the input image') n, h, w, c = input_tensor.shape model.reshape({model.input().get_any_name(): PartialShape((n, c, h, w))}) # --------------------------- Step 4. Apply preprocessing ------------------------------------------------------------- ppp = PrePostProcessor(model) # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) # noqa: N400 # 2) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 3) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 4) Apply preprocessing modifing the original 'model' model = ppp.build() # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, device_name) # --------------------------- Step 6. Create infer request and do inference synchronously ----------------------------- log.info('Starting inference in synchronous mode') results = compiled_model.infer_new_request({0: input_tensor}) # ---------------------------Step 6. Process output-------------------------------------------------------------------- predictions = next(iter(results.values())) # Change a shape of a numpy.ndarray with results ([1, 1, N, 7]) to get another one ([N, 7]), # where N is the number of detected bounding boxes detections = predictions.reshape(-1, 7) for detection in detections: confidence = detection[2] if confidence > 0.5: class_id = int(detection[1]) xmin = int(detection[3] * w) ymin = int(detection[4] * h) xmax = int(detection[5] * w) ymax = int(detection[6] * h) log.info( f'Found: class_id = {class_id}, confidence = {confidence:.2f}, ' f'coords = ({xmin}, {ymin}), ({xmax}, {ymax})') # Draw a bounding box on a output image cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.imwrite('out.bmp', image) if os.path.exists('out.bmp'): log.info('Image out.bmp was created!') else: log.error('Image out.bmp was not created. Check your permissions.') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def test_infer_float16(device): model = bytes(b"""<net name="add_model" version="10"> <layers> <layer id="0" name="x" type="Parameter" version="opset1"> <data element_type="f16" shape="2,2,2"/> <output> <port id="0" precision="FP16"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="1" name="y" type="Parameter" version="opset1"> <data element_type="f16" shape="2,2,2"/> <output> <port id="0" precision="FP16"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="2" name="sum" type="Add" version="opset1"> <input> <port id="0"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> <port id="1"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </input> <output> <port id="2" precision="FP16"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="3" name="sum/sink_port_0" type="Result" version="opset1"> <input> <port id="0"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </input> </layer> </layers> <edges> <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/> <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/> <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/> </edges> </net>""") core = Core() func = core.read_model(model=model) p = PrePostProcessor(func) p.input(0).tensor().set_element_type(Type.f16) p.input(0).preprocess().convert_element_type(Type.f16) p.input(1).tensor().set_element_type(Type.f16) p.input(1).preprocess().convert_element_type(Type.f16) p.output(0).tensor().set_element_type(Type.f16) p.output(0).postprocess().convert_element_type(Type.f16) func = p.build() exec_net = core.compile_model(func, device) input_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.float16) request = exec_net.create_infer_request() outputs = request.infer({0: input_data, 1: input_data}) assert np.allclose(list(outputs.values()), list(request.results.values())) assert np.allclose(list(outputs.values()), input_data + input_data)
def test_ngraph_preprocess_model(): model = bytes(b"""<net name="add_model" version="10"> <layers> <layer id="0" name="x" type="Parameter" version="opset1"> <data element_type="i32" shape="2,2,2"/> <output> <port id="0" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="1" name="y" type="Parameter" version="opset1"> <data element_type="i32" shape="2,2,2"/> <output> <port id="0" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="2" name="sum" type="Add" version="opset1"> <input> <port id="0"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> <port id="1"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </input> <output> <port id="2" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </output> </layer> <layer id="3" name="sum/sink_port_0" type="Result" version="opset1"> <input> <port id="0"> <dim>2</dim> <dim>2</dim> <dim>2</dim> </port> </input> </layer> </layers> <edges> <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/> <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/> <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/> </edges> </net>""") core = Core() function = core.read_model(model=model) @custom_preprocess_function def custom_preprocess(output: Output): return ops.abs(output) p = PrePostProcessor(function) p.input(1).preprocess().convert_element_type(Type.f32).scale(0.5) p.input(0).preprocess().convert_element_type(Type.f32).mean(5.) p.output(0).postprocess().custom(custom_preprocess) function = p.build() input_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.float32) expected_output = np.array([[[2, 1], [4, 7]], [[10, 13], [16, 19]]]).astype(np.float32) runtime = get_runtime() computation = runtime.computation(function) output = computation(input_data, input_data) assert np.equal(output, expected_output).all()
def pre_post_processing(model: Model, app_inputs_info, input_precision: str, output_precision: str, input_output_precision: str): pre_post_processor = PrePostProcessor(model) if input_precision: element_type = get_element_type(input_precision) for i in range(len(model.inputs)): pre_post_processor.input(i).tensor().set_element_type(element_type) app_inputs_info[i].element_type = element_type if output_precision: element_type = get_element_type(output_precision) for i in range(len(model.outputs)): pre_post_processor.output(i).tensor().set_element_type( element_type) user_precision_map = {} if input_output_precision: user_precision_map = parse_input_output_precision( input_output_precision) input_names = get_input_output_names(model.inputs) input_node_names = get_node_names(model.inputs) output_names = get_input_output_names(model.outputs) output_node_names = get_node_names(model.outputs) for node_name, precision in user_precision_map.items(): user_precision_map[node_name] = get_element_type(precision) for name, element_type in user_precision_map.items(): if name in input_names or name in input_node_names: input_index = input_names.index( name) if name in input_names else input_node_names.index( name) app_inputs_info[input_index].element_type = element_type pre_post_processor.input( input_index).tensor().set_element_type(element_type) elif name in output_names or name in output_node_names: if name in output_names: pre_post_processor.output(name).tensor().set_element_type( element_type) else: pre_post_processor.output(output_node_names.index( name)).tensor().set_element_type(element_type) else: raise Exception(f"Node '{name}' does not exist in model") # update app_inputs_info if not input_precision: inputs = model.inputs input_node_names = get_node_names(model.inputs) for i in range(len(inputs)): if app_inputs_info[i].name in user_precision_map: app_inputs_info[i].element_type = user_precision_map[ app_inputs_info[i].name] elif input_node_names[i] in user_precision_map: app_inputs_info[i].element_type = user_precision_map[ input_node_names[i]] elif app_inputs_info[i].is_image: app_inputs_info[i].element_type = Type.u8 pre_post_processor.input(i).tensor().set_element_type(Type.u8) # set layout for model input for info in app_inputs_info: pre_post_processor.input(info.name).model().set_layout(info.layout) model = pre_post_processor.build()
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) # Parsing and validation of input arguments if len(sys.argv) != 4: log.info('Usage: <path_to_model> <path_to_image> <device_name>') return 1 model_path = sys.argv[1] image_path = sys.argv[2] device_name = sys.argv[3] # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- log.info(f'Reading the network: {model_path}') # (.xml and .bin files) or (.onnx file) model = core.read_model(model_path) if len(model.inputs) != 1: log.error('Sample supports only single input topologies') return -1 if len(model.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # --------------------------- Step 3. Set up input -------------------------------------------------------------------- # Read input image image = cv2.imread(image_path) # Add N dimension input_tensor = np.expand_dims(image, 0) # --------------------------- Step 4. Apply preprocessing ------------------------------------------------------------- ppp = PrePostProcessor(model) _, h, w, _ = input_tensor.shape # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' # - set static spatial dimensions to input tensor to resize from ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) \ .set_spatial_static_shape(h, w) # noqa: ECE001, N400 # 2) Adding explicit preprocessing steps: # - apply linear resize from tensor spatial dims to model spatial dims ppp.input().preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR) # 3) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 4) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 5) Apply preprocessing modifing the original 'model' model = ppp.build() # --------------------------- Step 5. Loading model to the device ----------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, device_name) # --------------------------- Step 6. Create infer request and do inference synchronously ----------------------------- log.info('Starting inference in synchronous mode') results = compiled_model.infer_new_request({0: input_tensor}) # --------------------------- Step 7. Process output ------------------------------------------------------------------ predictions = next(iter(results.values())) # Change a shape of a numpy.ndarray with results to get another one with one dimension probs = predictions.reshape(-1) # Get an array of 10 class IDs in descending order of probability top_10 = np.argsort(probs)[-10:][::-1] header = 'class_id probability' log.info(f'Image path: {image_path}') log.info('Top 10 results: ') log.info(header) log.info('-' * len(header)) for class_id in top_10: probability_indent = ' ' * (len('class_id') - len(str(class_id)) + 1) log.info(f'{class_id}{probability_indent}{probs[class_id]:.7f}') log.info('') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def main(): args = parse_args() # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- if args.model: log.info(f'Reading the model: {args.model}') # (.xml and .bin files) or (.onnx file) model = core.read_model(args.model) # --------------------------- Step 3. Apply preprocessing ------------------------------------------------------------- if args.output_layers: output_layer_names, output_layer_ports = parse_outputs_from_args( args) model.add_outputs(list(zip(output_layer_names, output_layer_ports))) if args.layout: layouts = parse_input_layouts(args, model.inputs) ppp = PrePostProcessor(model) for i in range(len(model.inputs)): ppp.input(i).tensor().set_element_type(Type.f32) input_name = model.input(i).get_any_name() if args.layout and input_name in layouts.keys(): ppp.input(i).tensor().set_layout(Layout(layouts[input_name])) ppp.input(i).model().set_layout(Layout(layouts[input_name])) for i in range(len(model.outputs)): ppp.output(i).tensor().set_element_type(Type.f32) model = ppp.build() if args.batch_size: batch_size = args.batch_size if args.context_window_left == args.context_window_right == 0 else 1 if any([not _input.node.layout.empty for _input in model.inputs]): set_batch(model, batch_size) else: log.warning( 'Layout is not set for any input, so custom batch size is not set' ) # ---------------------------Step 4. Configure plugin --------------------------------------------------------- devices = args.device.replace('HETERO:', '').split(',') plugin_config = {} if 'GNA' in args.device: gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO' devices[0] = 'GNA' plugin_config['GNA_DEVICE_MODE'] = gna_device_mode plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}' plugin_config['GNA_EXEC_TARGET'] = args.exec_target plugin_config['GNA_PWL_MAX_ERROR_PERCENT'] = str(args.pwl_me) # Set a GNA scale factor if args.import_gna_model: if args.scale_factor: log.warning( f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}' ) set_scale_factors(plugin_config, parse_scale_factors(args)) else: log.info( f'Using scale factor from the imported GNA model: {args.import_gna_model}' ) else: if args.scale_factor: set_scale_factors(plugin_config, parse_scale_factors(args)) else: scale_factors = [] for file_name in re.split(', |,', args.input): _, utterances = read_utterance_file(file_name) scale_factors.append(get_scale_factor(utterances[0])) log.info( 'Using scale factor(s) calculated from first utterance') set_scale_factors(plugin_config, scale_factors) if args.export_embedded_gna_model: plugin_config[ 'GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model plugin_config[ 'GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration if args.performance_counter: plugin_config['PERF_COUNT'] = 'YES' device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[ 0] # --------------------------- Step 5. Loading model to the device ----------------------------------------------------- log.info('Loading the model to the plugin') if args.model: compiled_model = core.compile_model(model, device_str, plugin_config) else: with open(args.import_gna_model, 'rb') as f: buf = BytesIO(f.read()) compiled_model = core.import_model(buf, device_str, plugin_config) # --------------------------- Exporting GNA model using InferenceEngine AOT API --------------------------------------- if args.export_gna_model: log.info(f'Writing GNA Model to {args.export_gna_model}') user_stream = compiled_model.export_model() with open(args.export_gna_model, 'wb') as f: f.write(user_stream) return 0 if args.export_embedded_gna_model: log.info( f'Exported GNA embedded model to file {args.export_embedded_gna_model}' ) log.info( f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}' ) return 0 # --------------------------- Step 6. Set up input -------------------------------------------------------------------- if args.input_layers: input_layer_names = re.split(', |,', args.input_layers) else: input_layer_names = [ _input.any_name for _input in compiled_model.inputs ] input_file_names = re.split(', |,', args.input) if len(input_layer_names) != len(input_file_names): log.error( f'Number of model inputs ({len(compiled_model.inputs)}) is not equal ' f'to number of ark files ({len(input_file_names)})') sys.exit(-3) input_file_data = [ read_utterance_file(file_name) for file_name in input_file_names ] infer_data = [{ input_layer_names[j]: input_file_data[j].utterances[i] for j in range(len(input_layer_names)) } for i in range(len(input_file_data[0].utterances))] if args.output_layers: output_layer_names, output_layer_ports = parse_outputs_from_args(args) # If a name of output layer contains a port number then concatenate output_layer_names and output_layer_ports if ':' in compiled_model.outputs[0].any_name: output_layer_names = [ f'{output_layer_names[i]}:{output_layer_ports[i]}' for i in range(len(output_layer_names)) ] else: output_layer_names = [compiled_model.outputs[0].any_name] if args.output: output_file_names = re.split(', |,', args.output) if len(output_layer_names) != len(output_file_names): log.error( 'The number of output files is not equal to the number of model outputs.' ) sys.exit(-6) if args.reference: reference_file_names = re.split(', |,', args.reference) if len(output_layer_names) != len(reference_file_names): log.error( 'The number of reference files is not equal to the number of model outputs.' ) sys.exit(-5) reference_file_data = [ read_utterance_file(file_name) for file_name in reference_file_names ] references = [{ output_layer_names[j]: reference_file_data[j].utterances[i] for j in range(len(output_layer_names)) } for i in range(len(input_file_data[0].utterances))] # --------------------------- Step 7. Create infer request ------------------------------------------------------------ infer_request = compiled_model.create_infer_request() # --------------------------- Step 8. Do inference -------------------------------------------------------------------- log.info('Starting inference in synchronous mode') results = [] total_infer_time = 0 for i in range(len(infer_data)): start_infer_time = default_timer() # Reset states between utterance inferences to remove a memory impact for state in infer_request.query_state(): state.reset() results.append( do_inference( infer_data[i], infer_request, args.context_window_left, args.context_window_right, )) infer_time = default_timer() - start_infer_time total_infer_time += infer_time num_of_frames = infer_data[i][input_layer_names[0]].shape[0] avg_infer_time_per_frame = infer_time / num_of_frames # --------------------------- Step 9. Process output ------------------------------------------------------------------ log.info('') log.info(f'Utterance {i}:') log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms') log.info(f'Frames in utterance: {num_of_frames}') log.info( f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms' ) for name in output_layer_names: log.info('') log.info(f'Output blob name: {name}') log.info(f'Number scores per frame: {results[i][name].shape[1]}') if args.reference: log.info('') compare_with_reference(results[i][name], references[i][name]) if args.performance_counter: if 'GNA' in args.device: total_cycles = infer_request.profiling_info[ 0].real_time.total_seconds() stall_cycles = infer_request.profiling_info[ 1].real_time.total_seconds() active_cycles = total_cycles - stall_cycles frequency = 10**6 if args.arch == 'CORE': frequency *= GNA_CORE_FREQUENCY else: frequency *= GNA_ATOM_FREQUENCY total_inference_time = total_cycles / frequency active_time = active_cycles / frequency stall_time = stall_cycles / frequency log.info('') log.info('Performance Statistics of GNA Hardware') log.info( f' Total Inference Time: {(total_inference_time * 1000):.4f} ms' ) log.info(f' Active Time: {(active_time * 1000):.4f} ms') log.info(f' Stall Time: {(stall_time * 1000):.4f} ms') log.info('') log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms') if args.output: for i, name in enumerate(output_layer_names): data = [ results[i][name] for i in range(len(input_file_data[0].utterances)) ] write_utterance_file(output_file_names[i], input_file_data[0].keys, data) log.info(f'File {output_file_names[i]} was created!') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, ' 'for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
ppp.input(input_name).preprocess() \ .convert_element_type(Type.f32) \ .convert_color(ColorFormat.RGB) \ .resize(ResizeAlgorithm.RESIZE_LINEAR) \ .mean([100.5, 101, 101.5]) \ .scale([50., 51., 52.]) # .convert_layout(Layout('NCHW')); # Not needed, such conversion will be added implicitly # ! [ov:preprocess:steps] # ! [ov:preprocess:build] print(f'Dump preprocessor: {ppp}') model = ppp.build() # ! [ov:preprocess:build] # ! [ov:preprocess:input_index] ppp.input(1) # Gets 2nd input in a model ppp.output(2) # Gets output with index=2 (3rd one) in a model # ! [ov:preprocess:input_index] # ! [ov:preprocess:input_name] ppp.input('image') ppp.output('result') # ! [ov:preprocess:input_name] # ! [ov:preprocess:input_1] # no index/name is needed if model has one input ppp.input().preprocess().scale(50.) # same for output ppp.output() \ .postprocess().convert_element_type(Type.u8) # ! [ov:preprocess:input_1]
def apply_preprocessing(ov_function: Model, argv: argparse.Namespace): """ Applies pre-processing of model inputs by adding appropriate operations On return, 'ov_function' object will be updated Expected 'argv.mean_scale_values' formats examples: a) Dict: {'inputName': {'mean': [1., 2., 3.], 'scale': [2., 4., 8.]}} b) List: list(np.array([(np.array([1., 2., 3.]), np.array([2., 4., 6.])), (np.array([7., 8., 9.]), np.array([5., 6., 7.]))) Expected 'argv.layout_values' format examples: a) Specific layouts for inputs and outputs { 'input1': { 'source_layout': 'nchw', 'target_layout': 'nhwc' }, 'output2': { 'source_layout': 'nhwc' } } b) Layout for single input: {'': {'source_layout': 'nchw'}} :param: ov_function OV function for applying mean/scale pre-processing :param: argv Parsed command line arguments """ prep = PrePostProcessor(ov_function) if 'mean_scale_values' in argv and argv.mean_scale_values: mean_scale_values = argv.mean_scale_values else: mean_scale_values = {} mean_scale_values = update_mean_scale_to_dict( input_nodes=ov_function.inputs, mean_scale_val=mean_scale_values, scale=argv.scale) # On return, mean_scale_values is a dictionary with input names as key and mean/scale pair as value # {'inputName': {'mean': [1., 2., 3.], 'scale': [2.]}} layout_values = {} if 'layout_values' in argv and argv.layout_values: layout_values = argv.layout_values if '' in layout_values: if len(ov_function.inputs) > 1: input_names = [ list(ov_input.get_tensor().get_names())[0] for ov_input in ov_function.inputs ] raise Error( 'Layout without name can be specified for models with only one input, ' 'but provided model has {} inputs: \'{}\'. ' 'Please specify explicitly input/output name for --layout option' .format(len(input_names), input_names)) layout_values = { list(ov_function.input().get_tensor().get_names())[0]: { 'source_layout': layout_values[''].get('source_layout'), 'target_layout': layout_values[''].get('target_layout') } } check_keys_valid(ov_function=ov_function, keys=mean_scale_values.keys(), search_outputs=False) check_keys_valid(ov_function=ov_function, keys=layout_values.keys(), search_outputs=True) layout_values = update_layout_is_input_flag(ov_function, layout_values) layout_values = guess_source_layouts_by_mean_scale(ov_function, layout_values, mean_scale_values) need_reverse = 'reverse_input_channels' in argv and argv.reverse_input_channels suitable_params_ric = [] if need_reverse: suitable_params_ric = guess_source_layouts_for_reverse_channels( ov_function=ov_function, layout_values=layout_values) for node_name, layout_value in layout_values.items(): if layout_value.get('source_layout'): if layout_value.get('is_input'): prep.input(node_name).model().set_layout( Layout(layout_value['source_layout'])) else: prep.output(node_name).model().set_layout( Layout(layout_value['source_layout'])) if layout_value.get('target_layout'): if layout_value.get('is_input'): prep.input(node_name).tensor().set_layout( Layout(layout_value['target_layout'])) else: prep.output(node_name).tensor().set_layout( Layout(layout_value['target_layout'])) for node_name, node_mean_scale_values in mean_scale_values.items(): # Apply mean first, then scale if node_mean_scale_values['mean'] is not None: prep.input(node_name).preprocess().mean( node_mean_scale_values['mean']) if node_mean_scale_values['scale'] is not None: prep.input(node_name).preprocess().scale( node_mean_scale_values['scale']) log.debug('Mean/Scale pre-processing applied to {}'.format(node_name)) # Apply reverse_input_channels if need_reverse: for name, _ in suitable_params_ric: prep.input(name).preprocess().reverse_channels() log.debug( 'reverse_input_channels pre-processing applied to {}'.format( name)) # Apply pre-processing builder to a function ov_function = prep.build() # Remove guessed layout values from ov_function (these values shall not be serialized to IR for node_name, layout_value in layout_values.items(): if layout_value.get('source_guessed') and \ not layout_value.get('target_layout'): # search for parameter object for idx, ov_input in enumerate(ov_function.inputs): if node_name in ov_input.get_tensor().get_names(): log.debug('Clearing guessed layout {} for {}'.format( layout_value['source_layout'], node_name)) ov_function.get_parameters()[idx].layout = Layout()