def run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info): """Runs inference on a set of images. Args: runtime: Arm NN runtime net_id: Network ID images: Loaded images to run inference on labels: Loaded labels per class input_binding_info: Network input information output_binding_info: Network output information Returns: None """ output_tensors = ann.make_output_tensors([output_binding_info]) for idx, im in enumerate(images): # Create input tensors input_tensors = ann.make_input_tensors([input_binding_info], [im]) # Run inference print("Running inference({0}) ...".format(idx)) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) # Process output out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0] results = np.argsort(out_tensor)[::-1] print_top_n(5, results, labels, out_tensor)
def mock_model_runtime(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(os.path.join(shared_data_folder, 'mock_model.tflite')) graph_id = 0 input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, "input_1") input_tensor_data = np.load(os.path.join(shared_data_folder, 'tflite_parser/input_lite.npy')) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) print(messages) net_id, messages = runtime.LoadNetwork(opt_network) print(messages) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) outputs_binding_info = [] for output_name in output_names: outputs_binding_info.append(parser.GetNetworkOutputBindingInfo(graph_id, output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) yield runtime, net_id, input_tensors, output_tensors
def test_make_output_tensors(get_tensor_info_output): output_binding_info = get_tensor_info_output output_tensors = ann.make_output_tensors(output_binding_info) assert len(output_tensors) == 1 for tensor, tensor_info in zip(output_tensors, output_binding_info): assert type(tensor[1]) == ann.Tensor assert str(tensor[1].GetInfo()) == str(tensor_info[1])
def test_workload_tensors_to_ndarray(get_tensor_info_output): # Check shape and size of output from workload_tensors_to_ndarray matches expected. output_binding_info = get_tensor_info_output output_tensors = ann.make_output_tensors(output_binding_info) data = ann.workload_tensors_to_ndarray(output_tensors) for i in range(0, len(output_tensors)): assert data[i].shape == tuple(output_tensors[i][1].GetShape()) assert data[i].size == output_tensors[i][1].GetNumElements()
def test_caffe_parser_end_to_end(shared_data_folder): parser = ann.ICaffeParser = ann.ICaffeParser() # Load the network specifying the inputs and outputs input_name = "Placeholder" tensor_shape = {input_name: ann.TensorShape((1, 1, 28, 28))} requested_outputs = ["output"] network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.caffemodel'), tensor_shape, requested_outputs) # Specify preferred backend preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] input_binding_info = parser.GetNetworkInputBindingInfo(input_name) options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_caffe.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'caffe_parser/input_caffe.npy')).astype(np.float32) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) # Load output binding info and outputs_binding_info = [] for output_name in requested_outputs: outputs_binding_info.append( parser.GetNetworkOutputBindingInfo(output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. expected_output = np.load( os.path.join(shared_data_folder, 'caffe_parser/golden_output_caffe.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output_vectors[0], expected_output, 4)
def run(self): self.start() image = cv2.imread(self.image) image = cv2.resize(image, (128, 128)) image = np.array(image, dtype=np.float32) / 255.0 # ONNX, Caffe and TF parsers also exist. parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(self.model) graph_id = 0 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) input_tensor_id = input_binding_info[0] input_tensor_info = input_binding_info[1] # Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer # name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo( 0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) start = timer() runtime.EnqueueWorkload(0, input_tensors, output_tensors) end = timer() print('Elapsed time is ', (end - start) * 1000, 'ms') output, output_tensor_info = ann.from_output_tensor( output_tensors[0][1]) print(f"Output tensor info: {output_tensor_info}") print(output) j = np.argmax(output) if j == 0: print("Non-Fire") else: print("Fire")
def __init__(self, model_file: str, backends: list): """ Creates an inference executor for a given network and a list of backends. Args: model_file: User-specified model file. backends: List of backends to optimize network. """ self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file, backends) self.output_tensors = ann.make_output_tensors(self.output_binding_info)
def test_onnx_parser_end_to_end(shared_data_folder): parser = ann.IOnnxParser = ann.IOnnxParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.onnx')) # load test image data stored in input_onnx.npy input_binding_info = parser.GetNetworkInputBindingInfo("input") input_tensor_data = np.load( os.path.join(shared_data_folder, 'onnx_parser/input_onnx.npy')).astype(np.float32) options = ann.CreationOptions() runtime = ann.IRuntime(options) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = ann.make_output_tensors( [parser.GetNetworkOutputBindingInfo("output")]) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. golden_output = np.load( os.path.join(shared_data_folder, 'onnx_parser/golden_output_onnx.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output[0], golden_output, decimal=4)
def main(args): video, video_writer, frame_count = init_video(args.video_file_path, args.output_video_file_path) net_id, runtime, input_binding_info, output_binding_info = create_network( args.model_file_path, args.preferred_backends) output_tensors = ann.make_output_tensors(output_binding_info) labels, process_output, resize_factor = get_model_processing( args.model_name, video, input_binding_info) labels = dict_labels( labels if args.label_path is None else args.label_path) for _ in tqdm(frame_count, desc='Processing frames'): frame_present, frame = video.read() if not frame_present: continue input_tensors = preprocess(frame, input_binding_info) inference_output = execute_network(input_tensors, output_tensors, runtime, net_id) detections = process_output(inference_output) draw_bounding_boxes(frame, detections, resize_factor, labels) video_writer.write(frame) print('Finished processing frames') video.release(), video_writer.release()
# Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) print(f"Loaded network, id={net_id}") # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) runtime.EnqueueWorkload(0, input_tensors, output_tensors) results = ann.workload_tensors_to_ndarray(output_tensors) print(results[0]) print(output_tensors[0][1]) j = np.argmax(results[0]) if j == 0: print("Non-Fire") else: print("Fire")