def test_tflite_parser_with_optional_options_out_of_scope(shared_data_folder): parser = create_with_opt() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, "mock_model.tflite")) graphs_count = parser.GetSubgraphCount() graph_id = graphs_count - 1 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages
def get_runtime(shared_data_folder, network_file): parser= ann.ITfLiteParser() preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] network = parser.CreateNetworkFromBinaryFile(os.path.join(shared_data_folder, network_file)) options = ann.CreationOptions() runtime = ann.IRuntime(options) yield preferred_backends, network, runtime
def test_caffe_parser_end_to_end(shared_data_folder): parser = ann.ICaffeParser = ann.ICaffeParser() # Load the network specifying the inputs and outputs input_name = "Placeholder" tensor_shape = {input_name: ann.TensorShape((1, 1, 28, 28))} requested_outputs = ["output"] network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.caffemodel'), tensor_shape, requested_outputs) # Specify preferred backend preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] input_binding_info = parser.GetNetworkInputBindingInfo(input_name) options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_caffe.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'caffe_parser/input_caffe.npy')).astype(np.float32) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) # Load output binding info and outputs_binding_info = [] for output_name in requested_outputs: outputs_binding_info.append( parser.GetNetworkOutputBindingInfo(output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. expected_output = np.load( os.path.join(shared_data_folder, 'caffe_parser/golden_output_caffe.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output_vectors[0], expected_output, 4)
def test_tflite_parser_end_to_end(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, "mock_model.tflite")) graphs_count = parser.GetSubgraphCount() graph_id = graphs_count - 1 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_lite.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'tflite_parser/input_lite.npy')) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = [] for index, output_name in enumerate(output_names): out_bind_info = parser.GetNetworkOutputBindingInfo( graph_id, output_name) out_tensor_info = out_bind_info[1] out_tensor_id = out_bind_info[0] output_tensors.append((out_tensor_id, ann.Tensor(out_tensor_info))) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = [] for index, out_tensor in enumerate(output_tensors): output_vectors.append(out_tensor[1].get_memory_area()) # Load golden output file for result comparison. expected_outputs = np.load( os.path.join(shared_data_folder, 'tflite_parser/golden_output_lite.npy')) # Check that output matches golden output assert (expected_outputs == output_vectors[0]).all()
def run(self): self.start() image = cv2.imread(self.image) image = cv2.resize(image, (128, 128)) image = np.array(image, dtype=np.float32) / 255.0 # ONNX, Caffe and TF parsers also exist. parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(self.model) graph_id = 0 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) input_tensor_id = input_binding_info[0] input_tensor_info = input_binding_info[1] # Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer # name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo( 0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) start = timer() runtime.EnqueueWorkload(0, input_tensors, output_tensors) end = timer() print('Elapsed time is ', (end - start) * 1000, 'ms') output, output_tensor_info = ann.from_output_tensor( output_tensors[0][1]) print(f"Output tensor info: {output_tensor_info}") print(output) j = np.argmax(output) if j == 0: print("Non-Fire") else: print("Fire")
def test_deserializer_end_to_end(shared_data_folder): parser = ann.IDeserializer() network = parser.CreateNetworkFromBinary( os.path.join(shared_data_folder, "mock_model.armnn")) # use 0 as a dummy value for layer_id, which is unused in the actual implementation layer_id = 0 input_name = 'input_1' output_name = 'dense/Softmax' input_binding_info = parser.GetNetworkInputBindingInfo( layer_id, input_name) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_lite.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'deserializer/input_lite.npy')) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = [] out_bind_info = parser.GetNetworkOutputBindingInfo(layer_id, output_name) out_tensor_info = out_bind_info[1] out_tensor_id = out_bind_info[0] output_tensors.append((out_tensor_id, ann.Tensor(out_tensor_info))) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = [] for index, out_tensor in enumerate(output_tensors): output_vectors.append(out_tensor[1].get_memory_area()) # Load golden output file for result comparison. expected_outputs = np.load( os.path.join(shared_data_folder, 'deserializer/golden_output_lite.npy')) # Check that output matches golden output assert (expected_outputs == output_vectors[0]).all()
def test_profiler_workloads(mock_profiler, exec_times, unit, backend, workload): preferred_backends = [ ann.BackendId('CpuRef'), ann.BackendId('CpuAcc'), ann.BackendId('GpuAcc'), ann.BackendId('EthosNAcc') ] profiling_data_obj = ann.get_profiling_data(mock_profiler, preferred_backends) work_load_exec = profiling_data_obj.per_workload_execution_data[workload] assert work_load_exec["execution_time"] == exec_times assert work_load_exec["time_unit"] == unit assert work_load_exec["backend"] == backend
def mock_model_runtime(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(os.path.join(shared_data_folder, 'mock_model.tflite')) graph_id = 0 input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, "input_1") input_tensor_data = np.load(os.path.join(shared_data_folder, 'tflite_parser/input_lite.npy')) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) print(messages) net_id, messages = runtime.LoadNetwork(opt_network) print(messages) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) outputs_binding_info = [] for output_name in output_names: outputs_binding_info.append(parser.GetNetworkOutputBindingInfo(graph_id, output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) yield runtime, net_id, input_tensors, output_tensors
def test_optimize_executes_successfully_for_neon_backend_only(network_file, get_runtime): preferred_backends = [ann.BackendId('CpuAcc')] network = get_runtime[1] runtime = get_runtime[2] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) assert opt_network
def test_inference_exec(mock_profiler): preferred_backends = [ ann.BackendId('CpuRef'), ann.BackendId('CpuAcc'), ann.BackendId('GpuAcc'), ann.BackendId('EthosNAcc') ] profiling_data_obj = ann.get_profiling_data(mock_profiler, preferred_backends) assert (len(profiling_data_obj.inference_data) > 0) assert (len(profiling_data_obj.per_workload_execution_data) > 0) # Check each total execution time assert (profiling_data_obj.inference_data["execution_time"] == [ 1.1, 2.2, 3.3, 4.4, 5.5, 6.6 ]) assert (profiling_data_obj.inference_data["time_unit"] == "us")
def test_optimize_executes_successfully(network_file, get_runtime): preferred_backends = [ann.BackendId('CpuRef')] network = get_runtime[1] runtime = get_runtime[2] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert len(messages) == 0, 'With only CpuRef, there should be no warnings irrelevant of architecture.' assert opt_network
def test_optimize_fails_for_invalid_backends(network_file, get_runtime): invalid_backends = [ann.BackendId('Unknown')] network = get_runtime[1] runtime = get_runtime[2] with pytest.raises(RuntimeError) as err: ann.Optimize(network, invalid_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) expected_error_message = "None of the preferred backends [Unknown ] are supported." assert expected_error_message in str(err.value)
def create_network(model_file: str, backends: list): """ Creates a network based on the model file and a list of backends. Args: model_file: User-specified model file. backends: List of backends to optimize network. Returns: net_id: Unique ID of the network to run. runtime: Runtime context for executing inference. input_binding_info: Contains essential information about the model input. output_binding_info: Used to map output tensor and its memory. """ if not os.path.exists(model_file): raise FileNotFoundError(f'Model file not found for: {model_file}') # Determine which parser to create based on model file extension parser = None _, ext = os.path.splitext(model_file) if ext == '.tflite': parser = ann.ITfLiteParser() elif ext == '.pb': parser = ann.ITfParser() elif ext == '.onnx': parser = ann.IOnnxParser() assert (parser is not None) network = parser.CreateNetworkFromBinaryFile(model_file) # Specify backends to optimize network preferred_backends = [] for b in backends: preferred_backends.append(ann.BackendId(b)) # Select appropriate device context and optimize the network for that device options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n' f'Optimization warnings: {messages}') # Load the optimized network onto the Runtime device net_id, _ = runtime.LoadNetwork(opt_network) # Get input and output binding information graph_id = parser.GetSubgraphCount() - 1 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = [] for output_name in output_names: outBindInfo = parser.GetNetworkOutputBindingInfo(graph_id, output_name) output_binding_info.append(outBindInfo) return net_id, runtime, input_binding_info, output_binding_info
def test_onnx_parser_end_to_end(shared_data_folder): parser = ann.IOnnxParser = ann.IOnnxParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.onnx')) # load test image data stored in input_onnx.npy input_binding_info = parser.GetNetworkInputBindingInfo("input") input_tensor_data = np.load( os.path.join(shared_data_folder, 'onnx_parser/input_onnx.npy')).astype(np.float32) options = ann.CreationOptions() runtime = ann.IRuntime(options) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = ann.make_output_tensors( [parser.GetNetworkOutputBindingInfo("output")]) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. golden_output = np.load( os.path.join(shared_data_folder, 'onnx_parser/golden_output_onnx.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output[0], golden_output, decimal=4)
def __create_network(model_file: str, backends: list, parser=None): """Creates a network based on a file and parser type. Args: model_file (str): Path of the model file. backends (list): List of backends to use when running inference. parser_type: Parser instance. (pyarmnn.ITFliteParser/pyarmnn.IOnnxParser...) Returns: int: Network ID. int: Graph ID. IParser: TF Lite parser instance. IRuntime: Runtime object instance. """ args = parse_command_line() options = ann.CreationOptions() runtime = ann.IRuntime(options) if parser is None: # try to determine what parser to create based on model extension _, ext = os.path.splitext(model_file) if ext == ".onnx": parser = ann.IOnnxParser() elif ext == ".tflite": parser = ann.ITfLiteParser() assert (parser is not None) network = parser.CreateNetworkFromBinaryFile(model_file) preferred_backends = [] for b in backends: preferred_backends.append(ann.BackendId(b)) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) if args.verbose: for m in messages: warnings.warn(m) net_id, w = runtime.LoadNetwork(opt_network) if args.verbose and w: warnings.warn(w) return net_id, parser, runtime
def random_runtime(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.tflite')) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) graphs_count = parser.GetSubgraphCount() graph_id = graphs_count - 1 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) input_tensor_id = input_binding_info[0] input_tensor_info = input_binding_info[1] output_names = parser.GetSubgraphOutputTensorNames(graph_id) input_data = np.random.randint(255, size=input_tensor_info.GetNumElements(), dtype=np.uint8) const_tensor_pair = (input_tensor_id, ann.ConstTensor(input_tensor_info, input_data)) input_tensors = [const_tensor_pair] output_tensors = [] for index, output_name in enumerate(output_names): out_bind_info = parser.GetNetworkOutputBindingInfo( graph_id, output_name) out_tensor_info = out_bind_info[1] out_tensor_id = out_bind_info[0] output_tensors.append((out_tensor_id, ann.Tensor(out_tensor_info))) yield preferred_backends, network, runtime, input_tensors, output_tensors
input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0]) input_tensor_id = input_binding_info[0] input_tensor_info = input_binding_info[1] print(f""" tensor id: {input_tensor_id}, tensor info: {input_tensor_info} """) # Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) print(f"Loaded network, id={net_id}") # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info])
def test_add_constant_layer_to_fully_connected(): inputWidth = 1 inputHeight = 1 inputChannels = 5 inputNum = 2 outputChannels = 3 outputNum = 2 inputShape = (inputNum, inputChannels, inputHeight, inputWidth) outputShape = (outputNum, outputChannels) weightsShape = (inputChannels, outputChannels) biasShape = (outputChannels, ) input = np.array([[1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]], dtype=np.float32) weights = np.array( [[.5, 2., .5], [.5, 2., 1.], [.5, 2., 2.], [.5, 2., 3.], [.5, 2., 4.]], dtype=np.float32) biasValues = np.array([10, 20, 30], dtype=np.float32) expectedOutput = np.array([[ 0.5 + 1.0 + 1.5 + 2.0 + 2.5 + biasValues[0], 2.0 + 4.0 + 6.0 + 8.0 + 10. + biasValues[1], 0.5 + 2.0 + 6.0 + 12. + 20. + biasValues[2] ], [ 2.5 + 2.0 + 1.5 + 1.0 + 0.5 + biasValues[0], 10.0 + 8.0 + 6.0 + 4.0 + 2. + biasValues[1], 2.5 + 4.0 + 6.0 + 6. + 4. + biasValues[2] ]], dtype=np.float32) network = ann.INetwork() input_info = ann.TensorInfo(ann.TensorShape(inputShape), ann.DataType_Float32, 0, 0, True) input_tensor = ann.ConstTensor(input_info, input) input_layer = network.AddInputLayer(0, "input") w_info = ann.TensorInfo(ann.TensorShape(weightsShape), ann.DataType_Float32, 0, 0, True) w_tensor = ann.ConstTensor(w_info, weights) w_layer = network.AddConstantLayer(w_tensor, "weights") b_info = ann.TensorInfo(ann.TensorShape(biasShape), ann.DataType_Float32, 0, 0, True) b_tensor = ann.ConstTensor(b_info, biasValues) b_layer = network.AddConstantLayer(b_tensor, "bias") fc_descriptor = ann.FullyConnectedDescriptor() fc_descriptor.m_BiasEnabled = True fc_descriptor.m_ConstantWeights = True fully_connected = network.AddFullyConnectedLayer(fc_descriptor, "fc") output_info = ann.TensorInfo(ann.TensorShape(outputShape), ann.DataType_Float32) output_tensor = ann.Tensor(output_info, np.zeros([1, 1], dtype=np.float32)) output = network.AddOutputLayer(0, "output") input_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(0)) w_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(1)) b_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(2)) fully_connected.GetOutputSlot(0).Connect(output.GetInputSlot(0)) input_layer.GetOutputSlot(0).SetTensorInfo(input_info) w_layer.GetOutputSlot(0).SetTensorInfo(w_info) b_layer.GetOutputSlot(0).SetTensorInfo(b_info) fully_connected.GetOutputSlot(0).SetTensorInfo(output_info) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) net_id, messages = runtime.LoadNetwork(opt_network) input_tensors = [(0, input_tensor)] output_tensors = [(0, output_tensor)] runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) assert (output_vectors == expectedOutput).all()