def execute_node(node, context, graph): """Executes a single node by using onnxruntime, with custom function or if dataflow partition by using remote execution or rtlsim. Input/output provided via context.""" if node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) ret = execute_onnx(model, context, True) context.update(ret) else: if node.domain == "finn": ex_cu_node.execute_custom_node(node, context, graph) else: # onnxruntime unfortunately does not implement run_node as defined by ONNX, # it can only execute entire models -- so we create a model which solely # consists of our current node. node_inputs = list( filter(lambda x: x.name in node.input, graph.input)) node_inputs += list( filter(lambda x: x.name in node.input, graph.value_info)) node_outputs = list( filter(lambda x: x.name in node.output, graph.output)) node_outputs += list( filter(lambda x: x.name in node.output, graph.value_info)) node_graph = helper.make_graph( nodes=[node], name="single-node-exec", inputs=node_inputs, outputs=node_outputs, ) node_model = helper.make_model(node_graph) input_dict = dict() for inp in node.input: input_dict[inp] = context[inp] sess = rt.InferenceSession(node_model.SerializeToString()) output_list = sess.run(None, input_dict) for output_ind in range(len(node.output)): outp = node.output[output_ind] if output_list[output_ind].shape != context[outp].shape: raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" % ( str(output_list[output_ind].shape.shape), str(context[outp].shape), )) context[outp] = output_list[output_ind]
def execute_node(node, context, graph, return_full_exec_context=False): """Executes a single node by using onnxruntime, with custom function or if dataflow partition by using remote execution or rtlsim. Input/output provided via context.""" if node.op_type == "GenericPartition": partition_node = getCustomOp(node) model = ModelWrapper(partition_node.get_nodeattr("model")) inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) # inputs may have been renamed in partition for i, old_iname in enumerate(node.input): new_iname = model.graph.input[i].name if old_iname != new_iname: inp_ctx[new_iname] = inp_ctx[old_iname] del inp_ctx[old_iname] ret = execute_onnx(model, inp_ctx, return_full_exec_context) # outputs may have been renamed in partition for i, node_oname in enumerate(node.output): model_oname = model.graph.output[i].name context[node_oname] = ret[model_oname] # prefix and insert exec context entries if return_full_exec_context: for tname in ret.keys(): if tname not in [x.name for x in model.graph.output]: context[node.name + "_" + tname] = ret[tname] elif node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) # input may have been renamed in partition assert len(inp_ctx) == 1 old_iname = node.input[0] new_iname = model.graph.input[0].name if old_iname != new_iname: inp_ctx[new_iname] = inp_ctx[old_iname] del inp_ctx[old_iname] ret = execute_onnx(model, inp_ctx, return_full_exec_context) # if the model was in ip-stitched rtlsim mode, may get annotation # for numbet of elapsed cycles, save again if model.get_metadata_prop("exec_mode") == "rtlsim": model.save(sdp_node.get_nodeattr("model")) # output may have been renamed in partition assert len(model.graph.output) == 1 node_oname = node.output[0] model_oname = model.graph.output[0].name context[node_oname] = ret[model_oname] # prefix and insert exec context entries if return_full_exec_context: for tname in ret.keys(): if tname != model_oname: context[node.name + "_" + tname] = ret[tname] else: if is_finn_op(node.domain): ex_cu_node.execute_custom_node(node, context, graph) else: # onnxruntime unfortunately does not implement run_node as defined by ONNX, # it can only execute entire models -- so we create a model which solely # consists of our current node. # note: ensure that the same ValueInfo does not appear both in # graph.value_info as well as graph.output or graph.input # nodes with multiple outputs that are a mix of value_info and # input/outputs may get them reordered below node_inputs = list( filter(lambda x: x.name in node.input, graph.input)) node_inputs += list( filter(lambda x: x.name in node.input, graph.value_info)) node_outputs = list( filter(lambda x: x.name in node.output, graph.output)) node_outputs += list( filter(lambda x: x.name in node.output, graph.value_info)) node_graph = helper.make_graph( nodes=[node], name="single-node-exec", inputs=node_inputs, outputs=node_outputs, ) node_model = helper.make_model(node_graph) input_dict = dict() for inp in node.input: input_dict[inp] = context[inp] sess = rt.InferenceSession(node_model.SerializeToString()) output_list = sess.run(None, input_dict) for output_ind in range(len(node.output)): # get the name of the target buffer from node.output outp = node.output[output_ind] # retrieve the index of that name in node_outputs for i in range(len(node_outputs)): if outp == node_outputs[i].name: list_ind = i # use that index to index output_list if output_list[list_ind].shape != context[outp].shape: raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" % (str(output_list[list_ind].shape), str(context[outp].shape))) context[outp] = output_list[list_ind]
def execute_node(node, context, graph): """Executes a single node by using onnxruntime, with custom function or if dataflow partition by using remote execution or rtlsim. Input/output provided via context.""" if node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) ret = execute_onnx(model, context, True) context.update(ret) else: if node.domain == "finn": ex_cu_node.execute_custom_node(node, context, graph) else: # onnxruntime unfortunately does not implement run_node as defined by ONNX, # it can only execute entire models -- so we create a model which solely # consists of our current node. # note: ensure that the same ValueInfo does not appear both in # graph.value_info as well as graph.output or graph.input # nodes with multiple outputs that are a mix of value_info and # input/outputs may get them reordered below node_inputs = list( filter(lambda x: x.name in node.input, graph.input)) node_inputs += list( filter(lambda x: x.name in node.input, graph.value_info)) node_outputs = list( filter(lambda x: x.name in node.output, graph.output)) node_outputs += list( filter(lambda x: x.name in node.output, graph.value_info)) node_graph = helper.make_graph( nodes=[node], name="single-node-exec", inputs=node_inputs, outputs=node_outputs, ) node_model = helper.make_model(node_graph) input_dict = dict() for inp in node.input: input_dict[inp] = context[inp] sess = rt.InferenceSession(node_model.SerializeToString()) output_list = sess.run(None, input_dict) for output_ind in range(len(node.output)): # get the name of the target buffer from node.output outp = node.output[output_ind] # retrieve the index of that name in node_outputs for i in range(len(node_outputs)): if outp == node_outputs[i].name: list_ind = i # use that index to index output_list if output_list[list_ind].shape != context[outp].shape: raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" % ( str(output_list[list_ind].shape.shape), str(context[outp].shape), )) context[outp] = output_list[list_ind]
def test_execute_custom_node_multithreshold(): inputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.8, 3.2, 1.2, 4.9, 7.8, 2.4, 3.1, 4.7, 6.2, 5.1, 4.9, 2.2, 6.2, 0.0, 0.8, 4.7, 0.2, 5.6, 8.9, 9.2, 9.1, 4.0, 3.3, 4.9, 2.3, 1.7, 1.3, 2.2, 4.6, 3.4, 3.7, 9.8, 4.7, 4.9, 2.8, 2.7, 8.3, 6.7, 4.2, 7.1, 2.8, 3.1, 0.8, 0.6, 4.4, 2.7, 6.3, 6.1, 1.4, 5.3, 2.3, 1.9, 4.7, 8.1, 9.3, 3.7, 2.7, 5.1, 4.2, 1.8, 4.1, 7.3, 7.1, 0.4, 0.2, 1.3, 4.3, 8.9, 1.4, 1.6, 8.3, 9.4, ]), ) threshold_values = np.ndarray( shape=(3, 7), buffer=np.array([ 0.8, 1.4, 1.7, 3.5, 5.2, 6.8, 8.2, 0.2, 2.2, 3.5, 4.5, 6.6, 8.6, 9.2, 1.3, 4.1, 4.5, 6.5, 7.8, 8.1, 8.9, ]), ) v = helper.make_tensor_value_info("v", TensorProto.FLOAT, [6, 3, 2, 2]) thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [3, 7]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [6, 3, 2, 2]) node_def = helper.make_node("MultiThreshold", ["v", "thresholds"], ["out"], domain="finn.custom_op.general") graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) execution_context = {} execution_context["v"] = inputs execution_context["thresholds"] = threshold_values ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) outputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.0, 3.0, 1.0, 4.0, 5.0, 2.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 5.0, 0.0, 1.0, 4.0, 1.0, 4.0, 6.0, 7.0, 7.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 7.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 3.0, 1.0, 3.0, 2.0, 1.0, 4.0, 6.0, 6.0, 0.0, 1.0, 1.0, 3.0, 6.0, 1.0, 1.0, 6.0, 7.0, ]), ) assert (execution_context["out"] == outputs).all() # test the optional output scaling features on MultiThreshold node_def = helper.make_node( "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn.custom_op.general", out_scale=2.0, out_bias=-1.0, ) graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) outputs_scaled = 2.0 * outputs - 1.0 assert (execution_context["out"] == outputs_scaled).all() # test the optional data layout option for MultiThreshold node_def = helper.make_node( "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn.custom_op.general", data_layout="NHWC", ) v_nhwc = helper.make_tensor_value_info("v", TensorProto.FLOAT, [6, 2, 2, 3]) out_nhwc = helper.make_tensor_value_info("out", TensorProto.FLOAT, [6, 2, 2, 3]) inputs_nhwc = np.transpose(inputs, (0, 2, 3, 1)) # NCHW -> NHWC outputs_nhwc = np.transpose(outputs, (0, 2, 3, 1)) # NCHW -> NHWC execution_context["v"] = inputs_nhwc graph_def = helper.make_graph([node_def], "test_model", [v_nhwc, thresholds], [out_nhwc]) ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) assert (execution_context["out"] == outputs_nhwc).all() # check the set of allowed values op_inst = getCustomOp(node_def) assert op_inst.get_nodeattr_allowed_values("data_layout") == { "NCHW", "NHWC" } # exercise the allowed value checks # try to set attribute to non-allowed value, should raise an exception try: op_inst.set_nodeattr("data_layout", "xx") assert False except Exception: assert True # set a non-allowed value at the ONNX protobuf level node_def.attribute[0].s = "xx".encode("utf-8") try: op_inst.get_nodeattr("data_layout") assert False except Exception: assert True
def test_execute_custom_node_multithreshold(): inputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.8, 3.2, 1.2, 4.9, 7.8, 2.4, 3.1, 4.7, 6.2, 5.1, 4.9, 2.2, 6.2, 0.0, 0.8, 4.7, 0.2, 5.6, 8.9, 9.2, 9.1, 4.0, 3.3, 4.9, 2.3, 1.7, 1.3, 2.2, 4.6, 3.4, 3.7, 9.8, 4.7, 4.9, 2.8, 2.7, 8.3, 6.7, 4.2, 7.1, 2.8, 3.1, 0.8, 0.6, 4.4, 2.7, 6.3, 6.1, 1.4, 5.3, 2.3, 1.9, 4.7, 8.1, 9.3, 3.7, 2.7, 5.1, 4.2, 1.8, 4.1, 7.3, 7.1, 0.4, 0.2, 1.3, 4.3, 8.9, 1.4, 1.6, 8.3, 9.4, ]), ) threshold_values = np.ndarray( shape=(3, 7), buffer=np.array([ 0.8, 1.4, 1.7, 3.5, 5.2, 6.8, 8.2, 0.2, 2.2, 3.5, 4.5, 6.6, 8.6, 9.2, 1.3, 4.1, 4.5, 6.5, 7.8, 8.1, 8.9, ]), ) v = helper.make_tensor_value_info("v", TensorProto.FLOAT, [6, 3, 2, 2]) thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [3, 7]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [6, 3, 2, 2]) node_def = helper.make_node("MultiThreshold", ["v", "thresholds"], ["out"], domain="finn") graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) execution_context = {} execution_context["v"] = inputs execution_context["thresholds"] = threshold_values ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) outputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.0, 3.0, 1.0, 4.0, 5.0, 2.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 5.0, 0.0, 1.0, 4.0, 1.0, 4.0, 6.0, 7.0, 7.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 7.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 3.0, 1.0, 3.0, 2.0, 1.0, 4.0, 6.0, 6.0, 0.0, 1.0, 1.0, 3.0, 6.0, 1.0, 1.0, 6.0, 7.0, ]), ) assert (execution_context["out"] == outputs).all() # test the optional output scaling features on MultiThreshold node_def = helper.make_node( "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn", out_scale=2.0, out_bias=-1.0, ) graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) outputs_scaled = 2.0 * outputs - 1.0 assert (execution_context["out"] == outputs_scaled).all() # test the optional data layout option for MultiThreshold node_def = helper.make_node( "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn", data_layout="NHWC", ) v_nhwc = helper.make_tensor_value_info("v", TensorProto.FLOAT, [6, 2, 2, 3]) out_nhwc = helper.make_tensor_value_info("out", TensorProto.FLOAT, [6, 2, 2, 3]) inputs_nhwc = np.transpose(inputs, (0, 2, 3, 1)) # NCHW -> NHWC outputs_nhwc = np.transpose(outputs, (0, 2, 3, 1)) # NCHW -> NHWC execution_context["v"] = inputs_nhwc graph_def = helper.make_graph([node_def], "test_model", [v_nhwc, thresholds], [out_nhwc]) ex_cu_node.execute_custom_node(node_def, execution_context, graph_def) assert (execution_context["out"] == outputs_nhwc).all()