def test_end2end_tfc_w1a2_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # npysim model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) model = model.transform(SetExecMode("npysim")) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_npysim.onnx") ret_npysim = execute_onnx(model, inp_dict, True) res_npysim = ret_npysim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) getCustomOp(model.graph.node[0]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[1]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[2]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[3]).set_nodeattr("rtlsim_trace", "default") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_npysim, res_rtlsim_nodebynode).all() assert np.isclose(res_npysim, res_rtlsim_whole).all()
def test_end2end_cnv_w1a1_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx") x = np.zeros((1, 32, 32, 3), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx") # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_end2end_tfc_w1a2_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_move_chw_add_past_conv(idim, k, s, ich, och): odim = compute_conv_output_dim(idim, k, s) ishape = [1, ich, idim, idim] oshape = [1, och, odim, odim] add_param_shape = [1, ich, 1, 1] conv_param_shape = [och, ich, k, k] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [k, k] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [s, s] add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) model = helper.make_model( helper.make_graph( nodes=[add_node, conv_node], name="move-add-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1], )) model = ModelWrapper(model) # initialize model a0_values = np.random.uniform( low=0, high=1, size=tuple(add_param_shape)).astype(np.float32) model.set_initializer("a0", a0_values) a1_values = np.random.uniform( low=0, high=1, size=tuple(conv_param_shape)).astype(np.float32) model.set_initializer("a1", a1_values) model = model.transform(InferShapes()) # execution before transformation inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) idict = {model.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model, idict) y_before = odict[model.graph.output[0].name] model = model.transform(MoveAddPastConv()) odict = oxe.execute_onnx(model, idict) y_after = odict[model.graph.output[0].name] assert np.isclose(y_before, y_after).all() assert model.graph.node[0].op_type == "Conv" assert model.graph.node[1].op_type == "Add"
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode): model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt) cdp_model = model.transform(InferDoublePackedConv()) assert (len(cdp_model.graph.node) == 3 and cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and cdp_model.graph.node[0].op_type == "Transpose" and cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model" # execute models and compare x = gen_finn_dt_tensor(idt, (1, ich, idim, idim)) input_dict = {"inp": x} y_expected = oxe.execute_onnx(model, input_dict)["outp"] if mode == "cppsim": cdp_model = cdp_model.transform(SetExecMode("cppsim")) cdp_model = cdp_model.transform(PrepareCppSim()) cdp_model = cdp_model.transform(CompileCppSim()) y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"] elif mode == "rtlsim": cdp_model = cdp_model.transform(SetExecMode("rtlsim")) cdp_model = cdp_model.transform(GiveUniqueNodeNames()) cdp_model = cdp_model.transform(GiveReadableTensorNames()) cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5)) cdp_model = cdp_model.transform(HLSSynthIP()) cdp_model = cdp_model.transform(PrepareRTLSim()) input_dict = {"global_in": x} y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"] assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
def test_topk_insert(k): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model.model.opset_import[0].version = 11 # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_tensor = nph.to_array(input_tensor) input_dict = {"global_in": input_tensor} output_golden = oxe.execute_onnx(model, input_dict)["global_out"] output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk)
def test_mnist_onnx_download_extract_run(): # load the onnx model raw_m = get_data("finn", "data/onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) # run using FINN-based execution (full graph) input_dict = {"Input3": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) assert np.isclose(np_helper.to_array(output_tensor), output_dict["Plus214_Output_0"], atol=1e-3).all() # test subgraph execution start_node = model.graph.node[1] end_node = model.graph.node[3] subgraph_i_dict = {start_node.input[0]: output_dict[start_node.input[0]]} subgraph_o_dict = oxe.execute_onnx( model, subgraph_i_dict, return_full_exec_context=True, start_node=start_node, end_node=end_node, ) assert np.isclose(subgraph_o_dict[end_node.output[0]], output_dict[end_node.output[0]], atol=1e-3).all()
def test_end2end_tfc_w1a2_run_on_pynq(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) x = nph.to_array(input_tensor) # x = np.zeros(ishape, dtype=np.float32) # run using FINN-based execution ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type( "StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx") ret = execute_onnx(parent_model, {iname: x}, True) y = ret[oname] assert np.isclose(y, y_golden).all() except KeyError: pytest.skip("PYNQ board IP address not specified")
def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): ifm_dim_h = ifm_dim k_h = k if dim_1d: ifm_dim_w = 1 k_w = 1 else: ifm_dim_w = ifm_dim_h k_w = k_h ifm_dim = (ifm_dim_h, ifm_dim_w) k = (k_h, k_w) stride_h = k_h stride_w = k_w ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception( "Unknown exec_mode in test_layer_streaming_maxpool_batch") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # model.save("orig_cnv.onnx") model = model.transform(Streamline()) model = model.transform(RemoveUnusedTensors()) assert len(model.graph.initializer) == 21 assert len(model.graph.value_info) == 43 # model.save("streamlined_cnv.onnx") assert len(model.graph.node) == 23 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert model.graph.node[0].op_type == "MultiThreshold" assert np.argmax(produced) == 3
def test_4d_conversion_invalid_nodes(): """ Test for the 3D to 4D transformation with an invalid graph. """ model = create_arbitrary_model(invalid=True) # Inputs input_dict = generate_random_input(model) # Initializers set_all_initializers(model) # Comparing the outputs of the model before and after the transform output_node_name = model.graph.output[0].name output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) expected = output_dict[output_node_name] model = model.transform(Change3DTo4DTensors()) output_node_name = model.graph.output[0].name output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) expected_modified = output_dict[output_node_name] expected_modified = np.reshape(expected_modified, np.shape(expected)) assert (expected == expected_modified).all()
def test_streamline_fc(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(Streamline()) model = model.transform(RemoveUnusedTensors()) assert len(model.graph.initializer) == 11 assert len(model.graph.value_info) == 21 assert len(model.graph.quantization_annotation) == 20 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all()
def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride): simd = ifm_ch ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) model = make_single_slidingwindow_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_expected = im2col_indices(x, k, stride) # reshape expected output to match node output oshape = y_produced.shape y_expected = y_expected.reshape(oshape) assert (y_produced == y_expected).all(), "npysim failed" model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5)) model = model.transform(HLSSynth_IPGen()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all(), "rtlsim failed"
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all()
def test_end2end_cnv_w1a1_run_on_pynq(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) x = input_tensor # run using FINN-based execution ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx") ret = execute_onnx(parent_model, {iname: x}, True) y = ret[oname] assert np.isclose(y, y_golden).all() assert np.argmax(y) == 3 except KeyError: pytest.skip("PYNQ board IP address not specified")
def test_batchnorm_to_affine_epsilon(epsilon): """Dummy batchnorm node to test out the epsilon attribute.""" batchnorm_node = onnx.helper.make_node( "BatchNormalization", inputs=["x", "s", "bias", "mean", "var"], outputs=["y"], epsilon=epsilon, ) x = onnx.helper.make_tensor_value_info("x", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) s = onnx.helper.make_tensor_value_info("s", onnx.TensorProto.FLOAT, [3]) bias = onnx.helper.make_tensor_value_info("bias", onnx.TensorProto.FLOAT, [3]) mean = onnx.helper.make_tensor_value_info("mean", onnx.TensorProto.FLOAT, [3]) var = onnx.helper.make_tensor_value_info("var", onnx.TensorProto.FLOAT, [3]) y = onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) # Graph graph = onnx.helper.make_graph( nodes=[batchnorm_node], name="test_batchnorm_graph", inputs=[x], outputs=[y], value_info=[s, bias, mean, var], ) onnx_model = onnx.helper.make_model(graph, producer_name="test_batchnorm-model") model = ModelWrapper(onnx_model) model.set_initializer("s", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("bias", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("mean", np.array([3, 4, 5]).astype(np.float32)) model.set_initializer("var", np.array([0.5, 0.7, 0.3]).astype(np.float32)) i_val = np.arange(0, 3 * 5 * 5, dtype=np.float32) i_val = np.reshape(i_val, [1, 3, 5, 5]) input_dict = {"x": i_val} output_node_name = "y" output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) output_original = output_dict[output_node_name] model_lowered = model.transform(BatchNormToAffine()) output_dict = oxe.execute_onnx(model_lowered, input_dict, return_full_exec_context=True) output_lowered = output_dict[output_node_name] assert (output_original == output_lowered).all()
def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) # set up onnx model inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k, k], pads=padding, strides=[stride, stride], group=ifm_ch, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param, exec_mode): ifm_ch = 16 ifm_dim = 5 ishape = (1, ifm_ch, ifm_dim, ifm_dim) if scalar_param: pshape = (1, ) else: pshape = (1, ifm_ch, 1, 1) np.random.seed(0) model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) # Since the aren't Data types with a bit width of a non power of 2, # there are cases where the input won't use it full range. if idt == DataType["INT32"]: x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim)) elif idt == DataType["UINT32"]: x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim)) else: x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) input_dict = prepare_inputs(x) y_expected = oxe.execute_onnx(model, input_dict)["outp"] new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) new_model = new_model.transform(GiveUniqueNodeNames()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") ctx_produced = oxe.execute_onnx(new_model, input_dict, return_full_exec_context=True) y_produced = ctx_produced["outp"] assert (y_produced == y_expected).all() assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_fpgadataflow_slidingwindow( idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw ): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape( 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd ) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_remove_identity_ops(op, as_first_node, approx): # set up onnx model inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1]) mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, []) shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2]) div = helper.make_tensor_value_info("div", TensorProto.FLOAT, []) matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2]) mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"]) div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"]) matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"]) graph = helper.make_graph( nodes=[mul_node, reshape_node, div_node, matmul_node], name="identity-graph", inputs=[inp], outputs=[outp], value_info=[mul, shape, div, matmul], ) model = helper.make_model(graph, producer_name="mulpastconv-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, 4, 1, 1]) mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) shape_values = np.asarray([1, -1], dtype=np.int64) div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) matmul_values = gen_finn_dt_tensor(DataType["INT2"], [4, 2]) model.set_initializer("mul", mul_values) model.set_initializer("shape", shape_values) model.set_initializer("div", div_values) model.set_initializer("matmul", matmul_values) insert_identity_op(model, op, as_first_node, approx) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) idict = {"inp": inp_values} odict = oxe.execute_onnx(model, idict) out_before = odict["outp"] num_of_nodes_before = len(model.graph.node) model = model.transform(RemoveIdentityOps()) num_of_nodes_after = len(model.graph.node) assert num_of_nodes_before - 1 == num_of_nodes_after odict = oxe.execute_onnx(model, idict) out_after = odict["outp"] assert np.isclose(out_before, out_after, atol=1e-3).all()
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype): i_shape = (1, in_features) w_shape = (out_features, in_features) b_linear = QuantLinear( out_features=out_features, in_features=in_features, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=w_bits, weight_quant_type=QuantType.INT, weight_scaling_per_output_channel=True, ) weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0, size=w_shape).astype(np.float32) b_linear.weight.data = torch.from_numpy(weight_tensor_fp) b_linear.eval() bo.export_finn_onnx(b_linear, i_shape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_linear.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_end2end_mobilenet_cppsim(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") x = np.load(build_dir + "/end2end_mobilenet_input.npy") inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} start = time.time() # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) end = time.time() elapsed_time = end - start f = open(build_dir + "/end2end_mobilenet_compile_time.txt", "w+") f.write("Execution time in seconds: " + str(elapsed_time)) f.close() model.save(build_dir + "/end2end_mobilenet_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] np.save(build_dir + "/end2end_mobilenet_result_cppsim.npy", res_cppsim) a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy") res_cppsim_prob = ret_cppsim[model.graph.node[-2].output[0]] * a0 np.save(build_dir + "/end2end_mobilenet_result_cppsim_prob.npy", res_cppsim_prob) # check result with golden values golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy") golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy") assert (golden == res_cppsim).all() assert np.isclose(golden_prob, res_cppsim_prob).all()
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, pretrained): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip(f"No LFC_{MAX_WBITS}W{MAX_ABITS}A present.") if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"{size}_{wbits}W{abits}A" finn_onnx = nname + ".onnx" fc, _ = model_with_cfg(nname.lower(), pretrained=pretrained) FINNManager.export_onnx(fc, FC_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # load a random test vector input_tensor = np.random.uniform(MIN_INP_VAL, MAX_INP_VAL, size=FC_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): np.random.seed(0) if fold == -1: pe = 1 else: pe = labels // fold assert labels % pe == 0 if k == -1: k = labels # generate input data x = gen_finn_dt_tensor(idt, (1, labels)) model = make_labelselect_modelwrapper(labels, pe, k, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_brevitas_debug(): finn_onnx = "test_brevitas_debug.onnx" fc = get_test_model_trained("TFC", 2, 2) dbg_hook = bo.enable_debug(fc) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) produced = output_dict[model.graph.output[0].name] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(output_dict.keys()) names_common = names_brevitas.intersection(names_finn) assert len(names_common) == 16 for dbg_name in names_common: tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx)
def test_brevitas_cnv_export_exec(wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") cnv = get_test_model_trained("CNV", wbits, abits) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {model.graph.input[0].name: input_tensor} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] # do forward pass in PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() expected = cnv.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_end2end_mobilenet_rtlsim(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_ipgen.onnx") x = np.load(build_dir + "/end2end_mobilenet_input.npy") inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_mobilenet_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] np.save( build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode.npy", res_rtlsim_nodebynode, ) a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy") res_rtlsim_nodebynode_prob = ( ret_rtlsim_nodebynode[model.graph.node[-2].output[0]] * a0) np.save( build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode_prob.npy", res_rtlsim_nodebynode_prob, ) # check result with golden values golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy") golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy") assert (golden == res_rtlsim_nodebynode).all() assert np.isclose(golden_prob, res_rtlsim_nodebynode_prob).all()
def test_xnorpopcountmatmul(): M = 1 K = 3 N = 3 x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [M, K]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [K, N]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, ["x", "y"]) node_def = helper.make_node("XnorPopcountMatMul", ["x", "W"], ["out"], domain="finn.custom_op.general") modelproto = helper.make_model( helper.make_graph([node_def], "test_model", [x], [out], value_info=[W])) model = ModelWrapper(modelproto) model.set_tensor_datatype("x", DataType.BINARY) model.set_tensor_datatype("W", DataType.BINARY) W_data = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) model.set_initializer("W", W_data) # test shape inference model = model.transform(InferShapes()) assert model.get_tensor_shape("out") == [M, N] # test datatype inference assert model.get_tensor_datatype("out") is DataType.FLOAT32 model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("out") is DataType.UINT32 # test execution x_data = np.asarray([[1, 0, 0]], dtype=np.float32) inp_dict = {"x": x_data} out_dict = oxe.execute_onnx(model, inp_dict) Wb = 2 * W_data - 1 xb = 2 * x_data - 1 rb = np.matmul(xb, Wb) assert (2 * out_dict["out"] - K == rb).all()
def verify_step(model: ModelWrapper, cfg: DataflowBuildConfig, step_name: str, need_parent: bool): print("Running verification for " + step_name) verify_out_dir = cfg.output_dir + "/verification_output" intermediate_models_dir = cfg.output_dir + "/intermediate_models" os.makedirs(verify_out_dir, exist_ok=True) (in_npy, exp_out_npy) = cfg._resolve_verification_io_pair() if need_parent: assert (cfg.save_intermediate_models ), "Enable save_intermediate_models for verification" parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx" child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name model.save(child_model_fn) out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy) else: inp_tensor_name = model.graph.input[0].name out_tensor_name = model.graph.output[0].name inp_dict = {inp_tensor_name: in_npy} out_dict = execute_onnx(model, inp_dict) out_npy = out_dict[out_tensor_name] res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all() res_to_str = {True: "SUCCESS", False: "FAIL"} res_str = res_to_str[res] verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name, res_str) np.save(verification_output_fn, out_npy) print("Verification for %s : %s" % (step_name, res_str))