def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) x = gen_finn_dt_tensor(idt, input_tensor_shape) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] chw_mul = model.get_initializer(model.graph.node[-1].input[1]) chw_mul = 1 expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) assert (produced_sum.flatten() == expected_sum.flatten()).all() model = model.transform(InferDataLayouts()) # convert to hls model.set_tensor_datatype(model.graph.input[0].name, idt) # extra streamlining model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(MoveAddPastMul()) model = model.transform(CollapseRepeatedMul()) model = model.transform(CollapseRepeatedAdd()) # insert top-k node, which should absorb linear ops before it model = model.transform(InferShapes()) model = model.transform(InferDataLayouts()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferAddStreamsLayer()) model = model.transform(to_hls.InferGlobalAccPoolLayer()) model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(InsertTopK()) model = model.transform(AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(AbsorbConsecutiveTransposes()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(to_hls.InferDuplicateStreamsLayer()) model = model.transform(SortGraph()) # model.save("golden_hls.onnx") # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 9 add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") assert len(add_nodes) == 1 pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") assert len(pool_nodes) == 1 label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") assert len(label_nodes) == 1 channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") assert len(channelwise_nodes) == 5 dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") assert len(dup_nodes) == 1 model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) output_dict = oxe.execute_onnx(model, input_dict, True) produced_topk_hls = output_dict[model.graph.output[0].name] topk_input = output_dict[model.graph.node[-1].input[0]] assert soft_verify_topk(topk_input, produced_topk_hls, 5) os.remove(export_onnx_path)
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode): atol = 1e-3 # Create the test model and inputs for it torch_model = PyTorchTestModel(upscale_factor=scale) input_shape = (1, NumChannels, IFMDim, IFMDim) test_in = torch.arange(0, np.prod(np.asarray(input_shape))) # Limit the input to values valid for the given datatype test_in %= dt.max() - dt.min() + 1 test_in += dt.min() # Additionally make sure we always start with 0, for convenience purposes. test_in = torch.roll(test_in, dt.min()) test_in = test_in.view(*input_shape).type(torch.float32) # Get golden PyTorch and ONNX inputs golden_torch_float = torch_model(test_in) export_path = f"{tmpdir}/Upsample_exported.onnx" FINNManager.export(torch_model, input_shape=input_shape, export_path=export_path, opset_version=11) model = ModelWrapper(export_path) input_dict = {model.graph.input[0].name: test_in.numpy().astype(np.int32)} input_dict = {model.graph.input[0].name: test_in.numpy()} golden_output_dict = oxe.execute_onnx(model, input_dict, True) golden_result = golden_output_dict[model.graph.output[0].name] # Make sure PyTorch and ONNX match pyTorch_onnx_match = np.isclose(golden_result, golden_torch_float).all() assert pyTorch_onnx_match, "ONNX and PyTorch upsampling output don't match." # Prep model for execution model = ModelWrapper(export_path) # model = model.transform(TransposeUpsampleIO()) model = model.transform(MakeInputChannelsLast()) model = model.transform(InferDataLayouts()) model = model.transform(absorb.AbsorbTransposeIntoResize()) model = model.transform(InferShapes()) model = model.transform(ForceDataTypeForTensors(dType=dt)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferUpsample()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) # Check that all nodes are UpsampleNearestNeighbour_Batch nodes for n in model.get_finn_nodes(): node_check = n.op_type == "UpsampleNearestNeighbour_Batch" assert node_check, "All nodes should be UpsampleNearestNeighbour_Batch nodes." # Prep sim if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 10)) model = model.transform(HLSSynthIP()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # Run sim test_in_transposed = test_in.numpy().transpose(_to_chan_last_args) input_dict = {model.graph.input[0].name: test_in_transposed} output_dict = oxe.execute_onnx(model, input_dict, True) test_result = output_dict[model.graph.output[0].name] output_matches = np.isclose(golden_result, test_result, atol=atol).all() if exec_mode == "cppsim": assert output_matches, "Cppsim output doesn't match ONNX/PyTorch." elif exec_mode == "rtlsim": assert output_matches, "Rtlsim output doesn't match ONNX/PyTorch."