def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_end2end_tfc_w1a1_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx") model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model.save(build_dir + "/end2end_tfc_w1a1_hls_layers.onnx")
def test_end2end_cnv_w1a1_streamline(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx") model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
def test_streamline(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) # move past any reshapes to be able to streamline input scaling model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) if "fc" not in topology: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def test_convert_bipolar_matmul_to_xnorpopcountmatmul(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(ConvertSignToThres()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(ConvertBipolarMatMulToXnorPopcount()) produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() os.remove(export_onnx_path)
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_infer_data_layouts_cnv(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert (model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC) assert model.get_tensor_layout( "StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout( "StreamingFCLayer_Batch_6_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC os.remove(export_onnx_path_cnv)
def test_convert_to_hls_layers_tfc_w1a1(): tfc = get_test_model_trained("TFC", 1, 1) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) fc0 = model.graph.node[2] assert fc0.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc0.input[0]) == [1, 784] assert model.get_tensor_shape(fc0.input[1]) == [784, 64] assert model.get_tensor_shape(fc0.input[2]) == [64, 1] fc1 = model.graph.node[3] assert fc1.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc1.input[0]) == [1, 64] assert model.get_tensor_shape(fc1.input[1]) == [64, 64] assert model.get_tensor_shape(fc1.input[2]) == [64, 1] fc2 = model.graph.node[4] assert fc2.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc2.input[0]) == [1, 64] assert model.get_tensor_shape(fc2.input[1]) == [64, 64] assert model.get_tensor_shape(fc2.input[2]) == [64, 1] fc3 = model.graph.node[5] assert fc3.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] fc0w = getCustomOp(fc0) fc0w.set_nodeattr("SIMD", 784) fc0w.set_nodeattr("PE", 16) fc1w = getCustomOp(fc1) fc1w.set_nodeattr("SIMD", 16) fc1w.set_nodeattr("PE", 16) fc2w = getCustomOp(fc2) fc2w.set_nodeattr("SIMD", 16) fc2w.set_nodeattr("PE", 16) fc3w = getCustomOp(fc3) fc3w.set_nodeattr("SIMD", 16) fc3w.set_nodeattr("PE", 10) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = tfc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)