def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_infer_data_layouts(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_2_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC
def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model = model.transform(InferDataLayouts()) return model
def test_end2end_cnv_w1a1_streamline(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx") model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
def test_conv_lowering_conv_1x1(): np.random.seed(0) in_feature_dim_h = 7 in_feature_dim_w = 7 in_chn = 3 kernel_size = 1 out_feature_dim_h = in_feature_dim_h out_feature_dim_w = in_feature_dim_w input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w] output_shape = [1, in_chn, out_feature_dim_h, out_feature_dim_w] conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [1, 1] top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ oh.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ oh.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("p1", np.random.rand(*conv_param_shape).astype(np.float32)) new_model = model.transform(LowerConvsToMatMul()) inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} assert oxe.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "Transpose" assert new_model.graph.node[1].op_type == "MatMul" assert new_model.graph.node[2].op_type == "Transpose" assert len(new_model.graph.node) == 3
def test_end2end_mobilenet_lowering(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_streamlined.onnx") model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) # set up onnx model inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k, k], pads=padding, strides=[stride, stride], group=ifm_ch, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_streamline(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) # move past any reshapes to be able to streamline input scaling model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) if "fc" not in topology: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def test_apply_config(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(GiveUniqueNodeNames()) model = model.transform(LowerConvsToMatMul()) model = model.transform(GiveUniqueNodeNames()) # set up a config in a dict, then dump it to JSON config = {} config["Defaults"] = {"kernel_size": [[3, 3], ["Im2Col"]]} config["Im2Col_0"] = {"kernel_size": [7, 7]} with open("config.json", "w") as f: json.dump(config, f, indent=4) model = model.transform(ApplyConfig("config.json")) # check model assert getCustomOp(model.graph.node[2]).get_nodeattr("kernel_size") == [7, 7] assert getCustomOp(model.graph.node[9]).get_nodeattr("kernel_size") == [3, 3] os.remove("config.json")
def test_conv_lowering_cnv_w1a1(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # execute imported model to get expected answer input_dict = {"0": input_tensor} output_dict_e = oxe.execute_onnx(model, input_dict) expected = output_dict_e[list(output_dict_e.keys())[0]] # execute transformed model and compare model = model.transform(LowerConvsToMatMul()) output_dict_p = oxe.execute_onnx(model, input_dict) produced = output_dict_p[list(output_dict_p.keys())[0]] assert np.isclose(produced, expected).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_conv_lowering_convmnist(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) # model = model.transform(InferShapes()) # model = model.transform(FoldConstants()) raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_tensor = np_helper.to_array(input_tensor) # execute imported model to get expected answer input_name = model.graph.input[0].name output_name = model.graph.output[0].name input_dict = {input_name: input_tensor} output_dict_e = oxe.execute_onnx(model, input_dict) expected = output_dict_e[output_name] # execute transformed model and compare model = model.transform(LowerConvsToMatMul()) model = model.transform(InferShapes()) output_dict_p = oxe.execute_onnx(model, input_dict) produced = output_dict_p[output_name] assert np.isclose(produced, expected).all()
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): kernel_size, stride, pad = conv_config np.random.seed(0) idt = DataType.UINT4 in_feature_dim = 7 in_chn = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size, kernel_size] else: group = 1 out_chn = 20 conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) input_shape = [1, in_chn, in_feature_dim, in_feature_dim] output_shape = [1, out_chn, out_feature_dim, out_feature_dim] conv_weight_dt = DataType.UINT4 conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [pad, pad, pad, pad] conv_config["strides"] = [stride, stride] top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = helper.make_model( helper.make_graph( name="conv_test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], ) ) model = ModelWrapper(modelproto) model.set_tensor_datatype("top_in", idt) model.set_tensor_datatype("top_out", idt) model.set_tensor_datatype("p1", conv_weight_dt) model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) else: new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1))) simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1))) fc_inst.set_nodeattr("PE", pe_cands[0]) fc_inst.set_nodeattr("SIMD", simd_cands[0]) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferShapes()) new_model = new_model.transform(InferDataTypes()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) if kernel_size == 1 and stride > 1 and pad == 0: assert new_model.graph.node[1].op_type == "DownSampler" if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("DownSampler")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 if pad == 1: padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] padding_inst = getCustomOp(padding_node) assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0
def test_dws_reg_conv_lowering( idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding, dilations, dw ): if k_h > ifm_dim_h: pytest.skip("Kernel height must be smaller than image height") if k_w > ifm_dim_w: pytest.skip("Kernel width must be smaller than image height") # Ensure the right padding parameters are set if ifm_dim_w == 1: dilations[1] = 1 padding[1] = 0 padding[3] = 0 wdt = idt odt = DataType["INT32"] ofm_ch = ifm_ch pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] stride_h = stride[0] stride_w = stride[1] ofm_dim_h = compute_conv_output_dim( ifm_dim_h, k_h, stride_h, pad_h, dilations[0], ) ofm_dim_w = compute_conv_output_dim( ifm_dim_w, k_w, stride_w, pad_w, dilations[1], ) # set up onnx model inp = oh.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] ) outp = oh.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] ) if dw is True: W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k_h, k_w]) group = ifm_ch else: W = oh.make_tensor_value_info( "W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w] ) group = 1 dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k_h, k_w], pads=padding, strides=[stride_h, stride_w], group=group, dilations=dilations, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="test_dws_reg_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) if dw is True: w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k_h, k_w]) else: w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() if dw is True: # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_infer_data_layouts_cnv(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert (model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC) assert model.get_tensor_layout( "StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout( "StreamingFCLayer_Batch_6_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC os.remove(export_onnx_path_cnv)
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_non_equal_padding( idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding ): wdt = idt odt = DataType["INT32"] ofm_ch = ifm_ch pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] ofm_dim_h = compute_conv_output_dim( ifm_dim_h, k_h, stride, pad_h, ) ofm_dim_w = compute_conv_output_dim( ifm_dim_w, k_w, stride, pad_w, ) # set up onnx model inp = oh.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] ) outp = oh.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] ) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k_h, k_w], pads=padding, strides=[stride, stride], group=1, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all()