def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): ifm_dim_h = ifm_dim k_h = k if dim_1d: ifm_dim_w = 1 k_w = 1 else: ifm_dim_w = ifm_dim_h k_w = k_h ifm_dim = (ifm_dim_h, ifm_dim_w) k = (k_h, k_w) stride_h = k_h stride_w = k_w ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception( "Unknown exec_mode in test_layer_streaming_maxpool_batch") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_end2end_tfc_w1a1_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride): simd = ifm_ch ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) model = make_single_slidingwindow_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_expected = im2col_indices(x, k, stride) # reshape expected output to match node output oshape = y_produced.shape y_expected = y_expected.reshape(oshape) assert (y_produced == y_expected).all(), "npysim failed" model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5)) model = model.transform(HLSSynth_IPGen()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all(), "rtlsim failed"
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): np.random.seed(0) if fold == -1: pe = 1 else: pe = labels // fold assert labels % pe == 0 if k == -1: k = labels # generate input data x = gen_finn_dt_tensor(idt, (1, labels)) model = make_labelselect_modelwrapper(labels, pe, k, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all()
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode): model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt) cdp_model = model.transform(InferDoublePackedConv()) assert (len(cdp_model.graph.node) == 3 and cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and cdp_model.graph.node[0].op_type == "Transpose" and cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model" # execute models and compare x = gen_finn_dt_tensor(idt, (1, ich, idim, idim)) input_dict = {"inp": x} y_expected = oxe.execute_onnx(model, input_dict)["outp"] if mode == "cppsim": cdp_model = cdp_model.transform(SetExecMode("cppsim")) cdp_model = cdp_model.transform(PrepareCppSim()) cdp_model = cdp_model.transform(CompileCppSim()) y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"] elif mode == "rtlsim": cdp_model = cdp_model.transform(SetExecMode("rtlsim")) cdp_model = cdp_model.transform(GiveUniqueNodeNames()) cdp_model = cdp_model.transform(GiveReadableTensorNames()) cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5)) cdp_model = cdp_model.transform(HLSSynthIP()) cdp_model = cdp_model.transform(PrepareRTLSim()) input_dict = {"global_in": x} y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"] assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
def test_end2end_cnv_w1a1_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx") x = np.zeros((1, 32, 32, 3), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx") # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input and param data x = gen_finn_dt_tensor(idt, tuple(vecs + [ich])) # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32) C = gen_finn_dt_tensor(pdt, (ich)) odt = act model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} oshape = model.get_tensor_shape("outp") C_reshaped = np.broadcast_to(C.flatten(), x.shape) if func == "add": y = x + C_reshaped elif func == "mul": y = x * C_reshaped y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "ChannelwiseOp_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param, exec_mode): ifm_ch = 16 ifm_dim = 5 ishape = (1, ifm_ch, ifm_dim, ifm_dim) if scalar_param: pshape = (1, ) else: pshape = (1, ifm_ch, 1, 1) np.random.seed(0) model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) # Since the aren't Data types with a bit width of a non power of 2, # there are cases where the input won't use it full range. if idt == DataType["INT32"]: x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim)) elif idt == DataType["UINT32"]: x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim)) else: x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) input_dict = prepare_inputs(x) y_expected = oxe.execute_onnx(model, input_dict)["outp"] new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) new_model = new_model.transform(GiveUniqueNodeNames()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") ctx_produced = oxe.execute_onnx(new_model, input_dict, return_full_exec_context=True) y_produced = ctx_produced["outp"] assert (y_produced == y_expected).all() assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_fpgadataflow_slidingwindow( idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw ): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape( 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd ) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_fclayer_npysim(idt, wdt, act, nf, sf, mw, mh): if nf == -1: nf = mh if sf == -1: sf = mw pe = mh // nf simd = mw // sf assert mh % pe == 0 assert mw % sf == 0 # generate weights W = gen_finn_dt_tensor(wdt, (mw, mh)) # generate input data x = gen_finn_dt_tensor(idt, (1, mw)) if act is None: # no activation, produce accumulators T = None tdt = None if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: odt = DataType.UINT32 else: odt = DataType.INT32 else: odt = act (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) # generate thresholds for activation if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: tdt = DataType.UINT32 # bias thresholds to be positive T = np.ceil((T + mw) / 2) assert (T >= 0).all() else: tdt = DataType.INT32 model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt, wdt) if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: # convert inputs to binary and use xnorpopcountmatmul y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2) else: y = np.matmul(x, W) if T is not None: y = multithreshold(y, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "npysim failed"
def test_partitioned_model_using_rtlsim(stage, parent_model, list_of_child_models, input_dict, produced_golden, src_dir="/tmp/finn_dev_justin", return_full_exec_context=False): print("Making deep copies of each of the models") parent_model_for_rtlsim = deepcopy(parent_model) list_of_child_models_for_rtlsim = deepcopy(list_of_child_models) num_child_models = len(list_of_child_models_for_rtlsim) prepped_child_model_filepaths = [] # Prepare each child model for RTL sim testing for i in range(0, num_child_models): print(f"Preparing child model {i}") list_of_child_models_for_rtlsim[i] = prep_model_for_rtlsim( list_of_child_models_for_rtlsim[i], src_dir) prepped_child_model_path = f"/workspace/finn/tutorial/sfc_onnx_models/SFC1W1A_Child_RTLSim{i}.onnx" list_of_child_models_for_rtlsim[i].save(prepped_child_model_path) prepped_child_model_filepaths.append(prepped_child_model_path) parent_model_for_rtlsim = attach_child_models_to_parent_model( parent_model_for_rtlsim, prepped_child_model_filepaths) parent_model_for_rtlsim = parent_model_for_rtlsim.transform( SetExecMode("rtlsim")) parent_model_for_rtlsim.save("/workspace/finn/tutorial/parent_rtlsim.onnx") print(f"Running RTL Simulation") output = test_onnx_model(stage, parent_model_for_rtlsim, input_dict, produced_golden, return_full_exec_context=return_full_exec_context) if (return_full_exec_context): print(f"Context from run: {output}") parent_model_for_rtlsim.save("/workspace/finn/tutorial/parent_rtlsim.onnx") return output
def test_end2end_mobilenet_rtlsim(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_ipgen.onnx") x = np.load(build_dir + "/end2end_mobilenet_input.npy") inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_mobilenet_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] np.save( build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode.npy", res_rtlsim_nodebynode, ) a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy") res_rtlsim_nodebynode_prob = ( ret_rtlsim_nodebynode[model.graph.node[-2].output[0]] * a0) np.save( build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode_prob.npy", res_rtlsim_nodebynode_prob, ) # check result with golden values golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy") golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy") assert (golden == res_rtlsim_nodebynode).all() assert np.isclose(golden_prob, res_rtlsim_nodebynode_prob).all()
def test_end2end_mobilenet_cppsim(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") x = np.load(build_dir + "/end2end_mobilenet_input.npy") inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} start = time.time() # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) end = time.time() elapsed_time = end - start f = open(build_dir + "/end2end_mobilenet_compile_time.txt", "w+") f.write("Execution time in seconds: " + str(elapsed_time)) f.close() model.save(build_dir + "/end2end_mobilenet_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] np.save(build_dir + "/end2end_mobilenet_result_cppsim.npy", res_cppsim) a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy") res_cppsim_prob = ret_cppsim[model.graph.node[-2].output[0]] * a0 np.save(build_dir + "/end2end_mobilenet_result_cppsim_prob.npy", res_cppsim_prob) # check result with golden values golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy") golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy") assert (golden == res_cppsim).all() assert np.isclose(golden_prob, res_cppsim_prob).all()
def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding): idt = wdt = DataType.INT4 ifm_dim = 6 ifm_ch = 4 # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold) model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch]) input_dict = {"inp": input_tensor} new_model = model.transform(InferConvInpGen()) new_model = new_model.transform(InferVVAU()) # set SIMD in ConvInputGen node and PE in VVAU node for n in new_model.graph.node: if n.op_type == "ConvolutionInputGenerator": convinputgen_node = getCustomOp(n) convinputgen_node.set_nodeattr("SIMD", pe) elif n.op_type == "Vector_Vector_Activate_Batch": vvau_node = getCustomOp(n) vvau_node.set_nodeattr("PE", pe) new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) assert oxe.compare_execution(model, new_model, input_dict)
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if fold == -1: pe = 1 else: pe = max(1, ch // fold) assert ch % pe == 0 # generate input data x1 = gen_finn_dt_tensor(idt, (1, ch)) x2 = gen_finn_dt_tensor(idt, (1, ch)) model = make_addstreams_modelwrapper(ch, pe, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data input_dict = prepare_inputs(x1, x2) oshape = model.get_tensor_shape("outp") y = x1 + x2 y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("AddStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_accpool_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] expected_y = np.sum(x, axis=(1, 2)).flatten() assert (y == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] # commented out, needs performance debug: # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4] # assert False where False = # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103)) # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim) assert exp_cycles != 0 assert cycles_rtlsim != 0
def prep_model_for_rtlsim(model, src_dir="/tmp/finn_dev_justin"): # Make copies of all IP Cores, so that ReplaceVerilogRelPaths is not a permanent change randstring = get_rand_string(10) copy_dir = "/tmp/finn_dev_justin/rtlsim_" + randstring print(f"Copying IP to folder {copy_dir}") model = copy_ip(model, copy_dir, src_dir) model = model.transform(ReplaceVerilogRelPaths()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) return model
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_dupstreams_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) output_dict = oxe.execute_onnx(model, input_dict) y0 = output_dict["outp0"] y1 = output_dict["outp1"] expected_y = x assert (y0 == expected_y).all(), exec_mode + " failed" assert (y1 == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode): ishape = (1, 10) num_embeddings, idt, embedding_dim = embedding_cfg eshape = (num_embeddings, embedding_dim) exp_oshape = tuple(list(ishape) + [embedding_dim]) embeddings = gen_finn_dt_tensor(edt, eshape) model = make_lookup_model(embeddings, ishape, idt, edt) assert len(model.graph.node) == 1 assert model.graph.node[0].op_type == "Gather" iname = model.graph.input[0].name ename = model.graph.node[0].input[0] oname = model.graph.output[0].name assert model.get_tensor_datatype(iname) == idt assert model.get_tensor_datatype(ename) == edt assert model.get_tensor_datatype(oname) == edt assert tuple(model.get_tensor_shape(ename)) == eshape assert tuple(model.get_tensor_shape(oname)) == exp_oshape assert (model.get_initializer(ename) == embeddings).all() itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64) itensor = np.clip(itensor, 0, num_embeddings - 1) ret = execute_onnx(model, {iname: itensor}) exp_out = np.take(embeddings, itensor, axis=0) assert (exp_out == ret[oname]).all() # call transformation to convert to HLS and verify conversion model = model.transform(InferLookupLayer()) assert model.graph.node[0].op_type == "Lookup" assert model.graph.node[0].input[0] == iname assert model.graph.node[0].input[1] == ename assert model.graph.node[0].output[0] == oname if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 10)) model = model.transform(HLSSynthIP()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) ret_sim = execute_onnx(model, {iname: itensor}) assert (exp_out == ret_sim[oname]).all()
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): if num_ch % simd != 0: pytest.skip(" num_ch % simd != 0, skipping") # generate input data x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) input_dict = {"inp": x} odim = idim + pad model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) if mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif mode == "rtlsim": model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] expected_oshape = (1, odim, odim, num_ch) assert y_produced.shape == expected_oshape # calculate reference # calculate correct pad according to parameters if pad_style == 2: if pad % 2 == 0: pad_up = pad // 2 pad_left = pad // 2 else: pad_up = pad // 2 + 1 pad_left = pad // 2 + 1 else: pad_up = pad // 2 pad_left = pad // 2 pad_down = pad - pad_up pad_right = pad - pad_left y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant") assert (y_produced == y_expected).all() if mode == "rtlsim": node = model.get_nodes_by_op_type("FMPadding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_cppsim(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim") model.save(cppsim_chkpt) parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) y = execute_parent(parent_chkpt, cppsim_chkpt, input_tensor_npy) assert np.isclose(y, output_tensor_npy).all()
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig): """Apply the folding configuration file onto the model to set folding (parallelization) and other attributes, if config file is specified.""" if cfg.folding_config_file is not None: model = model.transform(GiveUniqueNodeNames()) model = model.transform(ApplyConfig(cfg.folding_config_file)) if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps(): # prepare cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) verify_step(model, cfg, "folded_hls_cppsim", need_parent=True) return model
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert ( y == x ).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect.""" model = model.transform(ReplaceVerilogRelPaths()) model = model.transform(CreateStitchedIP(test_fpga_part)) model = model.transform(MakePYNQProject(test_pynq_board)) model = model.transform(SynthPYNQProject()) model = model.transform(MakePYNQDriver()) ip = os.environ["PYNQ_IP"] username = os.getenv("PYNQ_USERNAME", "xilinx") password = os.getenv("PYNQ_PASSWORD", "xilinx") port = os.getenv("PYNQ_PORT", 22) target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) res = throughput_test(model) expected_dict = {} expected_dict["runtime[ms]"] = [] expected_dict["throughput[images/s]"] = [] expected_dict["DRAM_in_bandwidth[Mb/s]"] = [] expected_dict["DRAM_out_bandwidth[Mb/s]"] = [] for key in expected_dict: assert ( key in res ), """Throughput test not successful, no value for {} in result dictionary""".format( key )
def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert (y == x).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect."""
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert (y == x).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect."""
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, ich)) odt = act n_steps = act.get_num_possible_values() - 1 T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32) # make the vivado_hls threshold bug appear (incorrect rtlsim result when first # threshold of first channel is zero, while using BIPOLAR output) if act == DataType["BIPOLAR"]: T[0][0] = 0 # provide non-decreasing thresholds T = np.sort(T, axis=1) if odt == DataType["BIPOLAR"]: actval = 0 else: actval = odt.min() model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} y = multithreshold(x, T) if act == DataType["BIPOLAR"]: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "Thresholding_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("Thresholding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): kernel_size, stride, pad = conv_config np.random.seed(0) idt = DataType.UINT4 in_feature_dim = 7 in_chn = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size, kernel_size] else: group = 1 out_chn = 20 conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) input_shape = [1, in_chn, in_feature_dim, in_feature_dim] output_shape = [1, out_chn, out_feature_dim, out_feature_dim] conv_weight_dt = DataType.UINT4 conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [pad, pad, pad, pad] conv_config["strides"] = [stride, stride] top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = helper.make_model( helper.make_graph( name="conv_test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], ) ) model = ModelWrapper(modelproto) model.set_tensor_datatype("top_in", idt) model.set_tensor_datatype("top_out", idt) model.set_tensor_datatype("p1", conv_weight_dt) model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) else: new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1))) simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1))) fc_inst.set_nodeattr("PE", pe_cands[0]) fc_inst.set_nodeattr("SIMD", simd_cands[0]) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferShapes()) new_model = new_model.transform(InferDataTypes()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) if kernel_size == 1 and stride > 1 and pad == 0: assert new_model.graph.node[1].op_type == "DownSampler" if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("DownSampler")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 if pad == 1: padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] padding_inst = getCustomOp(padding_node) assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0