def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): """ Depending on the auto_fifo_depths setting, do one of the following: * if auto_fifo_depths=True: Run the `InsertAndSetFIFODepths` transformation to attempt to determine the FIFO sizes that provide full throughput. Involves running stitched-IP rtlsim and may take a long time. * if auto_fifo_depths=False: Assume the folding config file contains FIFO sizes as well. Runs the `InsertFIFO` transformation, then `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`. Coherency with config file node naming is ensured by calling `GiveUniqueNodeNames`. """ if cfg.auto_fifo_depths: model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period(), vivado_ram_style=cfg.large_fifo_mem_style.value, )) else: # assume folding cfg json contains FIFO sizes too # insert DWCs, FIFOs and run ApplyConfig once more model = model.transform(InsertDWC()) # need to make sure all FIFOs are created so that their depth can be # set by ApplyConfig, so create_shallow_fifos=True model = model.transform(InsertFIFO(create_shallow_fifos=True)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if cfg.folding_config_file is not None: model = model.transform(ApplyConfig(cfg.folding_config_file)) # remove any shallow FIFOs model = model.transform(RemoveShallowFIFOs()) # extract the final configuration and save it as json hw_attrs = [ "PE", "SIMD", "ram_style", "depth", "impl_style", "resType", "mem_mode", "runtime_writeable_weights", ] extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP()) return model
def test_res_estimate(): mw = mh = 4 simd = 1 pe = 1 idt = DataType.INT2 wdt = DataType.INT2 odt = DataType.INT32 actval = odt.min() inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn", backend="fpgadataflow", resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, ActVal=actval, binaryXnorMode=0, noActivation=0, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) model = model.transform(GiveUniqueNodeNames()) prod_resource_estimation = model.analysis(res_estimation) expect_resource_estimation = { "StreamingFCLayer_Batch_0": { "BRAM_18K": 1, "LUT": 304.4 } } assert check_two_dict_for_equality( prod_resource_estimation, expect_resource_estimation), """The produced output of
def apply(self, model): # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [ InsertIODMA(64), InsertDWC(), Floorplan(), CreateDataflowPartition(), ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: prefix = sdp_node.name + "_" sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix)) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model.set_metadata_prop("platform", "zynq-iodma") kernel_model.save(dataflow_model_filename) # Assemble design from IPs model = model.transform( MakeZYNQProject(self.platform, enable_debug=self.enable_debug)) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "zynq-iodma") # create driver model = model.transform(MakePYNQDriver(platform="zynq-iodma")) return (model, False)
def test_end2end_mobilenet_lowering(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_streamlined.onnx") model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
def test_import_and_tidy(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model.save(chkpt)
def tidy_up(model): log("Basic transformations launched") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) log("Basic transformations completed") save(model, "0_tidy") return model
def test_ipgen(self, topology, wbits, abits, kind): if kind == "alveo" and ("VITIS_PATH" not in os.environ): pytest.skip("VITIS_PATH not set") prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold") model = load_test_checkpoint_or_skip(prev_chkpt_name) test_fpga_part = get_build_env(kind, target_clk_ns)["part"] model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind))
def test_end2end_mobilenet_convert_to_hls_layers(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_lowered.onnx") model = model.transform(to_hls.InferPool_Batch()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferVVAU()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model.save(build_dir + "/end2end_mobilenet_hls_layers.onnx")
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): if num_ch % simd != 0: pytest.skip(" num_ch % simd != 0, skipping") # generate input data x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) input_dict = {"inp": x} odim = idim + pad model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) if mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif mode == "rtlsim": model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] expected_oshape = (1, odim, odim, num_ch) assert y_produced.shape == expected_oshape # calculate reference # calculate correct pad according to parameters if pad_style == 2: if pad % 2 == 0: pad_up = pad // 2 pad_left = pad // 2 else: pad_up = pad // 2 + 1 pad_left = pad // 2 + 1 else: pad_up = pad // 2 pad_left = pad // 2 pad_down = pad - pad_up pad_right = pad - pad_left y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant") assert (y_produced == y_expected).all() if mode == "rtlsim": node = model.get_nodes_by_op_type("FMPadding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_end2end_mobilenet_export(): # export preprocessing preproc_onnx = build_dir + "/end2end_mobilenet_preproc.onnx" mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) # set input finn datatype to UINT8 preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"]) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(FoldConstants()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) preproc_model.save(build_dir + "/end2end_mobilenet_preproc.onnx") # export mobilenet finn_onnx = build_dir + "/end2end_mobilenet_export.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) # calculate golden output with pytorch/brevitas and save as .npy # get single image as input and prepare image img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) # crop central 224*224 window img = crop_center(224, img) # save image as numpy array and as torch tensor to enable testing in # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W) img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1) img_np = img_np.reshape(1, 3, 224, 224) np.save(build_dir + "/end2end_mobilenet_input.npy", img_np) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) golden = mobilenet.forward(input_tensor).detach().numpy() golden_topk = golden.flatten() golden_top5 = np.argsort(golden_topk)[-5:] golden_top5 = np.flip(golden_top5) golden_top5_prob = [] for index in golden_top5: golden_top5_prob.append(golden_topk[index]) # save golden output values np.save(build_dir + "/end2end_mobilenet_golden_top5.npy", golden_top5) np.save(build_dir + "/end2end_mobilenet_golden_top5_prob.npy", golden_top5_prob) assert os.path.isfile(finn_onnx) assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx")
def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuildConfig): mem_mode = cfg.default_mem_mode.value model = model.transform(to_hls.InferPool_Batch()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferVVAU()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) return model
def post_processing(model): log("Starting Post Processing") # Insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) # Tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) log("Finished Post Processing!") save(model, "2_with_pre_post") return model
def inference_cost(model_filename, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True): """Print the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. :param model_filename: Filename for ONNX model :param output_json: Optional JSON filename to save the inference cost dict :param output_onnx: Optional ONNX filename to save the final model after any preprocessing :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights. """ print("Inference cost for " + model_filename) model = ModelWrapper(model_filename) if preprocess: qnt_nodes = model.get_nodes_by_op_type("Quant") for qnt_node in qnt_nodes: qnt_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferDataTypes()) model = model.transform(FoldConstants()) model = model.transform(RemoveUnusedTensors()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) bops = compute_bops(ret) mem_w_bits = compute_mem_bits(ret, "mem_w") mem_o_bits = compute_mem_bits(ret, "mem_o") ret["total_bops"] = bops ret["total_mem_w_bits"] = mem_w_bits ret["total_mem_o_bits"] = mem_o_bits if "unsupported" in ret: ret["unsupported"] = str(ret["unsupported"]) print(json.dumps(ret, sort_keys=True, indent=2)) if output_json is not None: with open(output_json, "w") as f: json.dump(ret, f, sort_keys=True, indent=2)
def test_fpgadataflow_slidingwindow( idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw ): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape( 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd ) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig): """Apply the folding configuration file onto the model to set folding (parallelization) and other attributes, if config file is specified.""" if cfg.folding_config_file is not None: model = model.transform(GiveUniqueNodeNames()) model = model.transform(ApplyConfig(cfg.folding_config_file)) if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps(): # prepare cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) verify_step(model, cfg, "folded_hls_cppsim", need_parent=True) return model
def test_topk_insert(k): tfc = get_test_model_trained("TFC", 1, 1) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) # do transformations (no topk) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) # verification: generate random input, run through net, streamline, # run again, check that output is top-k raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float() output_golden = tfc.forward(input_brevitas).detach().numpy() output_golden_topk = np.flip(output_golden.flatten().argsort())[:k] output_golden_topk = output_golden_topk.flatten() input_dict = {"global_in": nph.to_array(input_tensor)} # insert top-k model = model.transform(InsertTopK(k)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferShapes()) # verify output of top-k output_dict_topk = oxe.execute_onnx(model, input_dict) output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]] output_pysim_topk = output_pysim_topk.astype(np.int).flatten() assert np.array_equal(output_golden_topk, output_pysim_topk) os.remove(export_onnx_path)
def test_renaming(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # do some basic checks assert model.graph.input[0].name == "global_in" assert model.graph.output[0].name == "global_out" assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # ensure running renaming twice still yields the same names model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # run renamed model to make sure we did not mess up the topology raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) input_dict = {"global_in": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) assert np.isclose(np_helper.to_array(output_tensor), output_dict["global_out"], atol=1e-3).all()
def test_fpgadataflow_ipstitch_gen_model(): # exec_mode): model = create_one_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = ModelWrapper(sdp_node.get_nodeattr("model")) model.set_metadata_prop("exec_mode", "remote_pynq") model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen(test_fpga_part, 5)) model = model.transform(HLSSynth_IPGen()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
def test_fpgadataflow_ipstitch_gen_model(mem_mode): model = create_one_fc_model(mem_mode) if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) model.set_metadata_prop("exec_mode", "remote_pynq") model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, 5)) model = model.transform(HLSSynthIP()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model_%s.onnx" % mem_mode)
def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig): """Run the tidy-up step on given model. This includes shape and datatype inference, constant folding, and giving nodes and tensors better names. """ model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) if VerificationStepType.TIDY_UP_PYTHON in cfg._resolve_verification_steps(): verify_step(model, cfg, "initial_python", need_parent=False) return model
def test_infer_datatypes(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # this model has no DataType info, so add some DataType annotation # to make things a bit more exciting model.set_tensor_datatype("global_in", DataType["UINT8"]) # Conv with int weights + inputs will have int output datatype model.set_tensor_datatype("Conv_0_param0", DataType["INT4"]) model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("global_in") == DataType["UINT8"] assert model.get_tensor_datatype("Conv_0_out0") == DataType["INT32"] assert model.get_tensor_datatype("Relu_0_out0") == DataType["FLOAT32"] assert model.get_tensor_datatype("global_out") == DataType["FLOAT32"]
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert ( y == x ).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect.""" model = model.transform(ReplaceVerilogRelPaths()) model = model.transform(CreateStitchedIP(test_fpga_part)) model = model.transform(MakePYNQProject(test_pynq_board)) model = model.transform(SynthPYNQProject()) model = model.transform(MakePYNQDriver()) ip = os.environ["PYNQ_IP"] username = os.getenv("PYNQ_USERNAME", "xilinx") password = os.getenv("PYNQ_PASSWORD", "xilinx") port = os.getenv("PYNQ_PORT", 22) target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) res = throughput_test(model) expected_dict = {} expected_dict["runtime[ms]"] = [] expected_dict["throughput[images/s]"] = [] expected_dict["DRAM_in_bandwidth[Mb/s]"] = [] expected_dict["DRAM_out_bandwidth[Mb/s]"] = [] for key in expected_dict: assert ( key in res ), """Throughput test not successful, no value for {} in result dictionary""".format( key )
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if fold == -1: pe = 1 else: pe = max(1, ch // fold) assert ch % pe == 0 # generate input data x1 = gen_finn_dt_tensor(idt, (1, ch)) x2 = gen_finn_dt_tensor(idt, (1, ch)) model = make_addstreams_modelwrapper(ch, pe, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data input_dict = prepare_inputs(x1, x2) oshape = model.get_tensor_shape("outp") y = x1 + x2 y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("AddStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_move_flatten_past_affine(data_layout, batch_size): if data_layout == DataLayout.NHWC: ishape = [batch_size, 1, 1, 1024] oshape = [batch_size, 1024] else: ishape = [batch_size, 1024, 1, 1] oshape = [batch_size, 1024] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) flatten_node = helper.make_node("Flatten", ["inp"], ["outp"]) graph = helper.make_graph( nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp], ) model = helper.make_model(graph, producer_name="move_flatten_model") model = ModelWrapper(model) model.set_tensor_datatype("inp", DataType.INT2) model.set_tensor_layout("inp", data_layout) model = model.transform(InsertTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveFlattenPastTopK()) assert oxe.compare_execution(model, model_transformed, idict) # depending on data layout check if graph is transformed or not if data_layout == DataLayout.NHWC: # check if nodes have new order in transformed graph assert model.graph != model_transformed.graph assert model_transformed.graph.node[-1].op_type == "Flatten" else: assert model.graph == model_transformed.graph
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_accpool_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] expected_y = np.sum(x, axis=(1, 2)).flatten() assert (y == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] # commented out, needs performance debug: # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4] # assert False where False = # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103)) # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim) assert exp_cycles != 0 assert cycles_rtlsim != 0
def test_infer_datatypes(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("MatMul_0_out0") == DataType.INT32 assert model.get_tensor_datatype("MatMul_1_out0") == DataType.INT32 assert model.get_tensor_datatype("MatMul_2_out0") == DataType.INT32 assert model.get_tensor_datatype("MatMul_3_out0") == DataType.INT32 assert model.get_tensor_datatype("Sign_0_out0") == DataType.BIPOLAR assert model.get_tensor_datatype("Sign_1_out0") == DataType.BIPOLAR assert model.get_tensor_datatype("Sign_2_out0") == DataType.BIPOLAR assert model.get_tensor_datatype("Sign_3_out0") == DataType.BIPOLAR os.remove(export_onnx_path)
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert (y == x).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect."""
def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype): # generate input data x = gen_finn_dt_tensor(finn_dtype, Shape) input_dict = prepare_inputs(x, finn_dtype) model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype) model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y = oxe.execute_onnx(model, input_dict)["outp"] assert (y == x).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(Shape), """The output shape is incorrect."""
def test_ipstitch_rtlsim(self, topology, wbits, abits, kind): prev_chkpt_name = get_checkpoint_name( topology, wbits, abits, "fifodepth_" + kind ) model = load_test_checkpoint_or_skip(prev_chkpt_name) test_fpga_part = get_build_env(kind, target_clk_ns)["part"] model = model.transform(InsertDWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateCycles()) perf = model.analysis(dataflow_performance) latency = perf["critical_path_cycles"] # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that for fifo_layer in model.get_nodes_by_op_type("StreamingFIFO"): getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl") model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1)) if rtlsim_trace: model.set_metadata_prop( "rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits) ) os.environ["RTLSIM_TRACE_DEPTH"] = "3" rtlsim_chkpt = get_checkpoint_name( topology, wbits, abits, "ipstitch_rtlsim_" + kind ) model.save(rtlsim_chkpt) parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy) model = ModelWrapper(rtlsim_chkpt) perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim") # warnings.warn("Estimated & rtlsim performance: " + str(perf)) # for (k, v) in perf.items(): # update_dashboard_data(topology, wbits, abits, k, v) update_dashboard_data( topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"] ) assert np.isclose(y, output_tensor_npy).all()
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_dupstreams_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) output_dict = oxe.execute_onnx(model, input_dict) y0 = output_dict["outp0"] y1 = output_dict["outp1"] expected_y = x assert (y0 == expected_y).all(), exec_mode + " failed" assert (y1 == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0