def execute_node(self, context, graph): node = self.onnx_node inp_name = node.input[0] out_name = node.output[0] inp = context[inp_name] dummy_out = context[out_name] # convert i/o NHWC -> NCHW inp = np.transpose(inp, (0, 3, 1, 2)) dummy_out = np.transpose(dummy_out, (0, 3, 1, 2)) # execute as regular MaxPool orig_domain = node.domain node.domain = "" node.op_type = "MaxPool" inp_vi = helper.make_tensor_value_info(inp_name, TensorProto.FLOAT, inp.shape) out_vi = helper.make_tensor_value_info(out_name, TensorProto.FLOAT, dummy_out.shape) tmp_graph = helper.make_graph(nodes=[node], name="tmp_graph", inputs=[inp_vi], outputs=[out_vi]) tmp_model = helper.make_model(tmp_graph, producer_name="finn") tmp_model = ModelWrapper(tmp_model) new_ctx = {inp_name: inp} from finn.core.onnx_exec import execute_onnx ret = execute_onnx(tmp_model, new_ctx) # restore original node props node.domain = orig_domain node.op_type = "MaxPoolNHWC" outp = ret[out_name] # convert output NCHW -> NHWC outp = np.transpose(outp, (0, 2, 3, 1)) context[out_name] = outp
def test_add_pre_and_postproc(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model = load_test_checkpoint_or_skip(prev_chkpt_name) global_inp_name = model.graph.input[0].name ishape = model.get_tensor_shape(global_inp_name) # preprocessing: torchvision's ToTensor divides uint8 inputs by 255 totensor_pyt = ToTensor() chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc") bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name) assert os.path.isfile(chkpt_preproc_name) # join preprocessing and core model pre_model = ModelWrapper(chkpt_preproc_name) model = model.transform(MergeONNXModels(pre_model)) # add input quantization annotation: UINT8 for all BNN-PYNQ models global_inp_name = model.graph.input[0].name model.set_tensor_datatype(global_inp_name, DataType.UINT8) # postprocessing: insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") # tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) model.save(chkpt_name) assert os.path.isfile(chkpt_name)
def test_fpgadataflow_ipstitch_gen_model(): # exec_mode): model = create_one_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = ModelWrapper(sdp_node.get_nodeattr("model")) model.set_metadata_prop("exec_mode", "remote_pynq") model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, 5)) model = model.transform(HLSSynthIP()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
def step_hls_codegen(model: ModelWrapper, cfg: DataflowBuildConfig): "Generate Vivado HLS code to prepare HLSCustomOp nodes for IP generation." model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) ) return model
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig): """Apply the folding configuration file onto the model to set folding (parallelization) and other attributes, if config file is specified.""" if cfg.folding_config_file is not None: model = model.transform(GiveUniqueNodeNames()) model = model.transform(ApplyConfig(cfg.folding_config_file)) if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps( ): # prepare cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) verify_step(model, cfg, "folded_hls_cppsim", need_parent=True) return model
def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig): """If target_fps was specified, use the SetFolding transformation to determine parallelization attributes. The auto-generated config will be saved under auto_folding_config.json under the outputs, which can serve as a basis for customizing the folding factors further.""" target_cycles_per_frame = cfg._resolve_cycles_per_frame() if target_cycles_per_frame is not None: model = model.transform( SetFolding( target_cycles_per_frame, mvau_wwidth_max=cfg.mvau_wwidth_max, two_pass_relaxation=cfg.folding_two_pass_relaxation, ) ) # extract the suggested configuration and save it as json hw_attrs = [ "PE", "SIMD", "ram_style", "resType", "mem_mode", "runtime_writeable_weights", ] extract_model_config_to_json( model, cfg.output_dir + "/auto_folding_config.json", hw_attrs ) return model
def test_modelwrapper_detect_forks_n_joins(): # create small network with properties to be tested Neg_node = onnx.helper.make_node("Neg", inputs=["in1"], outputs=["neg1"]) Round_node = onnx.helper.make_node("Round", inputs=["neg1"], outputs=["round1"]) Ceil_node = onnx.helper.make_node("Ceil", inputs=["neg1"], outputs=["ceil1"]) Add_node = onnx.helper.make_node("Add", inputs=["round1", "ceil1"], outputs=["out1"]) in1 = onnx.helper.make_tensor_value_info("in1", onnx.TensorProto.FLOAT, [4, 4]) out1 = onnx.helper.make_tensor_value_info("out1", onnx.TensorProto.FLOAT, [4, 4]) graph = onnx.helper.make_graph( nodes=[Neg_node, Round_node, Ceil_node, Add_node], name="simple_graph", inputs=[in1], outputs=[out1], value_info=[ onnx.helper.make_tensor_value_info("neg1", onnx.TensorProto.FLOAT, [4, 4]), onnx.helper.make_tensor_value_info("round1", onnx.TensorProto.FLOAT, [4, 4]), onnx.helper.make_tensor_value_info("ceil1", onnx.TensorProto.FLOAT, [4, 4]), ], ) onnx_model = onnx.helper.make_model(graph, producer_name="simple-model") model = ModelWrapper(onnx_model) # test assert model.is_fork_node(Neg_node) assert not model.is_fork_node(Round_node) assert not model.is_fork_node(Ceil_node) assert not model.is_fork_node(Add_node) assert not model.is_join_node(Neg_node) assert not model.is_join_node(Round_node) assert not model.is_join_node(Ceil_node) assert model.is_join_node(Add_node)
def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type): min_val = -1.0 ishape = (1, 15) b_act = QuantReLU( bit_width=abits, max_val=max_val, scaling_impl_type=scaling_impl_type, restrict_scaling_type=RestrictValueType.LOG_FP, quant_type=QuantType.INT, ) if scaling_impl_type == ScalingImplType.PARAMETER: checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ scaling_impl.learned_value": torch.tensor(0.49).type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print(abits, max_val, scaling_impl_type) print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_move_scalar_past_matmul_only_if_linear(test_args): scalar_op = test_args[0] transf_fxn = test_args[1] input_shape = [1, 2] matmul_shape = [2, 2] top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) p1 = oh.make_tensor_value_info("p1", TensorProto.FLOAT, [1, 1]) p2 = oh.make_tensor_value_info("p2", TensorProto.FLOAT, matmul_shape) p3 = oh.make_tensor_value_info("p3", TensorProto.FLOAT, matmul_shape) p4 = oh.make_tensor_value_info("p4", TensorProto.FLOAT, matmul_shape) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[p1, p2, p3, p4], nodes=[ oh.make_node(scalar_op, ["top_in", "p1"], ["t1"]), oh.make_node("MatMul", ["t1", "p2"], ["fork"]), oh.make_node("MatMul", ["fork", "p3"], ["t3"]), oh.make_node(scalar_op, ["t3", "fork"], ["t4"]), oh.make_node("MatMul", ["t4", "p4"], ["top_out"]), ], ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) np.random.seed(0) model.set_initializer("p1", np.random.rand(1, 1).astype(np.float32)) model.set_initializer("p2", np.random.rand(*matmul_shape).astype(np.float32)) model.set_initializer("p3", np.random.rand(*matmul_shape).astype(np.float32)) model.set_initializer("p4", np.random.rand(*matmul_shape).astype(np.float32)) # Transform new_model = model.transform(transf_fxn) # Test inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} assert ox.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "MatMul" assert new_model.graph.node[1].op_type == scalar_op assert new_model.graph.node[2].op_type == "MatMul" assert new_model.graph.node[3].op_type == scalar_op assert new_model.graph.node[4].op_type == "MatMul"
def load_test_checkpoint_or_skip(filename): "Try to load given .onnx and return ModelWrapper, else skip current test." if os.path.isfile(filename): model = ModelWrapper(filename) return model else: warnings.warn(filename + " not found from previous test step, skipping") pytest.skip(filename + " not found from previous test step, skipping")
def test_end2end_tfc_w1a2_deploy_on_pynq(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_pynq_driver.onnx") try: ip = os.environ[ "PYNQ_IP"] # no fault for this one; skip if not defined if ip == "": pytest.skip("PYNQ board IP address not specified") username = os.getenv("PYNQ_USERNAME", "xilinx") password = os.getenv("PYNQ_PASSWORD", "xilinx") port = os.getenv("PYNQ_PORT", 22) target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") model = model.transform( DeployToPYNQ(ip, port, username, password, target_dir)) # save the model to be able to link it to the parent model.save(build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx") except KeyError: pytest.skip("PYNQ board IP address not specified")
def load_test_checkpoint_or_skip(filename): "Try to load given .onnx and return ModelWrapper, else skip current test." try: model = ModelWrapper(filename) return model except FileNotFoundError: warnings.warn(filename + " not found from previous test step, skipping") pytest.skip(filename + " not found from previous test step, skipping")
def apply(self, model): # hide your riches! hidden_ops = _hide_finn_ops(model) # call regular ONNX shape inference model = ModelWrapper(si.infer_shapes(model.model)) # bring back hidden ops _restore_finn_ops(model, hidden_ops) return (model, False)
def test_fpgadataflow_ipstitch_remote_execution(): try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") model = ModelWrapper( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_deployment.onnx") iname = "inp" idt = model.get_tensor_datatype(iname) ishape = model.get_tensor_shape(iname) x = gen_finn_dt_tensor(idt, ishape) input_dict = {"inp": x} outp = execute_onnx(model, input_dict) assert np.isclose(outp["outp"], x).all() except KeyError: pytest.skip("PYNQ board IP address not specified")
def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn.custom_op.general", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph( nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp] ) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout(node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout(node.output[0]) == DataLayout.NHWC
def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model = model.transform(InferDataLayouts()) return model
def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt): odt = idt inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim]) mp_node = helper.make_node( "MaxPool", ["inp"], ["outp"], kernel_shape=[k, k], pads=[pad, pad, pad, pad], strides=[stride, stride], ) graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model = model.transform(InferShapes()) return model
def make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, idt, odt): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ifm_ch, ofm_dim, ofm_dim]) mp_node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn.custom_op.general", stride=stride, kernel=k, ibits=idt.bitwidth(), obits=odt.bitwidth(), signed=1 if idt.signed() else 0, data_layout="NCHW", ) graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model = model.transform(InferShapes()) return model
def step_mobilenet_convert_to_hls_layers_separate_th(model: ModelWrapper, cfg: DataflowBuildConfig): mem_mode = cfg.default_mem_mode.value model = model.transform(to_hls.InferPool_Batch()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferVVAU()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) return model
def test_renaming(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # do some basic checks assert model.graph.input[0].name == "global_in" assert model.graph.output[0].name == "global_out" assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # ensure running renaming twice still yields the same names model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # run renamed model to make sure we did not mess up the topology raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) input_dict = {"global_in": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) assert np.isclose( np_helper.to_array(output_tensor), output_dict["global_out"], atol=1e-3 ).all()
def test_streamline_fc(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(Streamline()) produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all()
def test_brevitas_avg_pool_export(kernel_size, stride, signed, bit_width, input_bit_width, channels, idim): quant_avgpool = QuantAvgPool2d(kernel_size=kernel_size, stride=stride, bit_width=bit_width) quant_avgpool.eval() # determine input prefix = 'INT' if signed else 'UINT' dt_name = prefix + str(input_bit_width) dtype = DataType[dt_name] input_shape = (1, channels, idim, idim) input_array = gen_finn_dt_tensor(dtype, input_shape) scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(np.float32) input_tensor = torch.from_numpy(input_array * scale_array).float() scale_tensor = torch.from_numpy(scale_array).float() zp = torch.tensor(0.) input_quant_tensor = QuantTensor(input_tensor, scale_tensor, zp, input_bit_width, signed, training=False) # export FINNManager.export(quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) # reference brevitas output ref_output_array = quant_avgpool( input_quant_tensor).tensor.detach().numpy() # finn output idict = {model.graph.input[0].name: input_array} odict = oxe.execute_onnx(model, idict, True) finn_output = odict[model.graph.output[0].name] # compare outputs assert np.isclose(ref_output_array, finn_output).all() # cleanup os.remove(export_onnx_path)
def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig): """Separate consecutive groups of HLSCustomOp nodes into StreamingDataflowPartition nodes, which point to a separate ONNX file. Dataflow accelerator synthesis can only be performed on those HLSCustomOp sub-graphs.""" parent_model = model.transform(CreateDataflowPartition()) sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition") assert len( sdp_nodes) == 1, "Only a single StreamingDataflowPartition supported." sdp_node = sdp_nodes[0] sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") if cfg.save_intermediate_models: parent_model.save(cfg.output_dir + "/intermediate_models/dataflow_parent.onnx") model = ModelWrapper(dataflow_model_filename) return model
def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig): """Synthesize a bitfile for the using the specified shell flow, using either Vivado or Vitis, to target the specified board.""" if DataflowOutputType.BITFILE in cfg.generate_outputs: bitfile_dir = cfg.output_dir + "/bitfile" os.makedirs(bitfile_dir, exist_ok=True) report_dir = cfg.output_dir + "/report" os.makedirs(report_dir, exist_ok=True) partition_model_dir = cfg.output_dir + "/intermediate_models/kernel_partitions" if cfg.shell_flow_type == ShellFlowType.VIVADO_ZYNQ: model = model.transform( ZynqBuild( cfg.board, cfg.synth_clk_period_ns, cfg.enable_hw_debug, partition_model_dir=partition_model_dir, ) ) copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.bit") copy(model.get_metadata_prop("hw_handoff"), bitfile_dir + "/finn-accel.hwh") copy( model.get_metadata_prop("vivado_synth_rpt"), report_dir + "/post_synth_resources.xml", ) vivado_pynq_proj_dir = model.get_metadata_prop("vivado_pynq_proj") timing_rpt = ( "%s/finn_zynq_link.runs/impl_1/top_wrapper_timing_summary_routed.rpt" % vivado_pynq_proj_dir ) copy(timing_rpt, report_dir + "/post_route_timing.rpt") elif cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO: model = model.transform( VitisBuild( cfg._resolve_fpga_part(), cfg.synth_clk_period_ns, cfg.vitis_platform, strategy=cfg._resolve_vitis_opt_strategy(), enable_debug=cfg.enable_hw_debug, floorplan_file=cfg.vitis_floorplan_file, partition_model_dir=partition_model_dir, ) ) copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.xclbin") copy( model.get_metadata_prop("vivado_synth_rpt"), report_dir + "/post_synth_resources.xml", ) else: raise Exception("Unrecognized shell_flow_type: " + str(cfg.shell_flow_type)) print("Bitfile written into " + bitfile_dir) return model
def test_batchnorm_to_affine_epsilon(epsilon): """ Dummy batchnorm node to test out the epsilon attribute. """ batchnorm_node = onnx.helper.make_node( "BatchNormalization", inputs=["x", "s", "bias", "mean", "var"], outputs=["y"], epsilon=epsilon, ) x = onnx.helper.make_tensor_value_info("x", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) s = onnx.helper.make_tensor_value_info("s", onnx.TensorProto.FLOAT, [3]) bias = onnx.helper.make_tensor_value_info("bias", onnx.TensorProto.FLOAT, [3]) mean = onnx.helper.make_tensor_value_info("mean", onnx.TensorProto.FLOAT, [3]) var = onnx.helper.make_tensor_value_info("var", onnx.TensorProto.FLOAT, [3]) y = onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) # Graph graph = onnx.helper.make_graph( nodes=[batchnorm_node], name="test_batchnorm_graph", inputs=[x], outputs=[y], value_info=[s, bias, mean, var], ) onnx_model = onnx.helper.make_model(graph, producer_name="test_batchnorm-model") model = ModelWrapper(onnx_model) model.set_initializer("s", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("bias", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("mean", np.array([3, 4, 5]).astype(np.float32)) model.set_initializer("var", np.array([0.5, 0.7, 0.3]).astype(np.float32)) i_val = np.arange(0, 3 * 5 * 5, dtype=np.float32) i_val = np.reshape(i_val, [1, 3, 5, 5]) input_dict = {"x": i_val} output_node_name = "y" output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) output_original = output_dict[output_node_name] model_lowered = model.transform(BatchNormToAffine()) output_dict = oxe.execute_onnx( model_lowered, input_dict, return_full_exec_context=True ) output_lowered = output_dict[output_node_name] assert (output_original == output_lowered).all()
def test_infer_shapes(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) graph = model.graph # multi-thresholding node to be inserted between the first Relu and MaxPool node # get Relu node to use data Relu_node = graph.node[3] assert Relu_node.op_type == "Relu", "The wrong model was chosen for the check" # create thresholds tensor as constant mt_thresh0 = helper.make_tensor_value_info("mt_thresh0", TensorProto.FLOAT, [8, 7]) # random numbers for the thresholds # thresholds for one channel have to be sorted to guarantee the correct behavior mt_thresh0_values = np.empty([8, 7], dtype=np.float32) for i in range(len(mt_thresh0_values)): mt_thresh0_values[i] = np.sort(np.random.random_sample(7) * 10) model.set_initializer(mt_thresh0.name, mt_thresh0_values) # add multi-thresholding node and change Relu node mt_node = helper.make_node( "MultiThreshold", ["mt_v0", "mt_thresh0"], [Relu_node.output[0]], domain="finn.custom_op.general", ) Relu_node.output[0] = "mt_v0" # explicitly remove any present shape from ReLU and MultiThreshold outputs util.remove_by_name(model.graph.value_info, Relu_node.output[0]) util.remove_by_name(model.graph.value_info, mt_node.output[0]) graph.node.insert(4, mt_node) # first check routine # check if at least one shape is not specified assert not ( model.check_all_tensor_shapes_specified() ), "All tensors are already specified before the shape inference execution" # perform shape inference on mixed model model = model.transform(InferShapes()) # second check routine # now all shapes should be specified and mt_node output shape is (1,8,28,28) assert (model.check_all_tensor_shapes_specified() ), "There are still tensors that are not specified" assert (model.get_tensor_shape(mt_node.output[0])) == ([ 1, 8, 28, 28 ]), "output of multi-thresholding node has wrong shape"
def test_end2end_tfc_w1a2_create_dataflow_partition(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_hls_layers.onnx") parent_model = model.transform(CreateDataflowPartition()) parent_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx") sdp_node = getCustomOp(parent_model.graph.node[2]) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) dataflow_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
def test_ipstitch_rtlsim(self, topology, wbits, abits, kind): prev_chkpt_name = get_checkpoint_name( topology, wbits, abits, "fifodepth_" + kind ) model = load_test_checkpoint_or_skip(prev_chkpt_name) test_fpga_part = get_build_env(kind, target_clk_ns)["part"] model = model.transform(InsertDWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateCycles()) perf = model.analysis(dataflow_performance) latency = perf["critical_path_cycles"] # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that for fifo_layer in model.get_nodes_by_op_type("StreamingFIFO"): getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl") model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1)) if rtlsim_trace: model.set_metadata_prop( "rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits) ) os.environ["RTLSIM_TRACE_DEPTH"] = "3" rtlsim_chkpt = get_checkpoint_name( topology, wbits, abits, "ipstitch_rtlsim_" + kind ) model.save(rtlsim_chkpt) parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy) model = ModelWrapper(rtlsim_chkpt) perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim") # warnings.warn("Estimated & rtlsim performance: " + str(perf)) # for (k, v) in perf.items(): # update_dashboard_data(topology, wbits, abits, k, v) update_dashboard_data( topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"] ) assert np.isclose(y, output_tensor_npy).all()
def make_randomly_sorted_linear_model(num_of_nodes, seed=None): if seed is not None: np.random.seed(seed) ch = 2 ifmdim = 16 input_shape = (1, ch, ifmdim, ifmdim) top_in = helper.make_tensor_value_info("t0", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info( "t" + str(num_of_nodes), TensorProto.FLOAT, input_shape ) value_info = [] nodes = [] for i in range(num_of_nodes): nodes += [ helper.make_node("Add", ["t" + str(i), "p" + str(i)], ["t" + str(i + 1)]) ] value_info += [ helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) ] nodes = np.random.permutation(nodes) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=nodes, ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) for i in range(num_of_nodes): model.set_initializer( "p" + str(i), np.random.rand(*input_shape).astype(np.float32) ) return model
def test_end2end_cnv_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) x = input_tensor # x = np.zeros(ishape, dtype=np.float32) ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(parent_model, {iname: x}, True) y_cppsim = ret_cppsim[oname] # produce results with node-by-node rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx" ) ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] # produce results with whole-network (stitched ip) rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx" ) # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_whole_rtlsim = ret_whole_rtlsim[oname] assert np.isclose(y_golden, y_cppsim).all() assert np.isclose(y_golden, y_nodebynode_rtlsim).all() assert np.isclose(y_golden, y_whole_rtlsim).all() assert np.argmax(y_golden) == 3