def verify_step(model: ModelWrapper, cfg: DataflowBuildConfig, step_name: str, need_parent: bool): print("Running verification for " + step_name) verify_out_dir = cfg.output_dir + "/verification_output" intermediate_models_dir = cfg.output_dir + "/intermediate_models" os.makedirs(verify_out_dir, exist_ok=True) (in_npy, exp_out_npy) = cfg._resolve_verification_io_pair() if need_parent: assert (cfg.save_intermediate_models ), "Enable save_intermediate_models for verification" parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx" child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name model.save(child_model_fn) out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy) else: inp_tensor_name = model.graph.input[0].name out_tensor_name = model.graph.output[0].name inp_dict = {inp_tensor_name: in_npy} out_dict = execute_onnx(model, inp_dict) out_npy = out_dict[out_tensor_name] res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all() res_to_str = {True: "SUCCESS", False: "FAIL"} res_str = res_to_str[res] verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name, res_str) np.save(verification_output_fn, out_npy) print("Verification for %s : %s" % (step_name, res_str))
def test_fpgadataflow_ipstitch_pynq_synth(): model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx") model = model.transform(SynthPYNQProject()) bitfile = model.get_metadata_prop("vivado_pynq_bitfile") assert bitfile is not None assert os.path.isfile(bitfile) model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_synth.onnx")
def test_export(self, topology, wbits, abits, QONNX_export): if wbits > abits: pytest.skip("No wbits > abits end2end network configs for now") if topology == "lfc" and not (wbits == 1 and abits == 1): pytest.skip("Skipping certain lfc configs") (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) chkpt_name = get_checkpoint_name(topology, wbits, abits, QONNX_export, "export") if QONNX_export: BrevitasONNXManager.export(model, ishape, chkpt_name) qonnx_cleanup(chkpt_name, out_file=chkpt_name) model = ModelWrapper(chkpt_name) model = model.transform(ConvertQONNXtoFINN()) model.save(chkpt_name) else: bo.export_finn_onnx(model, ishape, chkpt_name) nname = "%s_w%da%d" % (topology, wbits, abits) update_dashboard_data(topology, wbits, abits, "network", nname) dtstr = datetime.now().strftime("%Y-%m-%d %H:%M:%S") update_dashboard_data(topology, wbits, abits, "datetime", dtstr) finn_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd="/workspace/finn") finn_commit = finn_commit.decode("utf-8").strip() update_dashboard_data(topology, wbits, abits, "finn-commit", finn_commit) assert os.path.isfile(chkpt_name)
def test_fpgadataflow_ipstitch_pynq_projgen(): model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx") model = model.transform(MakePYNQProject(test_pynq_board)) vivado_pynq_proj_dir = model.get_metadata_prop("vivado_pynq_proj") assert vivado_pynq_proj_dir is not None assert os.path.isdir(vivado_pynq_proj_dir) model.save(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx")
def test_fpgadataflow_ipstitch_pynq_deployment_folder(): model = ModelWrapper( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_driver.onnx" ) try: ip = os.environ["PYNQ_IP"] # no default for this one; skip if not defined if ip == "": pytest.skip("PYNQ board IP address not specified") username = os.getenv("PYNQ_USERNAME", "xilinx") password = os.getenv("PYNQ_PASSWORD", "xilinx") target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") model = model.transform(DeployToPYNQ(ip, username, password, target_dir)) pynq_ip = model.get_metadata_prop("pynq_ip") pynq_username = model.get_metadata_prop("pynq_username") pynq_password = model.get_metadata_prop("pynq_password") pynq_target_dir = model.get_metadata_prop("pynq_target_dir") assert pynq_ip == ip assert pynq_username == username assert pynq_password == password assert pynq_target_dir == target_dir deployment_dir = model.get_metadata_prop("pynq_deploy_dir") assert deployment_dir is not None assert os.path.isdir(deployment_dir) model.save( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_deployment.onnx" ) except KeyError: pytest.skip("PYNQ board IP address not specified")
def attach_child_models_to_parent_model(parent_model, ordered_list_of_child_model_paths): # Assume the child model list is in order (entry 0 is the first child model that is accessed) streaming_dataflow_partition_nodes = parent_model.get_nodes_by_op_type( "StreamingDataflowPartition") # print(streaming_dataflow_partition_nodes) num_sdpn = len(streaming_dataflow_partition_nodes) num_child_models = len(ordered_list_of_child_model_paths) if (num_child_models != num_sdpn): raise ValueError( f"Number of child models supplied ({num_child_models}) does not match number of StreamingDataflowPartition Nodes ({num_sdpn})" ) for i in range(0, num_child_models): sdpn = streaming_dataflow_partition_nodes[i] child_model_path = ordered_list_of_child_model_paths[i] getCustomOp(sdpn).set_nodeattr("model", child_model_path) # modify child model input and output to match streaming dataflow partition node's inputs and outputs new_input_name = sdpn.input[0] new_output_name = sdpn.output[0] child_model = ModelWrapper(child_model_path) child_model.rename_tensor(child_model.graph.input[0].name, new_input_name) child_model.rename_tensor(child_model.graph.output[0].name, new_output_name) child_model.save(child_model_path) return parent_model
def test_end2end_tfc_w1a2_fold_and_tlastmarker(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx") fc0 = model.graph.node[0] fc1 = model.graph.node[1] fc2 = model.graph.node[2] fc3 = model.graph.node[3] fc0w = getCustomOp(fc0) fc1w = getCustomOp(fc1) fc2w = getCustomOp(fc2) fc3w = getCustomOp(fc3) fc0w.set_nodeattr("inFIFODepth", 50) fc0w.set_nodeattr("SIMD", 8) fc0w.set_nodeattr("PE", 16) fc0w.set_nodeattr("outFIFODepth", 4) fc1w.set_nodeattr("SIMD", 16) fc1w.set_nodeattr("PE", 16) fc1w.set_nodeattr("outFIFODepth", 4) fc2w.set_nodeattr("SIMD", 16) fc2w.set_nodeattr("PE", 16) fc2w.set_nodeattr("outFIFODepth", 4) fc3w.set_nodeattr("SIMD", 16) fc3w.set_nodeattr("PE", 10) fc3w.set_nodeattr("outFIFODepth", 50) model = model.transform(InsertTLastMarker()) model.save(build_dir + "/end2end_tfc_w1a2_folded.onnx")
def test_fpgadataflow_ipstitch_pynq_driver(): model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx") model = model.transform(MakePYNQDriver()) driver_dir = model.get_metadata_prop("pynq_driver_dir") assert driver_dir is not None assert os.path.isdir(driver_dir) model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_driver.onnx")
def test_brevitas_cnv_export_exec(wbits, abits, QONNX_export): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") cnv = get_test_model_trained("CNV", wbits, abits) ishape = (1, 3, 32, 32) if QONNX_export: BrevitasONNXManager.export(cnv, ishape, export_onnx_path) qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(ConvertQONNXtoFINN()) model.save(export_onnx_path) else: bo.export_finn_onnx(cnv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {model.graph.input[0].name: input_tensor} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] # do forward pass in PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() expected = cnv.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_end2end_cnv_w1a1_fold_and_tlastmarker(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx") fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, in_fifo_depth) for a layer folding = [ (16, 3, 128), (32, 32, 128), (16, 32, 128), (16, 32, 128), (4, 32, 81), (1, 32, 2), (1, 4, 2), (1, 8, 128), (5, 1, 3), ] for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) fcl_inst.set_nodeattr("PE", pe) fcl_inst.set_nodeattr("SIMD", simd) fcl_inst.set_nodeattr("inFIFODepth", ififodepth) swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") for i in range(len(swg_layers)): swg_inst = getCustomOp(swg_layers[i]) simd = folding[i][1] swg_inst.set_nodeattr("SIMD", simd) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateResources("estimate")) model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export): ishape = (1, 32, 111, 111) if dw is True: groups = in_channels out_channels = in_channels kernel_size = 3 padding = 1 stride = 1 w_shape = (32, 1, 3, 3) else: groups = 1 out_channels = 64 kernel_size = 1 padding = 0 stride = 1 w_shape = (64, 32, 1, 1) b_conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, groups=groups, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=4, weight_quant_type=QuantType.INT, weight_scaling_impl_type=ScalingImplType.STATS, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_per_output_channel=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_min_val=2e-16, ) weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape) b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float()) b_conv.eval() if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_conv, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_conv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_conv.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_end2end_cnv_w1a1_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(MoveReshape()) model.save(build_dir + "/end2end_cnv_w1a1_hls_layers.onnx")
def test_end2end_tfc_w1a1_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx") model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model.save(build_dir + "/end2end_tfc_w1a1_hls_layers.onnx")
def test_end2end_tfc_w1a2_create_dataflow_partition(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_hls_layers.onnx") parent_model = model.transform(CreateDataflowPartition()) parent_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx") sdp_node = getCustomOp(parent_model.graph.node[2]) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) dataflow_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
def test_end2end_cnv_w1a1_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_export.onnx") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model.save(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
def test_end2end_tfc_w1a2_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_export.onnx") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_tfc_w1a2_tidy.onnx")
def test_end2end_cnv_w1a1_create_dataflow_partition(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_hls_layers.onnx") parent_model = model.transform(CreateDataflowPartition()) parent_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) dataflow_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
def test_end2end_cnv_w1a1_streamline(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx") model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
def apply(self, model): graph = model.graph if self.mode == "estimate": res_fxn = res_estimation elif self.mode == "hls": res_fxn = hls_synth_res_estimation elif self.mode == "synth": res_fxn = post_synth_res else: raise Exception("Unrecognized mode for AnnotateResources") if self.res_dict is None: self.res_dict = model.analysis(res_fxn) children_dict = {} # annotate node resources for node in graph.node: if _is_fpgadataflow_node( node) and node.name in self.res_dict.keys(): op_inst = registry.getCustomOp(node) op_inst.set_nodeattr("res_" + self.mode, str(self.res_dict[node.name])) children_dict[node.name] = self.res_dict[node.name] elif node.op_type == "StreamingDataflowPartition": # recurse into model to manually annotate per-layer resources sdp_model_filename = getCustomOp(node).get_nodeattr("model") sdp_model = ModelWrapper(sdp_model_filename) sdp_model = sdp_model.transform( AnnotateResources(self.mode, self.res_dict)) sdp_dict = sdp_model.get_metadata_prop("res_total_" + self.mode) sdp_dict = eval(sdp_dict) # save transformed model sdp_model.save(sdp_model_filename) # set res attribute for sdp node getCustomOp(node).set_nodeattr("res_" + self.mode, str(sdp_dict)) children_dict[node.name] = sdp_dict self.res_dict.update(children_dict) total_dict = {} for lname in children_dict.keys(): layer_res_dict = self.res_dict[lname] for r_type in layer_res_dict.keys(): r_amount = layer_res_dict[r_type] r_amount = float(r_amount) if r_type in total_dict.keys(): total_dict[r_type] += r_amount else: total_dict[r_type] = r_amount for k in total_dict.keys(): if "efficiency" in k: total_dict[k] = total_dict[k] / len(graph.node) model.set_metadata_prop("res_total_" + self.mode, str(total_dict)) if "(top)" in self.res_dict.keys(): top_dict = self.res_dict["(top)"] model.set_metadata_prop("res_total_top_" + self.mode, str(top_dict)) return (model, False)
def hw_accelerate_parent_model_setup(parent_onnx_model_dir, remote_exec_model_dir): parent_model = ModelWrapper(parent_onnx_model_dir) sdp_node = parent_model.graph.node[ 1] #Need to look into parent model to customize the value getCustomOp(sdp_node).set_nodeattr("model", REMOTE_EXEC_MODEL_DIR) parent_model.save( BASE_DIR + "/qnn_harnn_model_dataflow_parent_with_remote_bitfile_exec.onnx") return parent_model
def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type, QONNX_export): min_val = -1.0 ishape = (1, 15) b_act = QuantReLU( bit_width=abits, max_val=max_val, scaling_impl_type=scaling_impl_type, restrict_scaling_type=RestrictValueType.LOG_FP, quant_type=QuantType.INT, ) if scaling_impl_type == ScalingImplType.PARAMETER: checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ scaling_impl.learned_value": torch.tensor(0.49).type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_act, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print(abits, max_val, scaling_impl_type) print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def apply(self, model): _check_vitis_envvars() # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [ MakePYNQDriver(platform="alveo"), InsertIODMA(512), InsertDWC(), ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Floorplan(floorplan=self.floorplan_file)) model = model.transform(CreateDataflowPartition()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform( InsertTLastMarker(both=True, external=False, dynamic=False)) kernel_model = kernel_model.transform(GiveUniqueNodeNames()) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model = kernel_model.transform( CreateVitisXO(sdp_node.onnx_node.name)) kernel_model.set_metadata_prop("platform", "alveo") kernel_model.save(dataflow_model_filename) # Assemble design from kernels model = model.transform( VitisLink( self.platform, round(1000 / self.period_ns), strategy=self.strategy, enable_debug=self.enable_debug, )) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "alveo") return (model, False)
def test_dataflow_partition_create(): # load the onnx model raw_m = get_data( "finn", "data/onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx") model = ModelWrapper(raw_m) model = model.transform(CreateDataflowPartition()) assert model.graph.node[2].op_type == "StreamingDataflowPartition" sdp_node = getCustomOp(model.graph.node[2]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model.save(build_dir + "/test_dataflow_partition_create.onnx")
def test_end2end_mobilenet_export(): # export preprocessing preproc_onnx = build_dir + "/end2end_mobilenet_preproc.onnx" mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) # set input finn datatype to UINT8 preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"]) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(FoldConstants()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) preproc_model.save(build_dir + "/end2end_mobilenet_preproc.onnx") # export mobilenet finn_onnx = build_dir + "/end2end_mobilenet_export.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) # calculate golden output with pytorch/brevitas and save as .npy # get single image as input and prepare image img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) # crop central 224*224 window img = crop_center(224, img) # save image as numpy array and as torch tensor to enable testing in # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W) img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1) img_np = img_np.reshape(1, 3, 224, 224) np.save(build_dir + "/end2end_mobilenet_input.npy", img_np) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) golden = mobilenet.forward(input_tensor).detach().numpy() golden_topk = golden.flatten() golden_top5 = np.argsort(golden_topk)[-5:] golden_top5 = np.flip(golden_top5) golden_top5_prob = [] for index in golden_top5: golden_top5_prob.append(golden_topk[index]) # save golden output values np.save(build_dir + "/end2end_mobilenet_golden_top5.npy", golden_top5) np.save(build_dir + "/end2end_mobilenet_golden_top5_prob.npy", golden_top5_prob) assert os.path.isfile(finn_onnx) assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx")
def test_fpgadataflow_ipstitch_do_stitch(): model = ModelWrapper( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx" ) model = model.transform(CodeGen_ipstitch(test_fpga_part)) vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") assert vivado_stitch_proj_dir is not None assert os.path.isdir(vivado_stitch_proj_dir) assert os.path.isfile(vivado_stitch_proj_dir + "/ip/component.xml") vivado_stitch_vlnv = model.get_metadata_prop("vivado_stitch_vlnv") assert vivado_stitch_vlnv is not None assert vivado_stitch_vlnv == "xilinx_finn:finn:finn_design:1.0" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx")
def inference_cost(model_filename, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True): """Print the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. :param model_filename: Filename for ONNX model :param output_json: Optional JSON filename to save the inference cost dict :param output_onnx: Optional ONNX filename to save the final model after any preprocessing :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights. """ print("Inference cost for " + model_filename) model = ModelWrapper(model_filename) if preprocess: qnt_nodes = model.get_nodes_by_op_type("Quant") for qnt_node in qnt_nodes: qnt_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferDataTypes()) model = model.transform(FoldConstants()) model = model.transform(RemoveUnusedTensors()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) bops = compute_bops(ret) mem_w_bits = compute_mem_bits(ret, "mem_w") mem_o_bits = compute_mem_bits(ret, "mem_o") ret["total_bops"] = bops ret["total_mem_w_bits"] = mem_w_bits ret["total_mem_o_bits"] = mem_o_bits if "unsupported" in ret: ret["unsupported"] = str(ret["unsupported"]) print(json.dumps(ret, sort_keys=True, indent=2)) if output_json is not None: with open(output_json, "w") as f: json.dump(ret, f, sort_keys=True, indent=2)
def test_dataflow_partition_tlastmarker(): model = ModelWrapper(build_dir + "/test_dataflow_partition_create.onnx") model_path = getCustomOp(model.graph.node[2]).get_nodeattr("model") model = ModelWrapper(model_path) model = model.transform(InsertTLastMarker()) assert model.graph.node[-1].op_type == "TLastMarker" assert model.graph.node[-1].domain == "finn" tl_node = getCustomOp(model.graph.node[-1]) assert tl_node.get_nodeattr("NumIters") == 1 assert tl_node.get_nodeattr("StreamWidth") == 320 assert tl_node.get_nodeattr("ElemWidth") == 32 model.save(build_dir + "/test_dataflow_partition_tlastmarker.onnx") model = model.transform(InsertTLastMarker()) model.save(build_dir + "/test_dataflow_partition_tlastmarker2.onnx")
def test_end2end_cnv_w1a1_deploy_on_pynq(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_pynq_driver.onnx") try: ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined if ip == "": pytest.skip("PYNQ board IP address not specified") username = os.getenv("PYNQ_USERNAME", "xilinx") password = os.getenv("PYNQ_PASSWORD", "xilinx") port = os.getenv("PYNQ_PORT", 22) target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) # save the model to be able to link it to the parent model.save(build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx") except KeyError: pytest.skip("PYNQ board IP address not specified")
def test_fpgadataflow_ipstitch_gen_model(): # exec_mode): model = create_one_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = ModelWrapper(sdp_node.get_nodeattr("model")) model.set_metadata_prop("exec_mode", "remote_pynq") model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen(test_fpga_part, 5)) model = model.transform(HLSSynth_IPGen()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
def apply(self, model): graph = model.graph # annotate node cycles for node in graph.node: if _is_fpgadataflow_node(node): op_inst = registry.getCustomOp(node) cycles = op_inst.get_exp_cycles() op_inst.set_nodeattr("cycles_estimate", cycles) elif node.op_type == "StreamingDataflowPartition": # recurse into model to manually annotate per-layer cycles sdp_model_filename = getCustomOp(node).get_nodeattr("model") sdp_model = ModelWrapper(sdp_model_filename) sdp_model = sdp_model.transform(AnnotateCycles()) # save transformed model sdp_model.save(sdp_model_filename) return (model, False)