def test_end2end_tfc_w1a2_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx") # model = model.transform(ConvertBipolarMatMulToXnorPopcount()) # model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) # model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) # model = model.transform(RoundAndClipThresholds()) # model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) model.save(build_dir + "/end2end_tfc_w1a2_hls_layers.onnx")
def test_fpgadataflow_ipstitch_pynq_synth(): model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx") model = model.transform(SynthPYNQProject()) bitfile = model.get_metadata_prop("vivado_pynq_bitfile") assert bitfile is not None assert os.path.isfile(bitfile) model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_synth.onnx")
def test_fpgadataflow_ipstitch_pynq_driver(): model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx") model = model.transform(MakePYNQDriver()) driver_dir = model.get_metadata_prop("pynq_driver_dir") assert driver_dir is not None assert os.path.isdir(driver_dir) model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_driver.onnx")
def load_test_checkpoint_or_skip(filename): "Try to load given .onnx and return ModelWrapper, else skip current test." if os.path.isfile(filename): model = ModelWrapper(filename) return model else: warnings.warn(filename + " not found from previous test step, skipping") pytest.skip(filename + " not found from previous test step, skipping")
def test_batchnorm_to_affine_epsilon(epsilon): """Dummy batchnorm node to test out the epsilon attribute.""" batchnorm_node = onnx.helper.make_node( "BatchNormalization", inputs=["x", "s", "bias", "mean", "var"], outputs=["y"], epsilon=epsilon, ) x = onnx.helper.make_tensor_value_info("x", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) s = onnx.helper.make_tensor_value_info("s", onnx.TensorProto.FLOAT, [3]) bias = onnx.helper.make_tensor_value_info("bias", onnx.TensorProto.FLOAT, [3]) mean = onnx.helper.make_tensor_value_info("mean", onnx.TensorProto.FLOAT, [3]) var = onnx.helper.make_tensor_value_info("var", onnx.TensorProto.FLOAT, [3]) y = onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) # Graph graph = onnx.helper.make_graph( nodes=[batchnorm_node], name="test_batchnorm_graph", inputs=[x], outputs=[y], value_info=[s, bias, mean, var], ) onnx_model = onnx.helper.make_model(graph, producer_name="test_batchnorm-model") model = ModelWrapper(onnx_model) model.set_initializer("s", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("bias", np.array([1, 2, 3]).astype(np.float32)) model.set_initializer("mean", np.array([3, 4, 5]).astype(np.float32)) model.set_initializer("var", np.array([0.5, 0.7, 0.3]).astype(np.float32)) i_val = np.arange(0, 3 * 5 * 5, dtype=np.float32) i_val = np.reshape(i_val, [1, 3, 5, 5]) input_dict = {"x": i_val} output_node_name = "y" output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) output_original = output_dict[output_node_name] model_lowered = model.transform(BatchNormToAffine()) output_dict = oxe.execute_onnx(model_lowered, input_dict, return_full_exec_context=True) output_lowered = output_dict[output_node_name] assert (output_original == output_lowered).all()
def test_end2end_cnv_w1a1_streamline(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx") model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
def apply(self, model): graph = model.graph if self.mode == "estimate": res_fxn = res_estimation elif self.mode == "hls": res_fxn = hls_synth_res_estimation elif self.mode == "synth": res_fxn = post_synth_res else: raise Exception("Unrecognized mode for AnnotateResources") if self.res_dict is None: self.res_dict = model.analysis(res_fxn) children_dict = {} # annotate node resources for node in graph.node: if _is_fpgadataflow_node( node) and node.name in self.res_dict.keys(): op_inst = registry.getCustomOp(node) op_inst.set_nodeattr("res_" + self.mode, str(self.res_dict[node.name])) children_dict[node.name] = self.res_dict[node.name] elif node.op_type == "StreamingDataflowPartition": # recurse into model to manually annotate per-layer resources sdp_model_filename = getCustomOp(node).get_nodeattr("model") sdp_model = ModelWrapper(sdp_model_filename) sdp_model = sdp_model.transform( AnnotateResources(self.mode, self.res_dict)) sdp_dict = sdp_model.get_metadata_prop("res_total_" + self.mode) sdp_dict = eval(sdp_dict) # save transformed model sdp_model.save(sdp_model_filename) # set res attribute for sdp node getCustomOp(node).set_nodeattr("res_" + self.mode, str(sdp_dict)) children_dict[node.name] = sdp_dict self.res_dict.update(children_dict) total_dict = {} for lname in children_dict.keys(): layer_res_dict = self.res_dict[lname] for r_type in layer_res_dict.keys(): r_amount = layer_res_dict[r_type] r_amount = float(r_amount) if r_type in total_dict.keys(): total_dict[r_type] += r_amount else: total_dict[r_type] = r_amount for k in total_dict.keys(): if "efficiency" in k: total_dict[k] = total_dict[k] / len(graph.node) model.set_metadata_prop("res_total_" + self.mode, str(total_dict)) if "(top)" in self.res_dict.keys(): top_dict = self.res_dict["(top)"] model.set_metadata_prop("res_total_top_" + self.mode, str(top_dict)) return (model, False)
def test_res_estimate(): mw = mh = 4 simd = 1 pe = 1 idt = DataType.INT2 wdt = DataType.INT2 odt = DataType.INT32 actval = odt.min() inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn", backend="fpgadataflow", resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, ActVal=actval, binaryXnorMode=0, noActivation=0, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) model = model.transform(GiveUniqueNodeNames()) prod_resource_estimation = model.analysis(res_estimation) expect_resource_estimation = { "StreamingFCLayer_Batch_0": { "BRAM_18K": 1, 'BRAM_efficiency': 0.001736111111111111, "LUT": 304.4 } } assert check_two_dict_for_equality( prod_resource_estimation, expect_resource_estimation), """The produced output of
def test_conv_lowering_conv_1x1(): np.random.seed(0) in_feature_dim_h = 7 in_feature_dim_w = 7 in_chn = 3 kernel_size = 1 out_feature_dim_h = in_feature_dim_h out_feature_dim_w = in_feature_dim_w input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w] output_shape = [1, in_chn, out_feature_dim_h, out_feature_dim_w] conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [1, 1] top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ oh.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ oh.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("p1", np.random.rand(*conv_param_shape).astype(np.float32)) new_model = model.transform(LowerConvsToMatMul()) inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} assert oxe.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "Transpose" assert new_model.graph.node[1].op_type == "MatMul" assert new_model.graph.node[2].op_type == "Transpose" assert len(new_model.graph.node) == 3
def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn.custom_op.general", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph( nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp] ) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout(node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout(node.output[0]) == DataLayout.NHWC
def test_brevitas_cnv_w1a1_export(): cnv = get_test_model_untrained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.graph.node[2].op_type == "Sign" assert model.graph.node[3].op_type == "Conv" conv0_wname = model.graph.node[3].input[1] assert list(model.get_initializer(conv0_wname).shape) == [64, 3, 3, 3] assert model.graph.node[4].op_type == "Mul" os.remove(export_onnx_path)
def test_const_folding_shapes(): lfc = get_test_model_untrained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) assert model.graph.node[0].op_type == "Reshape" assert list(model.get_tensor_shape("0")) == [1, 1, 28, 28] assert list(model.get_tensor_shape("27")) == [1, 784] os.remove(export_onnx_path)
def test_make_input_chanlast(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) iname = model.graph.input[0].name assert tuple(model.get_tensor_shape(iname)) == (1, 1, 28, 28) model = model.transform(MakeInputChannelsLast()) assert model.graph.node[0].op_type == "Transpose" assert tuple(model.get_tensor_shape(iname)) == (1, 28, 28, 1) assert model.get_tensor_layout(iname) == data_layout.NHWC
def hw_accelerate_parent_model_setup(parent_onnx_model_dir, remote_exec_model_dir): parent_model = ModelWrapper(parent_onnx_model_dir) sdp_node = parent_model.graph.node[ 1] #Need to look into parent model to customize the value getCustomOp(sdp_node).set_nodeattr("model", REMOTE_EXEC_MODEL_DIR) parent_model.save( BASE_DIR + "/qnn_harnn_model_dataflow_parent_with_remote_bitfile_exec.onnx") return parent_model
def test_end2end_cnv_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) x = input_tensor # x = np.zeros(ishape, dtype=np.float32) ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(parent_model, {iname: x}, True) y_cppsim = ret_cppsim[oname] # produce results with node-by-node rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx" ) ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] # produce results with whole-network (stitched ip) rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx" ) # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_whole_rtlsim = ret_whole_rtlsim[oname] assert np.isclose(y_golden, y_cppsim).all() assert np.isclose(y_golden, y_nodebynode_rtlsim).all() assert np.isclose(y_golden, y_whole_rtlsim).all() assert np.argmax(y_golden) == 3
def test_brevitas_QConv2d(dw, in_channels): ishape = (1, 32, 111, 111) if dw is True: groups = in_channels out_channels = in_channels kernel_size = 3 padding = 1 stride = 1 w_shape = (32, 1, 3, 3) else: groups = 1 out_channels = 64 kernel_size = 1 padding = 0 stride = 1 w_shape = (64, 32, 1, 1) b_conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, groups=groups, kernel_size=kernel_size, padding=padding, stride=stride, bias=False, bias_quant_type=QuantType.FP, compute_output_bit_width=False, compute_output_scale=False, weight_bit_width=4, weight_quant_type=QuantType.INT, weight_scaling_impl_type=ScalingImplType.STATS, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_per_output_channel=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_min_val=2e-16, ) weight_tensor = gen_finn_dt_tensor(DataType.INT4, w_shape) b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float()) bo.export_finn_onnx(b_conv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_conv.eval() expected = b_conv.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def measure_top1_accuracy(model_chkpt, dataset, parent_chkpt=None): if dataset == "cifar10": trainx, trainy, testx, testy, valx, valy = cifar.load_cifar_data( "/workspace/finn/dataset", download=True, one_hot=False ) elif dataset == "mnist": trainx, trainy, testx, testy, valx, valy = mnist.load_mnist_data( "/workspace/finn/dataset", download=True, one_hot=False ) else: raise Exception("Unrecognized dataset") # move from dataset_loader layout to ONNX layout: NHWC -> NCHW testx = testx.transpose(0, 3, 1, 2) model = ModelWrapper(model_chkpt) iname = model.graph.input[0].name oname = model.graph.output[0].name if parent_chkpt is None: ishape = model.get_tensor_shape(iname) else: parent_model = ModelWrapper(parent_chkpt) parent_iname = parent_model.graph.input[0].name ishape = parent_model.get_tensor_shape(parent_iname) ok = 0 nok = 0 n_batches = testx.shape[0] for i in range(n_batches): tdata = testx[i].reshape(ishape).astype(np.float32) exp = testy[i].item() if parent_chkpt is not None: y = execute_parent(parent_chkpt, model_chkpt, tdata) else: y = execute_onnx(model, {iname: tdata}, False)[oname] ret = y.item() if ret == exp: ok += 1 else: nok += 1 if i % 10 == 0: print("%d : OK %d NOK %d " % (i, ok, nok)) acc_top1 = ok * 100.0 / (ok + nok) warnings.warn("Final OK %d NOK %d top-1 %f" % (ok, nok, acc_top1)) return acc_top1
def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val, QONNX_export): def get_quant_type(bit_width): if bit_width is None: return QuantType.FP elif bit_width == 1: return QuantType.BINARY else: return QuantType.INT act_quant_type = get_quant_type(abits) min_val = -1.0 ishape = (1, 10) b_act = QuantHardTanh( bit_width=abits, quant_type=act_quant_type, max_val=max_val, min_val=min_val, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_impl_type=ScalingImplType.CONST, narrow_range=narrow_range, ) if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_act, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_act.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) # set up onnx model inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k, k], pads=padding, strides=[stride, stride], group=ifm_ch, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_const_folding_shapes(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) mm_node_w_in = model.get_nodes_by_op_type("MatMul")[0].input[1] assert model.find_producer(mm_node_w_in) is not None assert model.find_producer(mm_node_w_in).op_type == "Reshape" assert model.get_initializer(mm_node_w_in) is None model = model.transform(FoldConstants()) assert model.find_producer(mm_node_w_in) is None assert model.get_initializer(mm_node_w_in) is not None
def create_one_fc_model(mem_mode="const"): # create a model with a StreamingFCLayer instance with no activation # the wider range of the full accumulator makes debugging a bit easier wdt = DataType.INT2 idt = DataType.INT32 odt = DataType.INT32 m = 4 no_act = 1 binary_xnor_mode = 0 actval = 0 simd = 4 pe = 4 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, m]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m]) fc0 = helper.make_node( "StreamingFCLayer_Batch", ["inp", "w0"], ["outp"], domain="finn", backend="fpgadataflow", resType="ap_resource_lut()", MW=m, MH=m, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, ActVal=actval, binaryXnorMode=binary_xnor_mode, noActivation=no_act, mem_mode=mem_mode, ) graph = helper.make_graph(nodes=[fc0], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("w0", wdt) # generate weights w0 = np.eye(m, dtype=np.float32) model.set_initializer("w0", w0) model = model.transform(CreateDataflowPartition()) return model
def apply(self, model): _check_vitis_envvars() # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [ MakePYNQDriver(platform="alveo"), InsertIODMA(512), InsertDWC(), ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Floorplan(floorplan=self.floorplan_file)) model = model.transform(CreateDataflowPartition()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform( InsertTLastMarker(both=True, external=False, dynamic=False)) kernel_model = kernel_model.transform(GiveUniqueNodeNames()) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model = kernel_model.transform( CreateVitisXO(sdp_node.onnx_node.name)) kernel_model.set_metadata_prop("platform", "alveo") kernel_model.save(dataflow_model_filename) # Assemble design from kernels model = model.transform( VitisLink( self.platform, round(1000 / self.period_ns), strategy=self.strategy, enable_debug=self.enable_debug, )) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "alveo") return (model, False)
def test_dataflow_partition_create(): # load the onnx model raw_m = get_data( "finn", "data/onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx") model = ModelWrapper(raw_m) model = model.transform(CreateDataflowPartition()) assert model.graph.node[2].op_type == "StreamingDataflowPartition" sdp_node = getCustomOp(model.graph.node[2]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model.save(build_dir + "/test_dataflow_partition_create.onnx")
def test_infer_shapes(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) graph = model.graph # multi-thresholding node to be inserted between the first Relu and MaxPool node # get Relu node to use data Relu_node = graph.node[3] assert Relu_node.op_type == "Relu", "The wrong model was chosen for the check" # create thresholds tensor as constant mt_thresh0 = helper.make_tensor_value_info("mt_thresh0", TensorProto.FLOAT, [8, 7]) # random numbers for the thresholds # thresholds for one channel have to be sorted to guarantee the correct behavior mt_thresh0_values = np.empty([8, 7], dtype=np.float32) for i in range(len(mt_thresh0_values)): mt_thresh0_values[i] = np.sort(np.random.random_sample(7) * 10) model.set_initializer(mt_thresh0.name, mt_thresh0_values) # add multi-thresholding node and change Relu node mt_node = helper.make_node( "MultiThreshold", ["mt_v0", "mt_thresh0"], [Relu_node.output[0]], domain="finn.custom_op.general", ) Relu_node.output[0] = "mt_v0" # explicitly remove any present shape from ReLU and MultiThreshold outputs util.remove_by_name(model.graph.value_info, Relu_node.output[0]) util.remove_by_name(model.graph.value_info, mt_node.output[0]) graph.node.insert(4, mt_node) # first check routine # check if at least one shape is not specified assert not ( model.check_all_tensor_shapes_specified() ), "All tensors are already specified before the shape inference execution" # perform shape inference on mixed model model = model.transform(InferShapes()) # second check routine # now all shapes should be specified and mt_node output shape is (1,8,28,28) assert (model.check_all_tensor_shapes_specified() ), "There are still tensors that are not specified" assert (model.get_tensor_shape(mt_node.output[0])) == ([ 1, 8, 28, 28 ]), "output of multi-thresholding node has wrong shape"
def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel): out_channels = 32 ishape = (1, out_channels, 1, 1) min_val = -1.0 b_act = QuantReLU( bit_width=abits, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER, scaling_per_channel=scaling_per_channel, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_min_val=2e-16, max_val=6.0, return_quant_tensor=True, per_channel_broadcastable_shape=(1, out_channels, 1, 1), ) if scaling_per_channel is True: rand_tensor = (2) * torch.rand((1, out_channels, 1, 1)) else: rand_tensor = torch.tensor(1.2398) checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ scaling_impl.learned_value": rand_tensor.type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).tensor.detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print(abits, max_val) print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def execute_node(node, context, graph): """Executes a single node by using onnxruntime, with custom function or if dataflow partition by using remote execution or rtlsim. Input/output provided via context.""" if node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) ret = execute_onnx(model, context, True) context.update(ret) else: if node.domain == "finn": ex_cu_node.execute_custom_node(node, context, graph) else: # onnxruntime unfortunately does not implement run_node as defined by ONNX, # it can only execute entire models -- so we create a model which solely # consists of our current node. node_inputs = list( filter(lambda x: x.name in node.input, graph.input)) node_inputs += list( filter(lambda x: x.name in node.input, graph.value_info)) node_outputs = list( filter(lambda x: x.name in node.output, graph.output)) node_outputs += list( filter(lambda x: x.name in node.output, graph.value_info)) node_graph = helper.make_graph( nodes=[node], name="single-node-exec", inputs=node_inputs, outputs=node_outputs, ) node_model = helper.make_model(node_graph) input_dict = dict() for inp in node.input: input_dict[inp] = context[inp] sess = rt.InferenceSession(node_model.SerializeToString()) output_list = sess.run(None, input_dict) for output_ind in range(len(node.output)): outp = node.output[output_ind] if output_list[output_ind].shape != context[outp].shape: raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" % ( str(output_list[output_ind].shape.shape), str(context[outp].shape), )) context[outp] = output_list[output_ind]
def test_end2end_mobilenet_export(): # export preprocessing preproc_onnx = build_dir + "/end2end_mobilenet_preproc.onnx" mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) # set input finn datatype to UINT8 preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType["UINT8"]) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(FoldConstants()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) preproc_model.save(build_dir + "/end2end_mobilenet_preproc.onnx") # export mobilenet finn_onnx = build_dir + "/end2end_mobilenet_export.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) # calculate golden output with pytorch/brevitas and save as .npy # get single image as input and prepare image img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) # crop central 224*224 window img = crop_center(224, img) # save image as numpy array and as torch tensor to enable testing in # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W) img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1) img_np = img_np.reshape(1, 3, 224, 224) np.save(build_dir + "/end2end_mobilenet_input.npy", img_np) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) golden = mobilenet.forward(input_tensor).detach().numpy() golden_topk = golden.flatten() golden_top5 = np.argsort(golden_topk)[-5:] golden_top5 = np.flip(golden_top5) golden_top5_prob = [] for index in golden_top5: golden_top5_prob.append(golden_topk[index]) # save golden output values np.save(build_dir + "/end2end_mobilenet_golden_top5.npy", golden_top5) np.save(build_dir + "/end2end_mobilenet_golden_top5_prob.npy", golden_top5_prob) assert os.path.isfile(finn_onnx) assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx")
def test_code_gen_trafo(): idt = wdt = odt = DataType.BIPOLAR mw = 8 mh = 8 pe = 4 simd = 4 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn", backend="fpgadataflow", code_gen_dir="", executable_path="", resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, noActivation=1, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) W = util.gen_finn_dt_tensor(wdt, (mw, mh)) model.set_initializer("weights", W) model = model.transform(CodeGen_npysim()) for node in model.graph.node: code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir_npysim") tmp_dir = code_gen_attribute.s.decode("UTF-8") assert os.path.isdir( tmp_dir), """Code generation directory of node with op type {} does not exist!""".format(node.op_type) assert (len(os.listdir(tmp_dir)) != 0), """Code generation directory of node with op type {} is empty!""".format(node.op_type)
def test_sort_nonlinear_graph(): ch = 2 ifmdim = 16 input_shape = (1, ch, ifmdim, ifmdim) top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) num_of_params = 8 value_info = [] for i in range(num_of_params): value_info += [ helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) ] modelproto = helper.make_model( helper.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ # Not sorted nodes helper.make_node("Mul", ["fork1", "p2"], ["t3"]), helper.make_node("Add", ["t4", "p3"], ["t5"]), helper.make_node("Add", ["t2", "t3"], ["t4"]), helper.make_node("Add", ["t6", "t7"], ["t8"]), helper.make_node("Add", ["fork3", "fork3"], ["top_out"]), helper.make_node("Mul", ["t5", "p4"], ["fork2"]), helper.make_node("Add", ["top_in", "p0"], ["fork1"]), helper.make_node("Mul", ["fork1", "p1"], ["t2"]), helper.make_node("Add", ["fork2", "p5"], ["t6"]), helper.make_node("Add", ["fork2", "p6"], ["t7"]), helper.make_node("Mul", ["t8", "p7"], ["fork3"]), ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) np.random.seed(0) for i in range(num_of_params): model.set_initializer("p" + str(i), np.random.rand(*input_shape).astype(np.float32)) new_model = model.transform(SortGraph()) # Test ret = new_model.analysis(ta.nodes_topologically_sorted) assert ret[ "nodes_topologically_sorted"], "Nodes are not topologically sorted."
def make_single_slidingwindow_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim stride_h, stride_w = stride dilation_h, dilation_w = dilation ofm_dim_h, ofm_dim_w = ofm_dim odt = idt inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]) outp = helper.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]) SlidingWindow_node = helper.make_node( "ConvolutionInputGenerator1D", ["inp"], ["outp"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ConvKernelDim=[k_h, k_w], IFMChannels=ifm_ch, IFMDim=[ifm_dim_h, ifm_dim_w], OFMDim=[ofm_dim_h, ofm_dim_w], SIMD=simd, Stride=[stride_h, stride_w], Dilation=[dilation_h, dilation_w], inputDataType=idt.name, outputDataType=odt.name, depthwise=dw, ) graph = helper.make_graph( nodes=[SlidingWindow_node], name="slidingwindow_graph", inputs=[inp], outputs=[outp], ) model = helper.make_model(graph, producer_name="slidingwindow-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) return model