def test_fpgadataflow_fclayer_npysim(idt, wdt, act, nf, sf, mw, mh): if nf == -1: nf = mh if sf == -1: sf = mw pe = mh // nf simd = mw // sf assert mh % pe == 0 assert mw % sf == 0 # generate weights W = gen_finn_dt_tensor(wdt, (mw, mh)) # generate input data x = gen_finn_dt_tensor(idt, (1, mw)) if act is None: # no activation, produce accumulators T = None tdt = None if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: odt = DataType.UINT32 else: odt = DataType.INT32 else: odt = act (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) # generate thresholds for activation if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: tdt = DataType.UINT32 # bias thresholds to be positive T = np.ceil((T + mw) / 2) assert (T >= 0).all() else: tdt = DataType.INT32 model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt, wdt) if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: # convert inputs to binary and use xnorpopcountmatmul y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2) else: y = np.matmul(x, W) if T is not None: y = multithreshold(y, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "npysim failed"
def test_move_flatten_past_affine(data_layout, batch_size): if data_layout == DataLayout.NHWC: ishape = [batch_size, 1, 1, 1024] oshape = [batch_size, 1000] else: ishape = [batch_size, 1024, 1, 1] oshape = [batch_size, 1000] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) a0 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, [1024, 1000]) a1 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) a2 = helper.make_tensor_value_info("a3", TensorProto.FLOAT, [1000]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) flatten_node = helper.make_node("Flatten", ["inp"], ["flatten_out"]) matmul_node = helper.make_node("MatMul", ["flatten_out", "a0"], ["matmul_out"]) mul_node = helper.make_node("Mul", ["matmul_out", "a1"], ["mul_out"]) add_node = helper.make_node("Add", ["mul_out", "a2"], ["outp"]) graph = helper.make_graph( nodes=[flatten_node, matmul_node, mul_node, add_node], name="move-reshape-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1, a2], ) model = helper.make_model(graph, producer_name="move_reshape_model") model = ModelWrapper(model) # initialize values a0_values = gen_finn_dt_tensor(DataType["TERNARY"], [1024, 1000]) model.set_initializer("a0", a0_values) a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) model.set_initializer("a1", a1_values) a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32) model.set_initializer("a2", a2_values) model.set_tensor_datatype("inp", DataType["INT2"]) model.set_tensor_layout("inp", data_layout) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveFlattenPastAffine()) assert oxe.compare_execution(model, model_transformed, idict) # depending on data layout check if graph is transformed or not if data_layout == DataLayout.NHWC: # check if nodes have new order in transformed graph assert model.graph != model_transformed.graph assert model_transformed.graph.node[-1].op_type == "Flatten" else: assert model.graph == model_transformed.graph
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input and param data x = gen_finn_dt_tensor(idt, tuple(vecs + [ich])) # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32) C = gen_finn_dt_tensor(pdt, (ich)) odt = act model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} oshape = model.get_tensor_shape("outp") C_reshaped = np.broadcast_to(C.flatten(), x.shape) if func == "add": y = x + C_reshaped elif func == "mul": y = x * C_reshaped y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "ChannelwiseOp_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_finn_tensor_generator(): # bipolar shape_bp = [2, 2] dt_bp = DataType.BIPOLAR tensor_bp = util.gen_finn_dt_tensor(dt_bp, shape_bp) # test shape for i in range(len(shape_bp)): assert ( shape_bp[i] == tensor_bp.shape[i]), """Shape of generated tensor does not match the desired shape""" # test if elements are FINN datatype for value in tensor_bp.flatten(): assert dt_bp.allowed(value), """Data type of generated tensor does not match the desired Data type""" # binary shape_b = [4, 2, 3] dt_b = DataType.BINARY tensor_b = util.gen_finn_dt_tensor(dt_b, shape_b) # test shape for i in range(len(shape_b)): assert (shape_b[i] == tensor_b.shape[i]), """Shape of generated tensor does not match the desired shape""" # test if elements are FINN datatype for value in tensor_b.flatten(): assert dt_b.allowed(value), """Data type of generated tensor does not match the desired Data type""" # ternary shape_t = [7, 1, 3, 1] dt_t = DataType.TERNARY tensor_t = util.gen_finn_dt_tensor(dt_t, shape_t) # test shape for i in range(len(shape_t)): assert (shape_t[i] == tensor_t.shape[i]), """Shape of generated tensor does not match the desired shape""" # test if elements are FINN datatype for value in tensor_t.flatten(): assert dt_t.allowed(value), """Data type of generated tensor does not match the desired Data type""" # int2 shape_int2 = [7, 4] dt_int2 = DataType.INT2 tensor_int2 = util.gen_finn_dt_tensor(dt_int2, shape_int2) # test shape for i in range(len(shape_int2)): assert (shape_int2[i] == tensor_int2.shape[i] ), """Shape of generated tensor does not match the desired shape""" # test if elements are FINN datatype for value in tensor_int2.flatten(): assert value in [ -2, -1, 0, 1, ], """Data type of generated tensor
def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) # set up onnx model inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k, k], pads=padding, strides=[stride, stride], group=ifm_ch, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param, exec_mode): ifm_ch = 16 ifm_dim = 5 ishape = (1, ifm_ch, ifm_dim, ifm_dim) if scalar_param: pshape = (1, ) else: pshape = (1, ifm_ch, 1, 1) np.random.seed(0) model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) # Since the aren't Data types with a bit width of a non power of 2, # there are cases where the input won't use it full range. if idt == DataType["INT32"]: x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim)) elif idt == DataType["UINT32"]: x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim)) else: x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) input_dict = prepare_inputs(x) y_expected = oxe.execute_onnx(model, input_dict)["outp"] new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) new_model = new_model.transform(GiveUniqueNodeNames()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") ctx_produced = oxe.execute_onnx(new_model, input_dict, return_full_exec_context=True) y_produced = ctx_produced["outp"] assert (y_produced == y_expected).all() assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_remove_identity_ops(op, as_first_node, approx): # set up onnx model inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1]) mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, []) shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2]) div = helper.make_tensor_value_info("div", TensorProto.FLOAT, []) matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2]) mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"]) div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"]) matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"]) graph = helper.make_graph( nodes=[mul_node, reshape_node, div_node, matmul_node], name="identity-graph", inputs=[inp], outputs=[outp], value_info=[mul, shape, div, matmul], ) model = helper.make_model(graph, producer_name="mulpastconv-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, 4, 1, 1]) mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) shape_values = np.asarray([1, -1], dtype=np.int64) div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) matmul_values = gen_finn_dt_tensor(DataType["INT2"], [4, 2]) model.set_initializer("mul", mul_values) model.set_initializer("shape", shape_values) model.set_initializer("div", div_values) model.set_initializer("matmul", matmul_values) insert_identity_op(model, op, as_first_node, approx) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) idict = {"inp": inp_values} odict = oxe.execute_onnx(model, idict) out_before = odict["outp"] num_of_nodes_before = len(model.graph.node) model = model.transform(RemoveIdentityOps()) num_of_nodes_after = len(model.graph.node) assert num_of_nodes_before - 1 == num_of_nodes_after odict = oxe.execute_onnx(model, idict) out_after = odict["outp"] assert np.isclose(out_before, out_after, atol=1e-3).all()
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode): model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt) cdp_model = model.transform(InferDoublePackedConv()) assert (len(cdp_model.graph.node) == 3 and cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and cdp_model.graph.node[0].op_type == "Transpose" and cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model" # execute models and compare x = gen_finn_dt_tensor(idt, (1, ich, idim, idim)) input_dict = {"inp": x} y_expected = oxe.execute_onnx(model, input_dict)["outp"] if mode == "cppsim": cdp_model = cdp_model.transform(SetExecMode("cppsim")) cdp_model = cdp_model.transform(PrepareCppSim()) cdp_model = cdp_model.transform(CompileCppSim()) y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"] elif mode == "rtlsim": cdp_model = cdp_model.transform(SetExecMode("rtlsim")) cdp_model = cdp_model.transform(GiveUniqueNodeNames()) cdp_model = cdp_model.transform(GiveReadableTensorNames()) cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5)) cdp_model = cdp_model.transform(HLSSynthIP()) cdp_model = cdp_model.transform(PrepareRTLSim()) input_dict = {"global_in": x} y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"] assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all()
def test_onnx_exec_internal_rounding(): inp0 = onnx.helper.make_tensor_value_info("inp0", onnx.TensorProto.FLOAT, [2, 2]) inp1 = onnx.helper.make_tensor_value_info("inp1", onnx.TensorProto.FLOAT, [1]) outp = onnx.helper.make_tensor_value_info("outp", onnx.TensorProto.FLOAT, [2, 2]) mul_node = onnx.helper.make_node("Mul", inputs=["inp0", "inp1"], outputs=["outp"]) graph = onnx.helper.make_graph(nodes=[mul_node], name="mul_graph", inputs=[inp0, inp1], outputs=[outp]) model = onnx.helper.make_model(graph, producer_name="mul-model") model = ModelWrapper(model) idt = DataType.INT2 model.set_tensor_datatype("inp0", idt) model.set_tensor_datatype("inp1", idt) model.transform(InferShapes()) mul_value = np.asarray([-1], dtype=np.float32) inp_int = gen_finn_dt_tensor(idt, [2, 2]) scale = np.random.uniform(low=0, high=1, size=(2, 2)).astype(np.float32) inp_rounded = (inp_int * scale) / (scale + 1e-7) input_dict = {"inp0": inp_rounded, "inp1": mul_value} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] expected = np.multiply(inp_int, mul_value) assert (produced == expected).all()
def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride): simd = ifm_ch ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) model = make_single_slidingwindow_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_expected = im2col_indices(x, k, stride) # reshape expected output to match node output oshape = y_produced.shape y_expected = y_expected.reshape(oshape) assert (y_produced == y_expected).all(), "npysim failed" model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5)) model = model.transform(HLSSynth_IPGen()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all(), "rtlsim failed"
def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): ifm_dim_h = ifm_dim k_h = k if dim_1d: ifm_dim_w = 1 k_w = 1 else: ifm_dim_w = ifm_dim_h k_w = k_h ifm_dim = (ifm_dim_h, ifm_dim_w) k = (k_h, k_w) stride_h = k_h stride_w = k_w ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception( "Unknown exec_mode in test_layer_streaming_maxpool_batch") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph(nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp]) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout( node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout( node.output[0]) == DataLayout.NHWC
def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding): idt = wdt = DataType.INT4 ifm_dim = 6 ifm_ch = 4 # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold) model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch]) input_dict = {"inp": input_tensor} new_model = model.transform(InferConvInpGen()) new_model = new_model.transform(InferVVAU()) # set SIMD in ConvInputGen node and PE in VVAU node for n in new_model.graph.node: if n.op_type == "ConvolutionInputGenerator": convinputgen_node = getCustomOp(n) convinputgen_node.set_nodeattr("SIMD", pe) elif n.op_type == "Vector_Vector_Activate_Batch": vvau_node = getCustomOp(n) vvau_node.set_nodeattr("PE", pe) new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) assert oxe.compare_execution(model, new_model, input_dict)
def test_move_identical_op_past_join_op(perm): model = create_model(perm) # Create input data input0_tensor_name = model.graph.input[0].name input1_tensor_name = model.graph.input[1].name # Note: it is assumed that both tensors have the same shape and data type input_shape = model.get_tensor_shape(input0_tensor_name) input_dtype = model.get_tensor_datatype(input0_tensor_name) input_val = gen_finn_dt_tensor(input_dtype, input_shape) input_dict = {} input_dict[input0_tensor_name] = input_val input_dict[input1_tensor_name] = input_val model_transformed = model.transform(MoveTransposePastJoinAdd()) assert oxe.compare_execution(model, model_transformed, input_dict) # Check if order changed node0_input0_model = model.find_consumers(model.graph.input[0].name)[0].op_type node1_input1_model = model.find_consumers(model.graph.input[1].name)[0].op_type node0_input0_model_transformed = model_transformed.find_consumers( model_transformed.graph.input[0].name )[0].op_type node1_input1_model_transformed = model_transformed.find_consumers( model_transformed.graph.input[1].name )[0].op_type assert node0_input0_model != node0_input0_model_transformed assert node1_input1_model != node1_input1_model_transformed
def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype): i_shape = (1, in_features) w_shape = (out_features, in_features) b_linear = QuantLinear( out_features=out_features, in_features=in_features, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=w_bits, weight_quant_type=QuantType.INT, weight_scaling_per_output_channel=True, ) weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0, size=w_shape).astype(np.float32) b_linear.weight.data = torch.from_numpy(weight_tensor_fp) b_linear.eval() bo.export_finn_onnx(b_linear, i_shape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_linear.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): np.random.seed(0) if fold == -1: pe = 1 else: pe = labels // fold assert labels % pe == 0 if k == -1: k = labels # generate input data x = gen_finn_dt_tensor(idt, (1, labels)) model = make_labelselect_modelwrapper(labels, pe, k, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export): ishape = (1, 32, 111, 111) if dw is True: groups = in_channels out_channels = in_channels kernel_size = 3 padding = 1 stride = 1 w_shape = (32, 1, 3, 3) else: groups = 1 out_channels = 64 kernel_size = 1 padding = 0 stride = 1 w_shape = (64, 32, 1, 1) b_conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, groups=groups, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=4, weight_quant_type=QuantType.INT, weight_scaling_impl_type=ScalingImplType.STATS, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_per_output_channel=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_min_val=2e-16, ) weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape) b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float()) b_conv.eval() if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_conv, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_conv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_conv.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if fold == -1: pe = 1 else: pe = max(1, ch // fold) assert ch % pe == 0 # generate input data x1 = gen_finn_dt_tensor(idt, (1, ch)) x2 = gen_finn_dt_tensor(idt, (1, ch)) model = make_addstreams_modelwrapper(ch, pe, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data input_dict = prepare_inputs(x1, x2) oshape = model.get_tensor_shape("outp") y = x1 + x2 y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("AddStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def set_all_initializers(model): """Sets all initializers of the graph to a random value.""" for n in model.graph.node: if len(n.input) > 1 and n.name != "TopK1": init_name = n.input[1] init_shape = model.get_tensor_shape(init_name) init_val = gen_finn_dt_tensor(DataType["FLOAT32"], init_shape) model.set_initializer(init_name, init_val)
def test_fast_vs_slow_random(idt, ishape): iarr = gen_finn_dt_tensor(idt, ishape) ret_slow = finnpy_to_packed_bytearray( iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=False ) ret_fast = finnpy_to_packed_bytearray( iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=True ) assert (ret_fast == ret_slow).all()
def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode): ishape = (1, 10) num_embeddings, idt, embedding_dim = embedding_cfg eshape = (num_embeddings, embedding_dim) exp_oshape = tuple(list(ishape) + [embedding_dim]) embeddings = gen_finn_dt_tensor(edt, eshape) model = make_lookup_model(embeddings, ishape, idt, edt) assert len(model.graph.node) == 1 assert model.graph.node[0].op_type == "Gather" iname = model.graph.input[0].name ename = model.graph.node[0].input[0] oname = model.graph.output[0].name assert model.get_tensor_datatype(iname) == idt assert model.get_tensor_datatype(ename) == edt assert model.get_tensor_datatype(oname) == edt assert tuple(model.get_tensor_shape(ename)) == eshape assert tuple(model.get_tensor_shape(oname)) == exp_oshape assert (model.get_initializer(ename) == embeddings).all() itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64) itensor = np.clip(itensor, 0, num_embeddings - 1) ret = execute_onnx(model, {iname: itensor}) exp_out = np.take(embeddings, itensor, axis=0) assert (exp_out == ret[oname]).all() # call transformation to convert to HLS and verify conversion model = model.transform(InferLookupLayer()) assert model.graph.node[0].op_type == "Lookup" assert model.graph.node[0].input[0] == iname assert model.graph.node[0].input[1] == ename assert model.graph.node[0].output[0] == oname if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 10)) model = model.transform(HLSSynthIP()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) ret_sim = execute_onnx(model, {iname: itensor}) assert (exp_out == ret_sim[oname]).all()
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): if num_ch % simd != 0: pytest.skip(" num_ch % simd != 0, skipping") # generate input data x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) input_dict = {"inp": x} odim = idim + pad model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) if mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif mode == "rtlsim": model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] expected_oshape = (1, odim, odim, num_ch) assert y_produced.shape == expected_oshape # calculate reference # calculate correct pad according to parameters if pad_style == 2: if pad % 2 == 0: pad_up = pad // 2 pad_left = pad // 2 else: pad_up = pad // 2 + 1 pad_left = pad // 2 + 1 else: pad_up = pad // 2 pad_left = pad // 2 pad_down = pad - pad_up pad_right = pad - pad_left y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant") assert (y_produced == y_expected).all() if mode == "rtlsim": node = model.get_nodes_by_op_type("FMPadding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_code_gen_trafo(): idt = wdt = odt = DataType.BIPOLAR mw = 8 mh = 8 pe = 4 simd = 4 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn", backend="fpgadataflow", code_gen_dir="", executable_path="", resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, noActivation=1, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) W = util.gen_finn_dt_tensor(wdt, (mw, mh)) model.set_initializer("weights", W) model = model.transform(CodeGen_npysim()) for node in model.graph.node: code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir_npysim") tmp_dir = code_gen_attribute.s.decode("UTF-8") assert os.path.isdir( tmp_dir), """Code generation directory of node with op type {} does not exist!""".format(node.op_type) assert (len(os.listdir(tmp_dir)) != 0), """Code generation directory of node with op type {} is empty!""".format(node.op_type)
def generate_random_input(model): """ Creates input dictionary with a random numpy array that matches the input tensor shape. """ input_dict = {} for i in range(len(model.graph.input)): input_node = model.graph.input[i] input_node_name = input_node.name input_node_shape = model.get_tensor_shape(input_node_name) i_val = gen_finn_dt_tensor(DataType["FLOAT32"], input_node_shape) input_dict[input_node_name] = i_val return input_dict
def throughput_test_rtlsim(model, batchsize=100): """Runs a throughput test for the given IP-stitched model. When combined with tracing, useful to determine bottlenecks and required FIFO sizes.""" assert (model.get_metadata_prop("exec_mode") == "rtlsim" ), """Top-level exec_mode metadata_prop must be set to rtlsim""" # make empty exec context and insert random inputs ctx = model.make_empty_exec_context() i_bytes = 0 for i_vi in model.graph.input: # create random input iname = i_vi.name ishape = model.get_tensor_shape(iname) ishape_batch = ishape ishape_batch[0] = batchsize idt = model.get_tensor_datatype(iname) dummy_input = gen_finn_dt_tensor(idt, ishape_batch) ctx[iname] = dummy_input i_bytes += (np.prod(ishape_batch) * idt.bitwidth()) / 8 # compute total output size as well o_bytes = 0 for o_vi in model.graph.output: oname = o_vi.name oshape = model.get_tensor_shape(oname) oshape_batch = oshape oshape_batch[0] = batchsize odt = model.get_tensor_datatype(oname) o_bytes += (np.prod(oshape_batch) * odt.bitwidth()) / 8 # remove liveness threshold, launch rtlsim os.environ["LIVENESS_THRESHOLD"] = "-1" rtlsim_exec(model, ctx) # extract metrics cycles = int(model.get_metadata_prop("cycles_rtlsim")) clk_ns = float(model.get_metadata_prop("clk_ns")) fclk_mhz = 1 / (clk_ns * 0.001) runtime_s = (cycles * clk_ns) * (10**-9) res = dict() res["cycles"] = cycles res["runtime[ms]"] = runtime_s * 1000 res["throughput[images/s]"] = batchsize / runtime_s res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s res["fclk[mhz]"] = fclk_mhz res["N"] = batchsize return res
def test_extend_partition(p, extend_id): if p == 0: if extend_id != [0]: pytest.skip("Only the first partition node can be extended") if p == 1: if extend_id != [1]: pytest.skip("Only the second partition node can be extended") else: extend_id = [6 ] # The 6th node is the index of the GenericPartition # node, so we set the index to the right value model = create_model() # Partition the model first partitionings = [ { 0: range(0, 6) }, { 0: range(6, 12) }, { 0: range(0, 6), 1: range(6, 12) }, ] partitioning = partitionings[p] model = model.transform(PartitionFromDict(partitioning)) # Create input data input0_tensor_name = model.graph.input[0].name input_shape = model.get_tensor_shape(input0_tensor_name) input_dtype = model.get_tensor_datatype(input0_tensor_name) input_val = gen_finn_dt_tensor(input_dtype, input_shape) input_dict = {} input_dict[input0_tensor_name] = input_val # Extend the model model_extended = model.transform(ExtendPartition(extend_id)) assert oxe.compare_execution(model, model_extended, input_dict) # Check if FINN data_types are retained for n in model_extended.graph.node: if n.op_type == "Conv": assert model_extended.get_tensor_datatype( n.input[1]) == DataType.INT4
def test_fpgadataflow_slidingwindow( idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw ): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape( 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd ) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_compilation_trafo(): idt = wdt = odt = DataType.BIPOLAR mw = 8 mh = 8 pe = 4 simd = 4 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", code_gen_dir="", executable_path="", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, noActivation=1, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) W = util.gen_finn_dt_tensor(wdt, (mw, mh)) model.set_initializer("weights", W) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) for node in model.graph.node: compilation_attribute = util.get_by_name(node.attribute, "executable_path") executable = compilation_attribute.s.decode("UTF-8") print(executable) assert os.path.isfile(executable), """Executable of node with op type {} does not exist!""".format(node.op_type)
def test_fpgadataflow_ipstitch_remote_execution(): model = ModelWrapper( ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_deployment.onnx" ) try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") idt = DataType.INT2 x = gen_finn_dt_tensor(idt, (1, 4)) input_dict = {"inp": x} outp = execute_onnx(model, input_dict) assert np.isclose(outp["outp"], x).all() except KeyError: pytest.skip("PYNQ board IP address not specified")