def get_normal_output_shape(self): k_h, k_w = self.get_nodeattr("ConvKernelDim") ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride_h, stride_w = self.get_nodeattr("Stride") dilation_h, dilation_w = self.get_nodeattr("Dilation") pad = 0 ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w) oshape = (1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch) return oshape
def get_normal_output_shape(self): ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim") k_h, k_w = tuple(self.get_nodeattr("PoolDim")) ifm_ch = self.get_nodeattr("NumChannels") stride_h = k_h stride_w = k_w pad = 0 assert ifm_dim_h % k_h == 0, "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0" assert ifm_dim_w % k_w == 0, "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0" ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad) oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch) return oshape
def test_move_chw_add_past_conv(idim, k, s, ich, och): odim = compute_conv_output_dim(idim, k, s) ishape = [1, ich, idim, idim] oshape = [1, och, odim, odim] add_param_shape = [1, ich, 1, 1] conv_param_shape = [och, ich, k, k] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [k, k] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [s, s] add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) model = helper.make_model( helper.make_graph( nodes=[add_node, conv_node], name="move-add-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1], )) model = ModelWrapper(model) # initialize model a0_values = np.random.uniform( low=0, high=1, size=tuple(add_param_shape)).astype(np.float32) model.set_initializer("a0", a0_values) a1_values = np.random.uniform( low=0, high=1, size=tuple(conv_param_shape)).astype(np.float32) model.set_initializer("a1", a1_values) model = model.transform(InferShapes()) # execution before transformation inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) idict = {model.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model, idict) y_before = odict[model.graph.output[0].name] model = model.transform(MoveAddPastConv()) odict = oxe.execute_onnx(model, idict) y_after = odict[model.graph.output[0].name] assert np.isclose(y_before, y_after).all() assert model.graph.node[0].op_type == "Conv" assert model.graph.node[1].op_type == "Add"
def get_folded_output_shape(self): k_h, k_w = self.get_nodeattr("ConvKernelDim") ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride_h, stride_w = self.get_nodeattr("Stride") dilation_h, dilation_w = self.get_nodeattr("Dilation") simd = self.get_nodeattr("SIMD") pad = 0 ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w) assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" wf = int((k_h * k_w * ifm_ch) // simd) folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd) return folded_oshape
def get_normal_output_shape(self): k = self.get_nodeattr("ConvKernelDim") ifm_dim = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride = self.get_nodeattr("Stride") pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) oshape = (1, ofm_dim, ofm_dim, k * k * ifm_ch) return oshape
def get_folded_output_shape(self): k = self.get_nodeattr("ConvKernelDim") ifm_dim = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride = self.get_nodeattr("Stride") simd = self.get_nodeattr("SIMD") pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" wf = int((k * k * ifm_ch) // simd) folded_oshape = (1, ofm_dim, ofm_dim, wf, simd) return folded_oshape
def get_normal_output_shape(self): k = self.get_nodeattr("ConvKernelDim") ifm_dim = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride = self.get_nodeattr("Stride") simd = self.get_nodeattr("SIMD") n_cols_pruned = np.sum(self.get_nodeattr("pruneMask")) pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) # Remove the columns pruned from the shape (total, not resampled) oshape = (1, ofm_dim, ofm_dim, k * k * ifm_ch - int(n_cols_pruned * simd)) print("generator normal oshape", oshape) return oshape
def get_folded_output_shape(self): k = self.get_nodeattr("ConvKernelDim") ifm_dim = self.get_nodeattr("IFMDim") ifm_ch = self.get_nodeattr("IFMChannels") stride = self.get_nodeattr("Stride") simd = self.get_nodeattr("SIMD") n_cols_pruned = np.sum(self.get_nodeattr("pruneMask")) pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" # Remove the pruned columns from the shape (resampled) wf = int((k * k * ifm_ch) // simd) - n_cols_pruned folded_oshape = (1, ofm_dim, ofm_dim, wf, simd) print("generator folded oshape", folded_oshape) return folded_oshape
def test_non_equal_padding( idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding ): wdt = idt odt = DataType["INT32"] ofm_ch = ifm_ch pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] ofm_dim_h = compute_conv_output_dim( ifm_dim_h, k_h, stride, pad_h, ) ofm_dim_w = compute_conv_output_dim( ifm_dim_w, k_w, stride, pad_w, ) # set up onnx model inp = oh.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] ) outp = oh.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] ) W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k_h, k_w], pads=padding, strides=[stride, stride], group=1, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="dws_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all()
def test_im2col(): case_id = 0 # bipolar inputs with following im2col parameters idt = DataType.BIPOLAR k_h = 2 k_w = 2 stride = 1 ifm_ch = 1 ifm_dim_h = 4 ifm_dim_w = 4 pad_amt = [0, 0, 0, 0] pad_amt_h = pad_amt[0] + pad_amt[2] pad_amt_w = pad_amt[1] + pad_amt[3] pad_val = 0 ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride, pad_amt_h) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride, pad_amt_w) x = np.asarray( [ -1.0, -1.0, 1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, ], dtype=np.float32, ).reshape(1, ifm_dim_h, ifm_dim_w, ifm_ch) expected = np.asarray( [ -1.0, -1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0, ], dtype=np.float32, ).reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 1 idt = DataType.INT8 k_h = 2 k_w = 2 stride = 1 ifm_ch = 2 ifm_dim_h = 4 ifm_dim_w = 4 pad_amt = [0, 0, 0, 0] pad_val = 0 x = np.asarray( [ [ [[1, -1], [2, -2], [3, -3], [4, -4]], [[5, -5], [6, -6], [7, -7], [8, -8]], [[9, -9], [10, -10], [11, -11], [12, -12]], [[13, -13], [14, -14], [15, -15], [16, -16]], ] ], dtype=np.float32, ) expected = np.asarray( [ [ [ [1.0, -1.0, 2.0, -2.0, 5.0, -5.0, 6.0, -6.0], [2.0, -2.0, 3.0, -3.0, 6.0, -6.0, 7.0, -7.0], [3.0, -3.0, 4.0, -4.0, 7.0, -7.0, 8.0, -8.0], ], [ [5.0, -5.0, 6.0, -6.0, 9.0, -9.0, 10.0, -10.0], [6.0, -6.0, 7.0, -7.0, 10.0, -10.0, 11.0, -11.0], [7.0, -7.0, 8.0, -8.0, 11.0, -11.0, 12.0, -12.0], ], [ [9.0, -9.0, 10.0, -10.0, 13.0, -13.0, 14.0, -14.0], [10.0, -10.0, 11.0, -11.0, 14.0, -14.0, 15.0, -15.0], [11.0, -11.0, 12.0, -12.0, 15.0, -15.0, 16.0, -16.0], ], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 2 idt = DataType.INT8 k_h = 2 k_w = 2 stride = 1 ifm_ch = 2 ifm_dim_h = 4 ifm_dim_w = 4 pad_amt = [1, 1, 1, 1] pad_val = 0 x = np.asarray( [ [ [[1, -1], [2, -2], [3, -3], [4, -4]], [[5, -5], [6, -6], [7, -7], [8, -8]], [[9, -9], [10, -10], [11, -11], [12, -12]], [[13, -13], [14, -14], [15, -15], [16, -16]], ] ], dtype=np.float32, ) expected = np.asarray( [ [ [ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0], [0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 2.0, -2.0], [0.0, 0.0, 0.0, 0.0, 2.0, -2.0, 3.0, -3.0], [0.0, 0.0, 0.0, 0.0, 3.0, -3.0, 4.0, -4.0], [0.0, 0.0, 0.0, 0.0, 4.0, -4.0, 0.0, 0.0], ], [ [0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 5.0, -5.0], [1.0, -1.0, 2.0, -2.0, 5.0, -5.0, 6.0, -6.0], [2.0, -2.0, 3.0, -3.0, 6.0, -6.0, 7.0, -7.0], [3.0, -3.0, 4.0, -4.0, 7.0, -7.0, 8.0, -8.0], [4.0, -4.0, 0.0, 0.0, 8.0, -8.0, 0.0, 0.0], ], [ [0.0, 0.0, 5.0, -5.0, 0.0, 0.0, 9.0, -9.0], [5.0, -5.0, 6.0, -6.0, 9.0, -9.0, 10.0, -10.0], [6.0, -6.0, 7.0, -7.0, 10.0, -10.0, 11.0, -11.0], [7.0, -7.0, 8.0, -8.0, 11.0, -11.0, 12.0, -12.0], [8.0, -8.0, 0.0, 0.0, 12.0, -12.0, 0.0, 0.0], ], [ [0.0, 0.0, 9.0, -9.0, 0.0, 0.0, 13.0, -13.0], [9.0, -9.0, 10.0, -10.0, 13.0, -13.0, 14.0, -14.0], [10.0, -10.0, 11.0, -11.0, 14.0, -14.0, 15.0, -15.0], [11.0, -11.0, 12.0, -12.0, 15.0, -15.0, 16.0, -16.0], [12.0, -12.0, 0.0, 0.0, 16.0, -16.0, 0.0, 0.0], ], [ [0.0, 0.0, 13.0, -13.0, 0.0, 0.0, 0.0, 0.0], [13.0, -13.0, 14.0, -14.0, 0.0, 0.0, 0.0, 0.0], [14.0, -14.0, 15.0, -15.0, 0.0, 0.0, 0.0, 0.0], [15.0, -15.0, 16.0, -16.0, 0.0, 0.0, 0.0, 0.0], [16.0, -16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 3 idt = DataType.INT8 k_h = 2 k_w = 2 stride = 1 ifm_ch = 2 ifm_dim_h = 4 ifm_dim_w = 5 pad_amt = [0, 0, 0, 0] pad_val = 0 x = np.asarray( [ [ [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], ] ], dtype=np.float32, ) expected = np.asarray( [ [ [ [1, -1, 2, -2, 6, -6, 7, -7], [2, -2, 3, -3, 7, -7, 8, -8], [3, -3, 4, -4, 8, -8, 9, -9], [4, -4, 5, -5, 9, -9, 10, -10], ], [ [6, -6, 7, -7, 11, -11, 12, -12], [7, -7, 8, -8, 12, -12, 13, -13], [8, -8, 9, -9, 13, -13, 14, -14], [9, -9, 10, -10, 14, -14, 15, -15], ], [ [11, -11, 12, -12, 16, -16, 17, -17], [12, -12, 13, -13, 17, -17, 18, -18], [13, -13, 14, -14, 18, -18, 19, -19], [14, -14, 15, -15, 19, -19, 20, -20], ], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 4 idt = DataType.INT8 k_h = 3 k_w = 2 stride = 1 ifm_ch = 2 ifm_dim_h = 4 ifm_dim_w = 5 pad_amt = [0, 0, 0, 0] pad_val = 0 x = np.asarray( [ [ [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], ] ], dtype=np.float32, ) expected = np.asarray( [ [ [ [1, -1, 2, -2, 6, -6, 7, -7, 11, -11, 12, -12], [2, -2, 3, -3, 7, -7, 8, -8, 12, -12, 13, -13], [3, -3, 4, -4, 8, -8, 9, -9, 13, -13, 14, -14], [4, -4, 5, -5, 9, -9, 10, -10, 14, -14, 15, -15], ], [ [6, -6, 7, -7, 11, -11, 12, -12, 16, -16, 17, -17], [7, -7, 8, -8, 12, -12, 13, -13, 17, -17, 18, -18], [8, -8, 9, -9, 13, -13, 14, -14, 18, -18, 19, -19], [9, -9, 10, -10, 14, -14, 15, -15, 19, -19, 20, -20], ], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 5 idt = DataType.INT8 k_h = 3 k_w = 2 stride = 1 ifm_ch = 2 ifm_dim_h = 4 ifm_dim_w = 5 pad_amt = [1, 1, 1, 1] pad_val = 0 x = np.asarray( [ [ [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], ] ], dtype=np.float32, ) expected = np.asarray( [ [ [ [0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 6, -6], [0, 0, 0, 0, 1, -1, 2, -2, 6, -6, 7, -7], [0, 0, 0, 0, 2, -2, 3, -3, 7, -7, 8, -8], [0, 0, 0, 0, 3, -3, 4, -4, 8, -8, 9, -9], [0, 0, 0, 0, 4, -4, 5, -5, 9, -9, 10, -10], [0, 0, 0, 0, 5, -5, 0, 0, 10, -10, 0, 0], ], [ [0, 0, 1, -1, 0, 0, 6, -6, 0, 0, 11, -11], [1, -1, 2, -2, 6, -6, 7, -7, 11, -11, 12, -12], [2, -2, 3, -3, 7, -7, 8, -8, 12, -12, 13, -13], [3, -3, 4, -4, 8, -8, 9, -9, 13, -13, 14, -14], [4, -4, 5, -5, 9, -9, 10, -10, 14, -14, 15, -15], [5, -5, 0, 0, 10, -10, 0, 0, 15, -15, 0, 0], ], [ [0, 0, 6, -6, 0, 0, 11, -11, 0, 0, 16, -16], [6, -6, 7, -7, 11, -11, 12, -12, 16, -16, 17, -17], [7, -7, 8, -8, 12, -12, 13, -13, 17, -17, 18, -18], [8, -8, 9, -9, 13, -13, 14, -14, 18, -18, 19, -19], [9, -9, 10, -10, 14, -14, 15, -15, 19, -19, 20, -20], [10, -10, 0, 0, 15, -15, 0, 0, 20, -20, 0, 0], ], [ [0, 0, 11, -11, 0, 0, 16, -16, 0, 0, 0, 0], [11, -11, 12, -12, 16, -16, 17, -17, 0, 0, 0, 0], [12, -12, 13, -13, 17, -17, 18, -18, 0, 0, 0, 0], [13, -13, 14, -14, 18, -18, 19, -19, 0, 0, 0, 0], [14, -14, 15, -15, 19, -19, 20, -20, 0, 0, 0, 0], [15, -15, 0, 0, 20, -20, 0, 0, 0, 0, 0, 0], ], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 6 idt = DataType.INT8 k_h = 3 k_w = 1 stride = 1 ifm_ch = 2 ifm_dim_h = 5 ifm_dim_w = 1 pad_amt = [0, 0, 0, 0] pad_val = 0 x = np.asarray( [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], dtype=np.float32, ) expected = np.asarray( [[[[1, -1, 2, -2, 3, -3]], [[2, -2, 3, -3, 4, -4]], [[3, -3, 4, -4, 5, -5]]]], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 7 idt = DataType.INT8 k_h = 3 k_w = 1 stride = 1 ifm_ch = 2 ifm_dim_h = 5 ifm_dim_w = 1 pad_amt = [1, 0, 1, 0] pad_val = 0 x = np.asarray( [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], dtype=np.float32, ) expected = np.asarray( [ [ [[0, 0, 1, -1, 2, -2]], [[1, -1, 2, -2, 3, -3]], [[2, -2, 3, -3, 4, -4]], [[3, -3, 4, -4, 5, -5]], [[4, -4, 5, -5, 0, 0]], ] ], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id ) case_id = 8 idt = DataType.INT8 k_h = 3 k_w = 1 stride = 2 ifm_ch = 2 ifm_dim_h = 5 ifm_dim_w = 1 pad_amt = [1, 0, 1, 0] pad_val = 0 x = np.asarray( [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], dtype=np.float32, ) expected = np.asarray( [[[[0, 0, 1, -1, 2, -2]], [[2, -2, 3, -3, 4, -4]], [[4, -4, 5, -5, 0, 0]]]], dtype=np.float32, ) produced = execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val ) assert (produced == expected).all(), "Test failed for case number {}".format( case_id )
def execution_im2col( x, idt, k_h, k_w, stride, ifm_ch, ifm_dim_h, ifm_dim_w, pad_amt, pad_val=0, dilation=1, ): pad_amt_h = pad_amt[0] + pad_amt[2] pad_amt_w = pad_amt[1] + pad_amt[3] ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride, pad_amt_h, dilation) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride, pad_amt_w, dilation) # set up onnx model inp = helper.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] ) im2col_node = helper.make_node( "Im2Col", ["inp"], ["outp"], domain="finn.custom_op.general", stride=stride, kernel_size=[k_h, k_w], pad_amount=pad_amt, pad_value=pad_val, input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch), dilations=dilation, ) graph = helper.make_graph( nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] ) model = helper.make_model(graph, producer_name="im2col-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) # test shape inference model.transform(InferShapes()) assert model.get_tensor_shape("outp") == [ 1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch, ] # test datatype inference assert model.get_tensor_datatype("outp") is DataType.FLOAT32 model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("outp") is idt # prepare input data input_dict = {"inp": x} # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] return y_produced
def test_im2col_infer_shapes(): idt = DataType.BIPOLAR k_h = 2 k_w = 2 stride = 1 ifm_ch = 1 ifm_dim_h = 4 ifm_dim_w = 4 pad_amt = [0, 0, 0, 0] # default pad_amt_h = pad_amt[0] + pad_amt[2] pad_amt_w = pad_amt[1] + pad_amt[3] dilation = 1 ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride, pad_amt_h, dilation) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride, pad_amt_w, dilation) # set up onnx model inp = helper.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] ) abs_node = helper.make_node("Abs", inputs=["inp"], outputs=["abs"]) Im2Col_node = helper.make_node( "Im2Col", ["abs"], ["im2col"], domain="finn.custom_op.general", stride=stride, kernel_size=[k_h, k_w], input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch), dilations=dilation, ) abs1_node = helper.make_node("Abs", inputs=["im2col"], outputs=["outp"]) graph = helper.make_graph( nodes=[abs_node, Im2Col_node, abs1_node], name="shape_graph", inputs=[inp], outputs=[outp], value_info=[ helper.make_tensor_value_info( "abs", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ), helper.make_tensor_value_info( "im2col", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch], ), ], ) model = helper.make_model(graph, producer_name="shape-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) # test shape inference model.transform(InferShapes()) assert model.get_tensor_shape("im2col") == [ 1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch, ]
def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw): if dw == 1: ofm_ch = ifm_ch groups = ifm_ch W_shape = [ofm_ch, 1, k, k] else: ofm_ch = ifm_ch + 2 groups = 1 W_shape = [ofm_ch, ifm_ch, k, k] total_pad = 2 * pad_amt ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, total_pad) # set up onnx model inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]) mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [1, ifm_ch, 1, 1]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, W_shape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]) Mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"]) Conv_node = helper.make_node( "Conv", ["mul_out", "W"], ["outp"], group=groups, kernel_shape=[k, k], pads=[pad_amt, pad_amt, pad_amt, pad_amt], strides=[stride, stride], ) graph = helper.make_graph( nodes=[Mul_node, Conv_node], name="mulpastconv_graph", inputs=[inp], outputs=[outp], value_info=[mul, W], ) model = helper.make_model(graph, producer_name="mulpastconv-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim]) mul_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, 1, 1]) W_values = gen_finn_dt_tensor(DataType.INT2, W_shape) model.set_initializer("W", W_values) model.set_initializer("mul", mul_values) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) idict = {"inp": inp_values} odict = oxe.execute_onnx(model, idict, True) out_before = odict["outp"] # move channelwise multiplication past depthwise conv model_transformed = model.transform(MoveMulPastDWConv()) odict = oxe.execute_onnx(model_transformed, idict, True) out_after = odict["outp"] assert (out_before == out_after).all() if dw == 0: assert model.graph.node[0].op_type == model_transformed.graph.node[ 0].op_type assert model.graph.node[1].op_type == model_transformed.graph.node[ 1].op_type else: assert model.graph.node[0].op_type == model_transformed.graph.node[ 1].op_type assert model.graph.node[1].op_type == model_transformed.graph.node[ 0].op_type
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0
def test_fpgadataflow_slidingwindow_1d(idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw, flip): if flip: k = k[::-1] ifm_dim = ifm_dim[::-1] stride = stride[::-1] dilation = dilation[::-1] k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim stride_h, stride_w = stride dilation_h, dilation_w = dilation if (dilation_h > 1 or dilation_w > 1) and (stride_h > 1 or stride_w > 1): pytest.skip("""Dilation value greater than 1 and stride greater than 1 currently not supported for 1D convolutions""") if simd > ifm_ch: pytest.skip("SIMD cannot be larger than number of input channels") ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) ofm_dim = [ofm_dim_h, ofm_dim_w] x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k=k, ifm_ch=ifm_ch, ifm_dim=ifm_dim, ofm_dim=ofm_dim, simd=simd, stride=stride, dilation=dilation, idt=idt, dw=dw, ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k=k, ifm_ch=ifm_ch, ifm_dim=ifm_dim, ofm_dim=ofm_dim, simd=simd, stride=stride, dilation=dilation, idt=idt, ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator1D")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_dws_reg_conv_lowering( idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding, dilations, dw ): if k_h > ifm_dim_h: pytest.skip("Kernel height must be smaller than image height") if k_w > ifm_dim_w: pytest.skip("Kernel width must be smaller than image height") # Ensure the right padding parameters are set if ifm_dim_w == 1: dilations[1] = 1 padding[1] = 0 padding[3] = 0 wdt = idt odt = DataType["INT32"] ofm_ch = ifm_ch pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] stride_h = stride[0] stride_w = stride[1] ofm_dim_h = compute_conv_output_dim( ifm_dim_h, k_h, stride_h, pad_h, dilations[0], ) ofm_dim_w = compute_conv_output_dim( ifm_dim_w, k_w, stride_w, pad_w, dilations[1], ) # set up onnx model inp = oh.make_tensor_value_info( "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] ) outp = oh.make_tensor_value_info( "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] ) if dw is True: W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k_h, k_w]) group = ifm_ch else: W = oh.make_tensor_value_info( "W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w] ) group = 1 dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], kernel_shape=[k_h, k_w], pads=padding, strides=[stride_h, stride_w], group=group, dilations=dilations, ) graph = oh.make_graph( nodes=[dw_cnv], name="dw_cnv_graph", inputs=[inp], outputs=[outp], value_info=[W], ) model = oh.make_model(graph, producer_name="test_dws_reg_cnv-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) if dw is True: w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k_h, k_w]) else: w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] model = model.transform(LowerConvsToMatMul()) output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] assert (produced == expected).all() if dw is True: # check if created nodes have attributes that indicate depthwise conv assert model.get_tensor_sparsity("W") is not None im2col_node = getCustomOp(model.graph.node[1]) assert im2col_node.get_nodeattr("depthwise") == 1
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): kernel_size, stride, pad = conv_config np.random.seed(0) idt = DataType.UINT4 in_feature_dim = 7 in_chn = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size, kernel_size] else: group = 1 out_chn = 20 conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) input_shape = [1, in_chn, in_feature_dim, in_feature_dim] output_shape = [1, out_chn, out_feature_dim, out_feature_dim] conv_weight_dt = DataType.UINT4 conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [pad, pad, pad, pad] conv_config["strides"] = [stride, stride] top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = helper.make_model( helper.make_graph( name="conv_test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], ) ) model = ModelWrapper(modelproto) model.set_tensor_datatype("top_in", idt) model.set_tensor_datatype("top_out", idt) model.set_tensor_datatype("p1", conv_weight_dt) model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) else: new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1))) simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1))) fc_inst.set_nodeattr("PE", pe_cands[0]) fc_inst.set_nodeattr("SIMD", simd_cands[0]) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferShapes()) new_model = new_model.transform(InferDataTypes()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) if kernel_size == 1 and stride > 1 and pad == 0: assert new_model.graph.node[1].op_type == "DownSampler" if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("DownSampler")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 if pad == 1: padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] padding_inst = getCustomOp(padding_node) assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0
def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding): # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold) ofm_ch = ifm_ch ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding) if act is None: odt = DataType.INT32 else: odt = act out_act = oh.make_tensor_value_info("out_act", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]) T = oh.make_tensor_value_info("T", TensorProto.FLOAT, [ofm_ch, 15]) tdt = DataType.INT32 thresh_node = oh.make_node( "MultiThreshold", domain="finn.custom_op.general", inputs=["outp", "T"], outputs=["out_act"], data_layout="NHWC", out_dtype=odt.name, out_scale=1.0, out_bias=0.0, ) # set up onnx model inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]) outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]) W_sparse = oh.make_tensor_value_info("W_sparse", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]) im2col_node = oh.make_node( "Im2Col", domain="finn.custom_op.general", inputs=["inp"], outputs=["im2col_out"], kernel_size=k, stride=stride, pad_amount=padding, input_shape="(1, {}, {}, {})".format(ifm_dim, ifm_dim, ifm_ch), depthwise=1, ) matmul_node = oh.make_node("MatMul", inputs=["im2col_out", "W_sparse"], outputs=["outp"]) if act is None: node_list = [im2col_node, matmul_node] global_out = outp value_info = [W_sparse] else: node_list = [im2col_node, matmul_node, thresh_node] global_out = out_act value_info = [W_sparse, T] graph = oh.make_graph( nodes=node_list, name="lowered_dw_cnv_graph", inputs=[inp], outputs=[global_out], value_info=value_info, ) model = oh.make_model(graph, producer_name="lowered_dw_cnv-model") model = ModelWrapper(model) # initialize model model.set_tensor_datatype("inp", idt) model.set_tensor_datatype(model.graph.output[0].name, odt) model.set_tensor_datatype("W_sparse", wdt) w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) # create sparse matrix W_matrix = np.zeros((ofm_ch, ifm_ch, k, k)) for ch in range(ifm_ch): W_matrix[ch][ch] = w_tensor[ch][0] W_matrix = W_matrix.astype(np.float32) W_matrix = W_matrix.transpose(0, 2, 3, 1) W_matrix = W_matrix.reshape(ofm_ch, ifm_ch * k * k) model.set_initializer("W_sparse", W_matrix.T) sparsity = {"dw": {"kernel_shape": k}} model.set_tensor_sparsity("W_sparse", sparsity) if act is not None: (min, max) = calculate_signed_dot_prod_range(idt, wdt, ifm_ch * k * k) n_steps = odt.get_num_possible_values() - 1 T_values = np.random.randint(min, max - 1, (ofm_ch, n_steps)).astype(np.float32) # provide non-decreasing thresholds T_values = np.sort(T_values, axis=1) model.set_initializer("T", T_values) model.set_tensor_datatype("T", tdt) model = model.transform(InferShapes()) return model