def test_fast_vs_slow_random(idt, ishape):
     iarr = gen_finn_dt_tensor(idt, ishape)
     ret_slow = finnpy_to_packed_bytearray(
         iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=False
     )
     ret_fast = finnpy_to_packed_bytearray(
         iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=True
     )
     assert (ret_fast == ret_slow).all()
Example #2
0
    def fpga_single_run(self, input):

        input = input.reshape(self.ishape_normal)
        input = MT.multithreshold(input, self.mt_node_thresholds)
        assert input.shape == self.ishape_normal
        ibuf_folded = input.reshape(self.ishape_folded)

        # pack the input buffer, reversing both SIMD dim and endianness
        ibuf_packed = finnpy_to_packed_bytearray(ibuf_folded,
                                                 self.idt,
                                                 reverse_endian=True,
                                                 reverse_inner=True)
        # copy the packed data into the PYNQ buffer
        # TODO optimization: pack directly into the PYNQ buffer?
        np.copyto(self.ibuf_packed_device, ibuf_packed)

        # set up the DMA and wait until all transfers complete
        self.dma.sendchannel.transfer(self.ibuf_packed_device)
        self.dma.recvchannel.transfer(self.obuf_packed)
        self.dma.sendchannel.wait()
        self.dma.recvchannel.wait()

        # unpack the packed output buffer from accelerator
        obuf_folded = packed_bytearray_to_finnpy(self.obuf_packed,
                                                 self.odt,
                                                 self.oshape_folded,
                                                 reverse_endian=True,
                                                 reverse_inner=True)

        obuf_normal = obuf_folded.reshape(self.oshape_normal)
        obuf_normal = obuf_normal * self.multiply_node_const
        obuf_normal = obuf_normal + self.add_node_mat
        return obuf_normal
Example #3
0
 def pack_input(self, ibuf_folded):
     """Packs folded input and reverses both SIMD dim and endianness.
     Gets input data in folded shape and returns packed input data."""
     ibuf_packed = finnpy_to_packed_bytearray(
         ibuf_folded, self.idt, reverse_endian=True, reverse_inner=True
     )
     return ibuf_packed
Example #4
0
def test_finnpy_to_packed_bytearray():
    A = [[1, 1, 1, 0], [0, 1, 1, 0]]
    eA = np.asarray([[14], [6]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(A, DataType["BINARY"]) == eA).all()
    B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
    eB = np.asarray([[[15], [15]], [[7], [13]]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(B, DataType["UINT2"]) == eB).all()
    C = [1, 7, 2, 5]
    eC = np.asarray([23, 37], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(C, DataType["UINT4"]) == eC).all()
    D = [[1, 7, 2, 5], [2, 5, 1, 7]]
    eD = np.asarray([[23, 37], [37, 23]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(D, DataType["UINT4"]) == eD).all()
    E = [[-4, 0, -4, -4]]
    eE = np.asarray(
        [[
            255, 255, 255, 252, 0, 0, 0, 0, 255, 255, 255, 252, 255, 255, 255,
            252
        ]],
        dtype=np.uint8,
    )
    assert (finnpy_to_packed_bytearray(E, DataType["INT32"]) == eE).all()
    F = [[17.125, -2.0], [-3.5, 11.25]]
    eF = np.asarray([[1, 19, 240], [3, 200, 90]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(F, DataType["FIXED<9,6>"]) == eF).all()
    G = F
    eG = np.asarray(
        [[65, 137, 0, 0, 192, 0, 0, 0], [192, 96, 0, 0, 65, 52, 0, 0]],
        dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(G, DataType["FLOAT32"]) == eG).all()
def test_finnpy_to_packed_bytearray():
    A = [[1, 1, 1, 0], [0, 1, 1, 0]]
    eA = np.asarray([[14], [6]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(A, DataType.BINARY) == eA).all()
    B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
    eB = np.asarray([[[15], [15]], [[7], [13]]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(B, DataType.UINT2) == eB).all()
    C = [1, 7, 2, 5]
    eC = np.asarray([23, 37], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(C, DataType.UINT4) == eC).all()
    D = [[1, 7, 2, 5], [2, 5, 1, 7]]
    eD = np.asarray([[23, 37], [37, 23]], dtype=np.uint8)
    assert (finnpy_to_packed_bytearray(D, DataType.UINT4) == eD).all()
    E = [[-4, 0, -4, -4]]
    eE = np.asarray(
        [[255, 255, 255, 252, 0, 0, 0, 0, 255, 255, 255, 252, 255, 255, 255, 252]],
        dtype=np.uint8,
    )
    assert (finnpy_to_packed_bytearray(E, DataType.INT32) == eE).all()
Example #6
0
    def apply(self, model):

        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # create the base FINN driver -- same for all accels
        driver_base_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/driver_base.py"
        )
        driver_base_py = pynq_driver_dir + "/driver_base.py"
        shutil.copy(driver_base_template, driver_base_py)
        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass?
        idt = []
        idma_names = []
        ishape_normal = []
        ishape_folded = []
        ishape_packed = []
        for idma_ind, graph_in in enumerate(model.graph.input):
            i_tensor_name = graph_in.name
            # get inp tensor properties
            i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
            i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
            # go down into dataflow partition to get folded shape info etc
            # TODO consider setting these as attributes during dataflow partitioning
            i_consumer = model.find_consumer(i_tensor_name)
            assert (
                i_consumer.op_type == "StreamingDataflowPartition"
            ), """
                Ensure CreateDataflowPartition called before driver creation."""
            first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model"))
            assert (
                first_df_model.graph.node[0].op_type == "IODMA"
            ), "First partition must hold input IODMA"
            successors = model.find_direct_successors(i_consumer)
            successor_input_num = list(successors[0].input).index(i_consumer.output[0])
            successor_sdp = getCustomOp(successors[0])
            successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model"))
            first_node = successor_df_model.find_consumer(
                successor_df_model.graph.input[successor_input_num].name
            )
            i_tensor_shape_folded = tuple(
                getCustomOp(first_node).get_folded_input_shape()
            )
            # generate dummy folded i/o tensors and their packed versions
            i_tensor_dummy_folded = gen_finn_dt_tensor(
                i_tensor_dt, i_tensor_shape_folded
            )
            i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
                i_tensor_dummy_folded, i_tensor_dt
            )
            i_tensor_shape_packed = i_tensor_dummy_packed.shape
            # append all input tensor info to relevant lists
            idt.append("DataType['%s']" % i_tensor_dt.name)
            ishape_normal.append(i_tensor_shape_normal)
            ishape_folded.append(i_tensor_shape_folded)
            ishape_packed.append(i_tensor_shape_packed)
            idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name"))

        odt = []
        odma_names = []
        oshape_normal = []
        oshape_folded = []
        oshape_packed = []
        for odma_ind, graph_out in enumerate(model.graph.output):
            o_tensor_name = graph_out.name
            # get inp tensor properties
            o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
            o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
            # go down into IODMA partition to get folded shape info etc
            # TODO consider setting these as attributes during dataflow partitioning
            o_producer = model.find_producer(o_tensor_name)
            assert (
                o_producer.op_type == "StreamingDataflowPartition"
            ), """
                Ensure CreateDataflowPartition called before driver creation."""
            df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model"))
            assert (
                df_model.graph.node[-1].op_type == "IODMA"
            ), "Partition must hold output IODMA"
            predecessors = model.find_direct_predecessors(o_producer)
            predecessor_output_num = list(predecessors[0].output).index(
                o_producer.input[0]
            )
            predecessor_sdp = getCustomOp(predecessors[0])
            predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model"))
            last_node = predecessor_df_model.find_producer(
                predecessor_df_model.graph.output[predecessor_output_num].name
            )
            o_tensor_shape_folded = tuple(
                getCustomOp(last_node).get_folded_output_shape()
            )
            o_tensor_dummy_folded = gen_finn_dt_tensor(
                o_tensor_dt, o_tensor_shape_folded
            )
            o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
                o_tensor_dummy_folded, o_tensor_dt
            )
            o_tensor_shape_packed = o_tensor_dummy_packed.shape
            # append all output tensor info to relevant lists
            odt.append("DataType['%s']" % o_tensor_dt.name)
            oshape_normal.append(o_tensor_shape_normal)
            oshape_folded.append(o_tensor_shape_folded)
            oshape_packed.append(o_tensor_shape_packed)
            odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name"))

        # generate external weights npy files
        weights_dir = pynq_driver_dir + "/runtime_weights"

        os.makedirs(weights_dir)
        idma_idx = 0
        ext_weight_dma_cnt = 0

        for node in model.graph.node:
            assert (
                node.op_type == "StreamingDataflowPartition"
            ), "CreateDataflowPartition needs to be applied before driver generation"

            if len(node.input) > 0:
                producer = model.find_producer(node.input[0])
                init_tensor = model.get_initializer(node.input[0])
            else:
                producer = None
                init_tensor = None

            if producer is None:  # input dma?
                sdp_inst = getCustomOp(node)
                idma_name = sdp_inst.get_nodeattr("instance_name")
                df_model = ModelWrapper(sdp_inst.get_nodeattr("model"))
                assert df_model.graph.node[0].op_type == "IODMA"
                iodma_node = getCustomOp(df_model.graph.node[0])
                if iodma_node.get_nodeattr("burstMode") == "wrap":  # input weights dma?
                    init_tensor = df_model.get_initializer(
                        iodma_node.onnx_node.input[0]
                    )
                    ext_weight_dma_cnt += 1
                    w_dtype = df_model.get_tensor_datatype(
                        iodma_node.onnx_node.input[0]
                    )
                    init_external_tensor = to_external_tensor(init_tensor, w_dtype)
                    np.save(
                        weights_dir + "/" + idma_name + ".npy", init_external_tensor
                    )
                idma_idx += 1

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = template_driver.pynq_driver_template

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', ""))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', ""))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed))
        driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names))
        driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names))
        driver = driver.replace("$NUM_INPUTS$", str(len(idma_names)))
        driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names)))
        driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/validate.py"
        )
        shutil.copy(validate_template, validate_py)

        # copy all the dependencies into the driver folder
        # driver imports utils/data_packing and core/datatype
        # both of which are in finn-base
        # e.g. /workspace/finn-base/src/finn/util/data_packing.py
        dpk_root = dpk.__file__
        # e.g. /workspace/finn-base/src/finn/util
        dpk_root = dpk_root.replace("data_packing.py", "")
        # e.g. /workspace/finn-base/src/finn/core/datatype.py
        dtp_root = dtp.__file__
        # e.g. /workspace/finn-base/src/finn/core
        dtp_root = dtp_root.replace("datatype.py", "")
        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")

        # generate weight files for runtime-writable layers

        for sdp_ind, sdp_node in enumerate(model.graph.node):
            assert sdp_node.op_type == "StreamingDataflowPartition"
            # get dataflow model
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            dataflow_model = ModelWrapper(dataflow_model_filename)
            rt_layer_ind = 0
            for node in dataflow_model.graph.node:
                if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
                    node_inst = getCustomOp(node)
                    is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights")
                    if is_rt_weights == 1:
                        fcl_w = dataflow_model.get_initializer(node.input[1])
                        w_filename = weights_dir + "/%d_%d_%s.dat" % (
                            sdp_ind,
                            rt_layer_ind,
                            node.name,
                        )
                        node_inst.make_weight_file(
                            fcl_w, "decoupled_runtime", w_filename
                        )
                        rt_layer_ind += 1
                elif node.op_type == "StreamingDataflowPartition":
                    warnings.warn(
                        """Nested StreamingDataflowPartition are not supported
                    """
                    )
                else:
                    continue

        return (model, False)
Example #7
0
    def apply(self, model):
        vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj")
        if vivado_pynq_proj is None or (not os.path.isdir(vivado_pynq_proj)):
            raise Exception("No PYNQ project found, apply MakePYNQProject first.")

        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
        # extract HLSCustomOp instances to get folded i/o shapes
        first_node = getCustomOp(model.find_consumer(i_tensor_name))
        last_node = getCustomOp(model.find_producer(o_tensor_name))
        i_tensor_shape_folded = tuple(first_node.get_folded_input_shape())
        o_tensor_shape_folded = tuple(last_node.get_folded_output_shape())
        # generate dummy folded i/o tensors and their packed versions
        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded)
        i_tensor_dummy_packed = finnpy_to_packed_bytearray(
            i_tensor_dummy_folded, i_tensor_dt
        )
        o_tensor_dummy_packed = finnpy_to_packed_bytearray(
            o_tensor_dummy_folded, o_tensor_dt
        )
        i_tensor_shape_packed = i_tensor_dummy_packed.shape
        o_tensor_shape_packed = o_tensor_dummy_packed.shape

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = templates.pynq_driver_template

        def mss(x, batch_var_name="N"):
            # "make shape string"
            # for a shape like (1, ...) emit a string (N, ...)
            # where N is the default value for batch_var_name
            # this lets the driver work with a batch of samples at once
            ret = str(x)
            ret = ret.replace("(1,", "(%s," % batch_var_name)
            ret = ret.replace("[1,", "[%s," % batch_var_name)
            return ret

        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed))

        with open(driver_py, "w") as f:
            f.write(driver)
        # copy all the dependencies into the driver folder
        shutil.copytree(
            get_finn_root() + "/src/finn/util", pynq_driver_dir + "/finn/util"
        )
        shutil.copytree(
            get_finn_root() + "/src/finn/core", pynq_driver_dir + "/finn/core"
        )

        return (model, False)
Example #8
0
    def apply(self, model):
        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
        # folded shapes for i/o simply derived from regular tensor shapes
        # this used to be extracted from first/last node folded shapes, but
        # can't do this anymore due to IODMAs
        i_tensor_shape_folded = list(i_tensor_shape_normal)
        i_tensor_shape_folded.insert(-1, 1)
        i_tensor_shape_folded = tuple(i_tensor_shape_folded)
        o_tensor_shape_folded = list(o_tensor_shape_normal)
        o_tensor_shape_folded.insert(-1, 1)
        o_tensor_shape_folded = tuple(o_tensor_shape_folded)

        # generate dummy folded i/o tensors and their packed versions
        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt,
                                                   i_tensor_shape_folded)
        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt,
                                                   o_tensor_shape_folded)
        i_tensor_dummy_packed = finnpy_to_packed_bytearray(
            i_tensor_dummy_folded, i_tensor_dt)
        o_tensor_dummy_packed = finnpy_to_packed_bytearray(
            o_tensor_dummy_folded, o_tensor_dt)
        i_tensor_shape_packed = i_tensor_dummy_packed.shape
        o_tensor_shape_packed = o_tensor_dummy_packed.shape

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = templates.pynq_driver_template

        def mss(x, batch_var_name="N"):
            # "make shape string"
            # for a shape like (1, ...) emit a string (N, ...)
            # where N is the default value for batch_var_name
            # this lets the driver work with a batch of samples at once
            ret = str(x)
            ret = ret.replace("(1,", "(%s," % batch_var_name)
            ret = ret.replace("[1,", "[%s," % batch_var_name)
            return ret

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$",
                                mss(i_tensor_shape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$",
                                mss(i_tensor_shape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$",
                                mss(i_tensor_shape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$",
                                mss(o_tensor_shape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$",
                                mss(o_tensor_shape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$",
                                mss(o_tensor_shape_packed))

        # clock settings for driver
        clk_ns = model.get_metadata_prop("clk_ns")
        # default to 10ns / 100 MHz if property not set
        if clk_ns is None:
            clk_ns = 10.0
        else:
            clk_ns = float(clk_ns)
        fclk_mhz = 1 / (clk_ns * 0.001)
        # TODO change according to PYNQ board?
        driver = driver.replace("$CLK_NAME$", "fclk0_mhz")
        driver = driver.replace("$CLOCK_FREQ_MHZ$", str(fclk_mhz))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_src = templates.pynq_validation_template
        with open(validate_py, "w") as f:
            f.write(validate_src)

        # copy all the dependencies into the driver folder
        shutil.copytree(get_finn_root() + "/src/finn/util",
                        pynq_driver_dir + "/finn/util")
        shutil.copytree(get_finn_root() + "/src/finn/core",
                        pynq_driver_dir + "/finn/core")

        return (model, False)
Example #9
0
    def apply(self, model):
        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # create the base FINN driver -- same for all accels
        driver_base_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/driver_base.py")
        driver_base_py = pynq_driver_dir + "/driver_base.py"
        shutil.copy(driver_base_template, driver_base_py)

        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass?
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)

        first_node = model.find_consumer(i_tensor_name)
        last_node = model.find_producer(o_tensor_name)
        if first_node.op_type == "StreamingDataflowPartition":
            # IODMAs and dataflow partitions have already been created
            # extract folded i/o shapes from IODMA consumer/producer
            first_df_model = ModelWrapper(
                getCustomOp(first_node).get_nodeattr("model"))
            assert (first_df_model.graph.node[0].op_type == "IODMA"
                    ), "First partition must hold input IODMA"
            successors = model.find_direct_successors(first_node)
            successor_sdp = getCustomOp(successors[0])
            successor_df_model = ModelWrapper(
                successor_sdp.get_nodeattr("model"))
            first_node = successor_df_model.find_consumer(
                successor_df_model.graph.input[0].name)

            last_df_model = ModelWrapper(
                getCustomOp(last_node).get_nodeattr("model"))
            assert (last_df_model.graph.node[0].op_type == "IODMA"
                    ), "Last partition must hold output IODMA"
            predecessors = model.find_direct_predecessors(last_node)
            predecessor_sdp = getCustomOp(predecessors[0])
            predecessor_df_model = ModelWrapper(
                predecessor_sdp.get_nodeattr("model"))
            last_node = predecessor_df_model.find_producer(
                predecessor_df_model.graph.output[0].name)

        # else: transformation called before IODMA/SDP creation (legacy flow)
        # can access folded i/o shapes directly
        i_tensor_shape_folded = tuple(
            getCustomOp(first_node).get_folded_input_shape())
        o_tensor_shape_folded = tuple(
            getCustomOp(last_node).get_folded_output_shape())

        # generate dummy folded i/o tensors and their packed versions
        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt,
                                                   i_tensor_shape_folded)
        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt,
                                                   o_tensor_shape_folded)
        i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            i_tensor_dummy_folded, i_tensor_dt)
        o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            o_tensor_dummy_folded, o_tensor_dt)
        i_tensor_shape_packed = i_tensor_dummy_packed.shape
        o_tensor_shape_packed = o_tensor_dummy_packed.shape

        # generate external weights npy files
        weights_dir = pynq_driver_dir + "/runtime_weights"

        os.makedirs(weights_dir)
        idma_idx = 0
        ext_weight_dma_cnt = 0

        for node in model.graph.node:
            assert (
                node.op_type == "StreamingDataflowPartition"
            ), "CreateDataflowPartition needs to be applied before driver generation"

            producer = model.find_producer(node.input[0])
            init_tensor = model.get_initializer(node.input[0])

            if producer is None:  # input dma?
                idma_name = "idma" + str(idma_idx)
                if init_tensor is not None:  # input weights dma?
                    ext_weight_dma_cnt += 1
                    w_dtype = model.get_tensor_datatype(node.input[0])
                    init_external_tensor = to_external_tensor(
                        init_tensor, w_dtype)
                    np.save(weights_dir + "/" + idma_name + ".npy",
                            init_external_tensor)
                else:
                    net_input_name = idma_name

                idma_idx += 1

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = template_driver.pynq_driver_template

        def mss(x, batch_var_name="1"):
            # "make shape string"
            # for a shape like (1, ...) emit a string (N, ...)
            # where N is the default value for batch_var_name
            # this lets the driver work with a batch of samples at once
            ret = str(x)
            ret = ret.replace("(1,", "(%s," % batch_var_name)
            ret = ret.replace("[1,", "[%s," % batch_var_name)
            return ret

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$",
                                mss(i_tensor_shape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$",
                                mss(i_tensor_shape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$",
                                mss(i_tensor_shape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$",
                                mss(o_tensor_shape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$",
                                mss(o_tensor_shape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$",
                                mss(o_tensor_shape_packed))
        driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
        driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/validate.py")
        shutil.copy(validate_template, validate_py)

        # copy all the dependencies into the driver folder
        # driver imports utils/data_packing and core/datatype
        # both of which are in finn-base
        # e.g. /workspace/finn-base/src/finn/util/data_packing.py
        dpk_root = dpk.__file__
        # e.g. /workspace/finn-base/src/finn/util
        dpk_root = dpk_root.replace("data_packing.py", "")
        # e.g. /workspace/finn-base/src/finn/core/datatype.py
        dtp_root = dtp.__file__
        # e.g. /workspace/finn-base/src/finn/core
        dtp_root = dtp_root.replace("datatype.py", "")
        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")

        # generate weight files for runtime-writable layers

        for sdp_ind, sdp_node in enumerate(model.graph.node):
            assert sdp_node.op_type == "StreamingDataflowPartition"
            # get dataflow model
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            dataflow_model = ModelWrapper(dataflow_model_filename)
            rt_layer_ind = 0
            for node in dataflow_model.graph.node:
                if node.op_type in [
                        "StreamingFCLayer_Batch", "Thresholding_Batch"
                ]:
                    node_inst = getCustomOp(node)
                    is_rt_weights = node_inst.get_nodeattr(
                        "runtime_writeable_weights")
                    if is_rt_weights == 1:
                        fcl_w = dataflow_model.get_initializer(node.input[1])
                        w_filename = weights_dir + "/%d_%d_%s.dat" % (
                            sdp_ind,
                            rt_layer_ind,
                            node.name,
                        )
                        node_inst.make_weight_file(fcl_w, "decoupled_runtime",
                                                   w_filename)
                        rt_layer_ind += 1
                elif node.op_type == "StreamingDataflowPartition":
                    warnings.warn(
                        """Nested StreamingDataflowPartition are not supported
                    """)
                else:
                    continue

        return (model, False)
Example #10
0
    def apply(self, model):
        # create a temporary folder for the generated driver
        pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
        model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)

        # create the base FINN driver -- same for all accels
        driver_base_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/driver_base.py")
        driver_base_py = pynq_driver_dir + "/driver_base.py"
        shutil.copy(driver_base_template, driver_base_py)

        # extract input-output shapes from the graph
        # TODO convert this to an analysis pass?
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
        # folded shapes for i/o simply derived from regular tensor shapes
        # this used to be extracted from first/last node folded shapes, but
        # can't do this anymore due to IODMAs
        i_tensor_shape_folded = list(i_tensor_shape_normal)
        i_tensor_shape_folded.insert(-1, 1)
        i_tensor_shape_folded = tuple(i_tensor_shape_folded)
        o_tensor_shape_folded = list(o_tensor_shape_normal)
        o_tensor_shape_folded.insert(-1, 1)
        o_tensor_shape_folded = tuple(o_tensor_shape_folded)

        # generate dummy folded i/o tensors and their packed versions
        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt,
                                                   i_tensor_shape_folded)
        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt,
                                                   o_tensor_shape_folded)
        i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            i_tensor_dummy_folded, i_tensor_dt)
        o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
            o_tensor_dummy_folded, o_tensor_dt)
        i_tensor_shape_packed = i_tensor_dummy_packed.shape
        o_tensor_shape_packed = o_tensor_dummy_packed.shape

        # fill in the driver template
        driver_py = pynq_driver_dir + "/driver.py"
        driver = template_driver.pynq_driver_template

        def mss(x, batch_var_name="1"):
            # "make shape string"
            # for a shape like (1, ...) emit a string (N, ...)
            # where N is the default value for batch_var_name
            # this lets the driver work with a batch of samples at once
            ret = str(x)
            ret = ret.replace("(1,", "(%s," % batch_var_name)
            ret = ret.replace("[1,", "[%s," % batch_var_name)
            return ret

        driver = driver.replace("$PLATFORM$", self.platform)
        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
        driver = driver.replace("$INPUT_SHAPE_NORMAL$",
                                mss(i_tensor_shape_normal))
        driver = driver.replace("$INPUT_SHAPE_FOLDED$",
                                mss(i_tensor_shape_folded))
        driver = driver.replace("$INPUT_SHAPE_PACKED$",
                                mss(i_tensor_shape_packed))
        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$",
                                mss(o_tensor_shape_normal))
        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$",
                                mss(o_tensor_shape_folded))
        driver = driver.replace("$OUTPUT_SHAPE_PACKED$",
                                mss(o_tensor_shape_packed))

        with open(driver_py, "w") as f:
            f.write(driver)

        # add validate.py to run full top-1 test (only for suitable networks)
        validate_py = pynq_driver_dir + "/validate.py"
        validate_template = pk.resource_filename(
            "finn.qnn-data", "templates/driver/validate.py")
        shutil.copy(validate_template, validate_py)

        # copy all the dependencies into the driver folder
        # driver imports utils/data_packing and core/datatype
        # both of which are in finn-base
        # e.g. /workspace/finn-base/src/finn/util/data_packing.py
        dpk_root = dpk.__file__
        # e.g. /workspace/finn-base/src/finn/util
        dpk_root = dpk_root.replace("data_packing.py", "")
        # e.g. /workspace/finn-base/src/finn/core/datatype.py
        dtp_root = dtp.__file__
        # e.g. /workspace/finn-base/src/finn/core
        dtp_root = dtp_root.replace("datatype.py", "")
        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")

        # generate weight files for runtime-writable layers
        weights_dir = pynq_driver_dir + "/runtime_weights"
        rt_layer_ind = 0
        os.makedirs(weights_dir)
        for node in model.graph.node:
            if node.op_type in [
                    "StreamingFCLayer_Batch", "Thresholding_Batch"
            ]:
                node_inst = getCustomOp(node)
                is_rt_weights = node_inst.get_nodeattr(
                    "runtime_writeable_weights")
                if is_rt_weights == 1:
                    fcl_w = model.get_initializer(node.input[1])
                    w_filename = weights_dir + "/%d_%s.dat" % (rt_layer_ind,
                                                               node.name)
                    node_inst.make_weight_file(fcl_w, "decoupled_runtime",
                                               w_filename)
                    rt_layer_ind += 1
            elif node.op_type == "StreamingDataflowPartition":
                warnings.warn("""Please call MakePYNQDriver prior to
                CreateDataflowPartition. Can only extract runtime-writable
                weights from HLSCustomOp instances and not StreamingDataflowPartition.
                """)
            else:
                continue
        return (model, False)