Example #1
0
def finnpy_to_packed_bytearray(ndarray,
                               dtype,
                               reverse_inner=False,
                               reverse_endian=False):
    """Given a numpy ndarray with FINN DataType dtype, pack the innermost
    dimension and return the packed representation as an ndarray of uint8.
    The packed innermost dimension will be padded to the nearest multiple
    of 8 bits. The returned ndarray has the same number of dimensions as the
    input.
    """

    if (not issubclass(type(ndarray),
                       np.ndarray)) or ndarray.dtype != np.float32:
        # try to convert to a float numpy array (container dtype is float)
        ndarray = np.asarray(ndarray, dtype=np.float32)
    # pack innermost dim to hex strings padded to 8 bits
    bits = dtype.bitwidth() * ndarray.shape[-1]
    bits_padded = roundup_to_integer_multiple(bits, 8)
    packed_hexstring = pack_innermost_dim_as_hex_string(
        ndarray, dtype, bits_padded, reverse_inner=reverse_inner)

    def fn(x):
        return np.asarray(list(map(hexstring2npbytearray, x)))

    if packed_hexstring.ndim == 0:
        # scalar, call hexstring2npbytearray directly
        ret = hexstring2npbytearray(np.asscalar(packed_hexstring))
    else:
        # convert ndarray of hex strings to byte array
        ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1,
                                  packed_hexstring)
    if reverse_endian:
        # reverse the endianness of packing dimension
        ret = np.flip(ret, axis=-1)
    return ret
Example #2
0
def npy_to_rtlsim_input(input_file,
                        input_dtype,
                        pad_to_nbits,
                        reverse_inner=True):
    """Convert the multidimensional NumPy array of integers (stored as floats)
    from input_file into a flattened sequence of Python arbitrary-precision
    integers, packing the innermost dimension. See
    finn.util.basic.pack_innermost_dim_as_hex_string() for more info on how the
    packing works. If reverse_inner is set, the innermost dimension will be
    reversed prior to packing."""
    pad_to_nbits = roundup_to_integer_multiple(pad_to_nbits, 4)
    if issubclass(type(input_file), np.ndarray):
        inp = input_file
    elif os.path.isfile(input_file):
        inp = np.load(input_file)
    else:
        raise Exception("input_file must be ndarray or filename for .npy")
    if inp.shape[-1] == 1 and input_dtype.is_integer():
        packed_data = inp.flatten().astype(input_dtype.to_numpy_dt())
    else:
        packed_data = pack_innermost_dim_as_hex_string(
            inp, input_dtype, pad_to_nbits, reverse_inner=reverse_inner)
        packed_data = packed_data.flatten()
        packed_data = [int(x[2:], 16) for x in packed_data]
    return packed_data
Example #3
0
def numpy_to_hls_code(ndarray,
                      dtype,
                      hls_var_name,
                      pack_innermost_dim=True,
                      no_decl=False):
    """Return C++ code representation of a numpy ndarray with FINN DataType
    dtype, using hls_var_name as the resulting C++ variable name. If
    pack_innermost_dim is specified, the innermost dimension of the ndarray
    will be packed into a hex string using array2hexstring. If no_decl is
    set to True, no variable name and type will be generated as part of the
    emitted string.
    """
    hls_dtype = dtype.get_hls_datatype_str()
    if type(ndarray) != np.ndarray or ndarray.dtype != np.float32:
        # try to convert to a float numpy array (container dtype is float)
        ndarray = np.asarray(ndarray, dtype=np.float32)
    if pack_innermost_dim:
        idimlen = ndarray.shape[-1]
        idimbits = idimlen * dtype.bitwidth()
        idimbits = roundup_to_integer_multiple(idimbits, 4)
        ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits)
        hls_dtype = "ap_uint<%d>" % idimbits
    ndims = ndarray.ndim
    # add type string and variable name
    # e.g. "const ap_uint<64>" "weightMem0"
    ret = "%s %s" % (hls_dtype, hls_var_name)
    # add dimensions
    for d in range(ndims):
        ret += "[%d]" % ndarray.shape[d]
    orig_printops = np.get_printoptions()
    np.set_printoptions(threshold=sys.maxsize)

    # define a function to convert a single element into a C++ init string
    # a single element can be a hex string if we are using packing
    def elem2str(x):
        if type(x) == str or type(x) == np.str_ or type(x) == np.str:
            return '%s("%s", 16)' % (hls_dtype, x)
        elif type(x) == np.float32:
            if dtype.is_integer():
                return str(int(x))
            else:
                return str(x)
        else:
            raise Exception("Unsupported type for numpy_to_hls_code")

    strarr = np.array2string(ndarray,
                             separator=", ",
                             formatter={"all": elem2str})
    np.set_printoptions(**orig_printops)
    strarr = strarr.replace("[", "{").replace("]", "}")
    if no_decl:
        ret = strarr + ";"
    else:
        ret = ret + " = \n" + strarr + ";"
    return ret
Example #4
0
 def __init__(self, onnx_node):
     super().__init__(onnx_node)
     odt_name = self.get_nodeattr("outputDataType")
     if odt_name == "":
         # If not provided compute min size
         labels = self.get_nodeattr("Labels")
         odt = DataType.get_smallest_possible(labels - 1)
         # ensure a datatype divisible by 8-bits in case this is the last node
         bw = roundup_to_integer_multiple(odt.bitwidth(), 8)
         new_odt_name = odt.name.replace(str(odt.bitwidth()), str(bw))
         odt = DataType[new_odt_name]
         odt_name = odt.name
         self.set_nodeattr("outputDataType", odt_name)
Example #5
0
    def code_generation_ipgen(self, model, fpgapart, clk):
        # generate code for all mem_mode of MVAU/FCLayer unit
        super().code_generation_ipgen(model, fpgapart, clk)

        # if mem_mode = "decoupled" generate code for verilog wrapper
        mem_mode = self.get_nodeattr("mem_mode")
        if mem_mode == "decoupled":
            # empty code gen dictionary for new entries
            self.code_gen_dict.clear()
            self.code_gen_dict["$TOPNAME$"] = [
                "{}_memstream".format(self.onnx_node.name)
            ]
            self.code_gen_dict["$LAYER_NAME$"] = [
                "{}_{}".format(self.onnx_node.name, self.onnx_node.name)
            ]
            # make instream width a multiple of 8 for AXI stream interface
            in_width = self.get_instream_width_padded()
            self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)]
            self.code_gen_dict["$OUT_RANGE$"] = [
                "[{}:0]".format(self.get_outstream_width_padded() - 1)
            ]
            # make weight stream width a multiple of 8 for AXI stream interface
            weight_width = self.get_weightstream_width_padded()
            self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)]
            self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)]
            self.code_gen_dict["$WSTREAM_DEPTH$"] = [str(self.calc_wmem())]
            self.code_gen_dict["$MEM_DEPTH$"] = [
                str(roundup_to_integer_multiple(self.calc_wmem(), 1024))
            ]
            self.code_gen_dict["$RAM_STYLE$"] = [self.get_nodeattr("ram_style")]

            template = self.decoupled_wrapper

            for key in self.code_gen_dict:
                # transform list into long string separated by '\n'
                code_gen_line = "\n".join(self.code_gen_dict[key])
                template = template.replace(key, code_gen_line)
            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
            f = open(
                os.path.join(
                    code_gen_dir, "{}_memstream.v".format(self.onnx_node.name)
                ),
                "w",
            )
            f.write(template)
            f.close()
            self.code_gen_dict.clear()
Example #6
0
def to_external_tensor(init, w_dtype):
    """Return an appropriately formatted and packed numpy byte array for given
    external parameter tensor."""

    weight_width = init.shape[1] * w_dtype.bitwidth()
    weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
    hex_init = pack_innermost_dim_as_hex_string(
        init, w_dtype, weight_width_padded, prefix="0x"
    )
    ext_weight = np.array([], dtype=np.uint8)
    for line in hex_init:
        array_line = [
            x for x in reversed(hexstring2npbytearray(line, remove_prefix="0x"))
        ]
        ext_weight = np.append(ext_weight, array_line)

    return ext_weight
Example #7
0
def finnpy_to_packed_bytearray(
    ndarray, dtype, reverse_inner=False, reverse_endian=False, fast_mode=False
):
    """Given a numpy ndarray with FINN DataType dtype, pack the innermost
    dimension and return the packed representation as an ndarray of uint8.
    The packed innermost dimension will be padded to the nearest multiple
    of 8 bits. The returned ndarray has the same number of dimensions as the
    input.

    If fast_mode is enabled, will attempt to use shortcuts (casting) to save
    on runtime for certain cases.
    This mode is currently not well-tested, use at your own risk.
    """

    # handle no-packing cases (if fast_mode) via casting to save on compute
    if issubclass(type(ndarray), np.ndarray) and fast_mode:
        inp_is_byte = ndarray.dtype in [np.uint8, np.int8]
        out_is_byte = dtype.bitwidth() == 8
        double_reverse = reverse_inner and reverse_endian
        if inp_is_byte and out_is_byte and double_reverse:
            return ndarray.view(np.uint8)

    if (not issubclass(type(ndarray), np.ndarray)) or ndarray.dtype != np.float32:
        # try to convert to a float numpy array (container dtype is float)
        ndarray = np.asarray(ndarray, dtype=np.float32)
    # pack innermost dim to hex strings padded to 8 bits
    bits = dtype.bitwidth() * ndarray.shape[-1]
    bits_padded = roundup_to_integer_multiple(bits, 8)
    packed_hexstring = pack_innermost_dim_as_hex_string(
        ndarray, dtype, bits_padded, reverse_inner=reverse_inner
    )

    def fn(x):
        return np.asarray(list(map(hexstring2npbytearray, x)))

    if packed_hexstring.ndim == 0:
        # scalar, call hexstring2npbytearray directly
        ret = hexstring2npbytearray(np.asscalar(packed_hexstring))
    else:
        # convert ndarray of hex strings to byte array
        ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1, packed_hexstring)
    if reverse_endian:
        # reverse the endianness of packing dimension
        ret = np.flip(ret, axis=-1)
    return ret
Example #8
0
 def minimize_accumulator_width(self, model):
     weights = model.get_initializer(self.onnx_node.input[1])
     if len(self.onnx_node.input) > 2:
         thresholds = model.get_initializer(self.onnx_node.input[2])
     else:
         thresholds = None
     idt = self.get_input_datatype()
     # calculate minimum and maximum values of accumulator
     (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt)
     if thresholds is not None:
         threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
         # set threshold datatype (and accumulator datatype implicitly)
         min_threshold = thresholds.min()
         max_threshold = thresholds.max()
         # get range required by threshold values
         tdt_min = min(acc_min, min_threshold)
         tdt_max = max(acc_max, max_threshold)
         if tdt_min < 0:
             if abs(tdt_min) > tdt_max:
                 tdt = DataType.get_smallest_possible(tdt_min)
             else:
                 tdt = DataType.get_smallest_possible(0 - tdt_max)
         else:
             tdt = DataType.get_smallest_possible(tdt_max)
         assert np.vectorize(tdt.allowed)(
             threshold_tensor
         ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
         self.set_nodeattr("accDataType", tdt.name)
     else:
         if acc_min < 0:
             if abs(acc_min) > acc_max:
                 adt = DataType.get_smallest_possible(acc_min)
             else:
                 adt = DataType.get_smallest_possible(0 - acc_max)
         else:
             adt = DataType.get_smallest_possible(acc_max)
         # ensure a datatype divisible by 8-bits in case this is the last node
         bw = roundup_to_integer_multiple(adt.bitwidth(), 8)
         new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw))
         adt = DataType[new_adt_name]
         self.set_nodeattr("accDataType", adt.name)
         # for no-activation nodes, output dt = acc dt
         self.set_nodeattr("outputDataType", adt.name)
     return DataType[self.get_nodeattr("accDataType")]
Example #9
0
def finnpy_to_packed_bytearray(ndarray,
                               dtype,
                               reverse_inner=False,
                               reverse_endian=False,
                               fast_mode=False):
    """Given a numpy ndarray with FINN DataType dtype, pack the innermost
    dimension and return the packed representation as an ndarray of uint8.
    The packed innermost dimension will be padded to the nearest multiple
    of 8 bits. The returned ndarray has the same number of dimensions as the
    input.

    If fast_mode is enabled, will attempt to use shortcuts  to save
    on runtime for certain cases:
    * 8-bit ndarray -> 8-bit
    * ndarray -> 1-bit and total bits % 8 == 0
    This mode is currently not well-tested, use at your own risk!
    """

    # handle fast_mode cases (currently only called from driver):
    if issubclass(type(ndarray), np.ndarray) and fast_mode:
        inp_is_byte = ndarray.dtype in [np.uint8, np.int8]
        out_is_byte = dtype.bitwidth() == 8
        double_reverse = reverse_inner and reverse_endian
        # fast mode case: byte -> byte: cast
        if inp_is_byte and out_is_byte and double_reverse:
            return ndarray.view(np.uint8)
        # fast mode case: xxx -> bit with nbits % 8 == 0: np.packbits
        out_is_bit = dtype.bitwidth() == 1
        bits = dtype.bitwidth() * ndarray.shape[-1]
        bits_padded = roundup_to_integer_multiple(bits, 8)
        no_pad = bits_padded == bits
        if out_is_bit and no_pad and double_reverse:
            in_as_int8 = ndarray.astype(np.int8)
            # bipolar -> binary if needed
            if dtype == DataType["BIPOLAR"]:
                in_as_int8 = (in_as_int8 + 1) // 2
            # reverse inner
            in_as_int8 = np.flip(in_as_int8, axis=-1)
            # pack with numpy
            packed_data = np.packbits(in_as_int8, axis=-1)
            # reverse endianness and return
            return np.flip(packed_data, axis=-1)

    if (not issubclass(type(ndarray),
                       np.ndarray)) or ndarray.dtype != np.float32:
        # try to convert to a float numpy array (container dtype is float)
        ndarray = np.asarray(ndarray, dtype=np.float32)
    # pack innermost dim to hex strings padded to 8 bits
    bits = dtype.bitwidth() * ndarray.shape[-1]
    bits_padded = roundup_to_integer_multiple(bits, 8)
    packed_hexstring = pack_innermost_dim_as_hex_string(
        ndarray, dtype, bits_padded, reverse_inner=reverse_inner)

    def fn(x):
        return np.asarray(list(map(hexstring2npbytearray, x)))

    if packed_hexstring.ndim == 0:
        # scalar, call hexstring2npbytearray directly
        ret = hexstring2npbytearray(np.asscalar(packed_hexstring))
    else:
        # convert ndarray of hex strings to byte array
        ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1,
                                  packed_hexstring)
    if reverse_endian:
        # reverse the endianness of packing dimension
        ret = np.flip(ret, axis=-1)
    return ret
Example #10
0
    def generate_params(self, model, path):
        mem_mode = self.get_nodeattr("mem_mode")
        code_gen_dir = path
        # weights, if not external
        weights = model.get_initializer(self.onnx_node.input[1])
        # convert weights into hlslib-compatible format
        weight_tensor = self.get_hls_compatible_weight_tensor(weights)
        export_wdt = self.get_weight_datatype()
        # we have converted bipolar weights to binary for export,
        # so use it as such for weight generation
        if self.get_weight_datatype() == DataType.BIPOLAR:
            export_wdt = DataType.BINARY

        if mem_mode == "const":
            """Saves weights into params.h"""
            weight_hls_code = numpy_to_hls_code(
                weight_tensor, export_wdt, "weights", True, True
            )
            # write weights into params.h
            f_weights = open("{}/params.h".format(code_gen_dir), "w")

            if export_wdt.bitwidth() != 1:
                f_weights.write(
                    "const FixedPointWeights<{},{},{},{}> weights = ".format(
                        self.get_nodeattr("SIMD"),
                        export_wdt.get_hls_datatype_str(),
                        self.get_nodeattr("PE"),
                        self.calc_wmem(),
                    )
                )
            else:
                f_weights.write(
                    "const BinaryWeights<{},{},{}> weights = ".format(
                        self.get_nodeattr("SIMD"),
                        self.get_nodeattr("PE"),
                        self.calc_wmem(),
                    )
                )
            f_weights.write(weight_hls_code)
            f_weights.close()

        elif mem_mode == "decoupled" or mem_mode == "external":
            """Saves weights in corresponding file format for cppsim or rtlsim"""
            # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD)
            weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3))

            # reverse SIMD flip for saving weights in .npy
            weight_tensor_simd_flipped = np.flip(weight_tensor_unflipped, axis=-1)
            # PE flip for saving weights in .dat
            weight_tensor_pe_flipped = np.flip(weight_tensor_unflipped, axis=-2)

            # reshape weight tensor (simd_flipped and pe_flipped) to desired shape
            pe = self.get_nodeattr("PE")
            simd = self.get_nodeattr("SIMD")
            # simd_flipped
            weight_tensor_simd_flipped = weight_tensor_simd_flipped.reshape(
                1, -1, pe * simd
            )
            weight_tensor_simd_flipped = weight_tensor_simd_flipped.copy()
            # flipped
            weight_tensor_pe_flipped = weight_tensor_pe_flipped.reshape(
                1, -1, pe * simd
            )
            weight_tensor_pe_flipped = weight_tensor_pe_flipped.copy()

            """Saves weights into .npy file"""
            np.save(
                os.path.join(code_gen_dir, "weights.npy"), weight_tensor_simd_flipped
            )

            if mem_mode == "decoupled":
                """Saves weights into .dat file"""
                # convert weight values into hexstring
                weight_width = self.get_weightstream_width()
                # pad to nearest 4 bits to get hex strings
                weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
                weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
                    weight_tensor_pe_flipped, export_wdt, weight_width_padded, prefix=""
                )
                # add zeroes to pad out file to 1024 entries
                weight_stream = weight_tensor_pe_flipped.flatten()
                weight_stream = weight_stream.copy()
                with open("{}/memblock_0.dat".format(code_gen_dir), "a+") as f:
                    for val in weight_stream:
                        f.write(val + "\n")
        else:
            raise Exception(
                """Please set mem_mode to "const", "decoupled", or "external",
                currently no other parameter value is supported!"""
            )

        # save thresholds in thresh.h
        if len(self.onnx_node.input) > 2:
            thresholds = model.get_initializer(self.onnx_node.input[2])
            if thresholds is not None:
                threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
                # use UINT32 threshold export for bipolar times bipolar
                inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
                wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
                # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
                inp_is_binary = self.get_input_datatype() == DataType.BINARY
                wt_is_binary = self.get_weight_datatype() == DataType.BINARY
                bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
                # get computed threshold datatype from attribute
                tdt = DataType[self.get_nodeattr("accDataType")]

                assert np.vectorize(tdt.allowed)(
                    threshold_tensor
                ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
                thresholds_hls_code = numpy_to_hls_code(
                    threshold_tensor, tdt, "thresholds", False, True
                )
                # write thresholds into thresh.h
                f_thresh = open("{}/thresh.h".format(code_gen_dir), "w")
                tdt_hls = tdt.get_hls_datatype_str()
                # use binary to export bipolar activations
                export_odt = self.get_output_datatype()
                if self.get_output_datatype() == DataType.BIPOLAR:
                    export_odt = DataType.BINARY
                odt_hls = export_odt.get_hls_datatype_str()
                f_thresh.write(
                    "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
                    = ".format(
                        self.calc_tmem(),
                        self.get_nodeattr("PE"),
                        threshold_tensor.shape[-1],
                        tdt_hls,
                        odt_hls,
                        self.get_nodeattr("ActVal"),
                        "std::less_equal<%s>" % tdt_hls,
                    )
                )
                f_thresh.write(thresholds_hls_code)
                f_thresh.close()
Example #11
0
 def get_weightstream_width_padded(self):
     """Returns weight stream width padded to a multiple of 8. This is required
     by the AXI Stream spec. Used in decoupled mode."""
     weight_width = self.get_weightstream_width()
     return roundup_to_integer_multiple(weight_width, 8)
Example #12
0
 def get_outstream_width_padded(self):
     """Returns output stream width padded to a multiple of 8. This is required
     by the AXI Stream spec."""
     out_width = self.get_outstream_width()
     return roundup_to_integer_multiple(out_width, 8)
Example #13
0
    def make_weight_file(self, weights, weight_file_mode, weight_file_name):
        """Produce a file containing given weights (thresholds) in appropriate
        format for this layer. This file can be used for either synthesis or
        run-time reconfig of weights.

        Arguments:
        * weights : numpy array with weights to be put into the file
        * weight_file_mode : one of {hls_header, decoupled_verilog_dat,
          decoupled_runtime}
        * weight_file_name : filename for the weight file to be generated
        """
        threshold_tensor = self.get_hls_compatible_threshold_tensor(weights)
        tdt = self.get_weight_datatype()
        assert np.vectorize(tdt.allowed)(threshold_tensor).all(
        ), "Thresholds can't be expressed with type %s" % str(tdt)
        if weight_file_mode == "hls_header":
            # save thresholds in thresh.h
            thresholds_hls_code = numpy_to_hls_code(threshold_tensor, tdt,
                                                    "thresholds", False, True)
            # write thresholds into thresh.h
            f_thresh = open(weight_file_name, "w")
            tdt_hls = tdt.get_hls_datatype_str()
            # use binary to export bipolar activations
            export_odt = self.get_output_datatype()
            if self.get_output_datatype() == DataType.BIPOLAR:
                export_odt = DataType.BINARY
            odt_hls = export_odt.get_hls_datatype_str()
            f_thresh.write(
                "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
                = ".format(
                    self.calc_tmem(),
                    self.get_nodeattr("PE"),
                    threshold_tensor.shape[-1],
                    tdt_hls,
                    odt_hls,
                    self.get_nodeattr("ActVal"),
                    "comp::less_equal<%s>" % tdt_hls,
                ))
            f_thresh.write(thresholds_hls_code)
            f_thresh.close()
        elif "decoupled" in weight_file_mode:
            # streaming thresholds need to be organized differently
            # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps)
            decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3))
            # TODO add flips/reversals as needed here
            # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps)
            pe = self.get_nodeattr("PE")
            n_thres_steps = self.get_nodeattr("numSteps")
            decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2)
            decoupled_thres = decoupled_thres.reshape(1, -1,
                                                      pe * n_thres_steps)
            decoupled_thres = decoupled_thres.copy()
            decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape(
                1, -1, pe * n_thres_steps)
            decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy()

            if weight_file_mode == "decoupled_npy":
                # save weight stream into npy for cppsim
                np.save(weight_file_name, decoupled_thres)
            elif weight_file_mode == "decoupled_verilog_dat":
                # convert weight values into hexstring
                weight_width = self.get_weightstream_width()
                # pad to nearest 4 bits to get hex strings
                weight_width_padded = roundup_to_integer_multiple(
                    weight_width, 4)
                weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
                    decoupled_thres_pe_flipped,
                    tdt,
                    weight_width_padded,
                    prefix="")
                weight_stream = weight_tensor_pe_flipped.flatten()
                weight_stream = weight_stream.copy()
                with open(weight_file_name, "w") as f:
                    for val in weight_stream:
                        f.write(val + "\n")
            elif weight_file_mode == "decoupled_runtime":
                # memstream axi-lite interface will map each mem line to
                # one or multiple 32-bit words
                weight_width = self.get_weightstream_width()
                words_per_memwidth = 2**ceil(log2(weight_width / 32))
                if words_per_memwidth < 1:
                    words_per_memwidth = 1
                weight_width_padded = words_per_memwidth * 32
                # first, pack and ensure padding to 32 bits
                weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
                    decoupled_thres_pe_flipped,
                    tdt,
                    weight_width_padded,
                    prefix="")
                weight_stream = weight_tensor_pe_flipped.flatten()
                weight_stream = weight_stream.copy()
                with open(weight_file_name, "w") as f:
                    for val in weight_stream:
                        # split into groups of 8 hex digits (= 32 bits)
                        words_32b = textwrap.wrap(val, 8)
                        words_32b.reverse()
                        for word_32b in words_32b:
                            f.write(word_32b + "\n")
            else:
                raise Exception("Decoupled weight export not yet implemented")
        else:
            raise Exception("Unknown weight_file_mode")
Example #14
0
    def apply(self, model):
        pynq_shell_path = os.environ["PYNQSHELL_PATH"]
        if not os.path.isdir(pynq_shell_path):
            raise Exception(
                "Ensure the PYNQ-HelloWorld utility repo is cloned.")
        ipstitch_path = model.get_metadata_prop("vivado_stitch_proj")
        if ipstitch_path is None or (not os.path.isdir(ipstitch_path)):
            raise Exception(
                "No stitched IPI design found, apply CreateStitchedIP first.")
        vivado_stitch_vlnv = model.get_metadata_prop("vivado_stitch_vlnv")
        if vivado_stitch_vlnv is None:
            raise Exception(
                "No vlnv for stitched IP found, apply CreateStitchedIP first.")

        # collect list of all IP dirs
        ip_dirs = ["list"]
        for node in model.graph.node:
            ip_dir_attribute = get_by_name(node.attribute, "ip_path")
            assert (ip_dir_attribute
                    is not None), """Node attribute "ip_path" is
            empty. Please run transformation HLSSynth_ipgen first."""
            ip_dir_value = ip_dir_attribute.s.decode("UTF-8")
            assert os.path.isdir(ip_dir_value), """The directory that should
            contain the generated ip blocks doesn't exist."""
            ip_dirs += [ip_dir_value]
        ip_dirs += [ipstitch_path + "/ip"]
        ip_dirs_str = "[%s]" % (" ".join(ip_dirs))

        # extract HLSCustomOp instances to get i/o stream widths
        i_tensor_name = model.graph.input[0].name
        o_tensor_name = model.graph.output[0].name
        first_node = getCustomOp(model.find_consumer(i_tensor_name))
        last_node = getCustomOp(model.find_producer(o_tensor_name))
        i_bits_per_cycle = first_node.get_instream_width()
        o_bits_per_cycle = last_node.get_outstream_width()
        # ensure i/o is padded to bytes
        i_bits_per_cycle_padded = roundup_to_integer_multiple(
            i_bits_per_cycle, 8)
        o_bits_per_cycle_padded = roundup_to_integer_multiple(
            o_bits_per_cycle, 8)
        assert (i_bits_per_cycle_padded %
                8 == 0), """Padded input bits are not a
        multiple of 8."""
        assert (o_bits_per_cycle_padded %
                8 == 0), """Padded output bits are not a
        multiple of 8."""
        in_bytes = i_bits_per_cycle_padded / 8
        out_bytes = o_bits_per_cycle_padded / 8
        in_if_name = "in0_V_V_0"
        out_if_name = "out_r_0"
        clk_name = "ap_clk_0"
        nrst_name = "ap_rst_n_0"
        axi_lite_if_name = "s_axi_control_0"
        vivado_ip_cache = os.getenv("VIVADO_IP_CACHE", default="")
        # TODO get from Transformation arg or metadata_prop
        fclk_mhz = 100.0

        # create a temporary folder for the project
        vivado_pynq_proj_dir = make_build_dir(prefix="vivado_pynq_proj_")
        model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir)
        # filename for the synth utilization report
        synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml"
        model.set_metadata_prop("vivado_synth_rpt", synth_report_filename)

        ip_config_tcl = templates.ip_config_tcl_template % (
            vivado_pynq_proj_dir,
            ip_dirs_str,
            vivado_pynq_proj_dir,
            synth_report_filename,
            vivado_stitch_vlnv,
            in_bytes,
            out_bytes,
            in_if_name,
            out_if_name,
            clk_name,
            nrst_name,
            axi_lite_if_name,
            vivado_ip_cache,
            fclk_mhz,
        )

        with open(vivado_pynq_proj_dir + "/ip_config.tcl", "w") as f:
            f.write(ip_config_tcl)
        # create a shell script for project creation and synthesis
        make_project_sh = vivado_pynq_proj_dir + "/make_project.sh"
        working_dir = os.environ["PWD"]
        ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl"
        with open(make_project_sh, "w") as f:
            f.write(templates.call_pynqshell_makefile_template %
                    (pynq_shell_path, self.platform, ipcfg, "block_design",
                     working_dir))
        synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh"
        with open(synth_project_sh, "w") as f:
            f.write(templates.call_pynqshell_makefile_template %
                    (pynq_shell_path, self.platform, ipcfg, "bitstream",
                     working_dir))
        # call the project creation script
        # synthesis script will be called with a separate transformation
        bash_command = ["bash", make_project_sh]
        process_compile = subprocess.Popen(bash_command,
                                           stdout=subprocess.PIPE)
        process_compile.communicate()
        return (model, False)
Example #15
0
    def generate_params(self, model, path):
        mem_mode = self.get_nodeattr("mem_mode")
        # weights
        weights = model.get_initializer(self.onnx_node.input[1])
        # convert weights into hlslib-compatible format
        weight_tensor = self.get_hls_compatible_weight_tensor(weights)
        export_wdt = self.get_weight_datatype()
        # we have converted bipolar weights to binary for export,
        # so use it as such for weight generation
        if self.get_weight_datatype() == DataType.BIPOLAR:
            export_wdt = DataType.BINARY
        code_gen_dir = path

        if mem_mode == "const":
            """Saves weights into params.h"""
            weight_hls_code = numpy_to_hls_code(
                weight_tensor, export_wdt, "weights", True, True
            )
            # write weights into params.h
            f_weights = open("{}/params.h".format(code_gen_dir), "w")

            if export_wdt.bitwidth() != 1:
                f_weights.write(
                    "const FixedPointWeights<{},{},{},{}> weights = ".format(
                        self.get_nodeattr("SIMD"),
                        export_wdt.get_hls_datatype_str(),
                        self.get_nodeattr("PE"),
                        self.calc_wmem(),
                    )
                )
            else:
                f_weights.write(
                    "const BinaryWeights<{},{},{}> weights = ".format(
                        self.get_nodeattr("SIMD"),
                        self.get_nodeattr("PE"),
                        self.calc_wmem(),
                    )
                )
            f_weights.write(weight_hls_code)
            f_weights.close()

        elif mem_mode == "decoupled":
            """Saves weights in corresponding file format for cppsim or rtlsim"""
            # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD)
            # and save as unflipped weight tensor to be able to differentiate between
            # flipped an unflipped weight tensor (has to be flipped for cppsim)

            weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3))

            # flip PE dimension and reverse SIMD flip for saving weights in .npy
            weight_tensor_flipped = np.flip(weight_tensor_unflipped, axis=-2)
            weight_tensor_flipped = np.flip(weight_tensor_flipped, axis=-1)

            # reshape weight tensor (flipped and unflipped) to desired shape
            pe = self.get_nodeattr("PE")
            simd = self.get_nodeattr("SIMD")
            # unflipped
            weight_tensor_unflipped = weight_tensor_unflipped.reshape(1, -1, pe * simd)
            weight_tensor_unflipped = weight_tensor_unflipped.copy()
            # flipped
            weight_tensor_flipped = weight_tensor_flipped.reshape(1, -1, pe * simd)
            weight_tensor_flipped = weight_tensor_flipped.copy()

            """Saves weights into .npy file"""
            np.save(os.path.join(code_gen_dir, "weights.npy"), weight_tensor_flipped)

            """Saves weights into .dat file"""
            # convert weight values into hexstring
            weight_width = self.get_weightstream_width()
            # pad to nearest 4 bits to get hex strings
            weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
            weight_tensor_unflipped = pack_innermost_dim_as_hex_string(
                weight_tensor_unflipped, export_wdt, weight_width_padded, prefix=""
            )
            weight_stream_len = np.prod(weight_tensor_unflipped.shape)
            factor = math.ceil(weight_stream_len / 1024)
            # add zeroes to pad out file to 1024 entries
            weight_stream = weight_tensor_unflipped.flatten()
            pad_amt = (factor * 1024) - weight_stream_len
            weight_stream = np.pad(
                weight_stream, (0, pad_amt), mode="constant", constant_values="0"
            )
            weight_stream = weight_stream.copy()
            i = 0
            j = 0
            for val in weight_stream:
                if i == 1024:
                    i = 0
                    j += 1
                with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f:
                    f.write(val + "\n")
                i += 1

        else:
            raise Exception(
                """Please set mem_mode to "const"i or "decoupled", currently no other
                    parameter value is supported!"""
            )

        # save thresholds in thresh.h
        if len(self.onnx_node.input) > 2:
            thresholds = model.get_initializer(self.onnx_node.input[2])
            if thresholds is not None:
                threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
                tdt = DataType.INT32
                # use UINT32 threshold export for bipolar times bipolar
                inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
                wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
                # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
                inp_is_binary = self.get_input_datatype() == DataType.BINARY
                wt_is_binary = self.get_weight_datatype() == DataType.BINARY
                bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
                if inp_is_bipolar and wt_is_bipolar:
                    tdt = DataType.UINT32
                thresholds_hls_code = numpy_to_hls_code(
                    threshold_tensor, tdt, "thresholds", False, True
                )
                # write thresholds into thresh.h
                f_thresh = open("{}/thresh.h".format(code_gen_dir), "w")
                tdt_hls = tdt.get_hls_datatype_str()
                # use binary to export bipolar activations
                export_odt = self.get_output_datatype()
                if self.get_output_datatype() == DataType.BIPOLAR:
                    export_odt = DataType.BINARY
                odt_hls = export_odt.get_hls_datatype_str()
                f_thresh.write(
                    "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
                    = ".format(
                        self.calc_tmem(),
                        self.get_nodeattr("PE"),
                        threshold_tensor.shape[-1],
                        tdt_hls,
                        odt_hls,
                        self.get_nodeattr("ActVal"),
                        "std::less_equal<%s>" % tdt_hls,
                    )
                )
                f_thresh.write(thresholds_hls_code)
                f_thresh.close()
 def minimize_accumulator_width(self, model):
     weights = model.get_initializer(self.onnx_node.input[1])
     k_h, k_w = self.get_nodeattr("Kernel")
     fm = self.get_nodeattr("Channels")
     # put weights into the shape expected by calculate_matvec_accumulator_range
     weights = weights.reshape(fm, k_h * k_w).transpose()
     if len(self.onnx_node.input) > 2:
         thresholds = model.get_initializer(self.onnx_node.input[2])
     else:
         thresholds = None
     idt = self.get_input_datatype()
     # calculate minimum and maximum values of accumulator
     (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt)
     if thresholds is not None:
         threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
         # set threshold datatype (and accumulator datatype implicitly)
         min_threshold = thresholds.min()
         max_threshold = thresholds.max()
         # clip threshold values
         clip_upper = None
         clip_lower = None
         if max_threshold > acc_max + 1:
             clip_upper = acc_max + 1
         if min_threshold < acc_min:
             clip_lower = acc_min
         if (clip_lower is not None) or (clip_upper is not None):
             warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name)
             thresholds = np.clip(thresholds, clip_lower, clip_upper)
             model.set_initializer(self.onnx_node.input[2], thresholds)
             threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
             min_threshold = thresholds.min()
             max_threshold = thresholds.max()
         # get range required by threshold values
         tdt_min = min(acc_min, min_threshold)
         tdt_max = max(acc_max, max_threshold)
         if tdt_min < 0:
             if abs(tdt_min) > tdt_max:
                 tdt = DataType.get_smallest_possible(tdt_min)
             else:
                 tdt = DataType.get_smallest_possible(0 - tdt_max)
         else:
             tdt = DataType.get_smallest_possible(tdt_max)
         assert np.vectorize(tdt.allowed)(
             threshold_tensor
         ).all(), "Thresholds in %s can't be expressed with type %s" % (
             self.onnx_node.name,
             str(tdt),
         )
         self.set_nodeattr("accDataType", tdt.name)
     else:
         if acc_min < 0:
             if abs(acc_min) > acc_max:
                 adt = DataType.get_smallest_possible(acc_min)
             else:
                 adt = DataType.get_smallest_possible(0 - acc_max)
         else:
             adt = DataType.get_smallest_possible(acc_max)
         # ensure a datatype divisible by 8-bits in case this is the last node
         bw = roundup_to_integer_multiple(adt.bitwidth(), 8)
         new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw))
         adt = DataType[new_adt_name]
         self.set_nodeattr("accDataType", adt.name)
         # for no-activation nodes, output dt = acc dt
         self.set_nodeattr("outputDataType", adt.name)
     return DataType[self.get_nodeattr("accDataType")]