def test_pack_innermost_dim_as_hex_string(): A = [[1, 1, 1, 0], [0, 1, 1, 0]] eA = np.asarray(["0x0e", "0x06"]) assert (pack_innermost_dim_as_hex_string(A, DataType.BINARY, 8) == eA).all() B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]] eB = np.asarray([["0x0f", "0x0f"], ["0x07", "0x0d"]]) assert (pack_innermost_dim_as_hex_string(B, DataType.UINT2, 8) == eB).all() C = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]] eC = np.asarray([["0x0f", "0x0f"], ["0x0d", "0x07"]]) assert ( pack_innermost_dim_as_hex_string(C, DataType.UINT2, 8, reverse_inner=True) == eC ).all()
def to_external_tensor(init, w_dtype): """Return an appropriately formatted and packed numpy byte array for given external parameter tensor.""" weight_width = init.shape[1] * w_dtype.bitwidth() weight_width_padded = roundup_to_integer_multiple(weight_width, 4) hex_init = pack_innermost_dim_as_hex_string( init, w_dtype, weight_width_padded, prefix="0x" ) ext_weight = np.array([], dtype=np.uint8) for line in hex_init: array_line = [ x for x in reversed(hexstring2npbytearray(line, remove_prefix="0x")) ] ext_weight = np.append(ext_weight, array_line) return ext_weight
def generate_params(self, model, path): mem_mode = self.get_nodeattr("mem_mode") code_gen_dir = path # weights, if not external weights = model.get_initializer(self.onnx_node.input[1]) # convert weights into hlslib-compatible format weight_tensor = self.get_hls_compatible_weight_tensor(weights) export_wdt = self.get_weight_datatype() # we have converted bipolar weights to binary for export, # so use it as such for weight generation if self.get_weight_datatype() == DataType.BIPOLAR: export_wdt = DataType.BINARY if mem_mode == "const": """Saves weights into params.h""" weight_hls_code = numpy_to_hls_code( weight_tensor, export_wdt, "weights", True, True ) # write weights into params.h f_weights = open("{}/params.h".format(code_gen_dir), "w") if export_wdt.bitwidth() != 1: f_weights.write( "const FixedPointWeights<{},{},{},{}> weights = ".format( self.get_nodeattr("SIMD"), export_wdt.get_hls_datatype_str(), self.get_nodeattr("PE"), self.calc_wmem(), ) ) else: f_weights.write( "const BinaryWeights<{},{},{}> weights = ".format( self.get_nodeattr("SIMD"), self.get_nodeattr("PE"), self.calc_wmem(), ) ) f_weights.write(weight_hls_code) f_weights.close() elif mem_mode == "decoupled" or mem_mode == "external": """Saves weights in corresponding file format for cppsim or rtlsim""" # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD) weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3)) # reverse SIMD flip for saving weights in .npy weight_tensor_simd_flipped = np.flip(weight_tensor_unflipped, axis=-1) # PE flip for saving weights in .dat weight_tensor_pe_flipped = np.flip(weight_tensor_unflipped, axis=-2) # reshape weight tensor (simd_flipped and pe_flipped) to desired shape pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") # simd_flipped weight_tensor_simd_flipped = weight_tensor_simd_flipped.reshape( 1, -1, pe * simd ) weight_tensor_simd_flipped = weight_tensor_simd_flipped.copy() # flipped weight_tensor_pe_flipped = weight_tensor_pe_flipped.reshape( 1, -1, pe * simd ) weight_tensor_pe_flipped = weight_tensor_pe_flipped.copy() """Saves weights into .npy file""" np.save( os.path.join(code_gen_dir, "weights.npy"), weight_tensor_simd_flipped ) if mem_mode == "decoupled": """Saves weights into .dat file""" # convert weight values into hexstring weight_width = self.get_weightstream_width() # pad to nearest 4 bits to get hex strings weight_width_padded = roundup_to_integer_multiple(weight_width, 4) weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( weight_tensor_pe_flipped, export_wdt, weight_width_padded, prefix="" ) # add zeroes to pad out file to 1024 entries weight_stream = weight_tensor_pe_flipped.flatten() weight_stream = weight_stream.copy() with open("{}/memblock_0.dat".format(code_gen_dir), "a+") as f: for val in weight_stream: f.write(val + "\n") else: raise Exception( """Please set mem_mode to "const", "decoupled", or "external", currently no other parameter value is supported!""" ) # save thresholds in thresh.h if len(self.onnx_node.input) > 2: thresholds = model.get_initializer(self.onnx_node.input[2]) if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) # use UINT32 threshold export for bipolar times bipolar inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR # reinterpret inp/wt as bipolar if bin_xnor_mode is iset inp_is_binary = self.get_input_datatype() == DataType.BINARY wt_is_binary = self.get_weight_datatype() == DataType.BINARY bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode) wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode) # get computed threshold datatype from attribute tdt = DataType[self.get_nodeattr("accDataType")] assert np.vectorize(tdt.allowed)( threshold_tensor ).all(), "Thresholds can't be expressed with type %s" % str(tdt) thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) # write thresholds into thresh.h f_thresh = open("{}/thresh.h".format(code_gen_dir), "w") tdt_hls = tdt.get_hls_datatype_str() # use binary to export bipolar activations export_odt = self.get_output_datatype() if self.get_output_datatype() == DataType.BIPOLAR: export_odt = DataType.BINARY odt_hls = export_odt.get_hls_datatype_str() f_thresh.write( "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \ = ".format( self.calc_tmem(), self.get_nodeattr("PE"), threshold_tensor.shape[-1], tdt_hls, odt_hls, self.get_nodeattr("ActVal"), "std::less_equal<%s>" % tdt_hls, ) ) f_thresh.write(thresholds_hls_code) f_thresh.close()
def make_weight_file(self, weights, weight_file_mode, weight_file_name): """Produce a file containing given weights (thresholds) in appropriate format for this layer. This file can be used for either synthesis or run-time reconfig of weights. Arguments: * weights : numpy array with weights to be put into the file * weight_file_mode : one of {hls_header, decoupled_verilog_dat, decoupled_runtime} * weight_file_name : filename for the weight file to be generated """ threshold_tensor = self.get_hls_compatible_threshold_tensor(weights) tdt = self.get_weight_datatype() assert np.vectorize(tdt.allowed)(threshold_tensor).all( ), "Thresholds can't be expressed with type %s" % str(tdt) if weight_file_mode == "hls_header": # save thresholds in thresh.h thresholds_hls_code = numpy_to_hls_code(threshold_tensor, tdt, "thresholds", False, True) # write thresholds into thresh.h f_thresh = open(weight_file_name, "w") tdt_hls = tdt.get_hls_datatype_str() # use binary to export bipolar activations export_odt = self.get_output_datatype() if self.get_output_datatype() == DataType.BIPOLAR: export_odt = DataType.BINARY odt_hls = export_odt.get_hls_datatype_str() f_thresh.write( "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \ = ".format( self.calc_tmem(), self.get_nodeattr("PE"), threshold_tensor.shape[-1], tdt_hls, odt_hls, self.get_nodeattr("ActVal"), "comp::less_equal<%s>" % tdt_hls, )) f_thresh.write(thresholds_hls_code) f_thresh.close() elif "decoupled" in weight_file_mode: # streaming thresholds need to be organized differently # (1, pe, tmem, n_thres_steps) -> (1, tmem, pe, n_thres_steps) decoupled_thres = np.transpose(threshold_tensor, (0, 2, 1, 3)) # TODO add flips/reversals as needed here # (1, tmem, pe, n_thres_steps) -(1, tmem, pe * n_thres_steps) pe = self.get_nodeattr("PE") n_thres_steps = self.get_nodeattr("numSteps") decoupled_thres_pe_flipped = np.flip(decoupled_thres, axis=-2) decoupled_thres = decoupled_thres.reshape(1, -1, pe * n_thres_steps) decoupled_thres = decoupled_thres.copy() decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.reshape( 1, -1, pe * n_thres_steps) decoupled_thres_pe_flipped = decoupled_thres_pe_flipped.copy() if weight_file_mode == "decoupled_npy": # save weight stream into npy for cppsim np.save(weight_file_name, decoupled_thres) elif weight_file_mode == "decoupled_verilog_dat": # convert weight values into hexstring weight_width = self.get_weightstream_width() # pad to nearest 4 bits to get hex strings weight_width_padded = roundup_to_integer_multiple( weight_width, 4) weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="") weight_stream = weight_tensor_pe_flipped.flatten() weight_stream = weight_stream.copy() with open(weight_file_name, "w") as f: for val in weight_stream: f.write(val + "\n") elif weight_file_mode == "decoupled_runtime": # memstream axi-lite interface will map each mem line to # one or multiple 32-bit words weight_width = self.get_weightstream_width() words_per_memwidth = 2**ceil(log2(weight_width / 32)) if words_per_memwidth < 1: words_per_memwidth = 1 weight_width_padded = words_per_memwidth * 32 # first, pack and ensure padding to 32 bits weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( decoupled_thres_pe_flipped, tdt, weight_width_padded, prefix="") weight_stream = weight_tensor_pe_flipped.flatten() weight_stream = weight_stream.copy() with open(weight_file_name, "w") as f: for val in weight_stream: # split into groups of 8 hex digits (= 32 bits) words_32b = textwrap.wrap(val, 8) words_32b.reverse() for word_32b in words_32b: f.write(word_32b + "\n") else: raise Exception("Decoupled weight export not yet implemented") else: raise Exception("Unknown weight_file_mode")
def generate_params(self, model, path): mem_mode = self.get_nodeattr("mem_mode") # weights weights = model.get_initializer(self.onnx_node.input[1]) # convert weights into hlslib-compatible format weight_tensor = self.get_hls_compatible_weight_tensor(weights) export_wdt = self.get_weight_datatype() # we have converted bipolar weights to binary for export, # so use it as such for weight generation if self.get_weight_datatype() == DataType.BIPOLAR: export_wdt = DataType.BINARY code_gen_dir = path if mem_mode == "const": """Saves weights into params.h""" weight_hls_code = numpy_to_hls_code( weight_tensor, export_wdt, "weights", True, True ) # write weights into params.h f_weights = open("{}/params.h".format(code_gen_dir), "w") if export_wdt.bitwidth() != 1: f_weights.write( "const FixedPointWeights<{},{},{},{}> weights = ".format( self.get_nodeattr("SIMD"), export_wdt.get_hls_datatype_str(), self.get_nodeattr("PE"), self.calc_wmem(), ) ) else: f_weights.write( "const BinaryWeights<{},{},{}> weights = ".format( self.get_nodeattr("SIMD"), self.get_nodeattr("PE"), self.calc_wmem(), ) ) f_weights.write(weight_hls_code) f_weights.close() elif mem_mode == "decoupled": """Saves weights in corresponding file format for cppsim or rtlsim""" # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD) # and save as unflipped weight tensor to be able to differentiate between # flipped an unflipped weight tensor (has to be flipped for cppsim) weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3)) # flip PE dimension and reverse SIMD flip for saving weights in .npy weight_tensor_flipped = np.flip(weight_tensor_unflipped, axis=-2) weight_tensor_flipped = np.flip(weight_tensor_flipped, axis=-1) # reshape weight tensor (flipped and unflipped) to desired shape pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") # unflipped weight_tensor_unflipped = weight_tensor_unflipped.reshape(1, -1, pe * simd) weight_tensor_unflipped = weight_tensor_unflipped.copy() # flipped weight_tensor_flipped = weight_tensor_flipped.reshape(1, -1, pe * simd) weight_tensor_flipped = weight_tensor_flipped.copy() """Saves weights into .npy file""" np.save(os.path.join(code_gen_dir, "weights.npy"), weight_tensor_flipped) """Saves weights into .dat file""" # convert weight values into hexstring weight_width = self.get_weightstream_width() # pad to nearest 4 bits to get hex strings weight_width_padded = roundup_to_integer_multiple(weight_width, 4) weight_tensor_unflipped = pack_innermost_dim_as_hex_string( weight_tensor_unflipped, export_wdt, weight_width_padded, prefix="" ) weight_stream_len = np.prod(weight_tensor_unflipped.shape) factor = math.ceil(weight_stream_len / 1024) # add zeroes to pad out file to 1024 entries weight_stream = weight_tensor_unflipped.flatten() pad_amt = (factor * 1024) - weight_stream_len weight_stream = np.pad( weight_stream, (0, pad_amt), mode="constant", constant_values="0" ) weight_stream = weight_stream.copy() i = 0 j = 0 for val in weight_stream: if i == 1024: i = 0 j += 1 with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f: f.write(val + "\n") i += 1 else: raise Exception( """Please set mem_mode to "const"i or "decoupled", currently no other parameter value is supported!""" ) # save thresholds in thresh.h if len(self.onnx_node.input) > 2: thresholds = model.get_initializer(self.onnx_node.input[2]) if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) tdt = DataType.INT32 # use UINT32 threshold export for bipolar times bipolar inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR # reinterpret inp/wt as bipolar if bin_xnor_mode is iset inp_is_binary = self.get_input_datatype() == DataType.BINARY wt_is_binary = self.get_weight_datatype() == DataType.BINARY bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode) wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode) if inp_is_bipolar and wt_is_bipolar: tdt = DataType.UINT32 thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) # write thresholds into thresh.h f_thresh = open("{}/thresh.h".format(code_gen_dir), "w") tdt_hls = tdt.get_hls_datatype_str() # use binary to export bipolar activations export_odt = self.get_output_datatype() if self.get_output_datatype() == DataType.BIPOLAR: export_odt = DataType.BINARY odt_hls = export_odt.get_hls_datatype_str() f_thresh.write( "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \ = ".format( self.calc_tmem(), self.get_nodeattr("PE"), threshold_tensor.shape[-1], tdt_hls, odt_hls, self.get_nodeattr("ActVal"), "std::less_equal<%s>" % tdt_hls, ) ) f_thresh.write(thresholds_hls_code) f_thresh.close()