def __init__(self): self.ffi = cffi.FFI() if IS_PYNQ: bitfile = "../pynq_chainer/HLS/bitstream.bit" libfile = "../pynq_chainer/HLS/src/libaccel.so" #self.ffi.cdef("int _Z18_p0_mmult_accel1_0PfS_S_iii(float*, float*, float*, int, int, int);") self.ffi.cdef( "int _Z17_p0_mmult_accel_0PfS_S_iii(float*, float*, float*, int, int, int);" ) self.lib = self.ffi.dlopen(libfile) #self.accel_fn = self.lib._Z18_p0_mmult_accel1_0PfS_S_iii self.accel_fn = self.lib._Z17_p0_mmult_accel_0PfS_S_iii overlay = Overlay(bitfile) if not overlay.is_loaded(): overlay.download() print("load Overlay") else: self.accel_fn = pcsim.mmult_accel
def __init__(self): self.ffi = cffi.FFI() if IS_PYNQ: bitfile = "../pynq_chainer/HLS/bitstream.bit" libfile = "../pynq_chainer/HLS/src/libaccel.so" #self.ffi.cdef("void _Z17_p0_mmult_accel_0PiS_S_ii(int*, int*, int*, int, int);") #self.ffi.cdef("void _Z20_p0_binary_connect_0tPiS_S_tt(unsigned short, int*, int*, int*, unsigned short, unsigned short);") self.ffi.cdef( "void _Z17_p0_BlackBoxJam_0P7ap_uintILi32EES1_bjjjS0_(unsigned int*, unsigned int*, bool, unsigned int, unsigned int, unsigned int, unsigned int);" ) self.lib = self.ffi.dlopen(libfile) print(self.lib.__dict__) #self.accel_fn = self.lib._Z17_p0_mmult_accel_0PiS_S_ii #self.accel_fn = self.lib._Z20_p0_binary_connect_0tPiS_S_tt self.accel_fn = self.lib._Z17_p0_BlackBoxJam_0P7ap_uintILi32EES1_bjjjS0_ overlay = Overlay(bitfile) if not overlay.is_loaded(): overlay.download() print("load Overlay") else: self.accel_fn = pcsim.mmult_accel
def __init__(self): #Set attributes self.bitfile = BITFILE self.libfile = LIBFILE self._ffi = cffi.FFI() #Accelerator functions self._ffi.cdef("void _p0_rgb_2_gray_0(uint8_t * input," + "uint8_t * output);") self._ffi.cdef("void _p0_sobel_filter_0(uint8_t * input," + "uint8_t * output);") self._ffilib = self._ffi.dlopen(LIBFILE) self.overlay = Overlay(self.bitfile) #XLNK functions self._ffi.cdef("void *cma_alloc(uint32_t len," + "uint32_t cacheable);") self._ffi.cdef("void cma_free(void *buf);") #Allocate memory for gray frame self.frame_gray = self._ffi.cast( "uint8_t *", self._ffilib.cma_alloc(1920 * 1080, 0)) #Check if bitstream is loaded if not Overlay.is_loaded(self.overlay): self.overlay.download()
MB = args.d # MB of the arry NUM_TEST = args.n # Number of benchmark tests VERBOSE = args.v # Verbose logging logging.basicConfig(format='%(levelname)s: %(message)s', level = logging.DEBUG if VERBOSE else logging.INFO) logging.debug("Configuration: MB={}, NUM_TEST={}".format(MB, NUM_TEST)) # Load the overlay logging.info("Loading Overlay..."); start = time.time() ol = Overlay("dma_benchmark.bit") ol.download() end = time.time() if not ol.is_loaded(): logging.error("The overlay can not be loaded!") exit(-1) logging.info("Overlay loaded in {0:.4f}s".format(end-start)); logging.debug("Overlay info: [{}, {}]".format(PL.bitfile_name, ol.bitstream.timestamp)) # DMA buffer for Transferring from PS-PL TXdma = DMA(0x40400000, direction=0) # DMA buffer for Transferring from PL-PS RXdma = DMA(0x40400000, direction=1) logging.debug("DMA configuration:") logging.debug(TXdma.Configuration) logging.debug(RXdma.Configuration)
from pynq import Overlay overlay = Overlay('/home/xilinx/pynq/tensorcore.bit') ###################################################################################################### overlay.is_loaded() help(overlay) ###################################################################################################### tensor_stat = overlay.tensorcore_0 help(tensor_stat) ###################################################################################################### # Configure Register (Address 0x00) Format # ______________________________________________________________________________________________ # | 31 -- 28 | 27 -- 26 | 25 -- | 24 -- 21 |20 -- 17 | 16 | 15 --12 | 11 -- 1 | 0 | # | Feat Sel, X-4b,Y-2b | Bias/Zero | Psum Index-4b |Out Shift| Relu | Ouput Sel*| Wgt Sel** | NA| # ---------------------------------------------------------------------------------------------- # for version 0.1, Ouput Sel should set as same as Psum Index # for version 0.1, Weight Sel only use two bit and 4 index are utilized # for version 0.1, Feat Sel X should be <12 tensor_stat.write(0x00, 0b0000_00_0_0001_0000_0_0001_00000000000_1) ###################################################################################################### # Write the Feature map, a 6 row 12 column table with each element of 8 bit # Address Space Mapping Format (a 32-bit word includes 4 elements) # ____________________________________________________