Beispiel #1
0
def fft(ol,data_in):
    out = ol.axi_dma_out
    re = ol.axi_dma_re
    im = ol.axi_dma_im
    data_size = 512
    xlnk = Xlnk()
    input_buffer = xlnk.cma_array(shape=(data_size,), dtype=np.int32)
    output_buffer_re = xlnk.cma_array(shape=(data_size,), dtype=np.int32)
    output_buffer_im = xlnk.cma_array(shape=(data_size,), dtype=np.int32)
    for i in range(512):
        input_buffer[i]=data_in[i]
    out.sendchannel.transfer(input_buffer)
    re.recvchannel.transfer(output_buffer_re)
    im.recvchannel.transfer(output_buffer_im)
    data_re=np.zeros(512)
    data_im=np.zeros(512)
    FFT=np.zeros(512)
    for i in range(512):
        if output_buffer_im[i]>=0x4000000:
            data_im[i]=-(0x8000000-output_buffer_im[i])
        else:
            data_im[i]=output_buffer_im[i]
    for i in range(512):
        if output_buffer_re[i]>=0x4000000:
            data_re[i]=-(0x8000000-output_buffer_re[i])
        else:
            data_re[i]=output_buffer_re[i]
    FFT=data_re*data_re+data_im*data_im
    return FFT
Beispiel #2
0
    def __init__(self, description, pkt_config=1, pkt_reload=128): # Find out the correct length of config and reload
        super().__init__(description)

        xlnk = Xlnk()
        self.buf_config = xlnk.cma_array(shape=(pkt_config, ), dtype=np.int8)
        self.buf_reload = xlnk.cma_array(shape=(pkt_reload, ), dtype=np.int16)
        self.BWSelector.enable = 1
        self._taps = 128
        self._fs = 256e6
        self.set_downsample(2)
    def __init__(self):
        self.ol = Overlay("/home/xilinx/jupyter_notebooks/IP core/end.bit")
        self.dma_0 = self.ol.axi_dma_0
        self.dma_1 = self.ol.axi_dma_1
        self.dma_2 = self.ol.axi_dma_2
        self.dma_3 = self.ol.axi_dma_3
        self.dma_4 = self.ol.axi_dma_4
        self.dma_5 = self.ol.axi_dma_5
        self.top = self.ol.top_0

        xlnk = Xlnk()
        self.map_in_buffer = xlnk.cma_array(shape=(128, 34, 34),
                                            dtype=np.float16)
        self.weight_buffer = xlnk.cma_array(shape=(128, 64, 3, 3),
                                            dtype=np.float16)
        self.bias_buffer = xlnk.cma_array(shape=(64), dtype=np.float16)
        self.out_buffer = xlnk.cma_array(shape=(64, 32, 32), dtype=np.float16)
        self.map_in_buffer_2 = xlnk.cma_array(shape=(3, 68, 68),
                                              dtype=np.float16)
        self.weight_buffer_2 = xlnk.cma_array(shape=(3, 64, 6, 6),
                                              dtype=np.float16)
        self.bias_buffer_2 = xlnk.cma_array(shape=(64), dtype=np.float16)
        self.out_buffer_2 = xlnk.cma_array(shape=(64, 32, 32),
                                           dtype=np.float16)

        self.map_in_64 = xlnk.cma_array(shape=(64, 34, 34), dtype=np.float16)
Beispiel #4
0
    def compute(self, rays, tri_ids, tris):
        from pynq import Xlnk
        xlnk = Xlnk()
        num_tris = len(tris) // 9
        num_rays = len(rays) // 6

        self._out_ids = None
        self._out_inter = None
        self._tids = None
        self._tris = None
        self._rays = None

        log.info(f'{self.name}: Allocating shared input arrays')
        self._tids = xlnk.cma_array(shape=(num_tris, ), dtype=np.int32)
        self._tris = xlnk.cma_array(shape=(num_tris * 9, ), dtype=np.float32)
        self._rays = xlnk.cma_array(shape=(num_rays * 6, ), dtype=np.float32)

        log.info(f'{self.name}: Allocating shared output arrays')
        self._out_ids = xlnk.cma_array(shape=(num_rays, ), dtype=np.int32)
        self._out_inter = xlnk.cma_array(shape=(num_rays, ), dtype=np.float32)

        log.info(f'{self.name}: Setting accelerator input physical addresses')
        self.intersect_ip.write(self.ADDR_I_TNUMBER_DATA, num_tris)
        self.intersect_ip.write(self.ADDR_I_TDATA_DATA,
                                self._tris.physical_address)
        self.intersect_ip.write(self.ADDR_I_TIDS_DATA,
                                self._tids.physical_address)

        self.intersect_ip.write(self.ADDR_I_RNUMBER_DATA, num_rays)
        self.intersect_ip.write(self.ADDR_I_RDATA_DATA,
                                self._rays.physical_address)

        self.intersect_ip.write(self.ADDR_O_TIDS_DATA,
                                self._out_ids.physical_address)
        self.intersect_ip.write(self.ADDR_O_TINTERSECTS_DATA,
                                self._out_inter.physical_address)

        ti = time()
        log.info(f'{self.name}: Filling input memory arrays')
        for t in range(num_tris):
            self._tids[t] = tri_ids[t]
            for i in range(9):
                self._tris[t * 9 + i] = tris[t * 9 + i]
        log.info(
            f'{self.name}: Triangle arrays filled in {time() - ti} seconds')

        ti = time()
        for i, r in enumerate(rays):
            self._rays[i] = r
        log.info(f'{self.name}: Ray arrays filled in {time() - ti} seconds')

        log.info(f'Starting co-processor {self.name}')
        self.intersect_ip.write(0x00, 1)
Beispiel #5
0
def matrixAvg(red,green,blue):
    dma0 = ol.axi_dma_0
    xlnk = Xlnk()
    inputs = xlnk.cma_array(shape=(2700), dtype=np.int32)
    outputs = xlnk.cma_array(shape=(27), dtype=np.int32)

    inputs= red+green+blue
    
    dma0.sendchannel.transfer(inputs)
    dma0.sendchannel.wait()
    dma0.recvchannel.transfer(outputs)
    dma0.recvchannel.wait()

    
    return outputs
Beispiel #6
0
    def __init__(self, description):
        super().__init__(description)

        xlnk = Xlnk()
        self.buf_data = xlnk.cma_array(shape=(2048, ), dtype=np.single)

        self.type = 1
        self.data_inspector.transfer = 0
        self.data_inspector.reset = 1
 def send(self, window_coeffs, n):
     xlnk = Xlnk()
     self.input_buffer = xlnk.cma_array(shape=(self.window_length, ),
                                        dtype=np.int16)
     dma = self.axi_dma_window
     np.copyto(self.input_buffer, window_coeffs)
     dma.sendchannel.transfer(self.input_buffer)
     dma.sendchannel.wait()
     self.input_buffer.close()
Beispiel #8
0
class MatrixOpServicer(matrix_op_pb2_grpc.MatrixOpServicer):
    DIM = 128

    def __init__(self):
        self.overlay = Overlay(
            '/home/xilinx/matmult/overlay/matmult/matmult.bit')
        self.dma = self.overlay.dma
        self.mmult_ip = self.overlay.accel
        self.xlnk = Xlnk()

        self.in_buf = self.xlnk.cma_array(shape=(2, MatrixOpServicer.DIM,
                                                 MatrixOpServicer.DIM),
                                          dtype=np.float32)
        self.out_buf = self.xlnk.cma_array(shape=(MatrixOpServicer.DIM,
                                                  MatrixOpServicer.DIM),
                                           dtype=np.float32)

    def MatMult(self, request, context):
        print('request received: matrix mult')
        before = time.time()
        # load np arrays from bytes
        a = pickle.loads(request.a)
        b = pickle.loads(request.b)

        lat = round((time.time() - before) * 1000000, 2)
        print(f'unpickled data in {lat} microsec')

        # run kernel
        before = time.time()

        self.in_buf[:] = np.stack((a, b))
        self.dma.sendchannel.transfer(self.in_buf)
        self.dma.recvchannel.transfer(self.out_buf)
        self.mmult_ip.write(CTRL_REG, (AP_START | AUTO_RESTART))

        self.dma.sendchannel.wait()
        self.dma.recvchannel.wait()

        ret = matrix_op_pb2.OpReply(res=pickle.dumps(self.out_buf))

        lat = round((time.time() - before) * 1000000, 2)
        print(f'mult done in {lat} microsec')

        return ret
Beispiel #9
0
class CmaBufferFactory():
    def __init__(self):
        self._xlnk = Xlnk()

    def make_cma_buf(self, shape, data_type):
        assert shape != [], RuntimeError
        return self._xlnk.cma_array(shape=shape, cacheable=1, dtype=data_type)

    def del_cma_buf(self, cma_buf):
        cma_buf.close()
Beispiel #10
0
def alloc_descriptor(Control, data_size, NDPL = 0x0, NDPU = 0x0, Status = 0x0, APP0 = 0x0, APP1 = 0x0, APP2 = 0x0, APP3 = 0x0, APP4 = 0x0):
	mmu = Xlnk()
	descriptor = mmu.cma_array([13, ])
	descriptor[0] = NDPL
	descriptor[1] = NDPU
	buffer = mmu.cma_array([1, data_size])
	descriptor[2] = buffer.physical_address & 0xffffffff
	descriptor[3] = (buffer.physical_address >> 32) & 0xffffffff
	# Reversed
	descriptor[4] = 0x0
	descriptor[5] = 0x0
	descriptor[6] = Control
	descriptor[7] = Status
	descriptor[8] = APP0
	descriptor[9] = APP1
	descriptor[10] = APP2
	descriptor[11] = APP3
	descriptor[12] = APP4
	return descriptor, buffer
Beispiel #11
0
    def Predict(self):
        print('Start Predict.......')
        self.dma = spi.axi_dma_0

        xlnk = Xlnk()

        dma_in = xlnk.cma_array(shape=(25, ), dtype=np.uint32)

        dma_out = xlnk.cma_array(shape=(25, ), dtype=np.uint32)

        for i in range(25):
            dma_in[i] = int(self.data_in[i])

        self.dma.sendchannel.transfer(dma_in)

        self.dma.recvchannel.transfer(dma_out)

        self.dma.sendchannel.wait()

        self.dma.recvchannel.wait()

        self.data_out = dma_out
def fft2(image,FDV):
    fft2_design = Overlay("./bitstream/fft2.bit")
    dma = fft2.axi_dma_0
    fft2 = fft2.fft2_0
    input_array = np.array(image)
    xlnk = Xlnk()
    in_buffer = xlnk.cma_array(shape=(pic_height, pic_width), 
                            dtype=np.uint8)
    out_buffer = xlnk.cma_array(shape=(pic_height, pic_width), 
                                dtype=np.uint8)
    np.copyto(in_buffer,input_array)

    dma.sendchannel.transfer(in_buffer)
    dma.recvchannel.transfer(out_buffer)    
    fft2.write(0x00,FDV) # start
    dma.sendchannel.wait()
    dma.recvchannel.wait()
    result = Image.fromarray(out_buffer)
    in_buffer.close()
    out_buffer.close()
    xlnk.xlnk_reset()
    return result
Beispiel #13
0
 def __init__(self, description, pkt_size, buf_dtype=np.int16, buf_words_per_pkt=2):
     super().__init__(description)
     
     # Init config register
     self.reset = 1
     self.enable = 1
     self.pkt_size = pkt_size-1
     self.auto_restart = 0
     self.reset = 0
     
     # Init buffer
     xlnk = Xlnk()
     self.buf = xlnk.cma_array(shape=(pkt_size * buf_words_per_pkt, ), dtype=np.int16)
Beispiel #14
0
    def __init__(self, description, pkt_sym=16, pkt_time=128, pkt_fft=1024):
        """Driver for our QPSK TX IP hierarchy

        This encompasses the qpsk tx logic and the DMAs for data
        transfer of exposed signals.
        """

        super().__init__(description)

        xlnk = Xlnk()
        self.buf_fft = xlnk.cma_array(shape=(pkt_fft, ), dtype=np.uint32)
        self.buf_sym = xlnk.cma_array(shape=(pkt_sym, ), dtype=np.uint8)
        self.buf_time = xlnk.cma_array(shape=(pkt_time * 2, ), dtype=np.int16)

        # QPSK IP General Config
        self.axi_qpsk_tx.lfsr_rst = 1
        self.axi_qpsk_tx.enable = 1
        self.axi_qpsk_tx.packetsize_rf = 1024
        self.axi_qpsk_tx.lfsr_rst = 0
        self.axi_qpsk_tx.output_gain = 2**32 - 1

        # QPSK IP Symbol Config
        self.axi_qpsk_tx.reset_symbol = 1
        self.axi_qpsk_tx.packetsize_symbol = pkt_sym - 1
        self.axi_qpsk_tx.reset_symbol = 0
        self.axi_qpsk_tx.autorestart_symbol = 0

        # QPSK IP FFT Config
        self.axi_qpsk_tx.reset_fft = 1
        self.axi_qpsk_tx.packetsize_fft = pkt_fft - 1
        self.axi_qpsk_tx.reset_fft = 0
        self.axi_qpsk_tx.autorestart_fft = 0

        ## QPSK IP Time Config
        self.axi_qpsk_tx.reset_time = 1
        self.axi_qpsk_tx.packetsize_time = pkt_time - 1
        self.axi_qpsk_tx.reset_time = 0
        self.axi_qpsk_tx.autorestart_time = 0
Beispiel #15
0
def run_my_cnn(path, name):
    overlay = Overlay(path)
    ip = nngen_ctrl.nngen_ip(overlay, name)

    xlnk = Xlnk()
    buf = xlnk.cma_array(16 * 1024, dtype=np.int32)
    for i in range(len(buf)):
        buf[i] = i

    ip.set_global_buffer(buf)
    ip.run()
    ip.wait()

    print(buf[:16])
Beispiel #16
0
    def __init__(self, description, pkt_wind=2048):
        super().__init__(description)

        xlnk = Xlnk()
        self.buf_wind = xlnk.cma_array(shape=(pkt_wind, ), dtype=np.int16)

        self.set_window_coeffs(np.ones(2048))
        self.set_enable(1)

        self.window_0.dc_enable = 1

        self.params = ipw.VBox([], layout=ipw.Layout(width='auto'))
        self.window_sel = ipw.HBox([], layout=ipw.Layout(width='auto'))
        self.coeffs = np.ones(2048)
        self.window_length = 2048
        self.window_type = 'Rectangular'

        self.coeffs_sat = np.ones(2048)

        self.frequency = 0
        self.input = 0
Beispiel #17
0
    def __init__(self, fpga_config, overlay):
        ddr2fpga_nb = fpga_config.config['ddr2fpga_nb']
        fpga2ddr_nb = fpga_config.config['fpga2ddr_nb']
        mem_nb = fpga_config.config['mem_nb']
        self.u_axi_dma_ddr2fpga = [
            overlay.__getattr__(f'axi_dma_ddr2fpga_{i}')
            for i in range(ddr2fpga_nb)
        ]
        self.u_axi_dma_fpga2ddr = [
            overlay.__getattr__(f'axi_dma_fpga2ddr_{i}')
            for i in range(fpga2ddr_nb)
        ]
        self.u_mem = [
            overlay.__getattr__(f'memory_{i}') for i in range(mem_nb)
        ]
        self.u_func = []
        for name, nb in fpga_config.func_layout.items():
            self.u_func += [
                overlay.__getattr__(f'{name}_{i}') for i in range(nb)
            ]
        self.u_ddr2fpga = [
            overlay.__getattr__(f'ddr2fpga_{i}') for i in range(ddr2fpga_nb)
        ]
        # enable function interrupts
        for i in self.u_func:
            i.write(0x04, 1)
            i.write(0x08, 1)

        xlnk = Xlnk()
        self.chunk_array = [
            xlnk.cma_array(shape=(fpga_config.config['mem_depth'], ),
                           dtype=np.uint64)
            for i in range(fpga_config.config['mem_nb'])
        ]

        self.state = FPGA_state(fpga_config)
        self.config = fpga_config.config
Beispiel #18
0
import time
from pynq import Overlay
#import numpy as np
from pynq import Xlnk
M = 200
N = 200
xlnk = Xlnk();
overlay = Overlay('/home/xilinx/pynq/overlays/HMM_v4/HMM_v4_4.bit')
overlay.download()
HMM_test = overlay.HMM_Scoring_0

arr_m=xlnk.cma_array(shape=(200,),cacheable=0,dtype=int)
arr_n=xlnk.cma_array(shape=(200,),cacheable=0,dtype=int)
arr_x=xlnk.cma_array(shape=(200,),cacheable=0,dtype=int)

s1 = "gcgagcgaactgcggatagttacactaacacacgaggcacgtggttgggagttacggccatgcaatggatagctcctgcatgatcggttattatacagcccattttgggcgccttccaaaggatctacttatcagaaggggtggtgccgcataactctgaccggtgggcgtagtcatagcagacttttgccgggaacgca"
s2 = "tggtccatctgcttggtggcagccgcaagatgccaattattggcgcggtcgacggggctgctatctgaatatcatatggtcttcacggagacaggaacttagcaaggtactaatcccacgcaaagtctttttttcaaaaatccagtctagtcctattatatatcctcggaaaacggtattaggacatcgggtacattcta"
s3 = "tttattgtttttgatctcgcgtctcaaagtagctccgacacacaagcggcccttggagactgctcccgagtgcctaggggcatttggtacaaggcggttataaaacgacgacctttccccttagtgcacctgggcaggctcacaccattcctccaccgtgtgtattatttgaggggaaggattctcctgtggcggctctt"
s4 = "tcaggacccaaggaggtatcaagattggaagattgtctccaggttctataggcaaaatgcaccgccctcaacggccagatgccggccgcagacttagatatgaatagaatcgggtcaagctctgctacatagattctcctccgtgctcgataactgccggagtttacgcgataagattagcggcactcttcgctgggacc"
arr1 = list(s1)
arr2 = list(s2)
arr3 = list(s3)
b1 = {}
b2 = {}
b3 = {}

arr1 = [w.replace('a', '1')for w in arr1]
arr1 = [w.replace('c', '2') for w in arr1]
arr1 = [w.replace('g', '3') for w in arr1]
arr1 = [w.replace('t', '4') for w in arr1]
arr2 = [x.replace('a', '1')for x in arr2]
Beispiel #19
0
class sharedmemOverlay(Overlay):
    """A simple Mem-Mapped Overlay for PYNQ.

    This overlay is implemented with a single Matrix Multiply Core fed
    connected directly to the ARM Core AXI interface.

    """
    __RESET_VALUE = 0
    __NRESET_VALUE = 1
    """ For convenince, we define register offsets that are scraped from
    the HLS implementation header files.

    """
    __MMULT_AP_CTRL_OFF = 0x00
    __MMULT_AP_CTRL_START_IDX = 0
    __MMULT_AP_CTRL_DONE_IDX = 1
    __MMULT_AP_CTRL_IDLE_IDX = 2
    __MMULT_AP_CTRL_READY_IDX = 3

    __MMULT_GIE_OFF = 0x04
    __MMULT_IER_OFF = 0x08
    __MMULT_ISR_OFF = 0x0C

    __MMULT_ADDR_A_DATA = 0x10
    __MMULT_ADDR_BT_DATA = 0x18
    __MMULT_ADDR_C_DATA = 0x20

    __MMULT_A_SHAPE = (100, 100)
    __MMULT_BT_SHAPE = (100, 100)
    __MMULT_C_SHAPE = (100, 100)
    __MMULT_A_SIZE = __MMULT_A_SHAPE[0] * __MMULT_A_SHAPE[1]
    __MMULT_BT_SIZE = __MMULT_BT_SHAPE[0] * __MMULT_BT_SHAPE[1]
    __MMULT_C_SIZE = __MMULT_C_SHAPE[0] * __MMULT_C_SHAPE[1]

    def __init__(self, bitfile, **kwargs):
        """Initializes a new sharedmemOverlay object.

        """
        # The following lines do some path searching to enable a
        # PYNQ-Like API for Overlays. For example, without these
        # lines you cannot call sharedmemOverlay('sharedmem.bit') because
        # sharedmem.bit is not on the bitstream search path. The
        # following lines fix this for any non-PYNQ Overlay
        #
        # You can safely reuse, and ignore the following lines
        #
        # Get file path of the current class (i.e. /opt/python3.6/<...>/sharedmem.py)
        file_path = os.path.abspath(inspect.getfile(inspect.currentframe()))
        # Get directory path of the current class (i.e. /opt/python3.6/<...>/sharedmem/)
        dir_path = os.path.dirname(file_path)
        # Update the bitfile path to search in dir_path
        bitfile = os.path.join(dir_path, bitfile)
        # Upload the bitfile (and parse the colocated .tcl script)
        super().__init__(bitfile, **kwargs)
        # Manually define the GPIO pin that drives reset
        self.__resetPin = GPIO(GPIO.get_gpio_pin(0), "out")
        self.nreset()
        # Define a Register object at address 0x0 of the mmult address space
        # We will use this to set bits and start the core (see start())
        # Do NOT write to __ap_ctrl unless __resetPin has been set to __NRESET_VALUE
        self.__ap_ctrl = Register(self.mmultCore.mmio.base_addr, 32)
        self.__a_offset = Register(
            self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_A_DATA, 32)
        self.__bt_offset = Register(
            self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_BT_DATA, 32)
        self.__c_offset = Register(
            self.mmultCore.mmio.base_addr + self.__MMULT_ADDR_C_DATA, 32)
        self.xlnk = Xlnk()

    def __start(self):
        """Raise AP_START and enable the HLS core

        """
        self.__ap_ctrl[self.__MMULT_AP_CTRL_START_IDX] = 1
        pass

    def __stop(self):
        """Lower AP_START and disable the HLS core

        """
        self.__ap_ctrl[self.__MMULT_AP_CTRL_START_IDX] = 0
        pass

    def nreset(self):
        """Set the reset pin to self.__NRESET_VALUE to place the core into
        not-reset (usually run)

        """
        self.__resetPin.write(self.__NRESET_VALUE)

    def reset(self):
        """Set the reset pin to self.__RESET_VALUE to place the core into
        reset

        """
        self.__resetPin.write(self.__RESET_VALUE)

    def run(self, A, B):
        """ Launch computation on the mmult HLS core

        Parameters
        ----------
    
        A : Numpy ndarray of at most size TODOxTODO (it will be padded)
            A buffer containing ND Array Elements to be transferred to the core

        B : Numpy ndarray of at most size TODOxTODO (it will be padded)
            A buffer containing ND Array Elements to be transferred to the core

        """
        if (not isinstance(A, np.ndarray)):
            raise TypeError("Parameter A must be an instance of "
                            "numpy.ndarray")

        if (not isinstance(B, np.ndarray)):
            raise RuntimeError("Parameter B must be an instance of "
                               "numpy.ndarray")
        sza = A.shape
        if (sza[0] > self.__MMULT_A_SHAPE[0]):
            raise RuntimeError(
                f"Dimension 0 of A must be less than or equal to"
                f"{self.__MMULT_A_SHAPE[0]}")
        if (sza[1] > self.__MMULT_A_SHAPE[1]):
            raise RuntimeError(
                f"Dimension 1 of A must be less than or equal to"
                f"{self.__MMULT_A_SHAPE[1]}")

        szb = B.shape
        if (szb[0] > self.__MMULT_BT_SHAPE[1]):
            raise RuntimeError(
                f"Dimension 0 of B must be less than or equal to"
                f"{self.__MMULT_BT_SHAPE[0]}")
        if (szb[1] > self.__MMULT_BT_SHAPE[0]):
            raise RuntimeError(
                f"Dimension 1 of B must be less than or equal to"
                f"{self.__MMULT_BT_SHAPE[1]}")

        # Check size of A
        # Check size of B
        # Allocate C
        a = self.xlnk.cma_array(self.__MMULT_A_SHAPE, "int")
        bt = self.xlnk.cma_array(self.__MMULT_BT_SHAPE, "int")
        c = self.xlnk.cma_array(self.__MMULT_C_SHAPE, "int")
        # Copy A->a
        a[:A.shape[0], :A.shape[1]] = A
        # Copy BT->bt
        bt[:B.shape[1], :B.shape[0]] = B.transpose()
        # TODO: Enable Interrupts
        # Write address of a, bt, c to HLS core
        self.__a_offset[31:0] = self.xlnk.cma_get_phy_addr(a.pointer)
        self.__bt_offset[31:0] = self.xlnk.cma_get_phy_addr(bt.pointer)
        self.__c_offset[31:0] = self.xlnk.cma_get_phy_addr(c.pointer)
        self.__start()
        # TODO: Wait for ASYNC Interrupt
        # TODO: Clear Interrupt
        import time
        time.sleep(1)
        self.__stop()
        C = np.zeros((A.shape[0], B.shape[1]), np.int32)
        # Transform C into a Numpy Array
        C[:A.shape[0], :B.shape[1]] = c[:A.shape[0], :B.shape[1]]
        a.freebuffer()
        bt.freebuffer()
        c.freebuffer()
        return C
    def __init__(self, addr_port_client=("192.168.1.100", 3000)):
        print('FPGA_Connect_Object init')
        self.resolution = [640, 360]
        self.client_port = addr_port_client

        team_name = 'SystemsETHZ'
        # agent = Agent(team_name)

        interval_time = 0
        xlnk = Xlnk()
        xlnk.xlnk_reset()

        ###########################variable initializing######################
        OVERLAY_PATH = '/home/xilinx/jupyter_notebooks/dac_2019_contest/common/' + team_name + '/ultra96_v04.bit'
        WEIGHTS_FILE_NAME = '/home/xilinx/jupyter_notebooks/dac_2019_contest/common/' + team_name + '/weights_file_v04_demo.txt'

        ###########################change board settings######################

        ###########################download      overlay######################
        overlay = Overlay(OVERLAY_PATH)
        self.dma = overlay.axi_dma_0
        self.nn_ctrl = MMIO(0xA0010000, length=1024)
        ###########################download      weights######################
        self.MINIBATCH_SIZE = 1
        self.height = 176
        self.width = 320
        pixel_bits = 24
        pixels_per_line = 384/pixel_bits
        self.num_lines = int((self.height*self.width)/pixels_per_line)

        self.in_buffer = xlnk.cma_array(shape=(self.MINIBATCH_SIZE*self.num_lines, 64), dtype=np.uint8)
        fire1_num_out_lines = (self.height/4)*(self.width/4)*self.MINIBATCH_SIZE
        self.fire1_out_buffer = xlnk.cma_array(shape=(int(16*fire1_num_out_lines),), dtype=np.uint32)
        fire2_num_out_lines = (self.height/8)*(self.width/8)*self.MINIBATCH_SIZE
        self.fire2_out_buffer = xlnk.cma_array(shape=(int(16*fire2_num_out_lines),), dtype=np.uint32)
        fire3_num_out_lines = (self.height/16)*(self.width/16)*self.MINIBATCH_SIZE
        self.fire3_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32)
        self.fire4_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32)
        self.fire5_out_buffer = xlnk.cma_array(shape=(int(16*fire3_num_out_lines),), dtype=np.uint32)
        final_num_lines = int((self.height/16)*(self.width/16))
        self.bndboxes = [xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32),
                        xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32),
                        xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32),
                        xlnk.cma_array(shape=(self.MINIBATCH_SIZE,final_num_lines,16), dtype=np.int32)]
        self.obj_array = np.zeros((self.MINIBATCH_SIZE,final_num_lines))

        NUM_LAYERS = 3+4*4
        weights_file = open(WEIGHTS_FILE_NAME, "r")
        layer = 0
        total_iterations = np.zeros(NUM_LAYERS)
        for line in weights_file:
            if "layer" in line:
                temp = line.split(": ")
                layer = int(temp[1])
            if "total_iterations" in line:
                temp = line.split(": ")
                total_iterations[layer] = int(temp[1])
        weights_file.close()

        weightfactors_length = np.zeros(NUM_LAYERS)
        self.weightsfactors = []
        for i in range(0, NUM_LAYERS):
            weightfactors_length[i] = int(total_iterations[i])
            self.weightsfactors.append( xlnk.cma_array(shape=(int(16*weightfactors_length[i]),), dtype=np.uint32) )
        self.obj_factors = np.zeros(4)
        self.box_factors = np.zeros(4)
            
        index = 0
        weights_file = open(WEIGHTS_FILE_NAME, "r")
        for line in weights_file:
            if "layer" in line:
                temp = line.split(": ")
                layer = int(temp[1])
                index = 0
            elif "total_iterations" not in line:
                if "obj_factor" in line:
                    temp = line.split(' ')
                    self.obj_factors[int(temp[1])] = int(temp[2])
                elif "box_factor" in line:
                    temp = line.split(' ')
                    self.box_factors[int(temp[1])] = int(temp[2])
                else:
                    no0x = line.split('0x')[-1]
                    base = 1
                    while base < len(no0x):
                        part = no0x[-1*(base+8):-1*base]    
                        self.weightsfactors[layer][index*16 + int(base/8)] = int(part, 16)
                        base += 8
                    index += 1
from math import ceil
import time
from pynq import Xlnk
import numpy as np
import matplotlib.pyplot as plt
from pynq.lib import Pmod_ADC
from pynq.overlays.base import BaseOverlay

ol = BaseOverlay("base.bit")
#create an instance of Xlnk
xlnk = Xlnk()

xlnk.cma_stats()

#allocate a memory buffer
py_buffer = xlnk.cma_array(shape=(100, ), dtype=np.uint32)

#allocate a output memory buffer
out_buffer = xlnk.cma_array(shape=(100, ), dtype=np.uint32)

adc = Pmod_ADC(ol.PMODA)

#delay = 0.00
#values = np.linspace(0, 2, 20)
samples = []
count = 0
while count < 100:
    count = count + 1
    sample = adc.read()
    #time.sleep(0.1)
    #samples.append(sample[0])
print("Loading image ../images/bigBunny_1080.png")
img = cv2.imread('../images/bigBunny_1080.png')
imgY = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

print("Size of imgY is ", imgY.shape)
height, width, channels = img.shape

kernel = np.array([[1.0, 2.0, 1.0], [0.0, 0.0, 0.0], [-1.0, -2.0, -1.0]],
                  np.float32)  # Sobel Horizontal Edges

numberOfIterations = 10
print("Number of loop iterations: " + str(numberOfIterations))

dstSW = np.ones((height, width), np.uint8)

xFimgY = mem_manager.cma_array(
    (height, width), np.uint8)  #allocated physically contiguous numpy array
xFimgY[:] = imgY[:]  # copy source data

xFdst = mem_manager.cma_array(
    (height, width), np.uint8)  #allocated physically contiguous numpy array

print("Start SW loop")
startSW = time.time()
for i in range(numberOfIterations):
    cv2.filter2D(imgY, -1, kernel, dst=dstSW,
                 borderType=cv2.BORDER_CONSTANT)  #filter2D on ARM
stopSW = time.time()

print("Start HW loop")
startPL = time.time()
for i in range(numberOfIterations):
Beispiel #23
0
hdmi_in.start()

from pynq import MMIO
rgb2yuv = MMIO(base.ip_dict['h264/rgb2yuv_with_axi_0']['phys_addr'], 0x10000)
h264 = MMIO(base.ip_dict['h264/h264enc_with_axi_0']['phys_addr'], 0x10000)

from h264py.h264 import H264
h264_send = H264()

from pynq import Xlnk
xlnk = Xlnk()

size = 1920 * 1088 * 4

xlnk.xlnk_reset()
cma_recv = xlnk.cma_array((size, ), dtype=np.uint8)
result = xlnk.cma_array((size, ), dtype=np.uint8)

for i in range(200):
    cma_send = hdmi_in.readframe()

    rgb2yuv.write(0x04, cma_send.physical_address)
    rgb2yuv.write(0x08, cma_recv.physical_address)
    rgb2yuv.write(0x0c, 1088)
    rgb2yuv.write(0x10, 1920)
    rgb2yuv.write(0x14, 1920 * 1088)
    rgb2yuv.write(0x00, 1)
    rgb2yuv.write(0x00, 0)
    while rgb2yuv.read(0x18) == 1:
        pass
    while rgb2yuv.read(0x18) == 0:
Beispiel #24
0
        y2 = int(round(bbox[b][3] * 360))
        x1 = np.clip(x1, 1, 640)
        y1 = np.clip(y1, 1, 360)
        x2 = np.clip(x2, 1, 640)
        y2 = np.clip(y2, 1, 360)

        result.write(batch[b].split('.')[0].zfill(3) + '.jpg' + ' ' +
                     str([x1, x2, y1, y2]) + '\n')
        print(batch[b], [x1, x2, y1, y2])


################################## Init FPGA ##################################
xlnk = Xlnk()
xlnk.xlnk_reset()

img = xlnk.cma_array(shape=[4, 160, 320, 4], dtype=np.uint8)
fm = xlnk.cma_array(shape=(628115 * 32), dtype=np.uint8)
weight = xlnk.cma_array(shape=(220672), dtype=np.int16)
biasm = xlnk.cma_array(shape=(432 * 16), dtype=np.int16)
print("Allocating memory done")

parameter = np.fromfile("SkyNet.bin", dtype=np.int16)
np.copyto(weight, parameter[0:220672])
np.copyto(biasm[0:428 * 16], parameter[220672:])
print("Parameters loading done")

overlay = Overlay("SkyNet.bit")
print("Bitstream loaded")

SkyNet = overlay.SkyNet
SkyNet.write(0x10, img.physical_address)
Beispiel #25
0
import itertools
from functools import partial

# Packages for using hardware
import pynq.lib.dma
from pynq import Xlnk
import numpy as np
from pynq import Overlay
import sys

overlay = Overlay('./sampleRNN_GRU_unroll.bit') # Downloading the bitstream on the FPGA
dma1 = overlay.axi_dma_0 # Having an object point to the DMA
xlnk = Xlnk() # Allocation of contiguous arrays
dim_mv = 64
# Allocating space for both inputs and outputs
in_stream = xlnk.cma_array(shape=(2*dim_mv+192*dim_mv+192*dim_mv+3*dim_mv+3*dim_mv,1), dtype=np.float32)
out_stream = xlnk.cma_array(shape=(6*dim_mv,1), dtype=np.float32)

try:
    import torch.backends.cudnn.rnn
except ImportError:
    pass

# Function used for using hardware designed
def GRU_Hardware(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
    # Pre-processing data
    detachedVar = torch.cat((input.t(), hidden.t(), w_ih.reshape(3*dim_mv*dim_mv, 1), w_hh.reshape(3*dim_mv*dim_mv, 1), b_ih.reshape(3*dim_mv, 1), b_hh.reshape(3*dim_mv, 1)), 0).detach()
    in_stream[:] = detachedVar
    
	# Sending out data to the hardware and letting the DMA know the space allocated for the output
	dma1.sendchannel.transfer(in_stream)
Beispiel #26
0
from pynq import Xlnk
from pynq import MMIO
from pprint import pprint
import random

M = int(sys.argv[1])
N = int(sys.argv[2])

xlnk = Xlnk()
ol = Overlay('./tutorial.bit')

####this prints all the IPs inside
pprint(ol.ip_dict)

# load inputs
in_buffer = xlnk.cma_array(shape=(2 * M * M, ), dtype=np.uint32)
out_buffer = xlnk.cma_array(shape=(M * M, ), dtype=np.uint32)

for i in range(0, len(in_buffer)):
    in_buffer[i] = random.randint(1, 9)

m0 = np.zeros((M, M))
for i in range(M * M):
    base_row = int(i * N / (M * M))
    base_column = int(int(i % M) / N)
    column = int(base_column * N) + (int(i % N))
    row = base_row + int((i % ((M * M) / N)) / M) * N
    m0[row][column] = in_buffer[i]

m1 = np.zeros((M, M))
for i in range(M * M):
Beispiel #27
0
from datetime import datetime
from pynq import Xlnk
from pynq import Overlay
import pynq
import struct
from multiprocessing import Process, Pipe, Queue, Event, Manager

print('\n**** Running SkyNet')

xlnk = Xlnk()
xlnk.xlnk_reset()

########## Allocate memory for weights and off-chip buffers
mytype = 'B,' * 63 + 'B'
dt = np.dtype(mytype)
img = xlnk.cma_array(shape=(3, 162 * 2, 322 * 2), dtype=np.uint8)

conv_weight_1x1_all = xlnk.cma_array(shape=(413, 32), dtype=dt)
conv_weight_3x3_all = xlnk.cma_array(shape=(64, 3, 3), dtype=dt)
bias_all = xlnk.cma_array(shape=(106), dtype=dt)
DDR_pool_3_out = xlnk.cma_array(shape=(2, 164, 324), dtype=dt)
DDR_pool_6_out = xlnk.cma_array(shape=(3, 84, 164), dtype=dt)
DDR_buf = xlnk.cma_array(shape=(128, 44, 84), dtype=dt)
predict_boxes = xlnk.cma_array(shape=(4, 5), dtype=np.float32)
constant = xlnk.cma_array(shape=(4, 3), dtype=np.int32)

print("Allocating memory done")

########### Load parameters from SD card to DDR
params = np.fromfile("SkyNet.bin", dtype=dt)
idx = 0
Beispiel #28
0
def update_graphs(tsliderValue):
    graphs = []
    motor.capture_mode('ia_ib_angle_rpm')

    xlnk = Xlnk()
    input_buffer = xlnk.cma_array(shape=(256, ), dtype=np.uint8)

    capture_address = input_buffer.physical_address
    capture_count = 1000

    def continuous_capture(capture_count):
        mmio_stream = MMIO(capture_address, 256)
        cap_list = [([]) for i in range(4)]
        for _ in range(capture_count):
            motor.stream_capture(capture_address)
            for i in range(4, 260, 4):
                stream = mmio_stream.read(i - 4, 4)
                highbits, lowbits = bytesplit(stream)
                if (i % 8 != 0):
                    cap_list[0].extend([(np.int16(lowbits))])
                    cap_list[1].extend([(np.int16(highbits))])
                else:
                    cap_list[2].extend([(np.int16(lowbits))])
                    cap_list[3].extend([(np.int16(highbits))])
        return cap_list

    cap_list = continuous_capture(capture_count)
    Ia, Ib, angle, rpm = cap_list[0], cap_list[1], cap_list[3], cap_list[2]

    current_Ia = np.array(Ia) * 0.00039
    current_Ib = np.array(Ib) * 0.00039

    data = {
        'Ia': current_Ia,
        'Ib': current_Ib,
        'angle': cap_list[3],
        'rpm': cap_list[2]
    }

    df = pd.DataFrame(data, columns=['Ia', 'Ib', 'angle', 'rpm'])

    if str(tsliderValue) == 'Ia Current':
        data = df.Ia
    elif str(tsliderValue) == 'Ib Current':
        data = df.Ib
    elif str(tsliderValue) == 'Angle':
        data = df.angle
    else:
        data = df.rpm

    graphs.append(
        dcc.Graph(id='Ia',
                  figure={
                      'data': [
                          go.Scatter(
                              x=random_x,
                              y=data,
                              opacity=0.7,
                              marker={
                                  'size': 15,
                                  'line': {
                                      'width': 0.5,
                                      'color': 'white'
                                  }
                              },
                          ) for i in df.items()
                      ],
                      'layout':
                      go.Layout(xaxis={'title': 'Sample'},
                                yaxis={'title': str(tsliderValue)},
                                margin={
                                    'l': 80,
                                    'b': 40,
                                    't': 10,
                                    'r': 10
                                },
                                hovermode='closest')
                  }))
    graphs.append((html.Div([dcc.Markdown(children='### `Plot-2 Ia vs Ib`')],
                            style={'padding': '3px 3px 3px 3px'})))
    graphs.append(
        dcc.Graph(id='Ia vs Ib',
                  figure={
                      'data': [
                          go.Scattergl(x=df['Ia'],
                                       y=df['Ib'],
                                       mode='markers',
                                       opacity=0.7,
                                       marker=dict(color='#F0598E',
                                                   line=dict(width=1)),
                                       name=i) for i in df.items()
                      ],
                      'layout':
                      go.Layout(xaxis={'title': 'Current Ia'},
                                yaxis={'title': 'Current Ib'},
                                margin={
                                    'l': 80,
                                    'b': 40,
                                    't': 10,
                                    'r': 10
                                },
                                legend={
                                    'x': 0,
                                    'y': 1
                                },
                                hovermode='closest')
                  }), )
    return graphs
FracNet.register_map

# In[4]:

# timer.register_map

# In[5]:

bus512 = 'B,' * 63 + 'B'
dt_512 = np.dtype(bus512)

bus256 = 'B,' * 31 + 'B'
dt_256 = np.dtype(bus256)

image_thermo = xlnk.cma_array(shape=(3, 32, 32), dtype=np.uint64)
result = xlnk.cma_array(shape=(10), dtype=np.float32)

# In[6]:

import numpy as np
images = np.load('conv1_input_uint64.npy')

# In[7]:

num_tests = 1000
with open('labels.bin', 'rb') as f:
    content = f.read()
print(len(content))

labels = np.ndarray((num_tests, ))
class _PSTraceAnalyzer:
    """Class for the Trace Analyzer controlled by PS.

    A typical use of this class is on the base overlay.

    This class can capture digital IO patterns / stimulus on all the pins.
    There can by multiple such instances on the defined overlay.

    Attributes
    ----------
    trace_control : MMIO
        The trace controller associated with the analyzer.
    dma : DMA
        The PS controlled DMA object associated with the analyzer.
    intf_spec : dict
        The interface specification, e.g., PYNQZ1_PMODA_SPECIFICATION.
    num_analyzer_samples : int
        The number of samples to be analyzed.
    samples : numpy.ndarray
        The raw data samples expressed in numpy array.
    frequency_mhz: float
        The frequency of the trace analyzer, in MHz.
    clk : Clocks
        The clock management unit for the trace analyzer.
    xlnk : Xlnk
        The Xlnk object to control contiguous memory.

    """
    def __init__(self, ip_info, intf_spec_name):
        """Return a new PS controlled trace analyzer object. 

        The maximum sample rate is 100MHz. Usually the sample rate is set
        to no larger than 10MHz in order for the signals to be captured
        on pins / wires.

        For Pmod header, pin numbers 0-7 correspond to the pins on the
        Pmod interface.

        For Arduino header, pin numbers 0-13 correspond to D0-D13;
        pin numbers 14-19 correspond to A0-A5;
        pin numbers 20-21 correspond to SDA and SCL.

        Parameters
        ----------
        ip_info : dict
            The dictionary containing the IP associated with the analyzer.
        intf_spec_name : str/dict
            The name of the interface specification.

        """
        if type(intf_spec_name) is str:
            self.intf_spec = eval(intf_spec_name)
        elif type(intf_spec_name) is dict:
            self.intf_spec = intf_spec_name
        else:
            raise ValueError("Interface specification has to be str or dict.")

        trace_cntrl_info = ip_info['trace_cntrl_{}_0'.format(
            self.intf_spec['monitor_width'])]
        trace_dma_info = ip_info['axi_dma_0']
        self.trace_control = MMIO(trace_cntrl_info['phys_addr'],
                                  trace_cntrl_info['addr_range'])
        self.dma = DMA(trace_dma_info)
        self.num_analyzer_samples = 0
        self.samples = None
        self._cma_array = None
        self.frequency_mhz = 0
        self.clk = Clocks
        self.xlnk = Xlnk()
        self._status = 'RESET'

    def __repr__(self):
        """Disambiguation of the object.

        Users can call `repr(object_name)` to display the object information.

        """
        parameter_list = list()
        parameter_list.append('num_analyzer_samples={}'.format(
            self.num_analyzer_samples))
        parameter_list.append('frequency_mhz={}'.format(self.frequency_mhz))
        parameter_string = ", ".join(map(str, parameter_list))
        return '{}({})'.format(self.__class__.__name__, parameter_string)

    @property
    def status(self):
        """Return the analyzer's status.

        Returns
        -------
        str
            Indicating the current status of the analyzer; can be 
            'RESET', 'READY', or 'RUNNING'.

        """
        return self._status

    def setup(self,
              num_analyzer_samples=DEFAULT_NUM_TRACE_SAMPLES,
              frequency_mhz=DEFAULT_CLOCK_FREQUENCY_MHZ,
              fclk_index=3):
        """Configure the trace analyzer.

        This method prepares the trace analyzer by sending configuration 
        parameters to the Microblaze.

        Note that the analyzer is always attached to the pins, so there
        is no need to use any method like 'connect()'. In short, once the 
        analyzer has been setup, it is connected as well.

        FCLK3 will be configured during this method.

        Note
        ----
        The first sample captured is a dummy sample (for both pattern 
        generator and FSM generator), therefore we have to allocate a buffer 
        one sample larger.

        Parameters
        ----------
        num_analyzer_samples : int
            The number of samples to be analyzed.
        frequency_mhz: float
            The frequency of the captured samples, in MHz.
        fclk_index : int
            The index of the fclk controlled by clock management object.

        """
        if not 1 <= num_analyzer_samples <= MAX_NUM_TRACE_SAMPLES:
            raise ValueError('Number of samples should be in '
                             '[1, {}]'.format(MAX_NUM_TRACE_SAMPLES))
        self.num_analyzer_samples = num_analyzer_samples

        if not MIN_CLOCK_FREQUENCY_MHZ <= frequency_mhz <= \
                MAX_CLOCK_FREQUENCY_MHZ:
            raise ValueError("Clock frequency out of range "
                             "[{}, {}]".format(MIN_CLOCK_FREQUENCY_MHZ,
                                               MAX_CLOCK_FREQUENCY_MHZ))
        setattr(self.clk, "fclk{}_mhz".format(fclk_index), frequency_mhz)
        self.frequency_mhz = frequency_mhz

        trace_byte_width = round(self.intf_spec['monitor_width'] / 8)
        self._cma_array = self.xlnk.cma_array(
            [1, self.num_analyzer_samples],
            dtype=BYTE_WIDTH_TO_NPTYPE[trace_byte_width])
        self._status = 'READY'

    def reset(self):
        """Reset the trace analyzer.

        This method will bring the trace analyzer from any state to 
        'RESET' state.

        """
        if self._status == 'RUNNING':
            self.stop()

        self.samples = None
        self.num_analyzer_samples = 0
        self.frequency_mhz = 0
        if self._cma_array is not None:
            self._cma_array.close()
        self._status = 'RESET'

    def run(self):
        """Start the DMA to capture the traces.

        Return
        ------
        None

        """
        self.dma.recvchannel.transfer(self._cma_array)
        if self.intf_spec['monitor_width'] == 32:
            self.trace_control.write(TRACE_CNTRL_32_LENGTH,
                                     self.num_analyzer_samples)
            self.trace_control.write(TRACE_CNTRL_32_DATA_COMPARE, 0)
            self.trace_control.write(TRACE_CNTRL_32_ADDR_AP_CTRL, 1)
            self.trace_control.write(TRACE_CNTRL_32_ADDR_AP_CTRL, 0)
        else:
            self.trace_control.write(TRACE_CNTRL_64_LENGTH,
                                     self.num_analyzer_samples)
            self.trace_control.write(TRACE_CNTRL_64_DATA_COMPARE_MSW, 0)
            self.trace_control.write(TRACE_CNTRL_64_DATA_COMPARE_LSW, 0)
            self.trace_control.write(TRACE_CNTRL_64_ADDR_AP_CTRL, 1)
            self.trace_control.write(TRACE_CNTRL_64_ADDR_AP_CTRL, 0)

        self._status = 'RUNNING'

    def stop(self):
        """Stop the DMA after capture is done.

        Return
        ------
        None

        """
        self.dma.recvchannel.wait()
        self._status = 'READY'

    def __del__(self):
        """Destructor for trace buffer object.

        Returns
        -------
        None

        """
        if self._cma_array is not None:
            self._cma_array.close()

    def analyze(self, steps):
        """Analyze the captured pattern.

        This function will process the captured pattern and put the pattern
        into a Wavedrom compatible format.

        The data output is of format:

        [{'name': '', 'pin': 'D1', 'wave': '1...0.....'},
         {'name': '', 'pin': 'D2', 'wave': '0.1..01.01'}]

        Note the all the lanes should have the same number of samples.
        All the pins are assumed to be tri-stated and traceable.

        Currently only no `step()` method is supported for PS controlled 
        trace analyzer.

        Parameters
        ----------
        steps : int
            Number of samples to analyze. A value 0 means to analyze all the
            valid samples.

        Returns
        -------
        list
            A list of dictionaries, each dictionary consisting the pin number,
            and the waveform pattern in string format.

        """
        io_pins = get_tri_state_pins(self.intf_spec['traceable_io_pins'],
                                     self.intf_spec['traceable_tri_states'])

        if steps == 0:
            num_valid_samples = self.num_analyzer_samples
        else:
            num_valid_samples = steps

        trace_byte_width = round(self.intf_spec['monitor_width'] / 8)
        data_type = '>i{}'.format(trace_byte_width)
        self.samples = np.zeros(num_valid_samples, dtype=data_type)
        np.copyto(self.samples, self._cma_array)
        temp_bytes = np.frombuffer(self.samples, dtype=np.uint8)
        bit_array = np.unpackbits(temp_bytes)
        temp_lanes = bit_array.reshape(num_valid_samples,
                                       self.intf_spec['monitor_width']).T[::-1]

        wavelanes = list()
        for pin_label in io_pins:
            temp_lane = temp_lanes[self.intf_spec['traceable_io_pins']
                                   [pin_label]]
            bitstring = ''.join(temp_lane.astype(str).tolist())
            wave = bitstring_to_wave(bitstring)
            wavelanes.append({'name': '', 'pin': pin_label, 'wave': wave})

        return wavelanes