Beispiel #1
0
class FlipRegister(Unit):
    """
    Switchable register, there are two registers and two sets of ports,
    Each set of ports is every time connected to opposite reg.
    By select you can choose between regs.

    This component is meant to be form of synchronization.
    Example first reg is connected to first set of ports, writer performs actualizations
    on first reg and reader reads data from second ram by second set of ports.

    Then select is set and access is flipped. Reader now has access to reg 0 and writer to reg 1.
    
    .. hwt-schematic::
    """

    def _config(self):
        self.DATA_WIDTH = Param(32)
        self.DEFAULT_VAL = Param(0)

    def _declr(self):
        with self._paramsShared():
            addClkRstn(self)
            self.first = RegCntrl()
            self.second = RegCntrl()

            self.select_sig = Signal()

    def connectWriteIntf(self, regA, regB):
        return [
            If(self.first.dout.vld,
                regA(self.first.dout.data)
            ),
            If(self.second.dout.vld,
               regB(self.second.dout.data)
            )
        ]

    def connectReadIntf(self, regA, regB):
        return [
            self.first.din(regA),
            self.second.din(regB)
        ]

    def _impl(self):
        first = self._reg("first_reg", Bits(self.DATA_WIDTH), defVal=self.DEFAULT_VAL)
        second = self._reg("second_reg", Bits(self.DATA_WIDTH), defVal=self.DEFAULT_VAL)

        If(self.select_sig,
           self.connectReadIntf(second, first),
           self.connectWriteIntf(second, first)
        ).Else(
           self.connectReadIntf(first, second),
           self.connectWriteIntf(first, second)
        )
Beispiel #2
0
class FlipRegister(Unit):
    """
    Switchable register, there are two registers and two sets of ports,
    Each set of ports is every time connected to opposite reg.
    By select you can choose between regs.

    This component is meant to be form of synchronization.
    Example first reg is connected to first set of ports, writer performs actualizations
    on first reg and reader reads data from second ram by second set of ports.

    Then select is set and access is flipped. Reader now has access to reg 0 and writer to reg 1.

    .. hwt-autodoc::
    """

    def _config(self):
        self.DATA_WIDTH = Param(32)
        self.DEFAULT_VAL = Param(0)

    def _declr(self):
        with self._paramsShared():
            addClkRstn(self)
            self.first = RegCntrl()
            self.second = RegCntrl()

            self.select_sig = Signal()

    def connectWriteIntf(self, regA, regB):
        return [
            If(self.first.dout.vld,
                regA(self.first.dout.data)
            ),
            If(self.second.dout.vld,
               regB(self.second.dout.data)
            )
        ]

    def connectReadIntf(self, regA, regB):
        return [
            self.first.din(regA),
            self.second.din(regB)
        ]

    def _impl(self):
        first = self._reg("first_reg", Bits(self.DATA_WIDTH), def_val=self.DEFAULT_VAL)
        second = self._reg("second_reg", Bits(self.DATA_WIDTH), def_val=self.DEFAULT_VAL)

        If(self.select_sig,
           self.connectReadIntf(second, first),
           self.connectWriteIntf(second, first)
        ).Else(
           self.connectReadIntf(first, second),
           self.connectWriteIntf(first, second)
        )
Beispiel #3
0
class CLinkedListReader(Unit):
    """
    This unit reads items from (circular) linked list like structure

    .. code-block:: c

        struct node {
            item_t items[ITEMS_IN_BLOCK],
            struct node * next;
        };

    synchronization is obtained by rdPtr/wrPtr (tail/head) pointer
    baseAddr is address of actual node

    :attention: device reads only chunks of size <= BUFFER_CAPACITY/2,
    
    .. hwt-schematic::
    """
    def _config(self):
        self.ID_WIDTH = Param(4)
        self.ID = Param(3)
        # id of packet where last item is next addr
        self.ID_LAST = Param(4)

        self.BUFFER_CAPACITY = Param(32)
        self.ITEMS_IN_BLOCK = Param(4096 // 8 - 1)

        self.ADDR_WIDTH = Param(32)
        self.DATA_WIDTH = Param(64)
        self.PTR_WIDTH = Param(16)

    def _declr(self):
        addClkRstn(self)

        with self._paramsShared():
            # interface which sending requests to download data
            # and interface which is collecting all data and only data with specified id are processed
            self.rDatapump = AxiRDatapumpIntf()._m()
            self.rDatapump.MAX_LEN.set(self.BUFFER_CAPACITY // 2 - 1)

            self.dataOut = Handshaked()._m()

        # (how much of items remains in block)
        self.inBlockRemain = VectSignal(log2ceil(self.ITEMS_IN_BLOCK + 1))._m()

        # interface to control internal register
        a = self.baseAddr = RegCntrl()
        a._replaceParam(a.DATA_WIDTH, self.ADDR_WIDTH)
        self.rdPtr = RegCntrl()
        self.wrPtr = RegCntrl()
        for ptr in [self.rdPtr, self.wrPtr]:
            ptr._replaceParam(ptr.DATA_WIDTH, self.PTR_WIDTH)

        f = self.dataFifo = HandshakedFifo(Handshaked)
        f.EXPORT_SIZE.set(True)
        f.DATA_WIDTH.set(self.DATA_WIDTH)
        f.DEPTH.set(self.BUFFER_CAPACITY)

    def addrAlignBits(self):
        return log2ceil(self.DATA_WIDTH // 8).val

    def _impl(self):
        propagateClkRstn(self)
        r, s = self._reg, self._sig
        req = self.rDatapump.req
        f = self.dataFifo
        dIn = self.rDatapump.r
        dBuffIn = f.dataIn

        ALIGN_BITS = self.addrAlignBits()
        ID = self.ID
        BUFFER_CAPACITY = self.BUFFER_CAPACITY
        BURST_LEN = BUFFER_CAPACITY // 2
        ID_LAST = self.ID_LAST
        bufferHasSpace = s("bufferHasSpace")
        bufferHasSpace(f.size < (BURST_LEN + 1))
        # we are counting base next addr as item as well
        inBlock_t = Bits(log2ceil(self.ITEMS_IN_BLOCK + 1))
        ringSpace_t = Bits(self.PTR_WIDTH)

        downloadPending = r("downloadPending", defVal=0)

        baseIndex = r("baseIndex", Bits(self.ADDR_WIDTH - ALIGN_BITS))
        inBlockRemain = r("inBlockRemain_reg", inBlock_t, defVal=self.ITEMS_IN_BLOCK)
        self.inBlockRemain(inBlockRemain)

        # Logic of tail/head
        rdPtr = r("rdPtr", ringSpace_t, defVal=0)
        wrPtr = r("wrPtr", ringSpace_t, defVal=0)
        If(self.wrPtr.dout.vld,
            wrPtr(self.wrPtr.dout.data)
        )
        self.wrPtr.din(wrPtr)
        self.rdPtr.din(rdPtr)

        # this means items are present in memory
        hasSpace = s("hasSpace")
        hasSpace(wrPtr != rdPtr)
        doReq = s("doReq")
        doReq(bufferHasSpace & hasSpace & ~downloadPending & req.rd)
        req.rem(0)
        self.dataOut(f.dataOut)

        # logic of baseAddr and baseIndex
        baseAddr = Concat(baseIndex, vec(0, ALIGN_BITS))
        req.addr(baseAddr)
        self.baseAddr.din(baseAddr)
        dataAck = dIn.valid & In(dIn.id, [ID, ID_LAST]) & dBuffIn.rd

        If(self.baseAddr.dout.vld,
            baseIndex(self.baseAddr.dout.data[:ALIGN_BITS])
        ).Elif(dataAck & downloadPending,
            If(dIn.last & dIn.id._eq(ID_LAST),
               baseIndex(dIn.data[self.ADDR_WIDTH:ALIGN_BITS])
            ).Else(
               baseIndex(baseIndex + 1) 
            )
        )

        sizeByPtrs = s("sizeByPtrs", ringSpace_t)
        sizeByPtrs(wrPtr - rdPtr)

        inBlockRemain_asPtrSize = fitTo(inBlockRemain, sizeByPtrs)
        constraingSpace = s("constraingSpace", ringSpace_t)
        If(inBlockRemain_asPtrSize < sizeByPtrs,
           constraingSpace(inBlockRemain_asPtrSize)
        ).Else(
           constraingSpace(sizeByPtrs)
        )

        constrainedByInBlockRemain = s("constrainedByInBlockRemain")
        constrainedByInBlockRemain(fitTo(sizeByPtrs, inBlockRemain) >= inBlockRemain)

        If(constraingSpace > BURST_LEN,
            # download full burst
            req.id(ID),
            req.len(BURST_LEN - 1),
            If(doReq,
               inBlockRemain(inBlockRemain - BURST_LEN)
            )
        ).Elif(constrainedByInBlockRemain & (inBlockRemain < BURST_LEN),
            # we know that sizeByPtrs <= inBlockRemain thats why we can resize it
            # we will download next* as well
            req.id(ID_LAST),
            connect(constraingSpace, req.len, fit=True),
            If(doReq,
               inBlockRemain(self.ITEMS_IN_BLOCK)
            )
        ).Else(
            # download data leftover
            req.id(ID),
            connect(constraingSpace - 1, req.len, fit=True),
            If(doReq,
               inBlockRemain(inBlockRemain - fitTo(constraingSpace, inBlockRemain))
            )
        )

        # logic of req dispatching
        If(downloadPending,
            req.vld(0),
            If(dataAck & dIn.last,
                downloadPending(0)
            )
        ).Else(
            req.vld(bufferHasSpace & hasSpace),
            If(req.rd & bufferHasSpace & hasSpace,
               downloadPending(1)
            )
        )

        # into buffer pushing logic
        dBuffIn.data(dIn.data)

        isMyData = s("isMyData")
        isMyData(dIn.id._eq(ID) | (~dIn.last & dIn.id._eq(ID_LAST)))
        If(self.rdPtr.dout.vld,
            rdPtr(self.rdPtr.dout.data)
        ).Else(
            If(dIn.valid & downloadPending & dBuffIn.rd & isMyData,
               rdPtr(rdPtr + 1)
            )
        )
        # push data into buffer and increment rdPtr
        StreamNode(masters=[dIn],
                   slaves=[dBuffIn],
                   extraConds={dIn: downloadPending,
                               dBuffIn: (dIn.id._eq(ID) | (dIn.id._eq(ID_LAST) & ~dIn.last)) & downloadPending
                               }).sync()
Beispiel #4
0
class CLinkedListWriter(Unit):
    """
    This unit writes items to (circular) linked list like structure
    (List does not necessary need to be circular but space is specified
    by two pointers like in circular queue)

    .. code-block:: c

        struct node {
            item_t items[ITEMS_IN_BLOCK],
            struct node * next;
        };

    synchronization is obtained by rdPtr/wrPtr (tail/head) pointer
    baseAddr is address of actual node

    :attention: device writes chunks of max size <= BUFFER_CAPACITY/2
    :attention: next addr is downloaded on background when items are uploaded
               (= has to be set when this unit enters this block)

    :note: wrPtr == rdPtr   => queue is empty
        and there is (2^PTR_WIDTH) - 1 of empty space
        wrPtr == rdPtr+1 => queue is full
        wrPtr+1 == rdPtr   => there is (2^PTR_WIDTH) - 2 of empty space
        spaceToWrite = rdPtr - wrPtr - 1 (with uint16_t)
    
    .. hwt-schematic::
    """
    def _config(self):
        self.ID_WIDTH = Param(4)
        # id on interfaces for default transaction
        self.ID = Param(3)

        self.BUFFER_CAPACITY = Param(32)
        self.ITEMS_IN_BLOCK = Param(4096 // 8 - 1)

        self.ADDR_WIDTH = Param(32)
        self.DATA_WIDTH = Param(64)
        self.PTR_WIDTH = Param(16)

        # timeout to send items from buffer even if they are smaller than recomended burst
        self.TIMEOUT = Param(4096)

    def _declr(self):
        addClkRstn(self)

        with self._paramsShared():
            # read interface for datapump
            # interface which sending requests to download addr of next block
            self.rDatapump = AxiRDatapumpIntf()._m()
            self.rDatapump.MAX_LEN.set(1)  # because we are downloading only addres of next block

            # write interface for datapump
            self.wDatapump = AxiWDatapumpIntf()._m()
            self.wDatapump.MAX_LEN.set(self.BUFFER_CAPACITY // 2)
            assert self.BUFFER_CAPACITY <= self.ITEMS_IN_BLOCK

            # interface for items which should be written into list
            self.dataIn = Handshaked()

        # interface to control internal register
        a = self.baseAddr = RegCntrl()
        a._replaceParam(a.DATA_WIDTH, self.ADDR_WIDTH)

        self.rdPtr = RegCntrl()
        self.wrPtr = RegCntrl()
        for ptr in [self.rdPtr, self.wrPtr]:
            ptr._replaceParam(ptr.DATA_WIDTH, self.PTR_WIDTH)

        f = self.dataFifo = HandshakedFifo(Handshaked)
        f.EXPORT_SIZE.set(True)
        f.DATA_WIDTH.set(self.DATA_WIDTH)
        f.DEPTH.set(self.BUFFER_CAPACITY)

        self.ALIGN_BITS = log2ceil(self.DATA_WIDTH // 8).val

    def addrToIndex(self, addr):
        return addr[:self.ALIGN_BITS]

    def indexToAddr(self, indx):
        return Concat(indx, vec(0, self.ALIGN_BITS))

    def rReqHandler(self, baseIndex, doReq):
        # always download only one word with address of next block
        rReq = self.rDatapump.req
        rReq.addr(self.indexToAddr(baseIndex + self.ITEMS_IN_BLOCK))
        rReq.id(self.ID)
        rReq.len(0)
        rReq.rem(0)

        rReq.vld(doReq)

    def baseAddrLogic(self, nextBlockTransition_in):
        """
        Logic for downloading address of next block

        :param nextBlockTransition_in: signal which means that baseIndex should be changed to nextBaseIndex
            if nextBaseAddrReady is not high this signal has no effect (= regular handshake)
        :return: (baseIndex, nextBaseIndex, nextBaseReady is ready and nextBlockTransition_in can be used)
        """
        r = self._reg
        rIn = self.rDatapump.r
        rReq = self.rDatapump.req

        addr_index_t = Bits(self.ADDR_WIDTH - self.ALIGN_BITS)
        baseIndex = r("baseIndex_backup", addr_index_t)
        nextBaseIndex = r("nextBaseIndex", addr_index_t)
        t = HEnum("nextBaseFsm_t", ["uninitialized",
                                   "required",
                                   "pending",
                                   "prepared"])
        isNextBaseAddr = rIn.valid & rIn.id._eq(self.ID)
        nextBaseFsm = FsmBuilder(self, t, "baseAddrLogic_fsm")\
        .Trans(t.uninitialized,
            (self.baseAddr.dout.vld, t.required)
        ).Trans(t.required,
            (rReq.rd, t.pending)
        ).Trans(t.pending,
            (isNextBaseAddr, t.prepared)
        ).Trans(t.prepared,
            (nextBlockTransition_in, t.required)
        ).stateReg

        If(self.baseAddr.dout.vld,
           baseIndex(self.addrToIndex(self.baseAddr.dout.data)),
        ).Elif(nextBlockTransition_in,
           baseIndex(nextBaseIndex)
        )
        self.baseAddr.din(self.indexToAddr(baseIndex))

        If(isNextBaseAddr,
           nextBaseIndex(self.addrToIndex(fitTo(rIn.data, rReq.addr)))
        )
        rIn.ready(1)

        self.rReqHandler(baseIndex, nextBaseFsm._eq(t.required))

        nextBaseReady = nextBaseFsm._eq(t.prepared)
        return baseIndex, nextBaseIndex, nextBaseReady 

    def timeoutHandler(self, rst, incr):
        timeoutCntr = self._reg("timeoutCntr", Bits(log2ceil(self.TIMEOUT) + 1, signed=False), defVal=self.TIMEOUT)
        If(rst,
           timeoutCntr(self.TIMEOUT)
        ).Elif((timeoutCntr != 0) & incr,
           timeoutCntr(timeoutCntr - 1)
        )
        return timeoutCntr._eq(0) 

    def queuePtrLogic(self, wrPtrIncrVal, wrPtrIncrEn):
        r, s = self._reg, self._sig
        ringSpace_t = Bits(self.PTR_WIDTH)

        # Logic of tail/head, 
        rdPtr = r("rdPtr", ringSpace_t, defVal=0)
        wrPtr = r("wrPtr", ringSpace_t, defVal=(power(2, self.PTR_WIDTH) - 1))

        If(self.wrPtr.dout.vld,
           wrPtr(self.wrPtr.dout.data)
        ).Elif(wrPtrIncrEn,
           wrPtr(wrPtr + wrPtrIncrVal)
        )

        If(self.rdPtr.dout.vld,
           rdPtr(self.rdPtr.dout.data)
        )
        self.wrPtr.din(wrPtr)
        self.rdPtr.din(rdPtr)

        lenByPtrs = s("lenByPtrs", ringSpace_t)
        lenByPtrs(rdPtr - wrPtr - 2)  # size - 1

        # this means items are present in memory
        queueHasSpace = (wrPtr + 1 != rdPtr)
        return queueHasSpace, lenByPtrs

    def wReqDriver(self, en, baseIndex, lenByPtrs, inBlockRemain):
        s = self._sig
        wReq = self.wDatapump.req
        BURST_LEN = self.BUFFER_CAPACITY // 2 - 1
        inBlockRemain_asPtrSize = fitTo(inBlockRemain, lenByPtrs)

        # wReq driver
        ringSpace_t = Bits(self.PTR_WIDTH)
        constraingLen = s("constraingSpace", ringSpace_t)

        If(inBlockRemain_asPtrSize < lenByPtrs,
          constraingLen(inBlockRemain_asPtrSize)
        ).Else(
          constraingLen(lenByPtrs)
        )
        reqLen = s("reqLen", wReq.len._dtype)
        If(constraingLen > BURST_LEN,
           reqLen(BURST_LEN)
        ).Else(
           connect(constraingLen, reqLen, fit=True)
        )

        wReq.id(self.ID)
        wReq.addr(self.indexToAddr(baseIndex))
        wReq.rem(0)
        wReq.len(reqLen)
        wReq.vld(en)

        return reqLen

    def mvDataToW(self, prepareEn, dataMoveEn, reqLen, inBlockRemain, nextBlockTransition_out, dataCntr_out):
        f = self.dataFifo.dataOut
        w = self.wDatapump.w
        nextBlockTransition = self._sig("mvDataToW_nextBlockTransition")
        nextBlockTransition(inBlockRemain <= fitTo(reqLen, inBlockRemain) + 1)
        If(prepareEn,
            dataCntr_out(fitTo(reqLen, dataCntr_out)),

            If(nextBlockTransition_out,
                inBlockRemain(self.ITEMS_IN_BLOCK)
            ).Else(
                inBlockRemain(inBlockRemain - (fitTo(reqLen, inBlockRemain) + 1))
            )
        ).Elif(dataMoveEn,
            If(StreamNode(masters=[f], slaves=[w]).ack(),
               dataCntr_out(dataCntr_out - 1)
            )
        )
        StreamNode(masters=[f], slaves=[w]).sync(dataMoveEn)
        w.data(f.data)
        w.last(dataCntr_out._eq(0))
        w.strb(mask(w.strb._dtype.bit_length()))
        self.dataFifo.dataIn(self.dataIn)
        nextBlockTransition_out(nextBlockTransition & prepareEn)

    def itemUploadLogic(self, baseIndex, nextBaseIndex, nextBaseReady, nextBlockTransition_out):
        r, s = self._reg, self._sig
        f = self.dataFifo
        w = self.wDatapump

        BURST_LEN = self.BUFFER_CAPACITY // 2
        bufferHasData = s("bufferHasData")
        bufferHasData(f.size > (BURST_LEN - 1))
        # we are counting base next addr as item as well
        addr_index_t = Bits(self.ADDR_WIDTH - self.ALIGN_BITS)
        baseIndex = r("baseIndex", addr_index_t)

        dataCntr_t = Bits(log2ceil(BURST_LEN + 1), signed=False)
        dataCntr = r("dataCntr", dataCntr_t, defVal=0)  # counter of uploading data
        reqLen_backup = r("reqLen_backup", w.req.len._dtype, defVal=0)

        gotWriteAck = w.ack.vld & w.ack.data._eq(self.ID)
        queueHasSpace, lenByPtrs = self.queuePtrLogic(fitTo(reqLen_backup, self.wrPtr.din) + 1, gotWriteAck)

        timeout = s("timeout")
        fsm_t = HEnum("itemUploadingFsm_t", ["idle",
                                            "reqPending",
                                            "dataPending_prepare",
                                            "dataPending_send",
                                            "waitForAck"])
        fsm = FsmBuilder(self, fsm_t, "itemUploadLogic_fsm")\
        .Trans(fsm_t.idle,
            (timeout | (bufferHasData & queueHasSpace), fsm_t.reqPending)

        ).Trans(fsm_t.reqPending,
            (w.req.rd, fsm_t.dataPending_prepare)

        ).Trans(fsm_t.dataPending_prepare,
            fsm_t.dataPending_send
        ).Trans(fsm_t.dataPending_send,
            ((~nextBlockTransition_out | nextBaseReady) & dataCntr._eq(0), fsm_t.waitForAck)
        ).Trans(fsm_t.waitForAck,
            (gotWriteAck, fsm_t.idle)    
        ).stateReg

        timeout(self.timeoutHandler(fsm != fsm_t.idle,
                                    (f.size != 0) & queueHasSpace))

        inBlock_t = Bits(log2ceil(self.ITEMS_IN_BLOCK + 1))
        inBlockRemain = r("inBlockRemain_reg", inBlock_t, defVal=self.ITEMS_IN_BLOCK)

        wReqEn = fsm._eq(fsm_t.reqPending)
        reqLen = self.wReqDriver(wReqEn, baseIndex, lenByPtrs, inBlockRemain)

        If(wReqEn & w.req.rd,
           reqLen_backup(reqLen)
        )

        dataMoveEn = fsm._eq(fsm_t.dataPending_send)
        prepareEn = fsm._eq(fsm_t.dataPending_prepare)
        self.mvDataToW(prepareEn, dataMoveEn, reqLen_backup,
                               inBlockRemain, nextBlockTransition_out, dataCntr)

        If(self.baseAddr.dout.vld,
           baseIndex(self.addrToIndex(self.baseAddr.dout.data)),
        ).Elif(prepareEn,
           baseIndex(baseIndex + fitTo(reqLen_backup, baseIndex) + 1)    
        ).Elif(nextBlockTransition_out,
           baseIndex(nextBaseIndex)
        )

        w.ack.rd(fsm._eq(fsm_t.waitForAck))

    def _impl(self):
        propagateClkRstn(self)
        nextBlockTransition = self._sig("nextBlockTransition")
        baseIndex, nextBaseIndex, nextBaseReady = self.baseAddrLogic(nextBlockTransition)
        self.itemUploadLogic(baseIndex, nextBaseIndex, nextBaseReady, nextBlockTransition)
Beispiel #5
0
class ArrayBuff_writer(Unit):
    """
    Write data in to a circula buffer allocated as an array.
    Collect items and send them over wDatapump
    when buffer is full or on timeout
    Maximum overlap of transactions is 1

    items -> buff -> internal logic -> axi datapump

    .. hwt-autodoc::
    """

    def _config(self):
        AddrSizeHs._config(self)
        self.ID = Param(3)
        self.MAX_LEN = 15
        self.ITEM_WIDTH = Param(16)
        self.BUFF_DEPTH = Param(16)
        self.TIMEOUT = Param(1024)
        self.ITEMS = Param(4096 // 8)

    def _declr(self):
        addClkRstn(self)

        self.items = Handshaked()
        self.items.DATA_WIDTH = self.ITEM_WIDTH

        with self._paramsShared():
            self.wDatapump = AxiWDatapumpIntf()._m()

        self.uploaded = VectSignal(16)._m()

        self.baseAddr = RegCntrl()
        self.baseAddr.DATA_WIDTH = self.ADDR_WIDTH

        self.buff_remain = VectSignal(16)._m()

        b = HandshakedFifo(Handshaked)
        b.DATA_WIDTH = self.ITEM_WIDTH
        b.EXPORT_SIZE = True
        b.DEPTH = self.BUFF_DEPTH
        self.buff = b

    def uploadedCntrHandler(self, st, reqAckHasCome, sizeOfitems):
        uploadedCntr = self._reg(
            "uploadedCntr", self.uploaded._dtype, def_val=0)
        self.uploaded(uploadedCntr)

        If(st._eq(stT.waitOnAck) & reqAckHasCome,
           uploadedCntr(uploadedCntr + fitTo(sizeOfitems, uploadedCntr))
        )

    def _impl(self):
        ALIGN_BITS = log2ceil(self.DATA_WIDTH // 8 - 1)
        TIMEOUT_MAX = self.TIMEOUT - 1
        ITEMS = self.ITEMS
        buff = self.buff
        reqAck = self.wDatapump.ack
        req = self.wDatapump.req
        w = self.wDatapump.w

        propagateClkRstn(self)

        sizeOfitems = self._reg("sizeOfItems", Bits(
            buff.size._dtype.bit_length()))

        # aligned base addr
        baseAddr = self._reg("baseAddrReg", Bits(self.ADDR_WIDTH - ALIGN_BITS))
        If(self.baseAddr.dout.vld,
           baseAddr(self.baseAddr.dout.data[:ALIGN_BITS])
        )
        self.baseAddr.din(Concat(baseAddr, Bits(ALIGN_BITS).from_py(0)))

        # offset in buffer and its complement
        offset_t = Bits(log2ceil(ITEMS + 1), signed=False)
        offset = self._reg("offset", offset_t, def_val=0)
        remaining = self._reg("remaining", Bits(
            log2ceil(ITEMS + 1), signed=False), def_val=ITEMS)
        self.buff_remain(remaining, fit=True)

        addrTmp = self._sig("baseAddrTmp", baseAddr._dtype)
        addrTmp(baseAddr + fitTo(offset, baseAddr))

        # req values logic
        req.id(self.ID)
        req.addr(Concat(addrTmp, Bits(ALIGN_BITS).from_py(0)))
        req.rem(0)

        sizeTmp = self._sig("sizeTmp", buff.size._dtype)

        assert (req.len._dtype.bit_length()
                == buff.size._dtype.bit_length() - 1), (
            req.len._dtype.bit_length(), buff.size._dtype.bit_length())

        buffSizeAsLen = self._sig("buffSizeAsLen", buff.size._dtype)
        buffSizeAsLen(buff.size - 1)
        buffSize_tmp = self._sig("buffSize_tmp", remaining._dtype)
        buffSize_tmp(buff.size, fit=True)

        endOfLenBlock = (remaining - 1) < buffSize_tmp

        remainingAsLen = self._sig("remainingAsLen", remaining._dtype)
        remainingAsLen(remaining - 1)

        If(endOfLenBlock,
            req.len(remainingAsLen, fit=True),
            sizeTmp(remaining, fit=True)
        ).Else(
            req.len(buffSizeAsLen, fit=True),
            sizeTmp(buff.size)
        )

        lastWordCntr = self._reg("lastWordCntr", buff.size._dtype, 0)
        w_last = lastWordCntr._eq(1)
        w_ack = w.ready & buff.dataOut.vld

        # timeout logic
        timeoutCntr = self._reg("timeoutCntr", Bits(log2ceil(self.TIMEOUT), False),
                                def_val=TIMEOUT_MAX)
        # buffer is full or timeout
        beginReq = buff.size._eq(self.BUFF_DEPTH) | timeoutCntr._eq(0)
        reqAckHasCome = self._sig("reqAckHasCome")
        reqAckHasCome(reqAck.vld & reqAck.data._eq(self.ID))
        st = FsmBuilder(self, stT)\
            .Trans(stT.waitOnInput,
                (beginReq & req.rd, stT.waitOnDataTx)
            ).Trans(stT.waitOnDataTx,
                    (w_last & w_ack, stT.waitOnAck)
            ).Trans(stT.waitOnAck,
                    (reqAckHasCome, stT.waitOnInput)
            ).stateReg

        If(st._eq(stT.waitOnInput) & beginReq,  # timeout is counting only when there is pending data
            # start new request
            req.vld(1),
            If(req.rd,
                If(endOfLenBlock,
                   offset(0),
                   remaining(ITEMS)
                ).Else(
                    offset(offset + fitTo(buff.size, offset)),
                    remaining(remaining - fitTo(buff.size, remaining))
                ),
                sizeOfitems(sizeTmp),
                timeoutCntr(TIMEOUT_MAX)
            )
        ).Else(
            req.vld(0),
            If(buff.dataOut.vld & st._eq(stT.waitOnInput) & (timeoutCntr != 0),
               timeoutCntr(timeoutCntr - 1)
            )
        )

        reqAck.rd(st._eq(stT.waitOnAck))

        self.uploadedCntrHandler(st, reqAckHasCome, sizeOfitems)

        # it does not matter when lastWordCntr is changing when there is no
        # request
        startSendingData = st._eq(stT.waitOnInput) & beginReq & req.rd
        If(startSendingData,
            lastWordCntr(sizeTmp)
        ).Elif((lastWordCntr != 0) & w_ack,
            lastWordCntr(lastWordCntr - 1)
        )

        buff.dataIn(self.items)

        w.data(buff.dataOut.data, fit=True)

        StreamNode(
            masters=[buff.dataOut],
            slaves=[w]
        ).sync(st._eq(stT.waitOnDataTx))
        w.strb(mask(w.strb._dtype.bit_length()))
        w.last(w_last)
Beispiel #6
0
class CLinkedListWriter(Unit):
    """
    This unit writes items to (circular) linked list like structure
    (List does not necessary need to be circular but space is specified
    by two pointers like in circular queue)

    .. code-block:: c

        struct node {
            item_t items[ITEMS_IN_BLOCK],
            struct node * next;
        };

    synchronization is obtained by rdPtr/wrPtr (tail/head) pointer
    baseAddr is address of actual node

    :attention: device writes chunks of max size <= BUFFER_CAPACITY/2
    :attention: next addr is downloaded on background when items are uploaded
               (= has to be set when this unit enters this block)

    :note: wrPtr == rdPtr   => queue is empty
        and there is (2^PTR_WIDTH) - 1 of empty space
        wrPtr == rdPtr+1 => queue is full
        wrPtr+1 == rdPtr   => there is (2^PTR_WIDTH) - 2 of empty space
        spaceToWrite = rdPtr - wrPtr - 1 (with uint16_t)

    .. hwt-autodoc::
    """
    def _config(self):
        self.ID_WIDTH = Param(4)
        # id on interfaces for default transaction
        self.ID = Param(3)

        self.BUFFER_CAPACITY = Param(32)
        self.ITEMS_IN_BLOCK = Param(4096 // 8 - 1)

        self.ADDR_WIDTH = Param(32)
        self.DATA_WIDTH = Param(64)
        self.PTR_WIDTH = Param(16)

        # timeout to send items from buffer even if they are smaller
        # than recomended burst
        self.TIMEOUT = Param(4096)

    def _declr(self):
        addClkRstn(self)

        with self._paramsShared():
            # read interface for datapump
            # interface which sending requests to download addr of next block
            self.rDatapump = AxiRDatapumpIntf()._m()
            # because we are downloading only addres of next block
            self.rDatapump.MAX_LEN = 1

            # write interface for datapump
            self.wDatapump = AxiWDatapumpIntf()._m()
            self.wDatapump.MAX_LEN = self.BUFFER_CAPACITY // 2
            assert self.BUFFER_CAPACITY <= self.ITEMS_IN_BLOCK

            # interface for items which should be written into list
            self.dataIn = Handshaked()

        # interface to control internal register
        a = self.baseAddr = RegCntrl()
        a.DATA_WIDTH = self.ADDR_WIDTH

        self.rdPtr = RegCntrl()
        self.wrPtr = RegCntrl()
        for ptr in [self.rdPtr, self.wrPtr]:
            ptr.DATA_WIDTH = self.PTR_WIDTH

        f = self.dataFifo = HandshakedFifo(Handshaked)
        f.EXPORT_SIZE = True
        f.DATA_WIDTH = self.DATA_WIDTH
        f.DEPTH = self.BUFFER_CAPACITY

        self.ALIGN_BITS = log2ceil(self.DATA_WIDTH // 8)

    def addrToIndex(self, addr):
        return addr[:self.ALIGN_BITS]

    def indexToAddr(self, indx):
        return Concat(indx, Bits(self.ALIGN_BITS).from_py(0))

    def rReqHandler(self, baseIndex, doReq):
        # always download only one word with address of next block
        rReq = self.rDatapump.req
        rReq.addr(self.indexToAddr(baseIndex + self.ITEMS_IN_BLOCK))
        rReq.id(self.ID)
        rReq.len(0)
        rReq.rem(0)

        rReq.vld(doReq)

    def baseAddrLogic(self, nextBlockTransition_in):
        """
        Logic for downloading address of next block

        :param nextBlockTransition_in: signal which means that baseIndex
               should be changed to nextBaseIndex if nextBaseAddrReady
               is not high this signal has no effect (= regular handshake)
        :return: (baseIndex, nextBaseIndex, nextBaseReady is ready
                 and nextBlockTransition_in can be used)
        """
        r = self._reg
        rIn = self.rDatapump.r
        rReq = self.rDatapump.req

        addr_index_t = Bits(self.ADDR_WIDTH - self.ALIGN_BITS)
        baseIndex = r("baseIndex_backup", addr_index_t)
        nextBaseIndex = r("nextBaseIndex", addr_index_t)
        t = HEnum("nextBaseFsm_t",
                  ["uninitialized", "required", "pending", "prepared"])
        isNextBaseAddr = rIn.valid & rIn.id._eq(self.ID)
        nextBaseFsm = FsmBuilder(self, t, "baseAddrLogic_fsm")\
        .Trans(t.uninitialized,
            (self.baseAddr.dout.vld, t.required)
        ).Trans(t.required,
            (rReq.rd, t.pending)
        ).Trans(t.pending,
            (isNextBaseAddr, t.prepared)
        ).Trans(t.prepared,
            (nextBlockTransition_in, t.required)
        ).stateReg

        If(
            self.baseAddr.dout.vld,
            baseIndex(self.addrToIndex(self.baseAddr.dout.data)),
        ).Elif(nextBlockTransition_in, baseIndex(nextBaseIndex))
        self.baseAddr.din(self.indexToAddr(baseIndex))

        If(isNextBaseAddr,
           nextBaseIndex(self.addrToIndex(fitTo(rIn.data, rReq.addr))))
        rIn.ready(1)

        self.rReqHandler(baseIndex, nextBaseFsm._eq(t.required))

        nextBaseReady = nextBaseFsm._eq(t.prepared)
        return baseIndex, nextBaseIndex, nextBaseReady

    def timeoutHandler(self, rst, incr):
        timeoutCntr = self._reg("timeoutCntr",
                                Bits(log2ceil(self.TIMEOUT) + 1, signed=False),
                                def_val=self.TIMEOUT)
        If(rst, timeoutCntr(self.TIMEOUT)).Elif((timeoutCntr != 0) & incr,
                                                timeoutCntr(timeoutCntr - 1))
        return timeoutCntr._eq(0)

    def queuePtrLogic(self, wrPtrIncrVal, wrPtrIncrEn):
        r, s = self._reg, self._sig
        ringSpace_t = Bits(self.PTR_WIDTH)

        # Logic of tail/head,
        rdPtr = r("rdPtr", ringSpace_t, def_val=0)
        wrPtr = r("wrPtr", ringSpace_t, def_val=(2**self.PTR_WIDTH) - 1)

        If(self.wrPtr.dout.vld,
           wrPtr(self.wrPtr.dout.data)).Elif(wrPtrIncrEn,
                                             wrPtr(wrPtr + wrPtrIncrVal))

        If(self.rdPtr.dout.vld, rdPtr(self.rdPtr.dout.data))
        self.wrPtr.din(wrPtr)
        self.rdPtr.din(rdPtr)

        lenByPtrs = s("lenByPtrs", ringSpace_t)
        lenByPtrs(rdPtr - wrPtr - 2)  # size - 1

        # this means items are present in memory
        queueHasSpace = (wrPtr + 1 != rdPtr)
        return queueHasSpace, lenByPtrs

    def wReqDriver(self, en, baseIndex, lenByPtrs, inBlockRemain):
        s = self._sig
        wReq = self.wDatapump.req
        BURST_LEN = self.BUFFER_CAPACITY // 2 - 1
        inBlockRemain_asPtrSize = fitTo(inBlockRemain, lenByPtrs)

        # wReq driver
        ringSpace_t = Bits(self.PTR_WIDTH)
        constraingLen = s("constraingSpace", ringSpace_t)

        If(inBlockRemain_asPtrSize < lenByPtrs,
           constraingLen(inBlockRemain_asPtrSize)).Else(
               constraingLen(lenByPtrs))
        reqLen = s("reqLen", wReq.len._dtype)
        If(constraingLen > BURST_LEN,
           reqLen(BURST_LEN)).Else(reqLen(constraingLen, fit=True))

        wReq.id(self.ID)
        wReq.addr(self.indexToAddr(baseIndex))
        wReq.rem(0)
        wReq.len(reqLen)
        wReq.vld(en)

        return reqLen

    def mvDataToW(self, prepareEn, dataMoveEn, reqLen, inBlockRemain,
                  nextBlockTransition_out, dataCntr_out):
        f = self.dataFifo.dataOut
        w = self.wDatapump.w
        nextBlockTransition = self._sig("mvDataToW_nextBlockTransition")
        nextBlockTransition(inBlockRemain <= fitTo(reqLen, inBlockRemain) + 1)
        If(
            prepareEn, dataCntr_out(fitTo(reqLen, dataCntr_out)),
            If(nextBlockTransition_out,
               inBlockRemain(self.ITEMS_IN_BLOCK)).Else(
                   inBlockRemain(inBlockRemain -
                                 (fitTo(reqLen, inBlockRemain) + 1)))).Elif(
                                     dataMoveEn,
                                     If(
                                         StreamNode(masters=[f],
                                                    slaves=[w]).ack(),
                                         dataCntr_out(dataCntr_out - 1)))
        StreamNode(masters=[f], slaves=[w]).sync(dataMoveEn)
        w.data(f.data)
        w.last(dataCntr_out._eq(0))
        w.strb(mask(w.strb._dtype.bit_length()))
        self.dataFifo.dataIn(self.dataIn)
        nextBlockTransition_out(nextBlockTransition & prepareEn)

    def itemUploadLogic(self, baseIndex, nextBaseIndex, nextBaseReady,
                        nextBlockTransition_out):
        r, s = self._reg, self._sig
        f = self.dataFifo
        w = self.wDatapump

        BURST_LEN = self.BUFFER_CAPACITY // 2
        bufferHasData = s("bufferHasData")
        bufferHasData(f.size > (BURST_LEN - 1))
        # we are counting base next addr as item as well
        addr_index_t = Bits(self.ADDR_WIDTH - self.ALIGN_BITS)
        baseIndex = r("baseIndex", addr_index_t)

        dataCntr_t = Bits(log2ceil(BURST_LEN + 1), signed=False)
        # counter of uploading data
        dataCntr = r("dataCntr", dataCntr_t, def_val=0)
        reqLen_backup = r("reqLen_backup", w.req.len._dtype, def_val=0)

        gotWriteAck = w.ack.vld & w.ack.data._eq(self.ID)
        queueHasSpace, lenByPtrs = self.queuePtrLogic(
            fitTo(reqLen_backup, self.wrPtr.din) + 1, gotWriteAck)

        timeout = s("timeout")
        fsm_t = HEnum("itemUploadingFsm_t", [
            "idle", "reqPending", "dataPending_prepare", "dataPending_send",
            "waitForAck"
        ])
        fsm = FsmBuilder(self, fsm_t, "itemUploadLogic_fsm")\
        .Trans(fsm_t.idle,
            (timeout | (bufferHasData & queueHasSpace), fsm_t.reqPending)

        ).Trans(fsm_t.reqPending,
            (w.req.rd, fsm_t.dataPending_prepare)

        ).Trans(fsm_t.dataPending_prepare,
            fsm_t.dataPending_send
        ).Trans(fsm_t.dataPending_send,
            ((~nextBlockTransition_out | nextBaseReady) & dataCntr._eq(0), fsm_t.waitForAck)
        ).Trans(fsm_t.waitForAck,
            (gotWriteAck, fsm_t.idle)
        ).stateReg

        timeout(
            self.timeoutHandler(fsm != fsm_t.idle,
                                (f.size != 0) & queueHasSpace))

        inBlock_t = Bits(log2ceil(self.ITEMS_IN_BLOCK + 1))
        inBlockRemain = r("inBlockRemain_reg",
                          inBlock_t,
                          def_val=self.ITEMS_IN_BLOCK)

        wReqEn = fsm._eq(fsm_t.reqPending)
        reqLen = self.wReqDriver(wReqEn, baseIndex, lenByPtrs, inBlockRemain)

        If(wReqEn & w.req.rd, reqLen_backup(reqLen))

        dataMoveEn = fsm._eq(fsm_t.dataPending_send)
        prepareEn = fsm._eq(fsm_t.dataPending_prepare)
        self.mvDataToW(prepareEn, dataMoveEn, reqLen_backup, inBlockRemain,
                       nextBlockTransition_out, dataCntr)

        If(
            self.baseAddr.dout.vld,
            baseIndex(self.addrToIndex(self.baseAddr.dout.data)),
        ).Elif(prepareEn,
               baseIndex(baseIndex + fitTo(reqLen_backup, baseIndex) +
                         1)).Elif(nextBlockTransition_out,
                                  baseIndex(nextBaseIndex))

        w.ack.rd(fsm._eq(fsm_t.waitForAck))

    def _impl(self):
        propagateClkRstn(self)
        nextBlockTransition = self._sig("nextBlockTransition")
        baseIndex, nextBaseIndex, nextBaseReady = self.baseAddrLogic(
            nextBlockTransition)
        self.itemUploadLogic(baseIndex, nextBaseIndex, nextBaseReady,
                             nextBlockTransition)
Beispiel #7
0
class CLinkedListReader(Unit):
    """
    This unit reads items from (circular) linked list like structure

    .. code-block:: c

        struct node {
            item_t items[ITEMS_IN_BLOCK],
            struct node * next;
        };

    synchronization is obtained by rdPtr/wrPtr (tail/head) pointer
    baseAddr is address of actual node

    :attention: device reads only chunks of size <= BUFFER_CAPACITY/2,

    .. hwt-autodoc::
    """
    def _config(self):
        self.ID_WIDTH = Param(4)
        self.ID = Param(3)
        # id of packet where last item is next addr
        self.ID_LAST = Param(4)

        self.BUFFER_CAPACITY = Param(32)
        self.ITEMS_IN_BLOCK = Param(4096 // 8 - 1)

        self.ADDR_WIDTH = Param(32)
        self.DATA_WIDTH = Param(64)
        self.PTR_WIDTH = Param(16)

    def _declr(self):
        addClkRstn(self)

        with self._paramsShared():
            # interface which sending requests to download data
            # and interface which is collecting all data and only data with specified id are processed
            self.rDatapump = AxiRDatapumpIntf()._m()
            self.rDatapump.MAX_BYTES = self.BUFFER_CAPACITY // 2 * self.DATA_WIDTH // 8

            self.dataOut = Handshaked()._m()

        # (how much of items remains in block)
        self.inBlockRemain = VectSignal(log2ceil(self.ITEMS_IN_BLOCK + 1))._m()

        # interface to control internal register
        a = self.baseAddr = RegCntrl()
        a.DATA_WIDTH = self.ADDR_WIDTH
        self.rdPtr = RegCntrl()
        self.wrPtr = RegCntrl()
        for ptr in [self.rdPtr, self.wrPtr]:
            ptr.DATA_WIDTH = self.PTR_WIDTH

        f = self.dataFifo = HandshakedFifo(Handshaked)
        f.EXPORT_SIZE = True
        f.DATA_WIDTH = self.DATA_WIDTH
        f.DEPTH = self.BUFFER_CAPACITY

    def addrAlignBits(self):
        return log2ceil(self.DATA_WIDTH // 8)

    def _impl(self):
        propagateClkRstn(self)
        r, s = self._reg, self._sig
        req = self.rDatapump.req
        f = self.dataFifo
        dIn = self.rDatapump.r
        dBuffIn = f.dataIn

        ALIGN_BITS = self.addrAlignBits()
        ID = self.ID
        BUFFER_CAPACITY = self.BUFFER_CAPACITY
        BURST_LEN = BUFFER_CAPACITY // 2
        ID_LAST = self.ID_LAST
        bufferHasSpace = s("bufferHasSpace")
        bufferHasSpace(f.size < (BURST_LEN + 1))
        # we are counting base next addr as item as well
        inBlock_t = Bits(log2ceil(self.ITEMS_IN_BLOCK + 1))
        ringSpace_t = Bits(self.PTR_WIDTH)

        downloadPending = r("downloadPending", def_val=0)

        baseIndex = r("baseIndex", Bits(self.ADDR_WIDTH - ALIGN_BITS))
        inBlockRemain = r("inBlockRemain_reg",
                          inBlock_t,
                          def_val=self.ITEMS_IN_BLOCK)
        self.inBlockRemain(inBlockRemain)

        # Logic of tail/head
        rdPtr = r("rdPtr", ringSpace_t, def_val=0)
        wrPtr = r("wrPtr", ringSpace_t, def_val=0)
        If(self.wrPtr.dout.vld, wrPtr(self.wrPtr.dout.data))
        self.wrPtr.din(wrPtr)
        self.rdPtr.din(rdPtr)

        # this means items are present in memory
        hasSpace = s("hasSpace")
        hasSpace(wrPtr != rdPtr)
        doReq = s("doReq")
        doReq(bufferHasSpace & hasSpace & ~downloadPending & req.rd)
        req.rem(0)
        self.dataOut(f.dataOut)

        # logic of baseAddr and baseIndex
        baseAddr = Concat(baseIndex, Bits(ALIGN_BITS).from_py(0))
        req.addr(baseAddr)
        self.baseAddr.din(baseAddr)
        dataAck = dIn.valid & In(dIn.id, [ID, ID_LAST]) & dBuffIn.rd

        If(self.baseAddr.dout.vld,
           baseIndex(self.baseAddr.dout.data[:ALIGN_BITS])).Elif(
               dataAck & downloadPending,
               If(dIn.last & dIn.id._eq(ID_LAST),
                  baseIndex(dIn.data[self.ADDR_WIDTH:ALIGN_BITS])).Else(
                      baseIndex(baseIndex + 1)))

        sizeByPtrs = s("sizeByPtrs", ringSpace_t)
        sizeByPtrs(wrPtr - rdPtr)

        inBlockRemain_asPtrSize = fitTo(inBlockRemain, sizeByPtrs)
        constraingSpace = s("constraingSpace", ringSpace_t)
        If(inBlockRemain_asPtrSize < sizeByPtrs,
           constraingSpace(inBlockRemain_asPtrSize)).Else(
               constraingSpace(sizeByPtrs))

        constrainedByInBlockRemain = s("constrainedByInBlockRemain")
        constrainedByInBlockRemain(
            fitTo(sizeByPtrs, inBlockRemain) >= inBlockRemain)

        If(
            constraingSpace > BURST_LEN,
            # download full burst
            req.id(ID),
            req.len(BURST_LEN - 1),
            If(doReq, inBlockRemain(inBlockRemain - BURST_LEN))
        ).Elif(
            constrainedByInBlockRemain & (inBlockRemain < BURST_LEN),
            # we know that sizeByPtrs <= inBlockRemain thats why we can resize it
            # we will download next* as well
            req.id(ID_LAST),
            req.len(constraingSpace, fit=True),
            If(doReq, inBlockRemain(self.ITEMS_IN_BLOCK))).Else(
                # download data leftover
                req.id(ID),
                req.len(constraingSpace - 1, fit=True),
                If(
                    doReq,
                    inBlockRemain(inBlockRemain -
                                  fitTo(constraingSpace, inBlockRemain))))

        # logic of req dispatching
        If(downloadPending, req.vld(0),
           If(dataAck & dIn.last, downloadPending(0))).Else(
               req.vld(bufferHasSpace & hasSpace),
               If(req.rd & bufferHasSpace & hasSpace, downloadPending(1)))

        # into buffer pushing logic
        dBuffIn.data(dIn.data)

        isMyData = s("isMyData")
        isMyData(dIn.id._eq(ID) | (~dIn.last & dIn.id._eq(ID_LAST)))
        If(self.rdPtr.dout.vld, rdPtr(self.rdPtr.dout.data)).Else(
            If(dIn.valid & downloadPending & dBuffIn.rd & isMyData,
               rdPtr(rdPtr + 1)))
        # push data into buffer and increment rdPtr
        StreamNode(masters=[dIn],
                   slaves=[dBuffIn],
                   extraConds={
                       dIn:
                       downloadPending,
                       dBuffIn: (dIn.id._eq(ID)
                                 | (dIn.id._eq(ID_LAST)
                                    & ~dIn.last)) & downloadPending
                   }).sync()
Beispiel #8
0
class ArrayBuff_writer(Unit):
    """
    Collect items and send them over wDatapump
    when buffer is full or on timeout
    Cyclically writes items into array over wDatapump
    Maximum overlap of transactions is 1

    [TODO] better fit of items on bus
    [TODO] fully pipeline

    items -> buff -> internal logic -> axi datapump
    
    .. hwt-schematic::
    """

    def _config(self):
        AddrSizeHs._config(self)
        self.ID = Param(3)
        self.MAX_LEN.set(16)
        self.SIZE_WIDTH = Param(16)
        self.BUFF_DEPTH = Param(16)
        self.TIMEOUT = Param(1024)
        self.ITEMS = Param(4096 // 8)

    def _declr(self):
        addClkRstn(self)

        self.items = Handshaked()
        self.items.DATA_WIDTH.set(self.SIZE_WIDTH)

        with self._paramsShared():
            self.wDatapump = AxiWDatapumpIntf()._m()

        self.uploaded = VectSignal(16)._m()

        self.baseAddr = RegCntrl()
        self.baseAddr.DATA_WIDTH.set(self.ADDR_WIDTH)

        self.buff_remain = VectSignal(16)._m()

        b = HandshakedFifo(Handshaked)
        b.DATA_WIDTH.set(self.SIZE_WIDTH)
        b.EXPORT_SIZE.set(True)
        b.DEPTH.set(self.BUFF_DEPTH)
        self.buff = b

    def uploadedCntrHandler(self, st, reqAckHasCome, sizeOfitems):
        uploadedCntr = self._reg(
            "uploadedCntr", self.uploaded._dtype, defVal=0)
        self.uploaded(uploadedCntr)

        If(st._eq(stT.waitOnAck) & reqAckHasCome,
           uploadedCntr(uploadedCntr + fitTo(sizeOfitems, uploadedCntr))
        )

    def _impl(self):
        ALIGN_BITS = log2ceil(self.DATA_WIDTH // 8 - 1).val
        TIMEOUT_MAX = self.TIMEOUT - 1
        ITEMS = self.ITEMS
        buff = self.buff
        reqAck = self.wDatapump.ack
        req = self.wDatapump.req
        w = self.wDatapump.w

        propagateClkRstn(self)

        sizeOfitems = self._reg("sizeOfItems", Bits(
            buff.size._dtype.bit_length()))

        # aligned base addr
        baseAddr = self._reg("baseAddrReg", Bits(self.ADDR_WIDTH - ALIGN_BITS))
        If(self.baseAddr.dout.vld,
           baseAddr(self.baseAddr.dout.data[:ALIGN_BITS])
           )
        self.baseAddr.din(Concat(baseAddr, vec(0, ALIGN_BITS)))

        # offset in buffer and its complement
        offset_t = Bits(log2ceil(ITEMS + 1), signed=False)
        offset = self._reg("offset", offset_t, defVal=0)
        remaining = self._reg("remaining", Bits(
            log2ceil(ITEMS + 1), signed=False), defVal=ITEMS)
        connect(remaining, self.buff_remain, fit=True)

        addrTmp = self._sig("baseAddrTmp", baseAddr._dtype)
        addrTmp(baseAddr + fitTo(offset, baseAddr))

        # req values logic
        req.id(self.ID)
        req.addr(Concat(addrTmp, vec(0, ALIGN_BITS)))
        req.rem(0)

        sizeTmp = self._sig("sizeTmp", buff.size._dtype)

        assert req.len._dtype.bit_length() == buff.size._dtype.bit_length() - 1, (
            req.len._dtype.bit_length(), buff.size._dtype.bit_length())

        buffSizeAsLen = self._sig("buffSizeAsLen", buff.size._dtype)
        buffSizeAsLen(buff.size - 1)
        buffSize_tmp = self._sig("buffSize_tmp", remaining._dtype)
        connect(buff.size, buffSize_tmp, fit=True)

        endOfLenBlock = (remaining - 1) < buffSize_tmp

        remainingAsLen = self._sig("remainingAsLen", remaining._dtype)
        remainingAsLen(remaining - 1)

        If(endOfLenBlock,
            connect(remainingAsLen, req.len, fit=True),
            connect(remaining, sizeTmp, fit=True)
        ).Else(
            connect(buffSizeAsLen, req.len, fit=True),
            sizeTmp(buff.size)
        )

        lastWordCntr = self._reg("lastWordCntr", buff.size._dtype, 0)
        w_last = lastWordCntr._eq(1)
        w_ack = w.ready & buff.dataOut.vld

        # timeout logic
        timeoutCntr = self._reg("timeoutCntr", Bits(log2ceil(self.TIMEOUT), False),
                                defVal=TIMEOUT_MAX)
        # buffer is full or timeout
        beginReq = buff.size._eq(self.BUFF_DEPTH) | timeoutCntr._eq(0)
        reqAckHasCome = self._sig("reqAckHasCome")
        reqAckHasCome(reqAck.vld & reqAck.data._eq(self.ID))
        st = FsmBuilder(self, stT)\
            .Trans(stT.waitOnInput,
                (beginReq & req.rd, stT.waitOnDataTx)
            ).Trans(stT.waitOnDataTx,
                    (w_last & w_ack, stT.waitOnAck)
            ).Trans(stT.waitOnAck,
                    (reqAckHasCome, stT.waitOnInput)
            ).stateReg

        If(st._eq(stT.waitOnInput) & beginReq,  # timeout is counting only when there is pending data
            # start new request
            req.vld(1),
            If(req.rd,
                If(endOfLenBlock,
                   offset(0),
                   remaining(ITEMS)
                ).Else(
                    offset(offset + fitTo(buff.size, offset)),
                    remaining(remaining - fitTo(buff.size, remaining))
                ),
                sizeOfitems(sizeTmp),
                timeoutCntr(TIMEOUT_MAX)
            )
        ).Else(
            req.vld(0),
            If(buff.dataOut.vld & st._eq(stT.waitOnInput) & (timeoutCntr != 0),
               timeoutCntr(timeoutCntr - 1)
            )
        )

        reqAck.rd(st._eq(stT.waitOnAck))

        self.uploadedCntrHandler(st, reqAckHasCome, sizeOfitems)

        # it does not matter when lastWordCntr is changing when there is no
        # request
        startSendingData = st._eq(stT.waitOnInput) & beginReq & req.rd
        If(startSendingData,
            lastWordCntr(sizeTmp)
        ).Elif((lastWordCntr != 0) & w_ack,
            lastWordCntr(lastWordCntr - 1)
        )

        buff.dataIn(self.items)

        connect(buff.dataOut.data, w.data, fit=True)

        StreamNode(masters=[buff.dataOut],
                   slaves=[w]
                   ).sync(st._eq(stT.waitOnDataTx))
        w.strb(mask(w.strb._dtype.bit_length()))
        w.last(w_last)