def reqHandler(self, dpReq, orderFifoIn):
        # join with roundrobin on requests form drivers and selected index is stored into orderFifo

        # because it is just proxy
        driversReq = list(map(lambda d: d.req, self.drivers))
        b = HsBuilder.join_fair(self, driversReq, exportSelected=True)
        req = b.end
        reqJoin = b.lastComp

        StreamNode(masters=[req], slaves=[dpReq, orderFifoIn]).sync()
        connect(req, dpReq, exclude=[dpReq.vld, dpReq.rd])
        orderFifoIn.data(oneHotToBin(self, reqJoin.selectedOneHot.data))
Beispiel #2
0
    def _impl(self):
        addr_t = self.addr_t
        value_t = self.value_t
        item_mask_t = Bits(self.ITEMS)

        # bitmap used to quickly detect position of an empty node
        item_valid = self._reg("item_valid", item_mask_t, def_val=0)
        item_last = self._reg("item_last", item_mask_t, def_val=0)
        # an address on where next item should be inserted
        insert_addr_next = self._reg("insert_addr_next", addr_t, def_val=0)
        insert_addr_next(
            oneHotToBin(
                self,
                rename_signal(
                    self, ~item_valid.next,
                    "item_become_invalid")))  # get index of first non valid

        pop = self.pop
        insert = self.insert

        insert.addr_ret(insert_addr_next)
        insert.rd(item_valid != mask(self.ITEMS))

        pop_one_hot = rename_signal(self,
                                    binToOneHot(pop.addr, en=pop.vld & pop.rd),
                                    "pop_one_hot")
        insert_one_hot = rename_signal(
            self, binToOneHot(insert_addr_next, en=insert.vld & insert.rd),
            "insert_one_hot")
        insert_parent_one_hot = rename_signal(
            self,
            binToOneHot(insert.addr,
                        en=insert.vld & insert.rd & insert.append),
            "insert_parent_one_hot")

        item_valid((item_valid & ~pop_one_hot) | insert_one_hot)
        item_last((item_last & ~insert_parent_one_hot) | insert_one_hot)

        values = self._sig("values", value_t[self.ITEMS])
        next_ptrs = self._sig("next_ptrs", addr_t[self.ITEMS])

        If(
            self.clk._onRisingEdge(),
            If(
                insert.vld & insert.rd,
                next_ptrs[insert.addr]
                (insert_addr_next),  # append behind parent node at insert_ptr
                values[insert_addr_next](insert.data),
            ))
        pop.data(values[pop.addr])
        pop.addr_next(next_ptrs[pop.addr])
        pop.last(item_last[pop.addr] & item_valid[pop.addr])
        pop.vld(item_valid != 0)
Beispiel #3
0
    def reqHandler(self, dpReq, orderFifoIn):
        # join with roundrobin on requests form drivers and selected index is stored into orderFifo

        # because it is just proxy
        driversReq = list(map(lambda d: d.req, self.drivers))
        b = HsBuilder.join_fair(self, driversReq, exportSelected=True)
        req = b.end
        reqJoin = b.lastComp

        StreamNode(masters=[req],
                   slaves=[dpReq, orderFifoIn]).sync()
        connect(req, dpReq, exclude=[dpReq.vld, dpReq.rd])
        orderFifoIn.data(oneHotToBin(self, reqJoin.selectedOneHot.data))
Beispiel #4
0
    def connect_lookup_port(self, lookup: AddrHs,
                            tag_mem_port_r: BramPort_withoutClk,
                            lookupRes: AxiCacheTagArrayLookupResIntf,
                            update_tmp: StructIntf):
        lookup_tmp = self._reg(
            "lookup_tmp",
            HStruct(*([(lookup.id._dtype, "id")] if lookup.ID_WIDTH else ()),
                    (lookup.addr._dtype, "addr"), (BIT, "vld")),
            def_val={"vld": 0})

        pa = self.parse_addr
        just_updated = rename_signal(
            self,
            # updated on same index (using "update" port) = the result which is currently on output
            # of the ram may be invalid
            update_tmp.vld
            & pa(lookup_tmp.addr)[1]._eq(pa(update_tmp.addr)[1]),
            "just_updated")

        # tag comparator
        tag, _, _ = self.parse_addr(lookup_tmp.addr)
        tags = tag_mem_port_r.dout._reinterpret_cast(
            self.tag_record_t[self.WAY_CNT])
        found = [
            # is matching and just this tag was not updated
            (t.valid & t.tag._eq(tag) &
             (~just_updated | (update_tmp.vld & ~update_tmp.way_en[i]))) | (
                 # or was updated to the matching tag
                 just_updated & update_tmp.way_en[i] & ~update_tmp.delete
                 & pa(lookup_tmp.addr)[0]._eq(pa(update_tmp.addr)[0]))
            for i, t in enumerate(tags)
        ]

        # if the data which is currently on output of the ram was
        # just updated it means that the data is invalid and the update
        # data will be lost in next clock cycle, if the consumer of lookupRes
        # can not consume the data just know, we need to perform lookup in tag_array
        # once again
        lookup.rd(~lookup_tmp.vld | lookupRes.rd)
        If(
            lookup.rd,
            lookup_tmp.id(lookup.id) if lookup.ID_WIDTH else [],
            lookup_tmp.addr(lookup.addr),
        )
        lookup_tmp.vld(lookup.vld | (lookup_tmp.vld & ~lookupRes.rd))

        tag_mem_port_r.addr((lookup_tmp.vld & ~lookupRes.rd)._ternary(
            self.parse_addr(lookup_tmp.addr)
            [1],  # lookup previous address and look for a change
            self.parse_addr(lookup.addr)[1],  # lookup a new address
        ))
        tag_mem_port_r.en(lookup.vld | (lookup_tmp.vld & ~lookupRes.rd))

        if lookupRes.ID_WIDTH:
            lookupRes.id(lookup_tmp.id)
        lookupRes.addr(lookup_tmp.addr)
        lookupRes.way(oneHotToBin(self, found))
        lookupRes.found(Or(*found))
        lookupRes.tags(tags)

        lookupRes.vld(lookup_tmp.vld)
Beispiel #5
0
    def data_insert(self, items: BramPort_withReadMask_withoutClk):
        """
        * check if this address is already present in address CAM or w_in_reg
        * if it is possible to update data in w_in_reg or in data_ram of this buffer
        * else allocate new data (insert to address CAM of ooo_fifo) and store data to w_in_reg
        * if w_in_tmp reg is not beeing updated, forward it to the data_ram to flush it to main memory

        .. figure:: ./_static/AxiWriteAggregator_data_insert.png

        :note: we must not let data from tmp reg if next w_in has same address (we have to update tmp reg instead)

        """
        w_in = self.w
        w_tmp = self.w_in_reg
        ooo_fifo = self.ooo_fifo
        write_pre_lookup = ooo_fifo.write_pre_lookup
        write_pre_lookup_res = ooo_fifo.write_pre_lookup_res

        write_pre_lookup.data(w_in.addr)
        w_tmp_in = w_tmp.dataIn
        w_tmp_out = w_tmp.dataOut

        # if true it means that the current input write data should be merged with
        # a content of the w_tmp register
        found_in_tmp_reg = rename_signal(
            self, w_tmp_in.vld & w_in.addr._eq(w_tmp_out.addr),
            "found_in_tmp_reg")
        accumulated_mask = rename_signal(self, w_in.mask | w_tmp_out.mask,
                                         "accumulated_mask")
        If(
            w_tmp_out.vld & found_in_tmp_reg,
            # update only bytes selected by w_in mask in tmp reg
            w_tmp_in.data(
                apply_write_with_mask(w_tmp_out.data, w_in.data, w_in.mask)),
            w_tmp_in.mask(w_in.mask | w_tmp_out.mask),
            w_tmp_in.mask_byte_unaligned(
                is_mask_byte_unaligned(accumulated_mask))).Else(
                    w_tmp_in.data(w_in.data), w_tmp_in.mask(w_in.mask),
                    w_tmp_in.mask_byte_unaligned(
                        is_mask_byte_unaligned(w_in.mask)))
        w_tmp_in.addr(w_in.addr),
        w_tmp_in.cam_lookup(write_pre_lookup_res.data)

        StreamNode([w_in], [w_tmp_in, write_pre_lookup]).sync()
        write_pre_lookup_res.rd(1)

        # CAM insert
        cam_index_onehot = rename_signal(
            self, w_tmp_out.cam_lookup & ooo_fifo.item_valid
            & ~ooo_fifo.item_write_lock, "cam_index_onehot")
        cam_found = rename_signal(self, cam_index_onehot != 0, "cam_found")
        cam_found_index = oneHotToBin(self, cam_index_onehot,
                                      "cam_found_index")

        write_execute = ooo_fifo.write_execute
        write_execute.key(w_tmp_out.addr)

        current_empty = rename_signal(
            self, ~ooo_fifo.item_valid[write_execute.index], "current_empty")
        will_insert_new_item = rename_signal(
            self,
            ~cam_found & ~found_in_tmp_reg & current_empty & write_execute.vld,
            "will_insert_new_item")

        # store to tmp register (and accumulate if possible)
        item_insert_last = self._sig("item_insert_last")
        item_insert_first = self._sig("item_insert_first")

        # insert word iteration,
        # push data to items RAM
        if self.BUS_WORDS_IN_CACHE_LINE == 1:
            # a cacheline fits in to a single busword, no extracarerequired
            item_insert_last(1)
            item_insert_first(1)
            items.din(w_tmp_out.data)
            items.we(w_tmp_out.mask)
            push_ptr = write_execute.index
        else:
            # iteration over multiple bus words to store a cacheline
            push_offset = self._reg("push_offset",
                                    self.word_index_t,
                                    def_val=0)
            item_write_start = rename_signal(
                self, will_insert_new_item | (cam_found & w_tmp_out.vld),
                "item_write_start")
            If(
                w_tmp_out.vld & found_in_tmp_reg,
                push_offset(0),
            ).Elif(
                items.en.vld & items.en.rd & (item_write_start |
                                              (push_offset != 0)),
                If(push_offset != self.WORD_OFFSET_MAX,
                   push_offset(push_offset + 1)).Else(push_offset(0)))
            item_insert_last(push_offset._eq(self.WORD_OFFSET_MAX))
            item_insert_first(push_offset._eq(0))
            cam_found_index = Concat(cam_found_index, push_offset)
            push_ptr = Concat(write_execute.index, push_offset)

            DIN_W = self.DATA_WIDTH
            WE_W = DIN_W // 8
            Switch(push_offset).add_cases([(i, [
                items.din(w_tmp_out.data[(i + 1) * DIN_W:i * DIN_W], ),
                items.we(w_tmp_out.mask[(i + 1) * WE_W:i * WE_W], )
            ]) for i in range(self.WORD_OFFSET_MAX + 1)]).Default(
                items.din(None),
                items.we(None),
            )

        If(w_tmp_out.vld & cam_found,
           items.addr(cam_found_index)).Else(items.addr(push_ptr))
        items.do_accumulate(w_tmp_out.vld & w_tmp_out.mask_byte_unaligned)
        items.do_overwrite(w_tmp_out.vld & ~cam_found)

        write_confirm = ooo_fifo.write_confirm
        StreamNode(
            masters=[w_tmp_out, write_execute],
            slaves=[items.en],
            extraConds={
                write_execute:
                rename_signal(self, will_insert_new_item & item_insert_last,
                              "ac_write_en"),
                items.en:
                rename_signal(
                    self,
                    (~w_in.vld | will_insert_new_item | ~item_insert_first) &
                    (write_confirm.rd | cam_found), "items_en_en"),
                w_tmp_out:
                rename_signal(
                    self, found_in_tmp_reg |
                    (((write_confirm.rd & current_empty) | cam_found)
                     & item_insert_last), "w_tmp_out_en")
            },
            skipWhen={
                write_execute: found_in_tmp_reg,
                items.en: found_in_tmp_reg,
            }).sync()
        write_confirm.vld(w_tmp_out.vld & will_insert_new_item
                          & item_insert_last & items.en.rd)
    def speculative_read_handler(self):
        """
        Connect the speculative_read port to internal storages of the :class:`AxiWriteAggregator`

        We need to handle several cases:

        1. the data is currently in tmp register
        2. the data was in tmp register and now is in data memory
        3. the data is in data memory
        4. the data was in data memory and now it is deallocated
        5. the data was not found anywhere


        Handling of speculative read has following stages:

        1. search input register and main address CAM for data
        2. optionaly load the data from ram
        3. send data to speculative_read_data and set resp to error if was not found
           it may also happen that the data was flushed in the mean time

        .. figure:: ./_static/AxiStoreQueueWritePropagating_speculativeRead.png

        :note: speculative read never block the write channel and thus data may be invalid if the speculative read data is stalled.
            This should be handled in master of speculative read port (Other component which is using this component).
        """
        sra = self.speculative_read_addr

        # CLOCK_PERIOD 0
        ooo_fifo = self.ooo_fifo
        ooo_fifo.read_lookup.data(sra.addr[:self.CACHE_LINE_OFFSET_BITS])

        w_in_reg = self.w_in_reg.dataOut
        w_in_reg_tmp = HObjList(
            HandshakedReg(AxiWriteAggregatorWriteTmpIntf) for _ in range(2))
        for r in w_in_reg_tmp:
            r._updateParamsFrom(w_in_reg)
            r.ID_WIDTH = self.ID_WIDTH

        self.w_in_reg_tmp = w_in_reg_tmp

        w_i = w_in_reg_tmp[0].dataIn
        w_i.orig_request_addr(sra.addr[:self.CACHE_LINE_OFFSET_BITS])
        w_i.orig_request_addr_eq(w_in_reg.addr._eq(w_i.orig_request_addr))
        w_i.orig_request_id(sra.id)
        w_i.orig_request_valid(sra.vld)
        w_i.addr(w_in_reg.addr)
        w_i.data(w_in_reg.data)
        w_i.valid(w_in_reg.vld)

        StreamNode(
            [sra],
            [ooo_fifo.read_lookup, w_i],
            skipWhen={
                sra: ~sra.vld
            },  # flush the pipeline if no request
        ).sync()

        # CLK_PERIOD 1
        read_lookup_res = HsBuilder(self, ooo_fifo.read_lookup_res).buff(1).end
        StreamNode([read_lookup_res, w_in_reg_tmp[0].dataOut],
                   [w_in_reg_tmp[1].dataIn]).sync()
        w_in_reg_tmp[1].dataIn(
            w_in_reg_tmp[0].dataOut,
            exclude=[w_in_reg_tmp[1].dataIn.vld, w_in_reg_tmp[1].dataIn.rd])

        in_ram_flag = rename_signal(self,
                                    read_lookup_res.data & ooo_fifo.item_valid,
                                    "in_ram_flag")
        found_in_ram_flag = self._reg("found_in_ram_flag", def_val=0)
        If(read_lookup_res.vld & read_lookup_res.rd,
           found_in_ram_flag(in_ram_flag != 0))

        ram_r = self.data_ram.port[2]
        ram_r.en.vld(found_in_ram_flag.next)
        ram_r.addr(oneHotToBin(self, in_ram_flag, "in_ram_index"))

        # CLK_PERIOD 2
        srd = self.speculative_read_data
        w_in_reg_tmp_o = w_in_reg_tmp[1].dataOut
        StreamNode(
            [w_in_reg_tmp_o],
            [srd],
            # filter out pipeline flushes
            extraConds={
                srd: w_in_reg_tmp_o.orig_request_valid
            },
            skipWhen={
                srd: ~w_in_reg_tmp_o.orig_request_valid
            },
        ).sync()

        # read from in_tmp req has to be postponed so we can potentially load the data from ram first
        found_in_actual_w_in_reg = rename_signal(
            self,
            w_in_reg.vld & w_in_reg.addr._eq(w_in_reg_tmp_o.orig_request_addr),
            "spec_read_found_in_actual_w_in_reg")
        w_in_reg_tmp_1_o = w_in_reg_tmp[0].dataOut
        found_in_w_in_reg_1 = rename_signal(
            self, w_in_reg_tmp_1_o.vld & w_in_reg_tmp_1_o.valid
            & w_in_reg_tmp_1_o.addr._eq(w_in_reg_tmp_o.orig_request_addr),
            "spec_read_found_in_w_in_reg_1")
        found_in_write_tmp_reg_2 = rename_signal(
            self, w_in_reg_tmp_o.vld & w_in_reg_tmp_o.valid
            & w_in_reg_tmp_o.orig_request_addr_eq,
            "spec_read_found_in_write_tmp_reg_2")

        srd.id(w_in_reg_tmp_o.orig_request_id)
        If(
            found_in_actual_w_in_reg,
            # found in tmp register just now
            srd.data(w_in_reg.data),
            srd.resp(RESP_OKAY),
            srd.last(1),
        ).Elif(
            found_in_w_in_reg_1,
            # found in tmp register in clock cycle -2
            srd.data(w_in_reg_tmp_1_o.data),
            srd.resp(RESP_OKAY),
            srd.last(1),
        ).Elif(
            found_in_write_tmp_reg_2,
            # found in tmp register in clock cycle -2
            srd.data(w_in_reg_tmp_o.data),
            srd.resp(RESP_OKAY),
            srd.last(1),
        ).Elif(
            found_in_ram_flag,
            # found in write data memory
            srd.data(ram_r.dout),
            srd.resp(RESP_OKAY),
            srd.last(1),
        ).Else(
            # not found anywhere
            srd.data(None),
            srd.resp(RESP_EXOKAY),
            srd.last(1),
        )
Beispiel #7
0
    def read_request_section(self, read_ack: RtlSignal, item_vld: RtlSignal,
                             waiting_transaction_id: RtlSignal,
                             waiting_transaction_vld: RtlSignal,
                             data_copy_override: VldSynced):
        s = self.s
        m = self.m
        addr_cam = self.addr_cam
        ITEMS = addr_cam.ITEMS
        addr_cam_out = self.add_addr_cam_out_reg(item_vld)

        with self._paramsShared():
            s_ar_tmp = self.s_ar_tmp = AxiSReg(s.AR_CLS)

        last_cam_insert_match = self._reg("last_cam_insert_match",
                                          Bits(ITEMS),
                                          def_val=0)
        match_res = rename_signal(
            self, item_vld & (addr_cam_out.data | last_cam_insert_match)
            & ~waiting_transaction_vld, "match_res")
        blocking_access = rename_signal(
            self, s.ar.valid & (item_vld[s.ar.id] |
                                (s_ar_tmp.dataOut.valid &
                                 (s.ar.id._eq(s_ar_tmp.dataOut.id)))),
            "blocking_access")
        s_ar_node = StreamNode(
            [s.ar],
            [addr_cam.match[0], s_ar_tmp.dataIn],
        )
        s_ar_node.sync(~blocking_access)
        # s_ar_node_ack = s_ar_node.ack() & ~blocking_access
        s_ar_tmp.dataIn(s.ar, exclude={s.ar.valid, s.ar.ready})

        parent_transaction_id = oneHotToBin(self, match_res,
                                            "parent_transaction_id")

        m_ar_node = StreamNode(
            [s_ar_tmp.dataOut, addr_cam_out],
            [m.ar],
            extraConds={m.ar: match_res._eq(0)},
            skipWhen={m.ar: match_res != 0},
        )
        m_ar_node.sync()
        m.ar(s_ar_tmp.dataOut, exclude={m.ar.valid, m.ar.ready})
        addr_cam.match[0].data(s.ar.addr[:self.CACHE_LINE_OFFSET_BITS])
        ar_ack = rename_signal(self, m_ar_node.ack(), "ar_ack")

        # insert into cam on empty position specified by id of this transaction
        acw = addr_cam.write
        acw.addr(s_ar_tmp.dataOut.id)
        acw.data(s_ar_tmp.dataOut.addr[:self.CACHE_LINE_OFFSET_BITS])
        acw.vld(addr_cam_out.vld)
        #If(s_ar_node_ack,
        last_cam_insert_match(
            binToOneHot(
                s_ar_tmp.dataOut.id,
                en=~blocking_access & s.ar.valid & s_ar_tmp.dataOut.valid
                & s_ar_tmp.dataOut.addr[:self.CACHE_LINE_OFFSET_BITS]._eq(
                    s.ar.addr[:self.CACHE_LINE_OFFSET_BITS])))
        #)

        for trans_id in range(ITEMS):
            # it becomes ready if we are requested for it on "s" interface
            this_trans_start = s_ar_tmp.dataOut.id._eq(trans_id) & \
                (data_copy_override.vld | ar_ack)
            # item becomes invalid if we read last data word
            this_trans_end = read_ack & s.r.id._eq(trans_id) & s.r.last
            this_trans_end = rename_signal(self, this_trans_end,
                                           f"this_trans_end{trans_id:d}")
            item_vld[trans_id](apply_set_and_clear(item_vld[trans_id],
                                                   this_trans_start,
                                                   this_trans_end))

            waiting_transaction_start = (ar_ack & (match_res != 0)
                                         & parent_transaction_id._eq(trans_id)
                                         & ~this_trans_end)
            # note: this_trans_end in this context is for parent transactio
            # which was not started just now, so it may be ending just now
            waiting_transaction_start = rename_signal(
                self, waiting_transaction_start,
                f"waiting_transaction_start{trans_id:d}")
            _waiting_transaction_vld = apply_set_and_clear(
                waiting_transaction_vld[trans_id], waiting_transaction_start,
                this_trans_end)
            waiting_transaction_vld[trans_id](rename_signal(
                self, _waiting_transaction_vld,
                f"waiting_transaction_vld{trans_id:d}"))

        If(
            self.clk._onRisingEdge(),
            If((match_res != 0) & ar_ack,
               waiting_transaction_id[parent_transaction_id](
                   s_ar_tmp.dataOut.id)))

        # parent transaction is finishing just now
        # we need to quickly grab the data in data buffer and copy it also
        # for this transaction
        data_copy_override.vld(s_ar_tmp.dataOut.valid & read_ack
                               & (match_res != 0)
                               & s.r.id._eq(parent_transaction_id) & s.r.last)
        data_copy_override.data(s_ar_tmp.dataOut.id)