Python Memory.write_portの例

プログラミング言語: Python

名前空間/パッケージ名: nmigen

クラス/型: Memory

メソッド/関数: write_port

hotexamples.comのコード掲載数: 15

Python Memory.write_port - 15件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnmigen.Memory.write_portの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Memory(28)

write_port(15)

read_port(6)

よく使われるメソッド

Memory (28)

write_port (15)

read_port (6)

コード例 #1

ファイルを表示

class User(Elaboratable):
    def __init__(self):
        self.user_rx_mem = Memory(width=8, depth=32)
        self.user_tx_mem = Memory(width=8,
                                  depth=32,
                                  init=[ord(x) for x in "Hello, World!!\r\n"])
        self.mem_r_port = self.user_tx_mem.read_port()
        self.mem_w_port = self.user_rx_mem.write_port()
        self.packet_received = Signal()
        self.transmit_ready = Signal()
        self.transmit_packet = Signal()

    def elaborate(self, platform):
        m = Module()
        rx_port = self.user_rx_mem.read_port()
        tx_port = self.user_tx_mem.write_port()

        m.submodules += [self.mem_r_port, self.mem_w_port, rx_port, tx_port]

        led1 = platform.request("user_led", 0)
        led2 = platform.request("user_led", 1)

        m.d.comb += [
            tx_port.addr.eq(0),
            tx_port.en.eq(0),
            tx_port.data.eq(0),
            rx_port.addr.eq(0),
        ]

        m.d.sync += [
            led1.eq(rx_port.data & 1),
            led2.eq((rx_port.data & 2) >> 1),
        ]

        with m.FSM():
            with m.State("IDLE"):
                m.d.sync += self.transmit_packet.eq(0)
                with m.If(self.packet_received):
                    m.next = "RX"
            with m.State("RX"):
                with m.If(self.transmit_ready):
                    m.d.sync += self.transmit_packet.eq(1)
                    m.next = "IDLE"

        return m

コード例 #2

ファイルを表示

ファイル: util.py プロジェクト: mgielda/CFU-Playground

 def elab(self, m):
     mem = Memory(width=self.width, depth=self.depth, simulate=self.is_sim)
     m.submodules['wp'] = wp = mem.write_port()
     m.submodules['rp'] = rp = mem.read_port(transparent=False)
     m.d.comb += [
         wp.en.eq(self.w_en),
         wp.addr.eq(self.w_addr),
         wp.data.eq(self.w_data),
         rp.en.eq(1),
         rp.addr.eq(self.r_addr),
         self.r_data.eq(rp.data),
     ]

コード例 #3

ファイルを表示

ファイル: rmii.py プロジェクト: ret/daqnet

def test_rmii_rx():
    import random
    from nmigen.back import pysim
    from nmigen import Memory

    crs_dv = Signal()
    rxd0 = Signal()
    rxd1 = Signal()

    mem = Memory(8, 128)
    mem_port = mem.write_port()
    mac_addr = [random.randint(0, 255) for _ in range(6)]

    rmii_rx = RMIIRx(mac_addr, mem_port, crs_dv, rxd0, rxd1)

    def testbench():
        def tx_packet():
            yield (crs_dv.eq(1))
            # Preamble
            for _ in range(random.randint(10, 40)):
                yield (rxd0.eq(1))
                yield (rxd1.eq(0))
                yield
            # SFD
            yield (rxd0.eq(1))
            yield (rxd1.eq(1))
            yield
            # Data
            for txbyte in txbytes:
                for dibit in range(0, 8, 2):
                    yield (rxd0.eq((txbyte >> (dibit + 0)) & 1))
                    yield (rxd1.eq((txbyte >> (dibit + 1)) & 1))
                    yield
            yield (crs_dv.eq(0))

            # Finish clocking
            for _ in range(6):
                yield

        for _ in range(10):
            yield

        txbytes = [
            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0, 0xDE, 0xF1, 0x38, 0x89,
            0x40, 0x08, 0x00, 0x45, 0x00, 0x00, 0x54, 0x00, 0x00, 0x40, 0x00,
            0x40, 0x01, 0xB6, 0xD0, 0xC0, 0xA8, 0x01, 0x88, 0xC0, 0xA8, 0x01,
            0x00, 0x08, 0x00, 0x0D, 0xD9, 0x12, 0x1E, 0x00, 0x07, 0x3B, 0x3E,
            0x0C, 0x5C, 0x00, 0x00, 0x00, 0x00, 0x13, 0x03, 0x0F, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x48, 0x65,
            0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x48, 0x65,
            0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x48, 0x65,
            0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x48, 0x52,
            0x32, 0x1F, 0x9E
        ]

        # Transmit first packet
        yield from tx_packet()

        # Check packet was received
        assert (yield rmii_rx.rx_valid)
        assert (yield rmii_rx.rx_len) == 102
        assert (yield rmii_rx.rx_offset) == 0
        mem_contents = []
        for idx in range(102):
            mem_contents.append((yield mem[idx]))
        assert mem_contents == txbytes

        # Pause (inter-frame gap)
        for _ in range(20):
            yield

        assert (yield rmii_rx.rx_valid) == 0

        # Transmit a second packet
        yield from tx_packet()

        # Check packet was received
        assert (yield rmii_rx.rx_valid)
        assert (yield rmii_rx.rx_len) == 102
        assert (yield rmii_rx.rx_offset) == 102
        mem_contents = []
        for idx in range(102):
            mem_contents.append((yield mem[(102 + idx) % 128]))
        assert mem_contents == txbytes

        yield

    mod = Module()
    mod.submodules += rmii_rx, mem_port
    vcdf = open("rmii_rx.vcd", "w")
    with pysim.Simulator(mod, vcd_file=vcdf) as sim:
        sim.add_clock(1 / 50e6)
        sim.add_sync_process(testbench())
        sim.run()

コード例 #4

ファイルを表示

ファイル: mac.py プロジェクト: ret/daqnet

class MAC(Elaboratable):
    """
    Ethernet RMII MAC.

    Clock domain:
        This module is clocked at the system clock frequency and generates an
        RMII clock domain internally. All its inputs and outputs are in the
        system clock domain.

    Parameters:
        * `clk_freq`: MAC's clock frequency
        * `phy_addr`: 5-bit address of the PHY
        * `mac_addr`: MAC address in standard XX:XX:XX:XX:XX:XX format

    Memory Ports:
        * `rx_port`: Read port into RX packet memory, 8 bytes by 2048 cells.
        * `tx_port`: Write port into TX packet memory, 8 bytes by 2048 cells.

    Pins:
        * `rmii`: signal group containing:
            txd0, txd1, txen, rxd0, rxd1, crs_dv, ref_clk
        * `mdio`: signal group containing: mdc, mdio
        * `phy_rst`: PHY RST pin (output, active low)
        * `eth_led`: Ethernet LED, active high, pulsed on packet traffic

    TX port:
        * `tx_start`: Pulse high to begin transmission of a packet from memory
        * `tx_len`: 11-bit length of packet to transmit
        * `tx_offset`: n-bit address offset of packet to transmit, with
                       n=log2(tx_buf_size)

    RX port:
        * `rx_valid`: Held high while `rx_len` and `rx_offset` are valid
        * `rx_len`: 11-bit length of received packet
        * `rx_offset`: n-bit address offset of received packet, with
                       n=log2(rx_buf_size)
        * `rx_ack`: Pulse high to acknowledge packet receipt

    Inputs:
        * `phy_reset`: Assert to reset the PHY, de-assert for normal operation

    Outputs:
        * `link_up`: High while link is established
    """
    def __init__(self,
                 clk_freq,
                 phy_addr,
                 mac_addr,
                 rmii,
                 mdio,
                 phy_rst,
                 eth_led,
                 tx_buf_size=2048,
                 rx_buf_size=2048):
        # Memory Ports
        self.rx_port = None  # Assigned below
        self.tx_port = None  # Assigned below

        # TX port
        self.tx_start = Signal()
        self.tx_len = Signal(11)
        self.tx_offset = Signal(max=tx_buf_size - 1)

        # RX port
        self.rx_ack = Signal()
        self.rx_valid = Signal()
        self.rx_len = Signal(11)
        self.rx_offset = Signal(max=rx_buf_size - 1)

        # Inputs
        self.phy_reset = Signal()

        # Outputs
        self.link_up = Signal()

        self.clk_freq = clk_freq
        self.phy_addr = phy_addr
        self.mac_addr = [int(x, 16) for x in mac_addr.split(":")]
        self.rmii = rmii
        self.mdio = mdio
        self.phy_rst = phy_rst
        self.eth_led = eth_led

        # Create packet memories and interface ports
        self.tx_mem = Memory(8, tx_buf_size)
        self.tx_port = self.tx_mem.write_port()
        self.rx_mem = Memory(8, rx_buf_size)
        self.rx_port = self.rx_mem.read_port(transparent=False)

    def elaborate(self, platform):
        m = Module()

        # Create RMII clock domain from RMII clock input
        cd = ClockDomain("rmii", reset_less=True)
        m.d.comb += cd.clk.eq(self.rmii.ref_clk)
        m.domains.rmii = cd

        # Create RX write and TX read ports for RMII use
        rx_port_w = self.rx_mem.write_port(domain="rmii")
        tx_port_r = self.tx_mem.read_port(domain="rmii", transparent=False)
        m.submodules += [self.rx_port, rx_port_w, self.tx_port, tx_port_r]
        m.d.comb += [self.rx_port.en.eq(1), tx_port_r.en.eq(1)]

        # Create submodules for PHY and RMII
        m.submodules.phy_manager = phy_manager = PHYManager(
            self.clk_freq, self.phy_addr, self.phy_rst, self.mdio.mdio,
            self.mdio.mdc)
        m.submodules.stretch = stretch = PulseStretch(int(1e6))

        rmii_rx = RMIIRx(self.mac_addr, rx_port_w, self.rmii.crs_dv,
                         self.rmii.rxd0, self.rmii.rxd1)
        rmii_tx = RMIITx(tx_port_r, self.rmii.txen, self.rmii.txd0,
                         self.rmii.txd1)

        # Create FIFOs to interface to RMII modules
        rx_fifo = AsyncFIFO(width=11 + self.rx_port.addr.nbits, depth=4)
        tx_fifo = AsyncFIFO(width=11 + self.tx_port.addr.nbits, depth=4)

        m.d.comb += [
            # RX FIFO
            rx_fifo.din.eq(Cat(rmii_rx.rx_offset, rmii_rx.rx_len)),
            rx_fifo.we.eq(rmii_rx.rx_valid),
            Cat(self.rx_offset, self.rx_len).eq(rx_fifo.dout),
            rx_fifo.re.eq(self.rx_ack),
            self.rx_valid.eq(rx_fifo.readable),

            # TX FIFO
            tx_fifo.din.eq(Cat(self.tx_offset, self.tx_len)),
            tx_fifo.we.eq(self.tx_start),
            Cat(rmii_tx.tx_offset, rmii_tx.tx_len).eq(tx_fifo.dout),
            tx_fifo.re.eq(rmii_tx.tx_ready),
            rmii_tx.tx_start.eq(tx_fifo.readable),

            # Other submodules
            phy_manager.phy_reset.eq(self.phy_reset),
            self.link_up.eq(phy_manager.link_up),
            stretch.trigger.eq(self.rx_valid),
            self.eth_led.eq(stretch.pulse),
        ]

        rdr = DomainRenamer({"read": "sync", "write": "rmii"})
        wdr = DomainRenamer({"write": "sync", "read": "rmii"})
        rr = DomainRenamer("rmii")
        m.submodules.rx_fifo = rdr(rx_fifo)
        m.submodules.tx_fifo = wdr(tx_fifo)
        m.submodules.rmii_rx = rr(rmii_rx)
        m.submodules.rmii_tx = rr(rmii_tx)

        return m

コード例 #5

ファイルを表示

ファイル: cache.py プロジェクト: AngelTerrones/Bellatrix

    def elaborate(self, platform: Platform) -> Module:
        m = Module()

        snoop_addr = Record(self.pc_layout)
        snoop_valid = Signal()

        # -------------------------------------------------------------------------
        # Performance counter
        # TODO: connect to CSR's performance counter
        with m.If(~self.s1_stall & self.s1_valid & self.s1_access):
            m.d.sync += self.access_cnt.eq(self.access_cnt + 1)
        with m.If(self.s2_valid & self.s2_miss & ~self.bus_valid
                  & self.s2_access):
            m.d.sync += self.miss_cnt.eq(self.miss_cnt + 1)
        # -------------------------------------------------------------------------

        way_layout = [('data', 32 * self.nwords),
                      ('tag', self.s1_address.tag.shape()), ('valid', 1),
                      ('sel_lru', 1), ('snoop_hit', 1)]
        if self.enable_write:
            way_layout.append(('sel_we', 1))

        ways = Array(
            Record(way_layout, name='way_idx{}'.format(_way))
            for _way in range(self.nways))
        fill_cnt = Signal.like(self.s1_address.offset)

        # Check hit/miss
        way_hit = m.submodules.way_hit = Encoder(self.nways)
        for idx, way in enumerate(ways):
            m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag)
                                          & way.valid)

        m.d.comb += self.s2_miss.eq(way_hit.n)
        if self.enable_write:
            # Asumiendo que hay un HIT, indicar que la vía que dió hit es en la cual se va a escribir
            m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid)

        # set the LRU
        if self.nways == 1:
            # One way: LRU is useless
            lru = Const(0)  # self.nlines
        else:
            # LRU es un vector de N bits, cada uno indicado el set a reemplazar
            # como NWAY es máximo 2, cada LRU es de un bit
            lru = Signal(self.nlines)
            _lru = lru.bit_select(self.s2_address.line, 1)
            write_ended = self.bus_valid & self.bus_ack & self.bus_last  # err ^ ack = = 1
            access_hit = ~self.s2_miss & self.s2_valid & (way_hit.o == _lru)
            with m.If(write_ended | access_hit):
                m.d.sync += _lru.eq(~_lru)

        # read data from the cache
        m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select(
            self.s2_address.offset, 32))

        # Internal Snoop
        snoop_use_cache = Signal()
        snoop_tag_match = Signal()
        snoop_line_match = Signal()
        snoop_cancel_refill = Signal()
        if not self.enable_write:
            bits_range = log2_int(self.end_addr - self.start_addr,
                                  need_pow2=False)

            m.d.comb += [
                snoop_addr.eq(self.dcache_snoop.addr),  # aux
                snoop_valid.eq(self.dcache_snoop.we & self.dcache_snoop.valid
                               & self.dcache_snoop.ack),
                snoop_use_cache.eq(snoop_addr[bits_range:] == (
                    self.start_addr >> bits_range)),
                snoop_tag_match.eq(snoop_addr.tag == self.s2_address.tag),
                snoop_line_match.eq(snoop_addr.line == self.s2_address.line),
                snoop_cancel_refill.eq(snoop_use_cache & snoop_valid
                                       & snoop_line_match & snoop_tag_match),
            ]
        else:
            m.d.comb += snoop_cancel_refill.eq(0)

        with m.FSM():
            with m.State('READ'):
                with m.If(self.s2_re & self.s2_miss & self.s2_valid):
                    m.d.sync += [
                        self.bus_addr.eq(self.s2_address),
                        self.bus_valid.eq(1),
                        fill_cnt.eq(self.s2_address.offset - 1)
                    ]
                    m.next = 'REFILL'
            with m.State('REFILL'):
                m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset)
                with m.If(self.bus_ack):
                    m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset +
                                                        1)
                with m.If(self.bus_ack & self.bus_last | self.bus_err):
                    m.d.sync += self.bus_valid.eq(0)
                with m.If(~self.bus_valid | self.s1_flush
                          | snoop_cancel_refill):
                    m.next = 'READ'
                    m.d.sync += self.bus_valid.eq(0)

        # mark the way to use (replace)
        m.d.comb += ways[lru.bit_select(self.s2_address.line,
                                        1)].sel_lru.eq(self.bus_valid)

        # generate for N ways
        for way in ways:
            # create the memory structures for valid, tag and data.
            valid = Signal(self.nlines)  # Valid bits

            tag_m = Memory(width=len(way.tag), depth=self.nlines)  # tag memory
            tag_rp = tag_m.read_port()
            snoop_rp = tag_m.read_port()
            tag_wp = tag_m.write_port()
            m.submodules += tag_rp, tag_wp, snoop_rp

            data_m = Memory(width=len(way.data),
                            depth=self.nlines)  # data memory
            data_rp = data_m.read_port()
            data_wp = data_m.write_port(
                granularity=32
            )  # implica que solo puedo escribir palabras de 32 bits.
            m.submodules += data_rp, data_wp

            # handle valid
            with m.If(self.s1_flush & self.s1_valid):  # flush
                m.d.sync += valid.eq(0)
            with m.Elif(way.sel_lru & self.bus_last
                        & self.bus_ack):  # refill ok
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1)
            with m.Elif(way.sel_lru & self.bus_err):  # refill error
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0)
            with m.Elif(self.s2_evict & self.s2_valid
                        & (way.tag == self.s2_address.tag)):  # evict
                m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0)

            # assignments
            m.d.comb += [
                tag_rp.addr.eq(
                    Mux(self.s1_stall, self.s2_address.line,
                        self.s1_address.line)),
                tag_wp.addr.eq(self.bus_addr.line),
                tag_wp.data.eq(self.bus_addr.tag),
                tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last),
                data_rp.addr.eq(
                    Mux(self.s1_stall, self.s2_address.line,
                        self.s1_address.line)),
                way.data.eq(data_rp.data),
                way.tag.eq(tag_rp.data),
                way.valid.eq(valid.bit_select(self.s2_address.line, 1))
            ]

            # update cache: CPU or Refill
            # El puerto de escritura se multiplexa debido a que la memoria solo puede tener un
            # puerto de escritura.
            if self.enable_write:
                update_addr = Signal(len(data_wp.addr))
                update_data = Signal(len(data_wp.data))
                update_we = Signal(len(data_wp.en))
                aux_wdata = Signal(32)

                with m.If(self.bus_valid):
                    m.d.comb += [
                        update_addr.eq(self.bus_addr.line),
                        update_data.eq(Repl(self.bus_data, self.nwords)),
                        update_we.bit_select(self.bus_addr.offset,
                                             1).eq(way.sel_lru & self.bus_ack),
                    ]
                with m.Else():
                    m.d.comb += [
                        update_addr.eq(self.s2_address.line),
                        update_data.eq(Repl(aux_wdata, self.nwords)),
                        update_we.bit_select(self.s2_address.offset,
                                             1).eq(way.sel_we & ~self.s2_miss)
                    ]
                m.d.comb += [
                    # Aux data: no tengo granularidad de byte en el puerto de escritura. Así que para el
                    # caso en el cual el CPU tiene que escribir, hay que construir el dato (wrord) a reemplazar
                    aux_wdata.eq(
                        Cat(
                            Mux(self.s2_sel[0],
                                self.s2_wdata.word_select(0, 8),
                                self.s2_rdata.word_select(0, 8)),
                            Mux(self.s2_sel[1],
                                self.s2_wdata.word_select(1, 8),
                                self.s2_rdata.word_select(1, 8)),
                            Mux(self.s2_sel[2],
                                self.s2_wdata.word_select(2, 8),
                                self.s2_rdata.word_select(2, 8)),
                            Mux(self.s2_sel[3],
                                self.s2_wdata.word_select(3, 8),
                                self.s2_rdata.word_select(3, 8)))),
                    #
                    data_wp.addr.eq(update_addr),
                    data_wp.data.eq(update_data),
                    data_wp.en.eq(update_we),
                ]
            else:
                m.d.comb += [
                    data_wp.addr.eq(self.bus_addr.line),
                    data_wp.data.eq(Repl(self.bus_data, self.nwords)),
                    data_wp.en.bit_select(self.bus_addr.offset,
                                          1).eq(way.sel_lru & self.bus_ack),
                ]

                # --------------------------------------------------------------
                # intenal snoop
                # for FENCE.i instruction
                _match_snoop = Signal()

                m.d.comb += [
                    snoop_rp.addr.eq(snoop_addr.line),  # read tag memory
                    _match_snoop.eq(snoop_rp.data == snoop_addr.tag),
                    way.snoop_hit.eq(snoop_use_cache & snoop_valid
                                     & _match_snoop
                                     & valid.bit_select(snoop_addr.line, 1)),
                ]
                # check is the snoop match a write from this core
                with m.If(way.snoop_hit):
                    m.d.sync += valid.bit_select(snoop_addr.line, 1).eq(0)
                # --------------------------------------------------------------

        return m

コード例 #6

ファイルを表示

ファイル: analyzer.py プロジェクト: ktemkin/luna

class USBAnalyzer(Elaboratable):
    """ Core USB analyzer; backed by a small ringbuffer in FPGA block RAM.

    If you're looking to instantiate a full analyzer, you'll probably want to grab
    one of the DRAM-based ringbuffer variants (which are currently forthcoming).

    If you're looking to use this with a ULPI PHY, rather than the FPGA-convenient UMTI interface,
    grab the UMTITranslator from `luna.gateware.interface.ulpi`.

    I/O port:
        O: data_available -- indicates that new data is available in the analysis stream
        O: data_out[8]    -- the next byte in the captured stream; valid when data_available is asserted
        I: next           -- strobe that indicates when the data_out byte has been accepted; and can be
                             discarded from the local memory
    """

    # Current, we'll provide a packet header of 16 bits.
    HEADER_SIZE_BITS = 16
    HEADER_SIZE_BYTES = HEADER_SIZE_BITS // 8

    # Support a maximum payload size of 1024B, plus a 1-byte PID and a 2-byte CRC16.
    MAX_PACKET_SIZE_BYTES = 1024 + 1 + 2

    def __init__(self, *, umti_interface, mem_depth=8192):
        """
        Parameters:
            umti_interface -- A record or elaboratable that presents a UMTI interface.
        """

        self.umti = umti_interface

        # Internal storage memory.
        self.mem = Memory(width=8, depth=mem_depth, name="analysis_ringbuffer")
        self.mem_size = mem_depth

        #
        # I/O port
        #
        self.data_available = Signal()
        self.data_out = Signal(8)
        self.next = Signal()

        self.overrun = Signal()
        self.capturing = Signal()

        # Diagnostic I/O.
        self.sampling = Signal()

    def elaborate(self, platform):
        m = Module()

        # Memory read and write ports.
        m.submodules.read = mem_read_port = self.mem.read_port(domain="ulpi")
        m.submodules.write = mem_write_port = self.mem.write_port(
            domain="ulpi")

        # Store the memory address of our active packet header, which will store
        # packet metadata like the packet size.
        header_location = Signal.like(mem_write_port.addr)
        write_location = Signal.like(mem_write_port.addr)

        # Read FIFO status.
        read_location = Signal.like(mem_read_port.addr)
        fifo_count = Signal.like(mem_read_port.addr, reset=0)
        fifo_new_data = Signal()

        # Current receive status.
        packet_size = Signal(16)

        #
        # Read FIFO logic.
        #
        m.d.comb += [

            # We have data ready whenever there's not data in the FIFO.
            self.data_available.eq(fifo_count != 0),

            # Our data_out is always the output of our read port...
            self.data_out.eq(mem_read_port.data),

            # ... and our read port always reads from our read pointer.
            mem_read_port.addr.eq(read_location),
            self.sampling.eq(mem_write_port.en)
        ]

        # Once our consumer has accepted our current data, move to the next address.
        with m.If(self.next & self.data_available):
            m.d.ulpi += read_location.eq(read_location + 1)

        #
        # FIFO count handling.
        #
        fifo_full = (fifo_count == self.mem_size)

        data_pop = Signal()
        data_push = Signal()
        m.d.comb += [
            data_pop.eq(self.next & self.data_available),
            data_push.eq(fifo_new_data & ~fifo_full)
        ]

        # If we have both a read and a write, don't update the count,
        # as we've both added one and subtracted one.
        with m.If(data_push & data_pop):
            pass

        # Otherwise, add when data's added, and subtract when data's removed.
        with m.Elif(data_push):
            m.d.ulpi += fifo_count.eq(fifo_count + 1)
        with m.Elif(data_pop):
            m.d.ulpi += fifo_count.eq(fifo_count - 1)

        #
        # Core analysis FSM.
        #
        with m.FSM(domain="ulpi") as f:
            m.d.comb += [
                self.overrun.eq(f.ongoing("OVERRUN")),
                self.capturing.eq(f.ongoing("CAPTURE")),
            ]

            # IDLE: wait for an active receive.
            with m.State("IDLE"):

                # Wait until a transmission is active.
                # TODO: add triggering logic?
                with m.If(self.umti.rx_active):
                    m.next = "CAPTURE"
                    m.d.ulpi += [
                        header_location.eq(write_location),
                        write_location.eq(write_location +
                                          self.HEADER_SIZE_BYTES),
                        packet_size.eq(0),
                    ]

                    #with m.If(self.umti.rx_valid):
                    #    m.d.ulpi += [
                    #        fifo_count   .eq(fifo_count + 1),
                    #    write_location   .eq(write_location + self.HEADER_SIZE_BYTES + 1),
                    #        packet_size  .eq(1)
                    #    ]
                    #    m.d.comb += [
                    #        mem_write_port.addr  .eq(write_location + self.HEADER_SIZE_BYTES),
                    #        mem_write_port.data  .eq(self.umti.data_out),
                    #        mem_write_port.en    .eq(1)
                    #    ]

            # Capture data until the packet is complete.
            with m.State("CAPTURE"):

                # Capture data whenever RxValid is asserted.
                m.d.comb += [
                    mem_write_port.addr.eq(write_location),
                    mem_write_port.data.eq(self.umti.data_out),
                    mem_write_port.en.eq(self.umti.rx_valid
                                         & self.umti.rx_active),
                    fifo_new_data.eq(self.umti.rx_valid & self.umti.rx_active)
                ]

                # Advance the write pointer each time we receive a bit.
                with m.If(self.umti.rx_valid & self.umti.rx_active):
                    m.d.ulpi += [
                        write_location.eq(write_location + 1),
                        packet_size.eq(packet_size + 1)
                    ]

                    # If this would be filling up our data memory,
                    # move to the OVERRUN state.
                    with m.If(fifo_count == self.mem_size - 1 -
                              self.HEADER_SIZE_BYTES):
                        m.next = "OVERRUN"

                # If we've stopped receiving, move to the "finalize" state.
                with m.If(~self.umti.rx_active):

                    # Optimization: if we didn't receive any data, there's no need
                    # to create a packet. Clear our header from the FIFO and disarm.
                    with m.If(packet_size == 0):
                        m.next = "IDLE"
                        m.d.ulpi += [write_location.eq(header_location)]
                    with m.Else():
                        m.next = "EOP_1"

            # EOP: handle the end of the relevant packet.
            with m.State("EOP_1"):

                # Now that we're done, add the header to the start of our packet.
                # This will take two cycles, currently, as we're using a 2-byte header,
                # but we only have an 8-bit write port.
                m.d.comb += [
                    mem_write_port.addr.eq(header_location),
                    mem_write_port.data.eq(packet_size[7:16]),
                    #mem_write_port.data  .eq(0xAA),
                    mem_write_port.en.eq(1),
                    fifo_new_data.eq(1)
                ]
                m.next = "EOP_2"

            with m.State("EOP_2"):

                # Add the second byte of our header.
                # Note that, if this is an adjacent read, we should have
                # just captured our packet header _during_ the stop turnaround.
                m.d.comb += [
                    mem_write_port.addr.eq(header_location + 1),
                    mem_write_port.data.eq(packet_size[0:8]),
                    mem_write_port.en.eq(1),
                    fifo_new_data.eq(1)
                ]

                # Move to the next state, which will either be another capture,
                # or our idle state, depending on whether we have another rx.
                with m.If(self.umti.rx_active):
                    m.next = "CAPTURE"
                    m.d.ulpi += [
                        header_location.eq(write_location),
                        write_location.eq(write_location +
                                          self.HEADER_SIZE_BYTES),
                        packet_size.eq(0),
                    ]

                    # FIXME: capture if rx_valid

                with m.Else():
                    m.next = "IDLE"

            # BABBLE -- handles the case in which we've received a packet beyond
            # the allowable size in the USB spec
            with m.State("BABBLE"):

                # Trap here, for now.
                pass

            with m.State("OVERRUN"):
                # TODO: we should probably set an overrun flag and then emit an EOP, here?

                pass

        return m

コード例 #7

ファイルを表示

ファイル: decimator.py プロジェクト: kbob/icebreaker-synth

    def elaborate(self, platform):

        m = Module()

        # N is size of RAMs.
        N = self.M + 2
        assert _is_power_of_2(N)

        # kernel_RAM is a buffer of convolution kernel coefficents.
        # It is read-only.  The 0'th element is zero, because the kernel
        # has length N-1.
        kernel_RAM = Memory(width=COEFF_WIDTH, depth=256, init=kernel)
        # kernel_RAM = Memory(width=COEFF_WIDTH, depth=N, init=kernel)
        m.submodules.kr_port = kr_port = kernel_RAM.read_port()

        # sample_RAM is a circular buffer for incoming samples.
        # sample_RAM = Array(
        #     Signal(signed(self.sample_depth), reset_less=True, reset=0)
        #     for _ in range(N)
        # )
        sample_RAM = Memory(width=self.sample_depth, depth=N, init=[0] * N)
        m.submodules.sw_port = sw_port = sample_RAM.write_port()
        m.submodules.sr_port = sr_port = sample_RAM.read_port()

        # The rotors index through sample_RAM.  They have an extra MSB
        # so we can distinguish between buffer full and buffer empty.
        #
        #   w_rotor: write rotor.  Points to the next entry to be written.
        #   s_rotor: start rotor.  Points to the oldest valid entry.
        #   r_rotor: read rotor.  Points to the next entry to be read.
        #
        # The polyphase decimator reads each sample N / R times, so
        # `r_rotor` is **NOT** the oldest needed sample.  Instead,
        # `s_rotor` is the oldest.  `s_rotor` is incremented by `R`
        # at the start of each convolution.
        #
        # We initialize the rotors so that the RAM contains N-1 zero samples,
        # and `r_rotor` is pointing to the first sample to be used.
        # The convolution engine can start immediately and produce a zero
        # result.
        w_rotor = Signal(range(2 * N), reset=N)
        s_rotor = Signal(range(2 * N), reset=1)
        r_rotor = Signal(range(2 * N), reset=1)

        # `c_index` is the next kernel coefficient to read.
        # `c_index` == 0 indicates done, so start at 1.
        c_index = Signal(range(N), reset=1)  # kernel coefficient index

        # Useful conditions
        buf_n_used = Signal(range(N + 1))
        buf_is_empty = Signal()
        buf_is_full = Signal()
        buf_n_readable = Signal(range(N + 1))
        buf_has_readable = Signal()
        m.d.comb += [
            buf_n_used.eq(w_rotor - s_rotor),
            buf_is_empty.eq(buf_n_used == 0),
            buf_is_full.eq(buf_n_used == N),
            buf_n_readable.eq(w_rotor - r_rotor),
            buf_has_readable.eq(buf_n_readable != 0),
            # Assert(buf_n_used <= N),
            # Assert(buf_n_readable <= buf_n_used),
        ]

        # put incoming samples into sample_RAM.
        m.d.comb += [
            self.samples_in.o_ready.eq(~buf_is_full),
            sw_port.addr.eq(w_rotor[:-1]),
            sw_port.data.eq(self.samples_in.i_data),
        ]
        m.d.sync += sw_port.en.eq(self.samples_in.received())
        with m.If(self.samples_in.received()):
            m.d.sync += [
                # sample_RAM[w_rotor[:-1]].eq(self.samples_in.i_data),
                w_rotor.eq(w_rotor + 1),
            ]

        # The convolution is pipelined.
        #
        #   stage 0: fetch coefficient and sample from their RAMs.
        #   stage 1: multiply coefficient and sample.
        #   stage 2: add product to accumulator.
        #   stage 3: if complete, try to send accumulated sample.
        #
        p_valid = Signal(4)
        p_ready = Array(
            Signal(name=f'p_ready{i}', reset=True) for i in range(4))
        p_complete = Signal(4)
        m.d.sync += [
            p_valid[1:].eq(p_valid[:-1]),
            p_ready[0].eq(p_ready[1]),
            p_ready[1].eq(p_ready[2]),
            p_ready[2].eq(p_ready[3]),
        ]

        # calculation variables
        coeff = Signal(COEFF_SHAPE)
        sample = Signal(signed(self.sample_depth))
        prod = Signal(signed(COEFF_WIDTH + self.sample_depth))
        acc = Signal(signed(self.acc_width))

        # Stage 0.
        en0 = Signal()
        m.d.comb += en0.eq(buf_has_readable & p_ready[0] * (c_index != 0))

        # with m.If(en0):
        #     m.d.sync += [
        #         coeff.eq(kernel_RAM[c_index]),
        #     ]
        m.d.comb += coeff.eq(kr_port.data)
        with m.If(en0):
            m.d.sync += [
                sample.eq(sample_RAM[r_rotor[:-1]]),
            ]
        with m.If(en0):
            m.d.sync += [
                c_index.eq(c_index + 1),
                r_rotor.eq(r_rotor + 1),
                p_valid[0].eq(True),
                p_complete[0].eq(False),
                # kr_port.addr.eq(c_index + 1),
            ]
        m.d.comb += kr_port.addr.eq(c_index)
        # with m.If(buf_has_readable & p_ready[0] & (c_index != 0)):
        #     m.d.sync += [
        #         coeff.eq(kernel_RAM[c_index]),
        #         sample.eq(sample_RAM[r_rotor[:-1]]),
        #         c_index.eq(c_index + 1),
        #         r_rotor.eq(r_rotor + 1),
        #         p_valid[0].eq(True),
        #         p_complete[0].eq(False),
        #     ]
        with m.If((~buf_has_readable | ~p_ready[0]) & (c_index != 0)):
            m.d.sync += [
                p_valid[0].eq(False),
                p_complete[0].eq(False),
            ]
        # When c_index is zero, all convolution samples have been read.
        # Set up the rotors for the next sample.  (and pause the
        # pipelined calculation.)
        with m.If(c_index == 0):
            m.d.sync += [
                c_index.eq(c_index + 1),
                s_rotor.eq(s_rotor + self.R),
                r_rotor.eq(s_rotor + self.R),
                p_valid[0].eq(False),
                p_complete[0].eq(True),
            ]

        # Stage 1.
        with m.If(p_valid[1] & p_ready[1]):
            m.d.sync += [
                prod.eq(coeff * sample),
                p_complete[1].eq(p_complete[0]),
            ]

        # Stage 2.
        with m.If(p_valid[2] & p_ready[2]):
            m.d.sync += [
                acc.eq(acc + prod),
                p_complete[2].eq(p_complete[1]),
            ]

        # Stage 3.
        m.d.comb += p_ready[3].eq(~self.samples_out.full())
        m.d.sync += p_complete[3].eq(p_complete[3] | p_complete[2])
        with m.If(p_valid[3] & p_ready[3] & p_complete[2]):
            m.d.sync += [
                self.samples_out.o_valid.eq(1),
                self.samples_out.o_data.eq(acc[self.shift:]),
                acc.eq(0),
                p_complete[3].eq(False),
            ]

        with m.If(self.samples_out.sent()):
            m.d.sync += [
                self.samples_out.o_valid.eq(0),
            ]

        # # s_in_index = Signal(range(2 * N), reset=N)
        # # s_out_index = Signal(range(2 * N), reset=self.R + 1)
        # # c_index = Signal(range(N), reset=1)
        # # start_index = Signal(range(2 *  N), reset=self.R)
        # s_in_index = Signal(range(2 * N), reset=N)
        # s_out_index = Signal(range(2 * N))
        # c_index = Signal(range(N))
        # start_index = Signal(range(2 *  N))
        #
        # coeff = Signal(signed(16))
        # sample = Signal(signed(self.sample_depth))
        # prod = Signal(signed(2 * self.sample_depth))
        # acc = Signal(signed(self.acc_width))
        #
        # m = Module()
        #
        # n_full = (s_in_index - start_index - 1)[:s_in_index.shape()[0]]
        # n_avail = (s_in_index - s_out_index)[:-1]
        # print(f's_in_index shape = {s_in_index.shape()}')
        # print(f'start_index shape = {start_index.shape()}')
        # print(f'n_full shape = {n_full.shape()}')
        # print(f'n_avail shape = {n_avail.shape()}')
        # full = Signal(n_full.shape())
        # avail = Signal(n_avail.shape())
        # m.d.comb += full.eq(n_full)
        # m.d.comb += avail.eq(n_avail)
        #
        #
        # # input process stores new samples in the ring buffer.
        # # with m.If(self.samples_in.received()):
        # #     m.d.sync += [
        # #         sample_RAM[s_in_index[:-1]].eq(self.samples_in.i_data),
        # #     ]
        # with m.If(self.samples_in.received()):
        #     m.d.sync += [
        #         sample_RAM[s_in_index[:-1]].eq(self.samples_in.i_data),
        #         s_in_index.eq(s_in_index + 1),
        #     ]
        # m.d.sync += [
        #     self.samples_in.o_ready.eq(n_full < N),
        # ]
        #
        # # convolution process
        # sample_ready = Signal()
        # sample_ready = n_avail > 0
        #
        # # with m.If(c_index == 1):
        # #     with m.If(sample_ready):
        # #         m.d.sync += [
        # #             start_index.eq(start_index + self.R),
        # #             s_out_index.eq(start_index + self.R + 1),
        # #         ]
        # #     with m.Else():
        # #         with m.If(sample_ready):
        # #             m.d.sync += [
        # #                 s_out_index.eq(s_out_index + 1),
        # #          ]
        # # with m.Else():
        # #     with m.If(sample_ready):
        # #         m.d.sync += [
        # #             s_out_index.eq(s_out_index + 1),
        # #         ]
        #
        # with m.If(sample_ready):
        #     with m.If(c_index == 1):
        #         m.d.sync += [
        #             start_index.eq(start_index + self.R),
        #             s_out_index.eq(start_index + self.R + 1),
        #         ]
        #     with m.Else():
        #         m.d.sync += [
        #             s_out_index.eq(s_out_index + 1),
        #         ]
        #
        # with m.If(c_index == 2):
        #     with m.If(~self.samples_out.full()):
        #         m.d.sync += [
        #             self.samples_out.o_valid.eq(True),
        #             self.samples_out.o_data.eq(acc[self.shift:]),
        #             acc.eq(0),
        #             c_index.eq(c_index + 1),
        #         ]
        # with m.Else():
        #     with m.If(sample_ready):
        #         m.d.sync += [
        #             acc.eq(acc + prod),
        #             c_index.eq(c_index + 1),
        #         ]
        #
        # with m.If(sample_ready):
        #     m.d.sync += [
        #         prod.eq(coeff * sample),
        #     ]
        #
        # with m.If(sample_ready):
        #     m.d.sync += [
        #         coeff.eq(kernel_RAM[c_index]),
        #     ]
        #
        # with m.If(sample_ready):
        #     m.d.sync += [
        #         sample.eq(sample_RAM[s_out_index]),
        #     ]
        #
        # with m.If(self.samples_out.sent()):
        #     m.d.sync += [
        #         self.samples_out.o_valid.eq(False),
        #     ]
        # # # convolution process
        # # if c_index == 0:
        # #     if sample_ready:
        # #         start_index += R
        # #         s_out_index = start_index + R + 1
        # # else:
        # #     if sample_ready:
        # #         s_out_index += 1
        # #
        # # if c_index == 1:
        # #     if not out_full:
        # #         out_sample = acc[shift:]
        # #         out_valid = True
        # #         acc = 0
        # #         c_index += 1
        # # else
        # #     if sample_ready:
        # #         acc += prod
        # #         c_index += 1
        # #
        # # if sample_ready:
        # #     prod = coeff * sample
        # #
        # # if sample_ready:
        # #    coeff = kernel_RAM[c_index]
        # #
        # # if sample_ready:
        # #    sample = sample_RAM[s_out_index]
        #
        # # # convolution process
        # # fill = Signal(range(N + 1))
        # # m.d.comb += fill.eq(s_in_index - s_out_index)
        # # with m.FSM():
        # #
        # #     with m.State('RUN'):
        # #         # when s_out_index MSB is zero, sample is ready.
        # #         # when MSB is one, test out_idx < in_idx.
        # #         # extended_in_index = Cat(s_in_index, 1)
        # #         sample_ready = (fill > 0) & (fill <= N)
        # #         # sample_ready = s_out_index < extended_in_index
        # #         # xii = Signal.like(s_out_index)
        # #         sr = Signal()
        # #         # m.d.comb += xii.eq(extended_in_index)
        # #         m.d.comb += sr.eq(sample_ready)
        # #         with m.If(sample_ready):
        # #             m.d.sync += [
        # #                 acc.eq(acc + prod),   # sign extension is automatic
        # #                 prod.eq(coeff * sample),
        # #                 coeff.eq(kernel_RAM[c_index]),
        # #                 sample.eq(sample_RAM[s_out_index[:-1]]),
        # #                 c_index.eq(c_index + 1),
        # #                 s_out_index.eq(s_out_index + 1),
        # #             ]
        # #             with m.If(c_index == 0):
        # #                 m.next = 'DONE'
        # #         with m.If(self.samples_out.sent()):
        # #             m.d.sync += [
        # #                 self.samples_out.o_valid.eq(False),
        # #             ]
        # #
        # #     with m.State('DONE'):
        # #         with m.If(~self.samples_out.full()):
        # #             m.d.sync += [
        # #                 self.samples_out.o_valid.eq(True),
        # #                 self.samples_out.o_data.eq(acc[self.shift:]),
        # #                 c_index.eq(1),
        # #                 start_index.eq(start_index + self.R),
        # #                 # s_out_index[:-1].eq(start_index + self.R + 1),
        # #                 # s_out_index[-1].eq(0),
        # #                 s_out_index.eq(start_index + self.R + 1),
        # #                 coeff.eq(0),
        # #                 sample.eq(0),
        # #                 prod.eq(0),
        # #                 acc.eq(0),
        # #             ]
        # #             m.next = 'RUN'
        #
        #
        # # i_ready = true
        # # if received:
        # #     sample_RAM[in_index] = samples_in.i_data
        # #     in_index += 1
        # #
        # # FSM:
        # #     start:
        # #         if start_index + cv_index < in_index:  <<<<< Wrong
        # #             acc += sign_extend(prod)
        # #             prod = coeff * sample
        # #             coeff = kernel_RAM[cv_index]
        # #             sample = sample_RAM[(start_index + cv_index)[:-1]]
        # #
        # #             if cv_index + 1 == 0:
        # #                 goto done
        # #
        # #     done:
        # #         o_valid = true
        # #         o_data = acc[shift:shift + 16]
        # #         acc = 0
        # #         start_index = (start_index + R) % Mp2
        # #         cv_index = 0
        # #         prod = 0
        # #         coeff = 0
        # #         sample = 0
        # #
        # # if sent:
        # #     o_valid = false

        return m

コード例 #8

ファイルを表示

    def elaborate(self, platform):
        cpu = Module()
        # ----------------------------------------------------------------------
        # create the pipeline stages
        a = cpu.submodules.a = Stage(None, _af_layout)
        f = cpu.submodules.f = Stage(_af_layout, _fd_layout)
        d = cpu.submodules.d = Stage(_fd_layout, _dx_layout)
        x = cpu.submodules.x = Stage(_dx_layout, _xm_layout)
        m = cpu.submodules.m = Stage(_xm_layout, _mw_layout)
        w = cpu.submodules.w = Stage(_mw_layout, None)
        # ----------------------------------------------------------------------
        # connect the stages
        cpu.d.comb += [
            a.endpoint_b.connect(f.endpoint_a),
            f.endpoint_b.connect(d.endpoint_a),
            d.endpoint_b.connect(x.endpoint_a),
            x.endpoint_b.connect(m.endpoint_a),
            m.endpoint_b.connect(w.endpoint_a)
        ]
        # ----------------------------------------------------------------------
        # units
        adder = cpu.submodules.adder = AdderUnit()
        logic = cpu.submodules.logic = LogicUnit()
        shifter = cpu.submodules.shifter = ShifterUnit()
        compare = cpu.submodules.compare = CompareUnit()
        decoder = cpu.submodules.decoder = DecoderUnit(self.configuration)
        exception = cpu.submodules.exception = ExceptionUnit(
            self.configuration)
        data_sel = cpu.submodules.data_sel = DataFormat()
        csr = cpu.submodules.csr = CSRFile()
        if (self.configuration.getOption('icache', 'enable')):
            fetch = cpu.submodules.fetch = CachedFetchUnit(self.configuration)
        else:
            fetch = cpu.submodules.fetch = BasicFetchUnit()
        if (self.configuration.getOption('dcache', 'enable')):
            lsu = cpu.submodules.lsu = CachedLSU(self.configuration)
        else:
            lsu = cpu.submodules.lsu = BasicLSU()
        if self.configuration.getOption('isa', 'enable_rv32m'):
            multiplier = cpu.submodules.multiplier = Multiplier()
            divider = cpu.submodules.divider = Divider()
        if self.configuration.getOption('predictor', 'enable_predictor'):
            predictor = cpu.submodules.predictor = BranchPredictor(
                self.configuration)
        # ----------------------------------------------------------------------
        # register file (GPR)
        gprf = Memory(width=32, depth=32)
        gprf_rp1 = gprf.read_port()
        gprf_rp2 = gprf.read_port()
        gprf_wp = gprf.write_port()
        cpu.submodules += gprf_rp1, gprf_rp2, gprf_wp
        # ----------------------------------------------------------------------
        # CSR
        csr.add_csr_from_list(exception.csr.csr_list)
        csr_rp = csr.create_read_port()
        csr_wp = csr.create_write_port()
        # ----------------------------------------------------------------------
        # forward declaration of signals
        fwd_x_rs1 = Signal()
        fwd_m_rs1 = Signal()
        fwd_w_rs1 = Signal()
        fwd_x_rs2 = Signal()
        fwd_m_rs2 = Signal()
        fwd_w_rs2 = Signal()
        x_result = Signal(32)
        m_result = Signal(32)
        w_result = Signal(32)
        m_kill_bj = Signal()
        # ----------------------------------------------------------------------
        # Address Stage
        a_next_pc = Signal(32)
        a_next_pc_q = Signal(32)
        a_next_pc_fu = Signal(32)
        latched_pc = Signal()

        # set the reset value.
        # to (RA -4) because the value to feed the fetch unit is the next pc:
        a.endpoint_b.pc.reset = self.configuration.getOption(
            'reset', 'reset_address') - 4

        # select next pc
        with cpu.If(exception.m_exception & m.valid):
            cpu.d.comb += a_next_pc.eq(exception.csr.mtvec.read)  # exception
        with cpu.Elif(m.endpoint_a.mret & m.valid):
            cpu.d.comb += a_next_pc.eq(exception.csr.mepc.read)  # mret

        if (self.configuration.getOption('predictor', 'enable_predictor')):
            with cpu.Elif((m.endpoint_a.prediction & m.endpoint_a.branch)
                          & ~m.endpoint_a.take_jmp_branch & m.valid):
                cpu.d.comb += a_next_pc.eq(m.endpoint_a.pc +
                                           4)  # branch not taken
            with cpu.Elif(~(m.endpoint_a.prediction & m.endpoint_a.branch)
                          & m.endpoint_a.take_jmp_branch & m.valid):
                cpu.d.comb += a_next_pc.eq(
                    m.endpoint_a.jmp_branch_target)  # branck taken
            with cpu.Elif(predictor.f_prediction):
                cpu.d.comb += a_next_pc.eq(
                    predictor.f_prediction_pc)  # prediction
        else:
            with cpu.Elif(m.endpoint_a.take_jmp_branch & m.valid):
                cpu.d.comb += a_next_pc.eq(
                    m.endpoint_a.jmp_branch_target)  # jmp/branch

        with cpu.Elif(x.endpoint_a.fence_i & x.valid):
            cpu.d.comb += a_next_pc.eq(x.endpoint_a.pc + 4)  # fence_i.
        with cpu.Else():
            cpu.d.comb += a_next_pc.eq(f.endpoint_a.pc + 4)

        with cpu.If(f.stall):
            with cpu.If(f.kill & ~latched_pc):
                cpu.d.sync += [a_next_pc_q.eq(a_next_pc), latched_pc.eq(1)]
        with cpu.Else():
            cpu.d.sync += latched_pc.eq(0)

        with cpu.If(latched_pc):
            cpu.d.comb += a_next_pc_fu.eq(a_next_pc_q)
        with cpu.Else():
            cpu.d.comb += a_next_pc_fu.eq(a_next_pc)

        cpu.d.comb += [
            fetch.a_pc.eq(a_next_pc_fu),
            fetch.a_stall.eq(a.stall),
            fetch.a_valid.eq(a.valid),
        ]

        cpu.d.comb += a.valid.eq(1)  # the stage is always valid
        # ----------------------------------------------------------------------
        # Fetch Stage
        cpu.d.comb += fetch.iport.connect(
            self.iport)  # connect the wishbone port

        cpu.d.comb += [fetch.f_stall.eq(f.stall), fetch.f_valid.eq(f.valid)]

        f_kill_r = Signal()

        with cpu.If(f.stall):
            with cpu.If(f_kill_r == 0):
                cpu.d.sync += f_kill_r.eq(f.kill)
        with cpu.Else():
            cpu.d.sync += f_kill_r.eq(0)

        if (self.configuration.getOption('icache', 'enable')):
            cpu.d.comb += [
                fetch.flush.eq(x.endpoint_a.fence_i & x.valid & ~x.stall),
                fetch.f_pc.eq(f.endpoint_a.pc)
            ]

        f.add_kill_source(f_kill_r)
        f.add_stall_source(fetch.f_busy)
        f.add_kill_source(exception.m_exception & m.valid)
        f.add_kill_source(m.endpoint_a.mret & m.valid)
        f.add_kill_source(m_kill_bj)
        f.add_kill_source(x.endpoint_a.fence_i & x.valid & ~x.stall)
        # ----------------------------------------------------------------------
        # Decode Stage
        cpu.d.comb += decoder.instruction.eq(d.endpoint_a.instruction)

        with cpu.If(~d.stall):
            cpu.d.comb += [
                gprf_rp1.addr.eq(fetch.f_instruction[15:20]),
                gprf_rp2.addr.eq(fetch.f_instruction[20:25])
            ]
        with cpu.Else():
            cpu.d.comb += [
                gprf_rp1.addr.eq(decoder.gpr_rs1),
                gprf_rp2.addr.eq(decoder.gpr_rs2)
            ]

        cpu.d.comb += [
            gprf_wp.addr.eq(w.endpoint_a.gpr_rd),
            gprf_wp.data.eq(w_result),
            gprf_wp.en.eq(w.endpoint_a.gpr_we & w.valid)
        ]

        rs1_data = Signal(32)
        rs2_data = Signal(32)

        # select data for RS1
        with cpu.If(decoder.aiupc):
            cpu.d.comb += rs1_data.eq(d.endpoint_a.pc)
        with cpu.Elif((decoder.gpr_rs1 == 0) | decoder.lui):
            cpu.d.comb += rs1_data.eq(0)
        with cpu.Elif(fwd_x_rs1 & x.valid):
            cpu.d.comb += rs1_data.eq(x_result)
        with cpu.Elif(fwd_m_rs1 & m.valid):
            cpu.d.comb += rs1_data.eq(m_result)
        with cpu.Elif(fwd_w_rs1 & w.valid):
            cpu.d.comb += rs1_data.eq(w_result)
        with cpu.Else():
            cpu.d.comb += rs1_data.eq(gprf_rp1.data)

        # select data for RS2
        with cpu.If(decoder.csr):
            cpu.d.comb += rs2_data.eq(0)
        with cpu.Elif(~decoder.gpr_rs2_use):
            cpu.d.comb += rs2_data.eq(decoder.immediate)
        with cpu.Elif(decoder.gpr_rs2 == 0):
            cpu.d.comb += rs2_data.eq(0)
        with cpu.Elif(fwd_x_rs2 & x.valid):
            cpu.d.comb += rs2_data.eq(x_result)
        with cpu.Elif(fwd_m_rs2 & m.valid):
            cpu.d.comb += rs2_data.eq(m_result)
        with cpu.Elif(fwd_w_rs2 & w.valid):
            cpu.d.comb += rs2_data.eq(w_result)
        with cpu.Else():
            cpu.d.comb += rs2_data.eq(gprf_rp2.data)

        # Check if the forwarding is needed
        cpu.d.comb += [
            fwd_x_rs1.eq((decoder.gpr_rs1 == x.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs1 != 0) & x.endpoint_a.gpr_we),
            fwd_m_rs1.eq((decoder.gpr_rs1 == m.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs1 != 0) & m.endpoint_a.gpr_we),
            fwd_w_rs1.eq((decoder.gpr_rs1 == w.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs1 != 0) & w.endpoint_a.gpr_we),
            fwd_x_rs2.eq((decoder.gpr_rs2 == x.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs2 != 0) & x.endpoint_a.gpr_we),
            fwd_m_rs2.eq((decoder.gpr_rs2 == m.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs2 != 0) & m.endpoint_a.gpr_we),
            fwd_w_rs2.eq((decoder.gpr_rs2 == w.endpoint_a.gpr_rd)
                         & (decoder.gpr_rs2 != 0) & w.endpoint_a.gpr_we),
        ]

        d.add_stall_source(((fwd_x_rs1 & decoder.gpr_rs1_use)
                            | (fwd_x_rs2 & decoder.gpr_rs2_use))
                           & ~x.endpoint_a.needed_in_x & x.valid)
        d.add_stall_source(((fwd_m_rs1 & decoder.gpr_rs1_use)
                            | (fwd_m_rs2 & decoder.gpr_rs2_use))
                           & ~m.endpoint_a.needed_in_m & m.valid)
        d.add_kill_source(exception.m_exception & m.valid)
        d.add_kill_source(m.endpoint_a.mret & m.valid)
        d.add_kill_source(m_kill_bj)
        d.add_kill_source(x.endpoint_a.fence_i & x.valid & ~x.stall)
        # ----------------------------------------------------------------------
        # Execute Stage
        x_branch_target = Signal(32)
        x_take_jmp_branch = Signal()

        cpu.d.comb += [
            x_branch_target.eq(x.endpoint_a.pc + x.endpoint_a.immediate),
            x_take_jmp_branch.eq(x.endpoint_a.jump
                                 | (x.endpoint_a.branch & compare.cmp_ok))
        ]

        cpu.d.comb += [
            adder.dat1.eq(x.endpoint_a.src_data1),
            adder.dat2.eq(
                Mux(x.endpoint_a.store, x.endpoint_a.immediate,
                    x.endpoint_a.src_data2)),
            adder.sub.eq((x.endpoint_a.arithmetic & x.endpoint_a.add_sub)
                         | x.endpoint_a.compare | x.endpoint_a.branch)
        ]

        cpu.d.comb += [
            logic.op.eq(x.endpoint_a.funct3),
            logic.dat1.eq(x.endpoint_a.src_data1),
            logic.dat2.eq(x.endpoint_a.src_data2)
        ]

        cpu.d.comb += [
            shifter.direction.eq(x.endpoint_a.shift_dir),
            shifter.sign_ext.eq(x.endpoint_a.shift_sign),
            shifter.dat.eq(x.endpoint_a.src_data1),
            shifter.shamt.eq(x.endpoint_a.src_data2),
            shifter.stall.eq(x.stall)
        ]

        cpu.d.comb += [
            compare.op.eq(x.endpoint_a.funct3),
            compare.zero.eq(adder.result == 0),
            compare.negative.eq(adder.result[-1]),
            compare.overflow.eq(adder.overflow),
            compare.carry.eq(adder.carry)
        ]

        # select result
        with cpu.If(x.endpoint_a.logic):
            cpu.d.comb += x_result.eq(logic.result)
        with cpu.Elif(x.endpoint_a.jump):
            cpu.d.comb += x_result.eq(x.endpoint_a.pc + 4)
        if (self.configuration.getOption('isa', 'enable_rv32m')):
            with cpu.Elif(x.endpoint_a.multiplier):
                cpu.d.comb += x_result.eq(multiplier.result)
        with cpu.Else():
            cpu.d.comb += x_result.eq(adder.result)

        # load/store unit
        cpu.d.comb += [
            data_sel.x_funct3.eq(x.endpoint_a.funct3),
            data_sel.x_offset.eq(adder.result[:2]),
            data_sel.x_store_data.eq(x.endpoint_a.src_data2),
        ]

        cpu.d.comb += [
            lsu.x_addr.eq(adder.result),
            lsu.x_data_w.eq(data_sel.x_data_w),
            lsu.x_store.eq(x.endpoint_a.store),
            lsu.x_load.eq(x.endpoint_a.load),
            lsu.x_byte_sel.eq(data_sel.x_byte_sel),
            lsu.x_valid.eq(x.valid & ~data_sel.x_misaligned),
            lsu.x_stall.eq(x.stall)
        ]
        if (self.configuration.getOption('dcache', 'enable')):
            cpu.d.comb += lsu.x_fence_i.eq(x.valid & x.endpoint_a.fence_i)
            x.add_stall_source(x.valid & x.endpoint_a.fence_i & m.valid
                               & m.endpoint_a.store)
        if (self.configuration.getOption('isa', 'enable_rv32m')):
            x.add_stall_source(x.valid & x.endpoint_a.multiplier
                               & ~multiplier.ready)
        if (self.configuration.getOption('dcache', 'enable')):
            x.add_stall_source(x.valid & lsu.x_busy)
        x.add_kill_source(exception.m_exception & m.valid)
        x.add_kill_source(m.endpoint_a.mret & m.valid)
        x.add_kill_source(m_kill_bj)
        # ----------------------------------------------------------------------
        # Memory (and CSR) Stage
        csr_wdata = Signal(32)

        # jump/branch
        if (self.configuration.getOption('predictor', 'enable_predictor')):
            cpu.d.comb += m_kill_bj.eq((
                (m.endpoint_a.prediction & m.endpoint_a.branch)
                ^ m.endpoint_a.take_jmp_branch) & m.valid)
        else:
            cpu.d.comb += m_kill_bj.eq(m.endpoint_a.take_jmp_branch & m.valid)

        cpu.d.comb += lsu.dport.connect(
            self.dport)  # connect the wishbone port

        # select result
        with cpu.If(m.endpoint_a.shifter):
            cpu.d.comb += m_result.eq(shifter.result)
        with cpu.Elif(m.endpoint_a.compare):
            cpu.d.comb += m_result.eq(m.endpoint_a.compare_result)
        if (self.configuration.getOption('isa', 'enable_rv32m')):
            with cpu.Elif(m.endpoint_a.divider):
                cpu.d.comb += m_result.eq(divider.result)
        with cpu.Else():
            cpu.d.comb += m_result.eq(m.endpoint_a.result)

        cpu.d.comb += [
            data_sel.m_data_r.eq(lsu.m_load_data),
            data_sel.m_funct3.eq(m.endpoint_a.funct3),
            data_sel.m_offset.eq(m.endpoint_a.result)
        ]

        cpu.d.comb += [lsu.m_valid.eq(m.valid), lsu.m_stall.eq(m.stall)]
        if (self.configuration.getOption('dcache', 'enable')):
            cpu.d.comb += [
                lsu.m_addr.eq(m.endpoint_a.result),
                lsu.m_load.eq(m.endpoint_a.load),
                lsu.m_store.eq(m.endpoint_a.store)
            ]

        csr_src0 = Signal(32)
        csr_src = Signal(32)

        cpu.d.comb += [
            csr_src0.eq(
                Mux(m.endpoint_a.funct3[2], m.endpoint_a.instruction[15:20],
                    m.endpoint_a.result)),
            csr_src.eq(
                Mux(m.endpoint_a.funct3[:2] == 0b11, ~csr_src0, csr_src0))
        ]

        with cpu.If(m.endpoint_a.funct3[:2] == 0b01):  # write
            cpu.d.comb += csr_wdata.eq(csr_src)
        with cpu.Elif(m.endpoint_a.funct3[:2] == 0b10):  # set
            cpu.d.comb += csr_wdata.eq(csr_rp.data | csr_src)
        with cpu.Else():  # clear
            cpu.d.comb += csr_wdata.eq(csr_rp.data & csr_src)

        # csr
        cpu.d.comb += [
            csr_rp.addr.eq(m.endpoint_a.csr_addr),
            csr_wp.addr.eq(m.endpoint_a.csr_addr),
            csr_wp.en.eq(m.endpoint_a.csr_we & m.valid),
            csr_wp.data.eq(csr_wdata)
        ]

        # exception unit
        cpu.d.comb += [
            exception.external_interrupt.eq(self.external_interrupt),
            exception.software_interrupt.eq(self.software_interrupt),
            exception.timer_interrupt.eq(self.timer_interrupt),
            exception.m_fetch_misalign.eq(m.endpoint_a.take_jmp_branch & (
                m.endpoint_a.jmp_branch_target[:2] != 0)),
            exception.m_fetch_error.eq(m.endpoint_a.fetch_error),
            exception.m_illegal.eq(m.endpoint_a.illegal
                                   | (m.endpoint_a.csr & csr.invalid)),
            exception.m_load_misalign.eq(m.endpoint_a.ls_misalign
                                         & m.endpoint_a.load),
            exception.m_load_error.eq(lsu.m_load_error),
            exception.m_store_misalign.eq(m.endpoint_a.ls_misalign
                                          & m.endpoint_a.store),
            exception.m_store_error.eq(lsu.m_store_error),
            exception.m_ecall.eq(m.endpoint_a.ecall),
            exception.m_ebreak.eq(m.endpoint_a.ebreak),
            exception.m_mret.eq(m.endpoint_a.mret),
            exception.m_pc.eq(m.endpoint_a.pc),
            exception.m_instruction.eq(m.endpoint_a.instruction),
            exception.m_fetch_badaddr.eq(m.endpoint_a.fetch_badaddr),
            exception.m_pc_misalign.eq(m.endpoint_a.jmp_branch_target),
            exception.m_ls_misalign.eq(m.endpoint_a.result),
            exception.m_load_store_badaddr.eq(lsu.m_badaddr),
            exception.m_store.eq(m.endpoint_a.store),
            exception.m_valid.eq(m.valid),
            exception.m_stall.eq(m.stall)
        ]

        m.add_stall_source(m.valid & lsu.m_busy)
        if (self.configuration.getOption('isa', 'enable_rv32m')):
            m.add_stall_source(divider.busy)
        m.add_kill_source(exception.m_exception & m.valid)
        # ----------------------------------------------------------------------
        # Write-back stage
        if self.configuration.getOption('isa', 'enable_extra_csr'):
            cpu.d.comb += exception.w_retire.eq(w.endpoint_a.is_instruction)

        with cpu.If(w.endpoint_a.load):
            cpu.d.comb += w_result.eq(w.endpoint_a.ld_result)
        with cpu.Elif(w.endpoint_a.csr):
            cpu.d.comb += w_result.eq(w.endpoint_a.csr_result)
        with cpu.Else():
            cpu.d.comb += w_result.eq(w.endpoint_a.result)

        # ----------------------------------------------------------------------
        # Optional units: Multiplier/Divider
        if (self.configuration.getOption('isa', 'enable_rv32m')):
            cpu.d.comb += [
                multiplier.op.eq(x.endpoint_a.funct3),
                multiplier.dat1.eq(x.endpoint_a.src_data1),
                multiplier.dat2.eq(x.endpoint_a.src_data2),
                multiplier.valid.eq(x.endpoint_a.multiplier & x.valid)
            ]
            cpu.d.comb += [
                divider.op.eq(x.endpoint_a.funct3),
                divider.dat1.eq(x.endpoint_a.src_data1),
                divider.dat2.eq(x.endpoint_a.src_data2),
                divider.stall.eq(x.stall),
                divider.start.eq(x.endpoint_a.divider)
            ]
        # ----------------------------------------------------------------------
        # Optional units: branch predictor
        if (self.configuration.getOption('predictor', 'enable_predictor')):
            cpu.d.comb += [
                predictor.a_pc.eq(a_next_pc_fu),
                predictor.a_stall.eq(a.stall),
                predictor.f_pc.eq(f.endpoint_a.pc),
                predictor.m_prediction_state.eq(m.endpoint_a.prediction_state),
                predictor.m_take_jmp_branch.eq(m.endpoint_a.take_jmp_branch
                                               & m.valid),
                predictor.m_pc.eq(m.endpoint_a.pc),
                predictor.m_target_pc.eq(m.endpoint_a.jmp_branch_target),
                predictor.m_update.eq(m.endpoint_a.branch & m.valid)
            ]
        # ----------------------------------------------------------------------
        # Pipeline registers

        # A -> F
        with cpu.If(~a.stall):
            cpu.d.sync += a.endpoint_b.pc.eq(a_next_pc_fu)

        # F -> D
        with cpu.If(~f.stall):
            cpu.d.sync += [
                f.endpoint_b.pc.eq(f.endpoint_a.pc),
                f.endpoint_b.instruction.eq(fetch.f_instruction),
                f.endpoint_b.fetch_error.eq(fetch.f_bus_error),
                f.endpoint_b.fetch_badaddr.eq(fetch.f_badaddr)
            ]
            if (self.configuration.getOption('predictor', 'enable_predictor')):
                cpu.d.sync += [
                    f.endpoint_b.prediction.eq(predictor.f_prediction),
                    f.endpoint_b.prediction_state.eq(
                        predictor.f_prediction_state)
                ]

        # D -> X
        with cpu.If(~d.stall):
            cpu.d.sync += [
                d.endpoint_b.pc.eq(d.endpoint_a.pc),
                d.endpoint_b.instruction.eq(d.endpoint_a.instruction),
                d.endpoint_b.gpr_rd.eq(decoder.gpr_rd),
                d.endpoint_b.gpr_we.eq(decoder.gpr_we),
                d.endpoint_b.src_data1.eq(rs1_data),
                d.endpoint_b.src_data2.eq(rs2_data),
                d.endpoint_b.immediate.eq(decoder.immediate),
                d.endpoint_b.funct3.eq(decoder.funct3),
                d.endpoint_b.gpr_rs1_use.eq(decoder.gpr_rs1_use),
                d.endpoint_b.needed_in_x.eq(decoder.needed_in_x),
                d.endpoint_b.needed_in_m.eq(decoder.needed_in_m),
                d.endpoint_b.arithmetic.eq(decoder.aritmetic),
                d.endpoint_b.logic.eq(decoder.logic),
                d.endpoint_b.shifter.eq(decoder.shift),
                d.endpoint_b.jump.eq(decoder.jump),
                d.endpoint_b.branch.eq(decoder.branch),
                d.endpoint_b.compare.eq(decoder.compare),
                d.endpoint_b.load.eq(decoder.load),
                d.endpoint_b.store.eq(decoder.store),
                d.endpoint_b.csr.eq(decoder.csr),
                d.endpoint_b.add_sub.eq(decoder.substract),
                d.endpoint_b.shift_dir.eq(decoder.shift_direction),
                d.endpoint_b.shift_sign.eq(decoder.shit_signed),
                d.endpoint_b.csr_addr.eq(decoder.immediate),
                d.endpoint_b.csr_we.eq(decoder.csr_we),
                d.endpoint_b.fetch_error.eq(d.endpoint_a.fetch_error),
                d.endpoint_b.fetch_badaddr.eq(d.endpoint_a.fetch_badaddr),
                d.endpoint_b.ecall.eq(decoder.ecall),
                d.endpoint_b.ebreak.eq(decoder.ebreak),
                d.endpoint_b.mret.eq(decoder.mret),
                d.endpoint_b.illegal.eq(decoder.illegal),
                d.endpoint_b.fence_i.eq(decoder.fence_i),
                d.endpoint_b.multiplier.eq(decoder.multiply),
                d.endpoint_b.divider.eq(decoder.divide),
                d.endpoint_b.prediction.eq(d.endpoint_a.prediction),
                d.endpoint_b.prediction_state.eq(d.endpoint_a.prediction_state)
            ]

        # X -> M
        with cpu.If(~x.stall):
            cpu.d.sync += [
                x.endpoint_b.pc.eq(x.endpoint_a.pc),
                x.endpoint_b.instruction.eq(x.endpoint_a.instruction),
                x.endpoint_b.gpr_rd.eq(x.endpoint_a.gpr_rd),
                x.endpoint_b.gpr_we.eq(x.endpoint_a.gpr_we),
                x.endpoint_b.needed_in_m.eq(x.endpoint_a.needed_in_m
                                            | x.endpoint_a.needed_in_x),
                x.endpoint_b.funct3.eq(x.endpoint_a.funct3),
                x.endpoint_b.shifter.eq(x.endpoint_a.shifter),
                x.endpoint_b.compare.eq(x.endpoint_a.compare),
                x.endpoint_b.branch.eq(x.endpoint_a.branch),
                x.endpoint_b.load.eq(x.endpoint_a.load),
                x.endpoint_b.store.eq(x.endpoint_a.store),
                x.endpoint_b.csr.eq(x.endpoint_a.csr),
                x.endpoint_b.csr_addr.eq(x.endpoint_a.csr_addr),
                x.endpoint_b.csr_we.eq(x.endpoint_a.csr_we),
                x.endpoint_b.result.eq(x_result),
                x.endpoint_b.compare_result.eq(compare.cmp_ok),
                x.endpoint_b.compare_result.eq(compare.cmp_ok),
                x.endpoint_b.jmp_branch_target.eq(
                    Mux(x.endpoint_a.jump & x.endpoint_a.gpr_rs1_use,
                        adder.result[1:] << 1, x_branch_target)),
                x.endpoint_b.take_jmp_branch.eq(x_take_jmp_branch),
                x.endpoint_b.fetch_error.eq(x.endpoint_a.fetch_error),
                x.endpoint_b.fetch_badaddr.eq(x.endpoint_a.fetch_badaddr),
                x.endpoint_b.ecall.eq(x.endpoint_a.ecall),
                x.endpoint_b.ebreak.eq(x.endpoint_a.ebreak),
                x.endpoint_b.mret.eq(x.endpoint_a.mret),
                x.endpoint_b.illegal.eq(x.endpoint_a.illegal),
                x.endpoint_b.ls_misalign.eq(data_sel.x_misaligned),
                x.endpoint_b.divider.eq(x.endpoint_a.divider),
                x.endpoint_b.prediction.eq(x.endpoint_a.prediction),
                x.endpoint_b.prediction_state.eq(
                    x.endpoint_a.prediction_state),
            ]

        # M -> W
        with cpu.If(~m.stall):
            cpu.d.sync += [
                m.endpoint_b.pc.eq(m.endpoint_a.pc),
                m.endpoint_b.gpr_rd.eq(m.endpoint_a.gpr_rd),
                m.endpoint_b.gpr_we.eq(m.endpoint_a.gpr_we),
                m.endpoint_b.result.eq(m_result),
                m.endpoint_b.ld_result.eq(data_sel.m_load_data),
                m.endpoint_b.csr_result.eq(csr_rp.data),
                m.endpoint_b.load.eq(m.endpoint_a.load),
                m.endpoint_b.csr.eq(m.endpoint_a.csr)
            ]

        return cpu

コード例 #9

ファイルを表示

ファイル: parser.py プロジェクト: terminus/onebitbt

    def elaborate(self, platform):
        m = Module()
        m.submodules.lfsr = self.lfsr
        m.submodules.crc = self.crc

        if self.printer:
            payload_data = self.printer.mem
        else:
            payload_data = Memory(width=8, depth=64)

        m.submodules.payload_wport = wport = payload_data.write_port()

        header = Signal(16)
        header_idx = Signal(range(16))

        pdu = Signal(8)
        m.d.comb += pdu.eq(header[0:8])

        size = Signal(8)
        m.d.comb += size.eq(header[8:16])

        payload_read = Signal(12)  # How many bits of the payload we've read

        payload_addr = Signal(48)
        payload_addr_idx = Signal(8)

        payload_sec_header_idx = Signal(4)
        payload_sec_header = Signal(16)
        payload_sec_size = Signal(8)
        payload_sec_type = Signal(8)
        m.d.comb += [
            payload_sec_size.eq(payload_sec_header[0:8]),
            payload_sec_type.eq(payload_sec_header[8:16])
        ]
        payload_sec_read = Signal(12)

        payload_byte = Signal(8)

        dewhitened = Signal()
        m.d.comb += dewhitened.eq(self.bitstream ^ self.lfsr.output)

        crc = Signal(24)
        crc_idx = Signal(8)
        crc_matches = self.crc_matches
        m.d.comb += crc_matches.eq(
            Cat([self.crc.crc[i] == crc[24 - i - 1] for i in range(24)]).all())

        should_print = Signal()

        with m.FSM() as fsm:
            # Exposed for debugging purposes
            m.d.comb += self.state.eq(fsm.state)

            with m.State("IDLE"):
                with m.If(self.sample):
                    m.next = "READ_HEADER"
                    m.d.sync += [
                        header_idx.eq(1),
                        header.eq(Cat(dewhitened, [0] * 7)),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.input.eq(dewhitened),
                        self.crc.en.eq(1),
                        should_print.eq(0),
                    ]
                with m.Else():
                    # Reset goes high at the end
                    m.d.sync += [self.lfsr.reset.eq(0), self.crc.reset.eq(0)]

            with m.State("READ_HEADER"):
                with m.If(self.sample):
                    m.d.sync += [
                        header_idx.eq(header_idx + 1),
                        header.eq(header | (dewhitened << header_idx)),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.input.eq(dewhitened),
                        self.crc.en.eq(1),
                    ]
                    with m.If(header_idx == 15):
                        m.d.sync += [
                            payload_read.eq(0),
                            payload_addr.eq(0),
                            payload_addr_idx.eq(0)
                        ]
                        m.next = "READ_PAYLOAD_ADDR"
                with m.Else():
                    m.d.sync += [self.lfsr.run_strobe.eq(0), self.crc.en.eq(0)]

            with m.State("READ_PAYLOAD_ADDR"):
                with m.If(self.sample):
                    m.d.sync += [
                        payload_read.eq(payload_read + 1),
                        payload_addr_idx.eq(payload_addr_idx + 1),
                        payload_addr.eq(payload_addr
                                        | (dewhitened << payload_addr_idx)),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.input.eq(dewhitened),
                        self.crc.en.eq(1),
                    ]
                    with m.If(payload_addr_idx == (48 - 1)):
                        m.d.sync += [
                            payload_sec_header.eq(0),
                            payload_sec_header_idx.eq(0)
                        ]
                        m.next = "READ_PAYLOAD_SECTION_HEADER"
                with m.Else():
                    m.d.sync += [self.lfsr.run_strobe.eq(0), self.crc.en.eq(0)]

            with m.State("READ_PAYLOAD_SECTION_HEADER"):
                with m.If(self.sample):
                    m.d.sync += [
                        payload_read.eq(payload_read + 1),
                        payload_sec_header_idx.eq(payload_sec_header_idx + 1),
                        payload_sec_header.eq(payload_sec_header | (
                            dewhitened << payload_sec_header_idx)),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.input.eq(dewhitened),
                        self.crc.en.eq(1),
                    ]
                    with m.If(payload_sec_header_idx == 15):
                        m.d.sync += payload_sec_read.eq(0)
                        m.next = "READ_PAYLOAD_SECTION_CONTENT"
                with m.Else():
                    m.d.sync += [self.lfsr.run_strobe.eq(0), self.crc.en.eq(0)]

                # If we previously were in READ_PAYLOAD_SECTION_CONTENT, null terminate
                # what we read
                m.d.comb += [
                    wport.addr.eq((payload_sec_read >> 3) + 1),
                    wport.en.eq(1),
                    wport.data.eq(0)
                ]

            with m.State("READ_PAYLOAD_SECTION_CONTENT"):
                with m.If(self.sample):
                    m.d.sync += [
                        payload_read.eq(payload_read + 1),
                        payload_sec_read.eq(payload_sec_read + 1),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.input.eq(dewhitened),
                        self.crc.en.eq(1),
                    ]

                    # m.d.comb += self.debug.eq(payload_sec_type == 0x9)

                    with m.If((payload_sec_type == 0x9)
                              | (payload_sec_type == 0x8)
                              ):  # If this section is a complete local name
                        idx = payload_read & 0x7
                        with m.If(idx == 0):
                            m.d.sync += payload_byte.eq(dewhitened)
                        with m.Else():
                            m.d.sync += payload_byte.eq(payload_byte
                                                        | (dewhitened << idx))
                        with m.If(idx == 0b111):
                            m.d.comb += [
                                wport.addr.eq(payload_sec_read >> 3),
                                wport.en.eq(1),
                                wport.data.eq(payload_byte
                                              | (dewhitened << idx))
                            ]
                        m.d.sync += should_print.eq(should_print | 1)

                    with m.If((payload_read + 1) >= size << 3):
                        m.next = "READ_CRC"
                        m.d.sync += [crc_idx.eq(0), crc.eq(0)]

                    with m.Else():
                        with m.If((payload_sec_read +
                                   1) == (payload_sec_size - 1) << 3):
                            m.d.sync += [
                                payload_sec_header.eq(0),
                                payload_sec_header_idx.eq(0)
                            ]
                            m.next = "READ_PAYLOAD_SECTION_HEADER"

                with m.Else():
                    m.d.sync += [self.lfsr.run_strobe.eq(0), self.crc.en.eq(0)]

            with m.State("READ_CRC"):
                with m.If(self.sample):
                    m.d.sync += [
                        crc_idx.eq(crc_idx + 1),
                        crc.eq(crc | (dewhitened << crc_idx)),
                        self.currentbit.eq(dewhitened),
                        self.lfsr.run_strobe.eq(1),
                        self.crc.en.eq(0),
                    ]
                    with m.If(crc_idx == 23):
                        m.next = "CHECK_CRC"
                with m.Else():
                    m.d.sync += [self.lfsr.run_strobe.eq(0), self.crc.en.eq(0)]

            with m.State("CHECK_CRC"):
                with m.If(crc_matches & should_print):
                    m.next = "START_READOUT"
                with m.Else():
                    m.next = "IDLE"
                    m.d.comb += self.done.eq(1)
                    m.d.sync += [
                        self.lfsr.run_strobe.eq(0),
                        self.lfsr.reset.eq(1),
                        self.crc.reset.eq(1)
                    ]
            with m.State("START_READOUT"):
                # If we previously were in READ_PAYLOAD_SECTION_CONTENT, null terminate
                # what we read
                m.d.comb += [
                    wport.addr.eq((payload_sec_read >> 3) + 1),
                    wport.en.eq(1),
                    wport.data.eq(0)
                ]

                m.d.comb += self.debug.eq(1)
                if self.printer:
                    m.d.comb += self.printer.start.eq(1)
                    m.next = "WAIT_READOUT"
                else:
                    m.d.comb += self.done.eq(1)
                    m.next = "IDLE"
            with m.State("WAIT_READOUT"):
                if self.printer:
                    with m.If(self.printer.done):
                        m.next = "IDLE"
                        m.d.comb += self.done.eq(1)
                        m.d.sync += [
                            self.lfsr.run_strobe.eq(0),
                            self.lfsr.reset.eq(1),
                            self.crc.reset.eq(1)
                        ]
                else:
                    # Invalid
                    pass

        return m

コード例 #10

ファイルを表示

ファイル: memory.py プロジェクト: TiltMeSenpai/luna

    def elaborate(self, platform):
        m = Module()

        # Range shortcuts for internal signals.
        address_range = range(0, self.depth + 1)

        #
        # Core internal "backing store".
        #
        memory = Memory(width=self.width, depth=self.depth + 1, name=self.name)
        m.submodules.read_port  = read_port  = memory.read_port()
        m.submodules.write_port = write_port = memory.write_port()

        # Always connect up our memory's data/en ports to ours.
        m.d.comb += [
            self.read_data  .eq(read_port.data),

            write_port.data .eq(self.write_data),
            write_port.en   .eq(self.write_en & ~self.full)
        ]

        #
        # Write port.
        #

        # We'll track two pieces of data: our _committed_ write position, and our current un-committed write one.
        # This will allow us to rapidly backtrack to our pre-commit position.
        committed_write_pointer = Signal(address_range)
        current_write_pointer   = Signal(address_range)
        m.d.comb += write_port.addr.eq(current_write_pointer)


        # Compute the location for the next write, accounting for wraparound. We'll not assume a binary-sized
        # buffer; so we'll compute the wraparound manually.
        next_write_pointer      = Signal.like(current_write_pointer)
        with m.If(current_write_pointer == self.depth):
            m.d.comb += next_write_pointer.eq(0)
        with m.Else():
            m.d.comb += next_write_pointer.eq(current_write_pointer + 1)


        # If we're writing to the fifo, update our current write position.
        with m.If(self.write_en & ~self.full):
            m.d.sync += current_write_pointer.eq(next_write_pointer)

        # If we're committing a FIFO write, update our committed position.
        with m.If(self.write_commit):
            m.d.sync += committed_write_pointer.eq(current_write_pointer)

        # If we're discarding our current write, reset our current position,
        with m.If(self.write_discard):
            m.d.sync += current_write_pointer.eq(committed_write_pointer)


        #
        # Read port.
        #

        # We'll track two pieces of data: our _committed_ read position, and our current un-committed read one.
        # This will allow us to rapidly backtrack to our pre-commit position.
        committed_read_pointer = Signal(address_range)
        current_read_pointer   = Signal(address_range)


        # Compute the location for the next read, accounting for wraparound. We'll not assume a binary-sized
        # buffer; so we'll compute the wraparound manually.
        next_read_pointer      = Signal.like(current_read_pointer)
        with m.If(current_read_pointer == self.depth):
            m.d.comb += next_read_pointer.eq(0)
        with m.Else():
            m.d.comb += next_read_pointer.eq(current_read_pointer + 1)


        # Our memory always takes a single cycle to provide its read output; so we'll update its address
        # "one cycle in advance". Accordingly, if we're about to advance the FIFO, we'll use the next read
        # address as our input. If we're not, we'll use the current one.
        with m.If(self.read_en & ~self.empty):
            m.d.comb += read_port.addr.eq(next_read_pointer)
        with m.Else():
            m.d.comb += read_port.addr.eq(current_read_pointer)


        # If we're reading from our the fifo, update our current read position.
        with m.If(self.read_en & ~self.empty):
            m.d.sync += current_read_pointer.eq(next_read_pointer)

        # If we're committing a FIFO write, update our committed position.
        with m.If(self.read_commit):
            m.d.sync += committed_read_pointer.eq(current_read_pointer)

        # If we're discarding our current write, reset our current position,
        with m.If(self.read_discard):
            m.d.sync += current_read_pointer.eq(committed_read_pointer)


        #
        # FIFO status.
        #

        # Our FIFO is empty if our read and write pointers are in the same. We'll use the current
        # read position (which leads ahead) and the committed write position (which lags behind).
        m.d.comb += self.empty.eq(current_read_pointer == committed_write_pointer)

        # For our space available, we'll use the current write position (which leads ahead) and our committed
        # read position (which lags behind). This yields two cases: one where the buffer isn't wrapped around,
        # and one where it is.
        with m.If(self.full):
            m.d.comb += self.space_available.eq(0)
        with m.Elif(committed_read_pointer <= current_write_pointer):
            m.d.comb += self.space_available.eq(self.depth - (current_write_pointer - committed_read_pointer))
        with m.Else():
            m.d.comb += self.space_available.eq(committed_read_pointer - current_write_pointer - 1)

        # Our FIFO is full if we don't have any space available.
        m.d.comb += self.full.eq(next_write_pointer == committed_read_pointer)


        # If we're not supposed to be in the sync domain, rename our sync domain to the target.
        if self.domain != "sync":
            m = DomainRenamer({"sync": self.domain})(m)

        return m

コード例 #11

ファイルを表示

ファイル: ila.py プロジェクト: zyp/luna

class IntegratedLogicAnalyzer(Elaboratable):
    """ Super-simple integrated-logic-analyzer generator class for LUNA.

    Attributes
    ----------
    trigger: Signal(), input
        A strobe that determines when we should start sampling.
    sampling: Signal(), output
        Indicates when sampling is in progress.

    complete: Signal(), output
        Indicates when sampling is complete and ready to be read.

    captured_sample_number: Signal(), input
        Selects which sample the ILA will output. Effectively the address for the ILA's
        sample buffer.
    captured_sample: Signal(), output
        The sample corresponding to the relevant sample number.
        Can be broken apart by using Cat(*signals).

    Parameters
    ----------
    signals: iterable of Signals
        An iterable of signals that should be captured by the ILA.
    sample_depth: int
        The depth of the desired buffer, in samples.

    domain: string
        The clock domain in which the ILA should operate.
    sample_rate: float
        Cosmetic indication of the sample rate. Used to format output.
    samples_pretrigger: int
        The number of our samples which should be captured _before_ the trigger.
        This also can act like an implicit synchronizer; so asynchronous inputs
        are allowed if this number is >= 2. Note that the trigger strobe is read
        on the rising edge of the clock.
    """
    def __init__(self,
                 *,
                 signals,
                 sample_depth,
                 domain="sync",
                 sample_rate=60e6,
                 samples_pretrigger=1):
        self.domain = domain
        self.signals = signals
        self.inputs = Cat(*signals)
        self.sample_width = len(self.inputs)
        self.sample_depth = sample_depth
        self.samples_pretrigger = samples_pretrigger
        self.sample_rate = sample_rate
        self.sample_period = 1 / sample_rate

        #
        # Create a backing store for our samples.
        #
        self.mem = Memory(width=self.sample_width,
                          depth=sample_depth,
                          name="ila_buffer")

        #
        # I/O port
        #
        self.trigger = Signal()
        self.sampling = Signal()
        self.complete = Signal()

        self.captured_sample_number = Signal(range(0, self.sample_depth))
        self.captured_sample = Signal(self.sample_width)

    def elaborate(self, platform):
        m = Module()

        # TODO: switch this to a single-port RAM

        # Memory ports.
        write_port = self.mem.write_port()
        read_port = self.mem.read_port(domain='comb')
        m.submodules += [write_port, read_port]

        # If necessary, create synchronized versions of the relevant signals.
        if self.samples_pretrigger >= 2:
            delayed_inputs = Signal.like(self.inputs)
            m.submodules += FFSynchronizer(self.inputs,
                                           delayed_inputs,
                                           stages=self.samples_pretrigger)
        elif self.samples_pretrigger == 1:
            delayed_inputs = Signal.like(self.inputs)
            m.d.sync += delayed_inputs.eq(self.inputs)
        else:
            delayed_inputs = self.inputs

        # Counter that keeps track of our write position.
        write_position = Signal(range(0, self.sample_depth))

        # Set up our write port to capture the input signals,
        # and our read port to provide the output.
        m.d.comb += [
            write_port.data.eq(delayed_inputs),
            write_port.addr.eq(write_position),
            self.captured_sample.eq(read_port.data),
            read_port.addr.eq(self.captured_sample_number)
        ]

        self.test = Signal()
        m.d.comb += self.test.eq(read_port.addr)

        # Don't sample unless our FSM asserts our sample signal explicitly.
        m.d.sync += write_port.en.eq(0)

        with m.FSM() as fsm:

            m.d.comb += self.sampling.eq(~fsm.ongoing("IDLE"))

            # IDLE: wait for the trigger strobe
            with m.State('IDLE'):

                with m.If(self.trigger):
                    m.next = 'SAMPLE'

                    # Grab a sample as our trigger is asserted.
                    m.d.sync += [
                        write_port.en.eq(1),
                        write_position.eq(0),
                        self.complete.eq(0),
                    ]

            # SAMPLE: do our sampling
            with m.State('SAMPLE'):

                # Sample until we run out of samples.
                m.d.sync += [
                    write_port.en.eq(1),
                    write_position.eq(write_position + 1),
                ]

                # If this is the last sample, we're done. Finish up.
                with m.If(write_position + 1 == self.sample_depth):
                    m.next = "IDLE"

                    m.d.sync += [self.complete.eq(1), write_port.en.eq(0)]

        # Convert our sync domain to the domain requested by the user, if necessary.
        if self.domain != "sync":
            m = DomainRenamer({"sync": self.domain})(m)

        return m

コード例 #12

ファイルを表示

    def elaborate(self, platform):
        m = Module()
        size = self.configuration.getOption('predictor', 'size')
        if size == 0 or (size & (size - 1)):
            raise ValueError(f'size must be a power of 2: {size}')

        _bits_index = log2_int(size)
        _bits_tag = 32 - _bits_index
        _btb_width = 1 + 32 + _bits_tag  # valid + data + tag
        _btb_depth = 1 << _bits_index

        _btb_layout = [('target', 32), ('tag', _bits_tag), ('valid', 1)]

        _pc_layout = [('index', _bits_index), ('tag', _bits_tag)]

        btb = Memory(width=_btb_width, depth=_btb_depth)
        btb_rp = btb.read_port()
        btb_wp = btb.write_port()

        bht = Memory(width=2, depth=_btb_depth)
        bht_rp = bht.read_port()
        bht_wp = bht.write_port()

        m.submodules += btb_rp, btb_wp
        m.submodules += bht_rp, bht_wp

        btb_r = Record(_btb_layout)
        a_pc = Record(_pc_layout)
        f_pc = Record(_pc_layout)
        m_pc = Record(_pc_layout)
        hit = Signal()
        pstate_next = Signal(2)

        m.d.comb += [
            btb_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)),
            bht_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)),
            btb_r.eq(btb_rp.data),
            #
            a_pc.eq(self.a_pc),
            f_pc.eq(self.f_pc),
            hit.eq(btb_r.valid & (btb_r.tag == f_pc.tag)),
            #
            self.f_prediction.eq(hit & bht_rp.data[1]),
            self.f_prediction_state.eq(bht_rp.data),
            self.f_prediction_pc.eq(btb_r.target)
        ]

        # update
        m.d.comb += [
            btb_wp.addr.eq(m_pc.index),
            btb_wp.data.eq(Cat(self.m_target_pc, m_pc.tag, 1)),
            btb_wp.en.eq(self.m_update),
            bht_wp.addr.eq(m_pc.index),
            bht_wp.data.eq(pstate_next),
            bht_wp.en.eq(self.m_update),
            m_pc.eq(self.m_pc),
            pstate_next.eq(0)
        ]

        with m.Switch(Cat(self.m_prediction_state, self.m_take_jmp_branch)):
            with m.Case(0b000, 0b001):
                m.d.comb += pstate_next.eq(0b00)
            with m.Case(0b010, 0b100):
                m.d.comb += pstate_next.eq(0b01)
            with m.Case(0b011, 0b101):
                m.d.comb += pstate_next.eq(0b10)
            with m.Case(0b110, 0b111):
                m.d.comb += pstate_next.eq(0b11)

        return m

コード例 #13

ファイルを表示

ファイル: mem.py プロジェクト: BracketMaster/nmigen-by-example

from nmigen import Memory, Signal, Module
from nmigen import Elaboratable

mem = Memory(width=32, depth=16)
rp = mem.read_port()
wp = mem.write_port()

m = Module()
m.submodules.rp = rp
m.submodules.wp = wp


class Check(Elaboratable):
    def __init__(self):
        self.check_in = Signal(range(4))
        self.check_out = Signal(range(4))

    def elaborate(self, platform):
        m = Module()
        m.d.sync += self.check_out.eq(self.check_in)
        return m


check = Check()
m.submodules.check = check

from nmigen.back.pysim import Simulator, Delay, Settle
sim = Simulator(m)
sim.add_clock(1e-6)

コード例 #14

ファイルを表示

    def elaborate(self, platform):
        if platform is not None:
            platform.add_file("picorv32.v", open("picorv32.v", "r"))

        if not os.path.exists("build"):
            os.makedirs("build")

        subprocess.run(
            [
                "cargo", "objcopy", "--release", "--", "-O", "binary",
                "../build/app.bin"
            ],
            cwd="app",
        ).check_returncode()

        with open("build/app.bin", "rb") as f:
            b = bytearray(f.read())
            b.extend([0] * (4 - (len(b) % 4)))
            app = np.frombuffer(b, dtype='<u4').tolist()

        # MEM_SIZE = 256 # words
        RAM_SIZE = 256  # words
        init = ([0] * RAM_SIZE) + app
        MEM_SIZE = len(init)

        mem = Memory(
            width=32,
            depth=MEM_SIZE,
            init=init,
        )

        resetn = Signal()
        mem_valid = Signal()
        mem_ready = Signal()
        mem_addr = Signal(32)
        mem_wdata = Signal(32)
        mem_wstrb = Signal(4)
        mem_rdata = Signal(32)

        m = Module()

        m.d.comb += resetn.eq(~ResetSignal())

        m.submodules.picorv32 = Instance(
            "picorv32",
            p_ENABLE_COUNTERS=0,
            p_LATCHED_MEM_RDATA=1,
            p_TWO_STAGE_SHIFT=0,
            p_TWO_CYCLE_ALU=1,
            p_CATCH_MISALIGN=0,
            p_CATCH_ILLINSN=0,
            p_COMPRESSED_ISA=1,
            p_ENABLE_MUL=1,
            p_PROGADDR_RESET=1024,
            p_PROGADDR_IRQ=1024 + 0x10,
            i_clk=ClockSignal(),
            i_resetn=resetn,
            o_mem_valid=mem_valid,
            i_mem_ready=mem_ready,
            o_mem_addr=mem_addr,
            o_mem_wdata=mem_wdata,
            o_mem_wstrb=mem_wstrb,
            i_mem_rdata=mem_rdata,
        )
        m.submodules.read_port = read_port = mem.read_port(transparent=False)
        m.submodules.write_port = write_port = mem.write_port(granularity=8)

        m.d.sync += mem_ready.eq(0)

        m.d.comb += [
            read_port.addr.eq(mem_addr >> 2),
            mem_rdata.eq(read_port.data),
            read_port.en.eq((~mem_wstrb).bool()),
            write_port.addr.eq(mem_addr >> 2),
            write_port.data.eq(mem_wdata),
            write_port.en.eq(mem_wstrb),
        ]

        with m.If(resetn & mem_valid & ~mem_ready):
            with m.If((mem_addr >> 2) < MEM_SIZE):
                m.d.sync += mem_ready.eq(1)

            for mapping in self.memory_mappings:
                if mapping.writing_enabled:
                    with m.If(mem_wstrb.bool() & (mem_addr == mapping.addr)):
                        if mapping.write is not None:
                            mapping.write(m, mem_wdata)
                        else:
                            m.d.sync += [
                                mapping.signal.eq(mem_wdata),
                                mem_ready.eq(1),
                            ]
                if mapping.read:
                    with m.If((~mem_wstrb).bool()
                              & (mem_addr == mapping.addr)):
                        m.d.comb += mem_rdata.eq(mapping.signal)
                        m.d.sync += mem_ready.eq(1)
                if not mapping.read and not (mapping.write
                                             or mapping.writing_enabled):
                    print(mapping.addr)
                    print("mapping doesn't specify read or write",
                          file=sys.stderr)

        return m

コード例 #15

ファイルを表示

    def elaborate(self, platform):
        m = Module()

        way_layout = [
            ('data',     32 * self.nwords),
            ('tag',      self.s1_address.tag.shape()),
            ('valid',    1),
            ('sel_lru',  1)
        ]
        if self.enable_write:
            way_layout.append(('sel_we',   1))

        ways     = Array(Record(way_layout) for _way in range(self.nways))
        fill_cnt = Signal.like(self.s1_address.offset)
        # set the LRU
        if self.nways == 1:
            lru = Const(0)  # self.nlines
        else:
            lru = Signal(self.nlines)
            with m.If(self.bus_valid & self.bus_ack & self.bus_last):  # err ^ ack == 1
                _lru = lru.bit_select(self.s2_address.line, 1)
                m.d.sync += lru.bit_select(self.s2_address.line, 1).eq(~_lru)

        # hit/miss
        way_hit = m.submodules.way_hit = Encoder(self.nways)
        for idx, way in enumerate(ways):
            m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag) & way.valid)

        m.d.comb += self.s2_miss.eq(way_hit.n)
        if self.enable_write:
            m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid)

        # read data
        m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select(self.s2_address.offset, 32))

        with m.FSM():
            with m.State('READ'):
                with m.If(self.s2_re & self.s2_miss & self.s2_valid):
                    m.d.sync += [
                        self.bus_addr.eq(self.s2_address),  # WARNING extra_bits
                        self.bus_valid.eq(1),
                        fill_cnt.eq(self.s2_address.offset - 1)
                    ]
                    m.next = 'REFILL'
            with m.State('REFILL'):
                m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset)
                with m.If(self.bus_ack):
                    m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset + 1)
                with m.If(self.bus_ack & self.bus_last | self.bus_err):
                    m.d.sync += self.bus_valid.eq(0)
                with m.If(~self.bus_valid | self.s1_flush):
                    # in case of flush, abort ongoing refill.
                    m.next = 'READ'
                    m.d.sync += self.bus_valid.eq(0)

        # mark the way to use (replace)
        m.d.comb += ways[lru.bit_select(self.s2_address.line, 1)].sel_lru.eq(self.bus_valid)

        # generate for N ways
        for way in ways:
            # create the memory structures for valid, tag and data.
            valid = Signal(self.nlines)

            tag_m  = Memory(width=len(way.tag), depth=self.nlines)
            tag_rp = tag_m.read_port()
            tag_wp = tag_m.write_port()
            m.submodules += tag_rp, tag_wp

            data_m  = Memory(width=len(way.data), depth=self.nlines)
            data_rp = data_m.read_port()
            data_wp = data_m.write_port(granularity=32)
            m.submodules += data_rp, data_wp

            # handle valid
            with m.If(self.s1_flush & self.s1_valid):  # flush
                m.d.sync += valid.eq(0)
            with m.Elif(way.sel_lru & self.bus_last & self.bus_ack):  # refill ok
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1)
            with m.Elif(way.sel_lru & self.bus_err):  # refill error
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0)
            with m.Elif(self.s2_evict & self.s2_valid & (way.tag == self.s2_address.tag)):  # evict
                m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0)

            # assignments
            m.d.comb += [
                tag_rp.addr.eq(Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)),
                tag_wp.addr.eq(self.bus_addr.line),
                tag_wp.data.eq(self.bus_addr.tag),
                tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last),

                data_rp.addr.eq(Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)),

                way.data.eq(data_rp.data),
                way.tag.eq(tag_rp.data),
                way.valid.eq(valid.bit_select(self.s2_address.line, 1))
            ]

            # update cache: CPU or Refill
            if self.enable_write:
                update_addr = Signal(len(data_wp.addr))
                update_data = Signal(len(data_wp.data))
                update_we   = Signal(len(data_wp.en))
                aux_wdata   = Signal(32)

                with m.If(self.bus_valid):
                    m.d.comb += [
                        update_addr.eq(self.bus_addr.line),
                        update_data.eq(Repl(self.bus_data, self.nwords)),
                        update_we.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack),
                    ]
                with m.Else():
                    m.d.comb += [
                        update_addr.eq(self.s2_address.line),
                        update_data.eq(Repl(aux_wdata, self.nwords)),
                        update_we.bit_select(self.s2_address.offset, 1).eq(way.sel_we & ~self.s2_miss)
                    ]
                m.d.comb += [
                    aux_wdata.eq(Cat(
                        Mux(self.s2_sel[0], self.s2_wdata.word_select(0, 8), self.s2_rdata.word_select(0, 8)),
                        Mux(self.s2_sel[1], self.s2_wdata.word_select(1, 8), self.s2_rdata.word_select(1, 8)),
                        Mux(self.s2_sel[2], self.s2_wdata.word_select(2, 8), self.s2_rdata.word_select(2, 8)),
                        Mux(self.s2_sel[3], self.s2_wdata.word_select(3, 8), self.s2_rdata.word_select(3, 8))
                    )),
                    #
                    data_wp.addr.eq(update_addr),
                    data_wp.data.eq(update_data),
                    data_wp.en.eq(update_we),
                ]
            else:
                m.d.comb += [
                    data_wp.addr.eq(self.bus_addr.line),
                    data_wp.data.eq(Repl(self.bus_data, self.nwords)),
                    data_wp.en.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack),
                ]

        return m