예제 #1
0
파일: bus.py 프로젝트: zyp/luna
    def elaborate(self, platform):
        m = Module()

        #
        # Our module has three core parts:
        #   - an encoder, which converts from our one-hot signal to a mux select line
        #   - a multiplexer, which handles multiplexing e.g. payload signals
        #   - a set of OR'ing logic, which joints together our simple or'd signals

        # Create our encoder...
        m.submodules.encoder = encoder = Encoder(len(self._inputs))
        for index, interface in enumerate(self._inputs):

            # ... and tie its inputs to each of our 'valid' signals.
            valid_signal = getattr(interface, self._valid_field)
            m.d.comb += encoder.i[index].eq(valid_signal)

        # Create our multiplexer, and drive each of our output signals from it.
        with m.Switch(encoder.o):
            for index, interface in enumerate(self._inputs):

                # If an interface is selected...
                with m.Case(index):
                    for identifier in self._mux_signals:

                        # ... connect all of its muxed signals through to the output.
                        output_signal = self._get_signal(
                            self.output, identifier)
                        input_signal = self._get_signal(interface, identifier)
                        m.d.comb += output_signal.eq(input_signal)

        # Create the OR'ing logic for each of or or_signals.
        for identifier in self._or_signals:

            # Figure out the signals we want to work with...
            output_signal = self._get_signal(self.output, identifier)
            input_signals = (self._get_signal(i, identifier)
                             for i in self._inputs)

            # ... and OR them together.
            or_reduced = functools.reduce(operator.__or__, input_signals, 0)
            m.d.comb += output_signal.eq(or_reduced)

        # Finally, pass each of our pass-back signals from the output interface
        # back to each of our input interfaces.
        for identifier in self._pass_signals:
            output_signal = self._get_signal(self.output, identifier)

            for interface in self._inputs:
                input_signal = self._get_signal(interface, identifier)
                m.d.comb += input_signal.eq(output_signal)

        return m
예제 #2
0
    def elaborate(self, platform):
        m = Module()

        m.submodules.encoder = encoder = Encoder(width=len(self._devices))

        comb = m.d.comb

        for i, top_bits in enumerate(self._devices.keys()):
            comb += encoder.i[i].eq((self.bus.addr[-4:] == top_bits)
                                    & (self.bus.rmask.any()
                                       | self.bus.wmask.any()))

        with m.If(~encoder.n):
            device_bus = Array(self._devices.values())[encoder.o]
            comb += [
                device_bus.addr[:-4].eq(self.bus.addr[:-4]),
                device_bus.rmask.eq(self.bus.rmask),
                device_bus.wmask.eq(self.bus.wmask),
                device_bus.wdata.eq(self.bus.wdata),
                self.bus.rdata.eq(device_bus.rdata),
                self.bus.ack.eq(device_bus.ack),
            ]

        return m
예제 #3
0
    def elaborate(self, platform):
        m = Module()

        ways = Array(Record([("data",   self.nwords * 32),
                             ("tag",    self.s2_addr.tag.shape()),
                             ("valid",  1),
                             ("bus_re", 1)])
                     for _ in range(self.nways))

        if self.nways == 1:
            way_lru = Const(0)
        elif self.nways == 2:
            way_lru = Signal()
            with m.If(self.bus_re & self.bus_valid & self.bus_last & ~self.bus_error):
                m.d.sync += way_lru.eq(~way_lru)

        m.d.comb += ways[way_lru].bus_re.eq(self.bus_re)

        way_hit = m.submodules.way_hit = Encoder(self.nways)
        for j, way in enumerate(ways):
            m.d.comb += way_hit.i[j].eq((way.tag == self.s2_addr.tag) & way.valid)

        m.d.comb += [
            self.s2_miss.eq(way_hit.n),
            self.s2_rdata.eq(ways[way_hit.o].data.word_select(self.s2_addr.offset, 32))
        ]

        with m.FSM() as fsm:
            last_offset = Signal.like(self.s2_addr.offset)

            with m.State("CHECK"):
                with m.If(self.s2_re & self.s2_miss & self.s2_valid):
                    m.d.sync += [
                        self.bus_addr.eq(self.s2_addr),
                        self.bus_re.eq(1),
                        last_offset.eq(self.s2_addr.offset - 1)
                    ]
                    m.next = "REFILL"

            with m.State("REFILL"):
                m.d.comb += self.bus_last.eq(self.bus_addr.offset == last_offset)
                with m.If(self.bus_valid):
                    m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset + 1)
                with m.If(self.bus_valid & self.bus_last | self.bus_error):
                    m.d.sync += self.bus_re.eq(0)
                with m.If(~self.bus_re & ~self.s1_stall):
                    m.next = "CHECK"

        if platform == "formal":
            with m.If(Initial()):
                m.d.comb += Assume(fsm.ongoing("CHECK"))

        for way in ways:
            valid_lines = Signal(self.nlines)

            with m.If(self.s1_flush & self.s1_valid):
                m.d.sync += valid_lines.eq(0)
            with m.Elif(way.bus_re & self.bus_error):
                m.d.sync += valid_lines.bit_select(self.bus_addr.line, 1).eq(0)
            with m.Elif(way.bus_re & self.bus_valid & self.bus_last):
                m.d.sync += valid_lines.bit_select(self.bus_addr.line, 1).eq(1)
            with m.Elif(self.s2_evict & self.s2_valid & (way.tag == self.s2_addr.tag)):
                m.d.sync += valid_lines.bit_select(self.s2_addr.line, 1).eq(0)

            tag_mem = Memory(width=len(way.tag), depth=self.nlines)
            tag_rp = tag_mem.read_port()
            tag_wp = tag_mem.write_port()
            m.submodules += tag_rp, tag_wp

            data_mem = Memory(width=len(way.data), depth=self.nlines)
            data_rp = data_mem.read_port()
            data_wp = data_mem.write_port(granularity=32)
            m.submodules += data_rp, data_wp

            m.d.comb += [
                tag_rp.addr.eq(Mux(self.s1_stall, self.s2_addr.line, self.s1_addr.line)),
                data_rp.addr.eq(Mux(self.s1_stall, self.s2_addr.line, self.s1_addr.line)),

                tag_wp.addr.eq(self.bus_addr.line),
                tag_wp.en.eq(way.bus_re & self.bus_valid & self.bus_last),
                tag_wp.data.eq(self.bus_addr.tag),

                data_wp.addr.eq(self.bus_addr.line),
                data_wp.en.bit_select(self.bus_addr.offset, 1).eq(way.bus_re & self.bus_valid),
                data_wp.data.eq(self.bus_rdata << self.bus_addr.offset*32),

                way.valid.eq(valid_lines.bit_select(self.s2_addr.line, 1)),
                way.tag.eq(tag_rp.data),
                way.data.eq(data_rp.data)
            ]

            if platform == "formal":
                with m.If(Initial()):
                    m.d.comb += Assume(~valid_lines.bool())

        return m
예제 #4
0
    def elaborate(self, platform: Platform) -> Module:
        m = Module()

        snoop_addr = Record(self.pc_layout)
        snoop_valid = Signal()

        # -------------------------------------------------------------------------
        # Performance counter
        # TODO: connect to CSR's performance counter
        with m.If(~self.s1_stall & self.s1_valid & self.s1_access):
            m.d.sync += self.access_cnt.eq(self.access_cnt + 1)
        with m.If(self.s2_valid & self.s2_miss & ~self.bus_valid
                  & self.s2_access):
            m.d.sync += self.miss_cnt.eq(self.miss_cnt + 1)
        # -------------------------------------------------------------------------

        way_layout = [('data', 32 * self.nwords),
                      ('tag', self.s1_address.tag.shape()), ('valid', 1),
                      ('sel_lru', 1), ('snoop_hit', 1)]
        if self.enable_write:
            way_layout.append(('sel_we', 1))

        ways = Array(
            Record(way_layout, name='way_idx{}'.format(_way))
            for _way in range(self.nways))
        fill_cnt = Signal.like(self.s1_address.offset)

        # Check hit/miss
        way_hit = m.submodules.way_hit = Encoder(self.nways)
        for idx, way in enumerate(ways):
            m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag)
                                          & way.valid)

        m.d.comb += self.s2_miss.eq(way_hit.n)
        if self.enable_write:
            # Asumiendo que hay un HIT, indicar que la vía que dió hit es en la cual se va a escribir
            m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid)

        # set the LRU
        if self.nways == 1:
            # One way: LRU is useless
            lru = Const(0)  # self.nlines
        else:
            # LRU es un vector de N bits, cada uno indicado el set a reemplazar
            # como NWAY es máximo 2, cada LRU es de un bit
            lru = Signal(self.nlines)
            _lru = lru.bit_select(self.s2_address.line, 1)
            write_ended = self.bus_valid & self.bus_ack & self.bus_last  # err ^ ack = = 1
            access_hit = ~self.s2_miss & self.s2_valid & (way_hit.o == _lru)
            with m.If(write_ended | access_hit):
                m.d.sync += _lru.eq(~_lru)

        # read data from the cache
        m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select(
            self.s2_address.offset, 32))

        # Internal Snoop
        snoop_use_cache = Signal()
        snoop_tag_match = Signal()
        snoop_line_match = Signal()
        snoop_cancel_refill = Signal()
        if not self.enable_write:
            bits_range = log2_int(self.end_addr - self.start_addr,
                                  need_pow2=False)

            m.d.comb += [
                snoop_addr.eq(self.dcache_snoop.addr),  # aux
                snoop_valid.eq(self.dcache_snoop.we & self.dcache_snoop.valid
                               & self.dcache_snoop.ack),
                snoop_use_cache.eq(snoop_addr[bits_range:] == (
                    self.start_addr >> bits_range)),
                snoop_tag_match.eq(snoop_addr.tag == self.s2_address.tag),
                snoop_line_match.eq(snoop_addr.line == self.s2_address.line),
                snoop_cancel_refill.eq(snoop_use_cache & snoop_valid
                                       & snoop_line_match & snoop_tag_match),
            ]
        else:
            m.d.comb += snoop_cancel_refill.eq(0)

        with m.FSM():
            with m.State('READ'):
                with m.If(self.s2_re & self.s2_miss & self.s2_valid):
                    m.d.sync += [
                        self.bus_addr.eq(self.s2_address),
                        self.bus_valid.eq(1),
                        fill_cnt.eq(self.s2_address.offset - 1)
                    ]
                    m.next = 'REFILL'
            with m.State('REFILL'):
                m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset)
                with m.If(self.bus_ack):
                    m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset +
                                                        1)
                with m.If(self.bus_ack & self.bus_last | self.bus_err):
                    m.d.sync += self.bus_valid.eq(0)
                with m.If(~self.bus_valid | self.s1_flush
                          | snoop_cancel_refill):
                    m.next = 'READ'
                    m.d.sync += self.bus_valid.eq(0)

        # mark the way to use (replace)
        m.d.comb += ways[lru.bit_select(self.s2_address.line,
                                        1)].sel_lru.eq(self.bus_valid)

        # generate for N ways
        for way in ways:
            # create the memory structures for valid, tag and data.
            valid = Signal(self.nlines)  # Valid bits

            tag_m = Memory(width=len(way.tag), depth=self.nlines)  # tag memory
            tag_rp = tag_m.read_port()
            snoop_rp = tag_m.read_port()
            tag_wp = tag_m.write_port()
            m.submodules += tag_rp, tag_wp, snoop_rp

            data_m = Memory(width=len(way.data),
                            depth=self.nlines)  # data memory
            data_rp = data_m.read_port()
            data_wp = data_m.write_port(
                granularity=32
            )  # implica que solo puedo escribir palabras de 32 bits.
            m.submodules += data_rp, data_wp

            # handle valid
            with m.If(self.s1_flush & self.s1_valid):  # flush
                m.d.sync += valid.eq(0)
            with m.Elif(way.sel_lru & self.bus_last
                        & self.bus_ack):  # refill ok
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1)
            with m.Elif(way.sel_lru & self.bus_err):  # refill error
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0)
            with m.Elif(self.s2_evict & self.s2_valid
                        & (way.tag == self.s2_address.tag)):  # evict
                m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0)

            # assignments
            m.d.comb += [
                tag_rp.addr.eq(
                    Mux(self.s1_stall, self.s2_address.line,
                        self.s1_address.line)),
                tag_wp.addr.eq(self.bus_addr.line),
                tag_wp.data.eq(self.bus_addr.tag),
                tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last),
                data_rp.addr.eq(
                    Mux(self.s1_stall, self.s2_address.line,
                        self.s1_address.line)),
                way.data.eq(data_rp.data),
                way.tag.eq(tag_rp.data),
                way.valid.eq(valid.bit_select(self.s2_address.line, 1))
            ]

            # update cache: CPU or Refill
            # El puerto de escritura se multiplexa debido a que la memoria solo puede tener un
            # puerto de escritura.
            if self.enable_write:
                update_addr = Signal(len(data_wp.addr))
                update_data = Signal(len(data_wp.data))
                update_we = Signal(len(data_wp.en))
                aux_wdata = Signal(32)

                with m.If(self.bus_valid):
                    m.d.comb += [
                        update_addr.eq(self.bus_addr.line),
                        update_data.eq(Repl(self.bus_data, self.nwords)),
                        update_we.bit_select(self.bus_addr.offset,
                                             1).eq(way.sel_lru & self.bus_ack),
                    ]
                with m.Else():
                    m.d.comb += [
                        update_addr.eq(self.s2_address.line),
                        update_data.eq(Repl(aux_wdata, self.nwords)),
                        update_we.bit_select(self.s2_address.offset,
                                             1).eq(way.sel_we & ~self.s2_miss)
                    ]
                m.d.comb += [
                    # Aux data: no tengo granularidad de byte en el puerto de escritura. Así que para el
                    # caso en el cual el CPU tiene que escribir, hay que construir el dato (wrord) a reemplazar
                    aux_wdata.eq(
                        Cat(
                            Mux(self.s2_sel[0],
                                self.s2_wdata.word_select(0, 8),
                                self.s2_rdata.word_select(0, 8)),
                            Mux(self.s2_sel[1],
                                self.s2_wdata.word_select(1, 8),
                                self.s2_rdata.word_select(1, 8)),
                            Mux(self.s2_sel[2],
                                self.s2_wdata.word_select(2, 8),
                                self.s2_rdata.word_select(2, 8)),
                            Mux(self.s2_sel[3],
                                self.s2_wdata.word_select(3, 8),
                                self.s2_rdata.word_select(3, 8)))),
                    #
                    data_wp.addr.eq(update_addr),
                    data_wp.data.eq(update_data),
                    data_wp.en.eq(update_we),
                ]
            else:
                m.d.comb += [
                    data_wp.addr.eq(self.bus_addr.line),
                    data_wp.data.eq(Repl(self.bus_data, self.nwords)),
                    data_wp.en.bit_select(self.bus_addr.offset,
                                          1).eq(way.sel_lru & self.bus_ack),
                ]

                # --------------------------------------------------------------
                # intenal snoop
                # for FENCE.i instruction
                _match_snoop = Signal()

                m.d.comb += [
                    snoop_rp.addr.eq(snoop_addr.line),  # read tag memory
                    _match_snoop.eq(snoop_rp.data == snoop_addr.tag),
                    way.snoop_hit.eq(snoop_use_cache & snoop_valid
                                     & _match_snoop
                                     & valid.bit_select(snoop_addr.line, 1)),
                ]
                # check is the snoop match a write from this core
                with m.If(way.snoop_hit):
                    m.d.sync += valid.bit_select(snoop_addr.line, 1).eq(0)
                # --------------------------------------------------------------

        return m
예제 #5
0
    def elaborate(self, platform):
        m = Module()

        way_layout = [
            ('data',     32 * self.nwords),
            ('tag',      self.s1_address.tag.shape()),
            ('valid',    1),
            ('sel_lru',  1)
        ]
        if self.enable_write:
            way_layout.append(('sel_we',   1))

        ways     = Array(Record(way_layout) for _way in range(self.nways))
        fill_cnt = Signal.like(self.s1_address.offset)
        # set the LRU
        if self.nways == 1:
            lru = Const(0)  # self.nlines
        else:
            lru = Signal(self.nlines)
            with m.If(self.bus_valid & self.bus_ack & self.bus_last):  # err ^ ack == 1
                _lru = lru.bit_select(self.s2_address.line, 1)
                m.d.sync += lru.bit_select(self.s2_address.line, 1).eq(~_lru)

        # hit/miss
        way_hit = m.submodules.way_hit = Encoder(self.nways)
        for idx, way in enumerate(ways):
            m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag) & way.valid)

        m.d.comb += self.s2_miss.eq(way_hit.n)
        if self.enable_write:
            m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid)

        # read data
        m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select(self.s2_address.offset, 32))

        with m.FSM():
            with m.State('READ'):
                with m.If(self.s2_re & self.s2_miss & self.s2_valid):
                    m.d.sync += [
                        self.bus_addr.eq(self.s2_address),  # WARNING extra_bits
                        self.bus_valid.eq(1),
                        fill_cnt.eq(self.s2_address.offset - 1)
                    ]
                    m.next = 'REFILL'
            with m.State('REFILL'):
                m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset)
                with m.If(self.bus_ack):
                    m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset + 1)
                with m.If(self.bus_ack & self.bus_last | self.bus_err):
                    m.d.sync += self.bus_valid.eq(0)
                with m.If(~self.bus_valid | self.s1_flush):
                    # in case of flush, abort ongoing refill.
                    m.next = 'READ'
                    m.d.sync += self.bus_valid.eq(0)

        # mark the way to use (replace)
        m.d.comb += ways[lru.bit_select(self.s2_address.line, 1)].sel_lru.eq(self.bus_valid)

        # generate for N ways
        for way in ways:
            # create the memory structures for valid, tag and data.
            valid = Signal(self.nlines)

            tag_m  = Memory(width=len(way.tag), depth=self.nlines)
            tag_rp = tag_m.read_port()
            tag_wp = tag_m.write_port()
            m.submodules += tag_rp, tag_wp

            data_m  = Memory(width=len(way.data), depth=self.nlines)
            data_rp = data_m.read_port()
            data_wp = data_m.write_port(granularity=32)
            m.submodules += data_rp, data_wp

            # handle valid
            with m.If(self.s1_flush & self.s1_valid):  # flush
                m.d.sync += valid.eq(0)
            with m.Elif(way.sel_lru & self.bus_last & self.bus_ack):  # refill ok
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1)
            with m.Elif(way.sel_lru & self.bus_err):  # refill error
                m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0)
            with m.Elif(self.s2_evict & self.s2_valid & (way.tag == self.s2_address.tag)):  # evict
                m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0)

            # assignments
            m.d.comb += [
                tag_rp.addr.eq(Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)),
                tag_wp.addr.eq(self.bus_addr.line),
                tag_wp.data.eq(self.bus_addr.tag),
                tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last),

                data_rp.addr.eq(Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)),

                way.data.eq(data_rp.data),
                way.tag.eq(tag_rp.data),
                way.valid.eq(valid.bit_select(self.s2_address.line, 1))
            ]

            # update cache: CPU or Refill
            if self.enable_write:
                update_addr = Signal(len(data_wp.addr))
                update_data = Signal(len(data_wp.data))
                update_we   = Signal(len(data_wp.en))
                aux_wdata   = Signal(32)

                with m.If(self.bus_valid):
                    m.d.comb += [
                        update_addr.eq(self.bus_addr.line),
                        update_data.eq(Repl(self.bus_data, self.nwords)),
                        update_we.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack),
                    ]
                with m.Else():
                    m.d.comb += [
                        update_addr.eq(self.s2_address.line),
                        update_data.eq(Repl(aux_wdata, self.nwords)),
                        update_we.bit_select(self.s2_address.offset, 1).eq(way.sel_we & ~self.s2_miss)
                    ]
                m.d.comb += [
                    aux_wdata.eq(Cat(
                        Mux(self.s2_sel[0], self.s2_wdata.word_select(0, 8), self.s2_rdata.word_select(0, 8)),
                        Mux(self.s2_sel[1], self.s2_wdata.word_select(1, 8), self.s2_rdata.word_select(1, 8)),
                        Mux(self.s2_sel[2], self.s2_wdata.word_select(2, 8), self.s2_rdata.word_select(2, 8)),
                        Mux(self.s2_sel[3], self.s2_wdata.word_select(3, 8), self.s2_rdata.word_select(3, 8))
                    )),
                    #
                    data_wp.addr.eq(update_addr),
                    data_wp.data.eq(update_data),
                    data_wp.en.eq(update_we),
                ]
            else:
                m.d.comb += [
                    data_wp.addr.eq(self.bus_addr.line),
                    data_wp.data.eq(Repl(self.bus_data, self.nwords)),
                    data_wp.en.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack),
                ]

        return m
예제 #6
0
파일: cache.py 프로젝트: Strobokopp/minerva
    def elaborate(self, platform):
        m = Module()

        def split_adr(adr):
            return split(adr, self.offsetbits, self.linebits, self.tagbits)

        s1_offset, s1_line, s1_tag = split_adr(self.s1_address)
        s2_offset, s2_line, s2_tag = split_adr(self.s2_address)
        refill_offset, refill_line, refill_tag = split_adr(self.refill_address)

        tag_layout = [("value", self.tagbits), ("valid", 1)]
        way_layout = [("data", 32), ("tag", tag_layout), ("enable", 1)]
        ways = Array(Record(way_layout) for _ in range(self.nb_ways))

        refilling = Signal()
        refill_status = Array(Signal() for _ in range(self.nb_words))

        refill_lru = Signal()
        if self.nb_ways == 1:
            m.d.comb += refill_lru.eq(0)
        else:
            # TODO: Implement a scalable pseudo-LRU refill policy.
            assert self.nb_ways == 2
            with m.If(self.refill_request):
                m.d.sync += refill_lru.eq(~refill_lru)
        m.d.comb += ways[refill_lru].enable.eq(1)

        flush = Signal()
        m.d.comb += flush.eq(reduce(or_, self._flush_sources, 0))

        flushing = Signal()
        flush_line = Signal(self.linebits, reset=2**self.linebits - 1)

        flush_stall = Signal()
        refill_stall = Signal()
        m.d.comb += self.stall_request.eq(flush_stall | refill_stall)

        latch_s1_line = Signal.like(s1_line)
        latch_s1_line_no_stall = Signal.like(s1_line)
        m.d.sync += latch_s1_line.eq(s1_line)
        with m.If(~self.s1_stall):
            m.d.sync += latch_s1_line_no_stall.eq(s1_line)

        # select the way containing the requested data
        way_sel = m.submodules.way_sel = Encoder(self.nb_ways)
        for j, way in enumerate(ways):
            m.d.comb += way_sel.i[j].eq((latch_s1_line_no_stall == s2_line)
                                        & (way.tag.value == s2_tag)
                                        & way.tag.valid)

        miss = Signal()
        m.d.comb += miss.eq(self.s2_re & way_sel.n)

        # cache control FSM

        s2_dat_r = Signal.like(self.s2_dat_r)
        with m.FSM() as fsm:
            m.d.comb += flushing.eq(fsm.ongoing("FLUSH"))
            with m.State("FLUSH"):
                m.d.comb += flush_stall.eq(1)
                m.d.sync += flush_line.eq(flush_line - 1)
                with m.If(flush_line == 0):
                    m.next = "CHECK"

            with m.State("CHECK"):
                m.d.comb += s2_dat_r.eq(ways[way_sel.o].data)
                with m.If(flush):
                    m.next = "FLUSH"
                with m.Elif(miss):
                    m.d.comb += refill_stall.eq(1)
                    with m.If(self.refill_ready):
                        m.d.comb += self.refill_request.eq(1)
                        m.next = "REFILL"

            m.d.comb += refilling.eq(fsm.ongoing("REFILL"))
            with m.State("REFILL"):
                with m.If((refill_tag == s2_tag) & (refill_line == s2_line)
                          & ~self.s2_we):
                    # Resume execution as soon as the requested word is available.
                    with m.If(refill_offset == s2_offset):
                        m.d.comb += [
                            refill_stall.eq(~self.refill_valid),
                            s2_dat_r.eq(self.refill_data)
                        ]
                    with m.Else():
                        # We use refill_status to track valid words during a refill.
                        m.d.comb += [
                            refill_stall.eq(~refill_status[s2_offset]),
                            s2_dat_r.eq(ways[way_sel.o].data)
                        ]
                with m.Else():
                    m.d.comb += [
                        refill_stall.eq(miss | self.s2_we
                                        | (refill_tag != s2_tag)),
                        s2_dat_r.eq(ways[way_sel.o].data)
                    ]
                with m.If(self.refill_valid):
                    m.d.sync += refill_status[refill_offset].eq(1)
                    with m.If(self.last_refill):
                        m.d.sync += (s.eq(0) for s in refill_status)
                        m.next = "CHECK"

        # XXX: This is a dirty workaround to temporarily avoid using a RE
        # on the tag and data memory ports.
        # https://github.com/m-labs/nmigen/issues/16
        # https://github.com/YosysHQ/yosys/issues/760
        latch_s2_dat_r = Signal.like(self.s2_dat_r)
        latch_s2_stall = Signal()
        latch_stall_request = Signal()
        restore_s2 = Signal()
        m.d.sync += [
            latch_s2_stall.eq(self.s2_stall),
            latch_stall_request.eq(self.stall_request)
        ]
        with m.If(~latch_s2_stall & self.s2_stall \
                | latch_stall_request & self.refill_request \
                | latch_stall_request & ~self.stall_request & self.s2_stall):
            m.d.sync += [
                restore_s2.eq(~self.stall_request | refilling),
                latch_s2_dat_r.eq(s2_dat_r)
            ]
        with m.If(latch_s2_stall & restore_s2
                  & ~(refilling & (self.refill_address == self.s2_address))):
            m.d.comb += self.s2_dat_r.eq(latch_s2_dat_r)
        with m.Else():
            m.d.comb += self.s2_dat_r.eq(s2_dat_r)

        # tag memory

        tag_din = Record(tag_layout)
        with m.If(refilling):
            m.d.comb += [
                tag_din.value.eq(refill_tag),
                tag_din.valid.eq(self.last_refill & self.refill_valid)
            ]
        with m.Elif(self.s2_we):
            m.d.comb += [
                tag_din.value.eq(ways[refill_lru].tag.value),
                tag_din.valid.eq(way_sel.i.part(refill_lru, 1))
            ]
        with m.Else():
            m.d.comb += [tag_din.value.eq(0), tag_din.valid.eq(0)]

        for way in ways:
            tag_mem = Memory(len(tag_din), 2**self.linebits)
            tag_wp = m.submodules.tag_wp = tag_mem.write_port()
            tag_rp = m.submodules.tag_rp = tag_mem.read_port()

            with m.If(refilling):
                m.d.comb += [
                    tag_wp.addr.eq(refill_line),
                    tag_wp.en.eq(way.enable)
                ]
            with m.Elif(flushing):
                m.d.comb += [tag_wp.addr.eq(flush_line), tag_wp.en.eq(1)]
            with m.Else():
                m.d.comb += [
                    tag_wp.addr.eq(s2_line),
                    tag_wp.en.eq(way.enable & self.s2_we)
                ]
            m.d.comb += tag_wp.data.eq(tag_din)

            m.d.comb += tag_rp.addr.eq(s1_line)
            latch_tag_rp_data = Signal.like(tag_rp.data)
            with m.If(latch_s1_line == s2_line):
                m.d.sync += latch_tag_rp_data.eq(tag_rp.data)
                m.d.comb += way.tag.eq(tag_rp.data)
            with m.Else():
                m.d.comb += way.tag.eq(latch_tag_rp_data)

        # data memory

        data_din = Signal(32)
        for i in range(len(self.s2_sel)):
            byte = slice(i * 8, (i + 1) * 8)
            with m.If(self.s2_sel[i]):
                m.d.comb += data_din[byte].eq(self.s2_dat_w[byte])
            with m.Else():
                m.d.comb += data_din[byte].eq(ways[refill_lru].data[byte])

        for way in ways:
            data_mem = Memory(self.nb_words * 32, 2**self.linebits)
            data_wp = m.submodules.data_wp = data_mem.write_port(
                granularity=32)
            data_rp = m.submodules.data_rp = data_mem.read_port()

            with m.If(refilling):
                m.d.comb += [
                    data_wp.addr.eq(refill_line),
                    displacer(way.enable, refill_offset, data_wp.en),
                    displacer(self.refill_data, refill_offset, data_wp.data)
                ]
            with m.Elif(flushing):
                m.d.comb += data_wp.addr.eq(flush_line)
            with m.Else():
                m.d.comb += [
                    data_wp.addr.eq(s2_line),
                    displacer(way.enable & self.s2_we, s2_offset, data_wp.en),
                    displacer(data_din, s2_offset, data_wp.data)
                ]

            m.d.comb += data_rp.addr.eq(s1_line)
            latch_data_rp_data = Signal.like(data_rp.data)
            with m.If(latch_s1_line == s2_line):
                m.d.sync += latch_data_rp_data.eq(data_rp.data)
                m.d.comb += way.data.eq(data_rp.data.part(s2_offset * 32, 32))
            with m.Else():
                m.d.comb += way.data.eq(
                    latch_data_rp_data.part(s2_offset * 32, 32))

        return m