def bitslip_test(self, data_width, length=128): prng = random.Random(42) sequence = [prng.randrange(2**data_width) for i in range(length)] for i in range(data_width): dut = BitSlip(data_width) dut.bitslip = i dut.i_sequence = sequence run_simulation(dut, main_generator(dut)) model = BitSlipModel(data_width, 4) m_sequence = model.simulate(i, sequence) self.assertEqual(dut.o_sequence, m_sequence[:len(dut.o_sequence)])
def __init__(self): self.value = value = Signal(6) self.sink = sink = stream.Endpoint([("data", 40)]) self.source = source = stream.Endpoint([("data", 40)]) # # # bitslip = CEInserter()(BitSlip(40)) self.submodules += bitslip # control self.comb += [ source.valid.eq(sink.valid), sink.ready.eq(source.ready), bitslip.value.eq(value), bitslip.ce.eq(source.valid & source.ready) ] # datapath self.comb += [bitslip.i.eq(sink.data), source.data.eq(bitslip.o)]
def __init__(self, pads): addressbits = len(pads.a) bankbits = len(pads.ba) databits = len(pads.dq) nphases = 4 self._en_vtc = CSRStorage(reset=1) self._wlevel_en = CSRStorage() self._wlevel_strobe = CSR() self._dly_sel = CSRStorage(databits // 8) self._rdly_dq_rst = CSR() self._rdly_dq_inc = CSR() self._rdly_dq_bitslip = CSR() self._wdly_dq_rst = CSR() self._wdly_dq_inc = CSR() self._wdly_dqs_rst = CSR() self._wdly_dqs_inc = CSR() self._wdly_dqs_taps = CSRStatus(9) self.settings = sdram_settings.PhySettings(memtype="DDR3", dfi_databits=2 * databits, nphases=nphases, rdphase=0, wrphase=2, rdcmdphase=1, wrcmdphase=0, cl=7, cwl=6, read_latency=8, write_latency=2) self.dfi = Interface(addressbits, bankbits, 2 * databits, nphases) # # # # Clock clk_o_nodelay = Signal() clk_o_delayed = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=clk_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=0b10101010), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="FIXED", p_DELAY_VALUE=0, i_ODATAIN=clk_o_nodelay, o_DATAOUT=clk_o_delayed), Instance("OBUFDS", i_I=clk_o_delayed, o_O=pads.clk_p, o_OB=pads.clk_n) ] # Addresses and commands for i in range(addressbits): a_o_nodelay = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=a_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(self.dfi.phases[0].address[i], self.dfi.phases[0].address[i], self.dfi.phases[1].address[i], self.dfi.phases[1].address[i], self.dfi.phases[2].address[i], self.dfi.phases[2].address[i], self.dfi.phases[3].address[i], self.dfi.phases[3].address[i])), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="FIXED", p_DELAY_VALUE=0, i_ODATAIN=a_o_nodelay, o_DATAOUT=pads.a[i]) ] for i in range(bankbits): ba_o_nodelay = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=ba_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(self.dfi.phases[0].bank[i], self.dfi.phases[0].bank[i], self.dfi.phases[1].bank[i], self.dfi.phases[1].bank[i], self.dfi.phases[2].bank[i], self.dfi.phases[2].bank[i], self.dfi.phases[3].bank[i], self.dfi.phases[3].bank[i])), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="FIXED", p_DELAY_VALUE=0, i_ODATAIN=ba_o_nodelay, o_DATAOUT=pads.ba[i]) ] for name in "ras_n", "cas_n", "we_n", "cs_n", "cke", "odt", "reset_n": x_o_nodelay = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=x_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(getattr(self.dfi.phases[0], name), getattr(self.dfi.phases[0], name), getattr(self.dfi.phases[1], name), getattr(self.dfi.phases[1], name), getattr(self.dfi.phases[2], name), getattr(self.dfi.phases[2], name), getattr(self.dfi.phases[3], name), getattr(self.dfi.phases[3], name))), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="FIXED", p_DELAY_VALUE=0, i_ODATAIN=x_o_nodelay, o_DATAOUT=getattr(pads, name)) ] # DQS and DM oe_dqs = Signal() dqs_serdes_pattern = Signal(8) self.comb += \ If(self._wlevel_en.storage, If(self._wlevel_strobe.re, dqs_serdes_pattern.eq(0b00000001) ).Else( dqs_serdes_pattern.eq(0b00000000) ) ).Else( dqs_serdes_pattern.eq(0b01010101) ) for i in range(databits // 8): dm_o_nodelay = Signal() self.specials += \ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dm_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(self.dfi.phases[0].wrdata_mask[i], self.dfi.phases[0].wrdata_mask[databits//8+i], self.dfi.phases[1].wrdata_mask[i], self.dfi.phases[1].wrdata_mask[databits//8+i], self.dfi.phases[2].wrdata_mask[i], self.dfi.phases[2].wrdata_mask[databits//8+i], self.dfi.phases[3].wrdata_mask[i], self.dfi.phases[3].wrdata_mask[databits//8+i]) ) self.specials += \ Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dq_inc.re, i_ODATAIN=dm_o_nodelay, o_DATAOUT=pads.dm[i] ) dqs_nodelay = Signal() dqs_delayed = Signal() dqs_t = Signal() if i == 0: # Store initial DQS DELAY_VALUE (in taps) to # be able to reload DELAY_VALUE after reset. dqs_taps = Signal(9) dqs_taps_timer = WaitTimer(2**16) self.submodules += dqs_taps_timer dqs_taps_done = Signal() self.comb += dqs_taps_timer.wait.eq(~dqs_taps_done) self.sync += \ If(dqs_taps_timer.done, dqs_taps_done.eq(1), self._wdly_dqs_taps.status.eq(dqs_taps) ) self.specials += [ Instance( "OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dqs_nodelay, o_T_OUT=dqs_t, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(dqs_serdes_pattern[0], dqs_serdes_pattern[1], dqs_serdes_pattern[2], dqs_serdes_pattern[3], dqs_serdes_pattern[4], dqs_serdes_pattern[5], dqs_serdes_pattern[6], dqs_serdes_pattern[7]), i_T=~oe_dqs, ), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=500, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i] & self._wdly_dqs_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dqs_inc.re, o_CNTVALUEOUT=Signal(9) if i != 0 else dqs_taps, i_ODATAIN=dqs_nodelay, o_DATAOUT=dqs_delayed), Instance("IOBUFDSE3", i_I=dqs_delayed, i_T=dqs_t, io_IO=pads.dqs_p[i], io_IOB=pads.dqs_n[i]) ] # DQ oe_dq = Signal() for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() dq_i_nodelay = Signal() dq_i_delayed = Signal() dq_t = Signal() dq_bitslip = BitSlip(8) self.sync += \ If(self._dly_sel.storage[i//8], If(self._wdly_dq_rst.re, dq_bitslip.value.eq(0) ).Elif(self._rdly_dq_bitslip.re, dq_bitslip.value.eq(dq_bitslip.value + 1) ) ) self.submodules += dq_bitslip self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dq_o_nodelay, o_T_OUT=dq_t, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(self.dfi.phases[0].wrdata[i], self.dfi.phases[0].wrdata[databits + i], self.dfi.phases[1].wrdata[i], self.dfi.phases[1].wrdata[databits + i], self.dfi.phases[2].wrdata[i], self.dfi.phases[2].wrdata[databits + i], self.dfi.phases[3].wrdata[i], self.dfi.phases[3].wrdata[databits + i]), i_T=~oe_dq), Instance( "ISERDESE3", p_IS_CLK_INVERTED=0, p_IS_CLK_B_INVERTED=1, p_DATA_WIDTH=8, i_D=dq_i_delayed, i_RST=ResetSignal(), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("sys4x"), i_CLK_B=ClockSignal("sys4x"), # locally inverted i_CLKDIV=ClockSignal(), o_Q=dq_bitslip.i), Instance( "ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i // 8] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i // 8] & self._wdly_dq_inc.re, i_ODATAIN=dq_o_nodelay, o_DATAOUT=dq_o_delayed), Instance( "IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i // 8] & self._rdly_dq_rst.re, i_CE=self._dly_sel.storage[i // 8] & self._rdly_dq_inc.re, i_IDATAIN=dq_i_nodelay, o_DATAOUT=dq_i_delayed), Instance("IOBUF", i_I=dq_o_delayed, o_O=dq_i_nodelay, i_T=dq_t, io_IO=pads.dq[i]) ] self.comb += [ self.dfi.phases[0].rddata[i].eq(dq_bitslip.o[0]), self.dfi.phases[1].rddata[i].eq(dq_bitslip.o[2]), self.dfi.phases[2].rddata[i].eq(dq_bitslip.o[4]), self.dfi.phases[3].rddata[i].eq(dq_bitslip.o[6]), self.dfi.phases[0].rddata[databits + i].eq(dq_bitslip.o[1]), self.dfi.phases[1].rddata[databits + i].eq(dq_bitslip.o[3]), self.dfi.phases[2].rddata[databits + i].eq(dq_bitslip.o[5]), self.dfi.phases[3].rddata[databits + i].eq(dq_bitslip.o[7]), ] # Flow control # # total read latency = 8: # 2 cycles through OSERDESE3 # 2 cycles CAS # 2 cycles through ISERDESE3 # 2 cycles through BitSlip rddata_en = self.dfi.phases[self.settings.rdphase].rddata_en for i in range(8 - 1): n_rddata_en = Signal() self.sync += n_rddata_en.eq(rddata_en) rddata_en = n_rddata_en self.sync += [ phase.rddata_valid.eq(rddata_en | self._wlevel_en.storage) for phase in self.dfi.phases ] oe = Signal() last_wrdata_en = Signal(4) wrphase = self.dfi.phases[self.settings.wrphase] self.sync += last_wrdata_en.eq( Cat(wrphase.wrdata_en, last_wrdata_en[:3])) self.comb += oe.eq(last_wrdata_en[1] | last_wrdata_en[2] | last_wrdata_en[3]) self.sync += \ If(self._wlevel_en.storage, oe_dqs.eq(1), oe_dq.eq(0) ).Else( oe_dqs.eq(oe), oe_dq.eq(oe) )
def __init__(self, pads, with_odelay, memtype="DDR3", nphases=4, sys_clk_freq=100e6, iodelay_clk_freq=200e6): assert not (memtype == "DDR3" and nphases == 2) # FIXME: Needs BL8 support for nphases=2 tck = 2/(2*nphases*sys_clk_freq) addressbits = len(pads.a) bankbits = len(pads.ba) nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) databits = len(pads.dq) nphases = nphases iodelay_tap_average = { 200e6: 78e-12, 300e6: 52e-12, 400e6: 39e-12, # Only valid for -3 and -2/2E speed grades } half_sys8x_taps = math.floor(tck/(4*iodelay_tap_average[iodelay_clk_freq])) self._half_sys8x_taps = CSRStorage(4, reset=half_sys8x_taps) if with_odelay: self._wlevel_en = CSRStorage() self._wlevel_strobe = CSR() self._dly_sel = CSRStorage(databits//8) self._rdly_dq_rst = CSR() self._rdly_dq_inc = CSR() self._rdly_dq_bitslip_rst = CSR() self._rdly_dq_bitslip = CSR() if with_odelay: self._wdly_dq_rst = CSR() self._wdly_dq_inc = CSR() self._wdly_dqs_rst = CSR() self._wdly_dqs_inc = CSR() # compute phy settings cl, cwl = get_cl_cw(memtype, tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) self.settings = PhySettings( memtype=memtype, dfi_databits=2*databits, nranks=nranks, nphases=nphases, rdphase=rdphase, wrphase=wrphase, rdcmdphase=rdcmdphase, wrcmdphase=wrcmdphase, cl=cl, cwl=cwl, read_latency=2 + cl_sys_latency + 2 + 3, write_latency=cwl_sys_latency ) self.dfi = Interface(addressbits, bankbits, nranks, 2*databits, 4) # # # # Clock ddr_clk = "sys2x" if nphases == 2 else "sys4x" for i in range(len(pads.clk_p)): sd_clk_se = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=sd_clk_se, i_OCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=0, i_D2=1, i_D3=0, i_D4=1, i_D5=0, i_D6=1, i_D7=0, i_D8=1 ), Instance("OBUFDS", i_I=sd_clk_se, o_O=pads.clk_p[i], o_OB=pads.clk_n[i] ) ] # Addresses and commands for i in range(addressbits): self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=pads.a[i], i_OCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=self.dfi.phases[0].address[i], i_D2=self.dfi.phases[0].address[i], i_D3=self.dfi.phases[1].address[i], i_D4=self.dfi.phases[1].address[i], i_D5=self.dfi.phases[2].address[i], i_D6=self.dfi.phases[2].address[i], i_D7=self.dfi.phases[3].address[i], i_D8=self.dfi.phases[3].address[i] ) for i in range(bankbits): self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=pads.ba[i], i_OCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=self.dfi.phases[0].bank[i], i_D2=self.dfi.phases[0].bank[i], i_D3=self.dfi.phases[1].bank[i], i_D4=self.dfi.phases[1].bank[i], i_D5=self.dfi.phases[2].bank[i], i_D6=self.dfi.phases[2].bank[i], i_D7=self.dfi.phases[3].bank[i], i_D8=self.dfi.phases[3].bank[i] ) controls = ["ras_n", "cas_n", "we_n", "cke", "odt"] if hasattr(pads, "reset_n"): controls.append("reset_n") if hasattr(pads, "cs_n"): controls.append("cs_n") for name in controls: for i in range(len(getattr(pads, name))): self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=getattr(pads, name)[i], i_OCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=getattr(self.dfi.phases[0], name)[i], i_D2=getattr(self.dfi.phases[0], name)[i], i_D3=getattr(self.dfi.phases[1], name)[i], i_D4=getattr(self.dfi.phases[1], name)[i], i_D5=getattr(self.dfi.phases[2], name)[i], i_D6=getattr(self.dfi.phases[2], name)[i], i_D7=getattr(self.dfi.phases[3], name)[i], i_D8=getattr(self.dfi.phases[3], name)[i] ) # DQS and DM oe_dqs = Signal() dqs_preamble = Signal() dqs_postamble = Signal() dqs_serdes_pattern = Signal(8, reset=0b01010101) if with_odelay: self.comb += \ If(self._wlevel_en.storage, If(self._wlevel_strobe.re, dqs_serdes_pattern.eq(0b00000001) ).Else( dqs_serdes_pattern.eq(0b00000000) ) ).Elif(dqs_preamble | dqs_postamble, dqs_serdes_pattern.eq(0b0000000) ).Else( dqs_serdes_pattern.eq(0b01010101) ) else: self.comb += [ If(dqs_preamble | dqs_postamble, dqs_serdes_pattern.eq(0b0000000) ).Else( dqs_serdes_pattern.eq(0b01010101) ) ] for i in range(databits//8): dm_o_nodelay = Signal() self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=dm_o_nodelay if with_odelay else pads.dm[i], i_OCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=self.dfi.phases[0].wrdata_mask[i], i_D2=self.dfi.phases[0].wrdata_mask[databits//8+i], i_D3=self.dfi.phases[1].wrdata_mask[i], i_D4=self.dfi.phases[1].wrdata_mask[databits//8+i], i_D5=self.dfi.phases[2].wrdata_mask[i], i_D6=self.dfi.phases[2].wrdata_mask[databits//8+i], i_D7=self.dfi.phases[3].wrdata_mask[i], i_D8=self.dfi.phases[3].wrdata_mask[databits//8+i] ) if with_odelay: self.specials += \ Instance("ODELAYE2", p_DELAY_SRC="ODATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=iodelay_clk_freq/1e6, p_PIPE_SEL="FALSE", p_ODELAY_TYPE="VARIABLE", p_ODELAY_VALUE=0, i_C=ClockSignal(), i_LD=self._dly_sel.storage[i] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dq_inc.re, i_LDPIPEEN=0, i_INC=1, o_ODATAIN=dm_o_nodelay, o_DATAOUT=pads.dm[i] ) dqs_nodelay = Signal() dqs_delayed = Signal() dqs_t = Signal() self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OFB=dqs_nodelay if with_odelay else Signal(), o_OQ=Signal() if with_odelay else dqs_nodelay, o_TQ=dqs_t, i_OCE=1, i_TCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk) if with_odelay else ClockSignal(ddr_clk+"_dqs"), i_CLKDIV=ClockSignal(), i_D1=dqs_serdes_pattern[0], i_D2=dqs_serdes_pattern[1], i_D3=dqs_serdes_pattern[2], i_D4=dqs_serdes_pattern[3], i_D5=dqs_serdes_pattern[4], i_D6=dqs_serdes_pattern[5], i_D7=dqs_serdes_pattern[6], i_D8=dqs_serdes_pattern[7], i_T1=~oe_dqs ) if with_odelay: self.specials += \ Instance("ODELAYE2", p_DELAY_SRC="ODATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=iodelay_clk_freq/1e6, p_PIPE_SEL="FALSE", p_ODELAY_TYPE="VARIABLE", p_ODELAY_VALUE=half_sys8x_taps, i_C=ClockSignal(), i_LD=self._dly_sel.storage[i] & self._wdly_dqs_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dqs_inc.re, i_LDPIPEEN=0, i_INC=1, o_ODATAIN=dqs_nodelay, o_DATAOUT=dqs_delayed ) self.specials += \ Instance("OBUFTDS", i_I=dqs_delayed if with_odelay else dqs_nodelay, i_T=dqs_t, o_O=pads.dqs_p[i], o_OB=pads.dqs_n[i] ) # DQ oe_dq = Signal() for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() dq_i_nodelay = Signal() dq_i_delayed = Signal() dq_t = Signal() self.specials += \ Instance("OSERDESE2", p_DATA_WIDTH=2*nphases, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=dq_o_nodelay, o_TQ=dq_t, i_OCE=1, i_TCE=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_D1=self.dfi.phases[0].wrdata[i], i_D2=self.dfi.phases[0].wrdata[databits+i], i_D3=self.dfi.phases[1].wrdata[i], i_D4=self.dfi.phases[1].wrdata[databits+i], i_D5=self.dfi.phases[2].wrdata[i], i_D6=self.dfi.phases[2].wrdata[databits+i], i_D7=self.dfi.phases[3].wrdata[i], i_D8=self.dfi.phases[3].wrdata[databits+i], i_T1=~oe_dq ) dq_i_data = Signal(8) self.specials += \ Instance("ISERDESE2", p_DATA_WIDTH=2*nphases, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=dq_i_delayed, i_CE1=1, i_RST=ResetSignal(), i_CLK=ClockSignal(ddr_clk), i_CLKB=~ClockSignal(ddr_clk), i_CLKDIV=ClockSignal(), i_BITSLIP=0, o_Q8=dq_i_data[0], o_Q7=dq_i_data[1], o_Q6=dq_i_data[2], o_Q5=dq_i_data[3], o_Q4=dq_i_data[4], o_Q3=dq_i_data[5], o_Q2=dq_i_data[6], o_Q1=dq_i_data[7] ) dq_bitslip = BitSlip(8) self.comb += dq_bitslip.i.eq(dq_i_data) self.sync += \ If(self._dly_sel.storage[i//8], If(self._rdly_dq_bitslip_rst.re, dq_bitslip.value.eq(0) ).Elif(self._rdly_dq_bitslip.re, dq_bitslip.value.eq(dq_bitslip.value + 1) ) ) self.submodules += dq_bitslip self.comb += [ self.dfi.phases[0].rddata[i].eq(dq_bitslip.o[0]), self.dfi.phases[0].rddata[databits+i].eq(dq_bitslip.o[1]), self.dfi.phases[1].rddata[i].eq(dq_bitslip.o[2]), self.dfi.phases[1].rddata[databits+i].eq(dq_bitslip.o[3]), self.dfi.phases[2].rddata[i].eq(dq_bitslip.o[4]), self.dfi.phases[2].rddata[databits+i].eq(dq_bitslip.o[5]), self.dfi.phases[3].rddata[i].eq(dq_bitslip.o[6]), self.dfi.phases[3].rddata[databits+i].eq(dq_bitslip.o[7]) ] if with_odelay: self.specials += \ Instance("ODELAYE2", p_DELAY_SRC="ODATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=iodelay_clk_freq/1e6, p_PIPE_SEL="FALSE", p_ODELAY_TYPE="VARIABLE", p_ODELAY_VALUE=0, i_C=ClockSignal(), i_LD=self._dly_sel.storage[i//8] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i//8] & self._wdly_dq_inc.re, i_LDPIPEEN=0, i_INC=1, o_ODATAIN=dq_o_nodelay, o_DATAOUT=dq_o_delayed ) self.specials += \ Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=iodelay_clk_freq/1e6, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=0, i_C=ClockSignal(), i_LD=self._dly_sel.storage[i//8] & self._rdly_dq_rst.re, i_CE=self._dly_sel.storage[i//8] & self._rdly_dq_inc.re, i_LDPIPEEN=0, i_INC=1, i_IDATAIN=dq_i_nodelay, o_DATAOUT=dq_i_delayed ) self.specials += \ Instance("IOBUF", i_I=dq_o_delayed if with_odelay else dq_o_nodelay, o_O=dq_i_nodelay, i_T=dq_t, io_IO=pads.dq[i] ) # Flow control # # total read latency: # 2 cycles through OSERDESE2 # cl_sys_latency cycles CAS # 2 cycles through ISERDESE2 # 3 cycles through Bitslip rddata_en = self.dfi.phases[self.settings.rdphase].rddata_en for i in range(self.settings.read_latency-1): n_rddata_en = Signal() self.sync += n_rddata_en.eq(rddata_en) rddata_en = n_rddata_en if with_odelay: self.sync += [phase.rddata_valid.eq(rddata_en | self._wlevel_en.storage) for phase in self.dfi.phases] else: self.sync += [phase.rddata_valid.eq(rddata_en) for phase in self.dfi.phases] oe = Signal() last_wrdata_en = Signal(cwl_sys_latency+2) wrphase = self.dfi.phases[self.settings.wrphase] self.sync += last_wrdata_en.eq(Cat(wrphase.wrdata_en, last_wrdata_en[:-1])) self.comb += oe.eq( last_wrdata_en[cwl_sys_latency-1] | last_wrdata_en[cwl_sys_latency] | last_wrdata_en[cwl_sys_latency+1]) if with_odelay: self.sync += \ If(self._wlevel_en.storage, oe_dqs.eq(1), oe_dq.eq(0) ).Else( oe_dqs.eq(oe), oe_dq.eq(oe) ) else: self.sync += [ oe_dqs.eq(oe), oe_dq.eq(oe) ] # dqs preamble/postamble if memtype == "DDR2": dqs_sys_latency = cwl_sys_latency-1 elif memtype == "DDR3": dqs_sys_latency = cwl_sys_latency-1 if with_odelay else cwl_sys_latency self.comb += [ dqs_preamble.eq(last_wrdata_en[dqs_sys_latency-1] & ~last_wrdata_en[dqs_sys_latency]), dqs_postamble.eq(last_wrdata_en[dqs_sys_latency+1] & ~last_wrdata_en[dqs_sys_latency]), ]
def __init__(self, pll, pads, mode="master"): self.tx_k = Signal(4) self.tx_d = Signal(32) self.rx_k = Signal(4) self.rx_d = Signal(32) self.tx_idle = Signal() self.tx_comma = Signal() self.rx_idle = Signal() self.rx_comma = Signal() self.rx_bitslip_value = Signal(6) self.rx_delay_rst = Signal() self.rx_delay_inc = Signal() self.rx_delay_ce = Signal() self.rx_delay_en_vtc = Signal() # # # self.submodules.encoder = ClockDomainsRenamer("serwb_serdes")( Encoder(4, True)) self.decoders = [ClockDomainsRenamer("serwb_serdes")( Decoder(True)) for _ in range(4)] self.submodules += self.decoders # clocking: # In master mode: # - linerate/10 pll refclk provided by user # - linerate/10 slave refclk generated on clk_pads # In Slave mode: # - linerate/10 pll refclk provided by clk_pads self.clock_domains.cd_serwb_serdes = ClockDomain() self.clock_domains.cd_serwb_serdes_5x = ClockDomain() self.clock_domains.cd_serwb_serdes_20x = ClockDomain(reset_less=True) self.comb += [ self.cd_serwb_serdes.clk.eq(pll.serwb_serdes_clk), self.cd_serwb_serdes_5x.clk.eq(pll.serwb_serdes_5x_clk), self.cd_serwb_serdes_20x.clk.eq(pll.serwb_serdes_20x_clk) ] self.specials += AsyncResetSynchronizer(self.cd_serwb_serdes, ~pll.lock) self.comb += self.cd_serwb_serdes_5x.rst.eq(self.cd_serwb_serdes.rst) # control/status cdc tx_idle = Signal() tx_comma = Signal() rx_idle = Signal() rx_comma = Signal() rx_bitslip_value = Signal(6) rx_delay_rst = Signal() rx_delay_inc = Signal() rx_delay_en_vtc = Signal() rx_delay_ce = Signal() self.specials += [ MultiReg(self.tx_idle, tx_idle, "serwb_serdes"), MultiReg(self.tx_comma, tx_comma, "serwb_serdes"), MultiReg(rx_idle, self.rx_idle, "sys"), MultiReg(rx_comma, self.rx_comma, "sys"), MultiReg(self.rx_bitslip_value, rx_bitslip_value, "serwb_serdes"), MultiReg(self.rx_delay_inc, rx_delay_inc, "serwb_serdes_5x"), MultiReg(self.rx_delay_en_vtc, rx_delay_en_vtc, "serwb_serdes_5x") ] self.submodules.do_rx_delay_rst = PulseSynchronizer("sys", "serwb_serdes_5x") self.comb += [ rx_delay_rst.eq(self.do_rx_delay_rst.o), self.do_rx_delay_rst.i.eq(self.rx_delay_rst) ] self.submodules.do_rx_delay_ce = PulseSynchronizer("sys", "serwb_serdes_5x") self.comb += [ rx_delay_ce.eq(self.do_rx_delay_ce.o), self.do_rx_delay_ce.i.eq(self.rx_delay_ce) ] # tx clock (linerate/10) if mode == "master": self.submodules.tx_clk_gearbox = Gearbox(40, "serwb_serdes", 8, "serwb_serdes_5x") self.comb += self.tx_clk_gearbox.i.eq((0b1111100000 << 30) | (0b1111100000 << 20) | (0b1111100000 << 10) | (0b1111100000 << 0)) clk_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=clk_o, i_RST=ResetSignal("serwb_serdes"), i_CLK=ClockSignal("serwb_serdes_20x"), i_CLKDIV=ClockSignal("serwb_serdes_5x"), i_D=self.tx_clk_gearbox.o ), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n ) ] # tx datapath # tx_data -> encoders -> gearbox -> serdes self.submodules.tx_gearbox = Gearbox(40, "serwb_serdes", 8, "serwb_serdes_5x") self.comb += [ If(tx_comma, self.encoder.k[0].eq(1), self.encoder.d[0].eq(0xbc) ).Else( self.encoder.k[0].eq(self.tx_k[0]), self.encoder.k[1].eq(self.tx_k[1]), self.encoder.k[2].eq(self.tx_k[2]), self.encoder.k[3].eq(self.tx_k[3]), self.encoder.d[0].eq(self.tx_d[0:8]), self.encoder.d[1].eq(self.tx_d[8:16]), self.encoder.d[2].eq(self.tx_d[16:24]), self.encoder.d[3].eq(self.tx_d[24:32]) ) ] self.sync.serwb_serdes += \ If(tx_idle, self.tx_gearbox.i.eq(0) ).Else( self.tx_gearbox.i.eq(Cat(*[self.encoder.output[i] for i in range(4)])) ) serdes_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=serdes_o, i_RST=ResetSignal("serwb_serdes"), i_CLK=ClockSignal("serwb_serdes_20x"), i_CLKDIV=ClockSignal("serwb_serdes_5x"), i_D=self.tx_gearbox.o ), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n ) ] # rx clock use_bufr = True if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i ) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg) ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg) self.comb += pll.refclk.eq(clk_i_bufg) # rx datapath # serdes -> gearbox -> bitslip -> decoders -> rx_data self.submodules.rx_gearbox = Gearbox(8, "serwb_serdes_5x", 40, "serwb_serdes") self.submodules.rx_bitslip = ClockDomainsRenamer("serwb_serdes")(BitSlip(40)) serdes_i_nodelay = Signal() self.specials += [ Instance("IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_i_nodelay ) ] serdes_i_delayed = Signal() serdes_q = Signal(8) self.specials += [ Instance("IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="COUNT", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal("serwb_serdes_5x"), i_RST=rx_delay_rst, i_LOAD=0, i_INC=rx_delay_inc, i_EN_VTC=rx_delay_en_vtc, i_CE=rx_delay_ce, i_IDATAIN=serdes_i_nodelay, o_DATAOUT=serdes_i_delayed ), Instance("ISERDESE3", p_IS_CLK_INVERTED=0, p_IS_CLK_B_INVERTED=1, p_DATA_WIDTH=8, i_D=serdes_i_delayed, i_RST=ResetSignal("serwb_serdes"), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("serwb_serdes_20x"), i_CLK_B=ClockSignal("serwb_serdes_20x"), # locally inverted i_CLKDIV=ClockSignal("serwb_serdes_5x"), o_Q=serdes_q ) ] self.comb += [ self.rx_gearbox.i.eq(serdes_q), self.rx_bitslip.value.eq(rx_bitslip_value), self.rx_bitslip.i.eq(self.rx_gearbox.o), self.decoders[0].input.eq(self.rx_bitslip.o[0:10]), self.decoders[1].input.eq(self.rx_bitslip.o[10:20]), self.decoders[2].input.eq(self.rx_bitslip.o[20:30]), self.decoders[3].input.eq(self.rx_bitslip.o[30:40]), self.rx_k.eq(Cat(*[self.decoders[i].k for i in range(4)])), self.rx_d.eq(Cat(*[self.decoders[i].d for i in range(4)])), rx_idle.eq(self.rx_bitslip.o == 0), rx_comma.eq(((self.decoders[0].d == 0xbc) & (self.decoders[0].k == 1)) & ((self.decoders[1].d == 0x00) & (self.decoders[1].k == 0)) & ((self.decoders[2].d == 0x00) & (self.decoders[2].k == 0)) & ((self.decoders[3].d == 0x00) & (self.decoders[3].k == 0))) ]
def __init__(self, pll, pads, mode="master"): self.tx_pattern = CSRStorage(20) self.tx_produce_square_wave = CSRStorage() self.tx_prbs_config = CSRStorage(2) self.rx_pattern = CSRStatus(20) self.rx_prbs_config = CSRStorage(2) self.rx_prbs_errors = CSRStatus(32) self.rx_bitslip_value = CSRStorage(5) self.rx_delay_rst = CSR() self.rx_delay_en_vtc = CSRStorage(reset=1) self.rx_delay_inc = CSRStorage() self.rx_delay_ce = CSR() self.rx_delay_m_cntvalueout = CSRStatus(9) self.rx_delay_s_cntvalueout = CSRStatus(9) # # # self.submodules.encoder = ClockDomainsRenamer("serdes")(Encoder( 2, True)) self.decoders = [ ClockDomainsRenamer("serdes")(Decoder(True)) for _ in range(2) ] self.submodules += self.decoders # clocking # master mode: # - linerate/10 pll refclk provided externally # - linerate/10 clock generated on clk_pads # slave mode: # - linerate/10 pll refclk provided by clk_pads self.clock_domains.cd_serdes = ClockDomain() self.clock_domains.cd_serdes_10x = ClockDomain() self.clock_domains.cd_serdes_10x_90 = ClockDomain() self.clock_domains.cd_serdes_2p5x = ClockDomain() self.comb += [ self.cd_serdes.clk.eq(pll.serdes_clk), self.cd_serdes_10x.clk.eq(pll.serdes_10x_clk), self.cd_serdes_10x_90.clk.eq(pll.serdes_10x_90_clk), self.cd_serdes_2p5x.clk.eq(pll.serdes_2p5x_clk) ] self.specials += [ AsyncResetSynchronizer(self.cd_serdes, ~pll.lock), AsyncResetSynchronizer(self.cd_serdes_10x, ~pll.lock), AsyncResetSynchronizer(self.cd_serdes_10x_90, ~pll.lock), AsyncResetSynchronizer(self.cd_serdes_2p5x, ~pll.lock) ] # control/status cdc tx_pattern = Signal(20) tx_produce_square_wave = Signal() tx_prbs_config = Signal(2) rx_pattern = Signal(20) rx_prbs_config = Signal(2) rx_prbs_errors = Signal(32) rx_bitslip_value = Signal(5) rx_delay_rst = Signal() rx_delay_inc = Signal() rx_delay_en_vtc = Signal() rx_delay_ce = Signal() rx_delay_m_cntvalueout = Signal(9) rx_delay_s_cntvalueout = Signal(9) self.specials += [ MultiReg(self.tx_pattern.storage, tx_pattern, "serdes"), MultiReg(self.tx_produce_square_wave.storage, tx_produce_square_wave, "serdes"), MultiReg(self.tx_prbs_config.storage, tx_prbs_config, "serdes") ] self.specials += [ MultiReg(rx_pattern, self.rx_pattern.status, "sys"), MultiReg(self.rx_prbs_config.storage, rx_prbs_config, "serdes"), MultiReg(rx_prbs_errors, self.rx_prbs_errors.status, "sys") # FIXME ] self.specials += [ MultiReg(self.rx_bitslip_value.storage, rx_bitslip_value, "serdes"), MultiReg(self.rx_delay_inc.storage, rx_delay_inc, "serdes_2p5x"), MultiReg(self.rx_delay_en_vtc.storage, rx_delay_en_vtc, "serdes_2p5x") ] self.submodules.do_rx_delay_rst = PulseSynchronizer( "sys", "serdes_2p5x") self.comb += [ rx_delay_rst.eq(self.do_rx_delay_rst.o), self.do_rx_delay_rst.i.eq(self.rx_delay_rst.re) ] self.submodules.do_rx_delay_ce = PulseSynchronizer( "sys", "serdes_2p5x") self.comb += [ rx_delay_ce.eq(self.do_rx_delay_ce.o), self.do_rx_delay_ce.i.eq(self.rx_delay_ce.re) ] self.specials += [ MultiReg(rx_delay_m_cntvalueout, self.rx_delay_m_cntvalueout.status, "sys"), MultiReg(rx_delay_s_cntvalueout, self.rx_delay_s_cntvalueout.status, "sys"), ] # tx clock (linerate/10) if mode == "master": self.submodules.tx_clk_gearbox = Gearbox(20, "serdes", 8, "serdes_2p5x") self.comb += self.tx_clk_gearbox.i.eq(0b11111000001111100000) clk_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=clk_o, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_D=self.tx_clk_gearbox.o), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n) ] # tx data and prbs self.submodules.tx_prbs = ClockDomainsRenamer("serdes")(PRBSTX( 20, True)) self.comb += self.tx_prbs.config.eq(tx_prbs_config) self.submodules.tx_gearbox = Gearbox(20, "serdes", 8, "serdes_2p5x") self.sync.serdes += [ self.tx_prbs.i.eq(Cat(*[self.encoder.output[i] for i in range(2)])), If(tx_pattern != 0, self.tx_gearbox.i.eq(tx_pattern)).Elif( tx_produce_square_wave, # square wave @ linerate/20 for scope observation self.tx_gearbox.i.eq(0b11111111110000000000)).Else( self.tx_gearbox.i.eq(self.tx_prbs.o)) ] serdes_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=serdes_o, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_D=self.tx_gearbox.o), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n) ] # rx clock use_bufr = True if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg), ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg), self.comb += pll.refclk.eq(clk_i_bufg) # rx self.submodules.rx_gearbox = Gearbox(8, "serdes_2p5x", 20, "serdes") self.submodules.rx_bitslip = ClockDomainsRenamer("serdes")(BitSlip(20)) self.submodules.phase_detector = ClockDomainsRenamer("serdes_2p5x")( PhaseDetector()) # use 2 serdes for phase detection: 1 master/ 1 slave serdes_m_i_nodelay = Signal() serdes_s_i_nodelay = Signal() self.specials += [ Instance( "IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_m_i_nodelay, o_OB=serdes_s_i_nodelay, ) ] serdes_m_i_delayed = Signal() serdes_m_q = Signal(8) self.specials += [ Instance( "IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, # Note: can't use TIME mode since not reloading DELAY_VALUE on rst... p_DELAY_FORMAT="COUNT", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=50, # 1/4 bit period (ambient temp) i_CLK=ClockSignal("serdes_2p5x"), i_RST=rx_delay_rst, i_LOAD=0, i_INC=rx_delay_inc, i_EN_VTC=rx_delay_en_vtc, i_CE=rx_delay_ce, i_IDATAIN=serdes_m_i_nodelay, o_DATAOUT=serdes_m_i_delayed, o_CNTVALUEOUT=rx_delay_m_cntvalueout), Instance("ISERDESE3", p_DATA_WIDTH=8, i_D=serdes_m_i_delayed, i_RST=ResetSignal("serdes_2p5x"), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("serdes_10x"), i_CLK_B=~ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), o_Q=serdes_m_q), ] self.comb += self.phase_detector.mdata.eq(serdes_m_q) serdes_s_i_delayed = Signal() serdes_s_q = Signal(8) self.specials += [ Instance( "IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, # Note: can't use TIME mode since not reloading DELAY_VALUE on rst... p_DELAY_FORMAT="COUNT", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=100, # 1/2 bit period (ambient temp) i_CLK=ClockSignal("serdes_2p5x"), i_RST=rx_delay_rst, i_LOAD=0, i_INC=rx_delay_inc, i_EN_VTC=rx_delay_en_vtc, i_CE=rx_delay_ce, i_IDATAIN=serdes_s_i_nodelay, o_DATAOUT=serdes_s_i_delayed, o_CNTVALUEOUT=rx_delay_s_cntvalueout), Instance("ISERDESE3", p_DATA_WIDTH=8, i_D=serdes_s_i_delayed, i_RST=ResetSignal("serdes_2p5x"), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("serdes_10x"), i_CLK_B=~ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), o_Q=serdes_s_q), ] self.comb += self.phase_detector.sdata.eq(~serdes_s_q) # rx data and prbs self.submodules.rx_prbs = ClockDomainsRenamer("serdes")(PRBSRX( 20, True)) self.comb += [ self.rx_prbs.config.eq(rx_prbs_config), rx_prbs_errors.eq(self.rx_prbs.errors) ] self.comb += [ self.rx_gearbox.i.eq(serdes_m_q), self.rx_bitslip.value.eq(rx_bitslip_value), self.rx_bitslip.i.eq(self.rx_gearbox.o), self.decoders[0].input.eq(self.rx_bitslip.o[:10]), self.decoders[1].input.eq(self.rx_bitslip.o[10:]), rx_pattern.eq(self.rx_bitslip.o), self.rx_prbs.i.eq(self.rx_bitslip.o) ]
def __init__(self, pads, memtype="DDR3", sys_clk_freq=100e6, iodelay_clk_freq=200e6, cmd_latency=0): tck = 2 / (2 * 4 * sys_clk_freq) addressbits = len(pads.a) if memtype == "DDR4": addressbits += 3 # cas_n/ras_n/we_n multiplexed with address bankbits = len( pads.ba) if memtype == "DDR3" else len(pads.ba) + len(pads.bg) nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) databits = len(pads.dq) nphases = 4 if hasattr(pads, "ten"): self.comb += pads.ten.eq(0) self._en_vtc = CSRStorage(reset=1) self._half_sys8x_taps = CSRStatus(9) self._wlevel_en = CSRStorage() self._wlevel_strobe = CSR() self._cdly_rst = CSR() self._cdly_inc = CSR() self._dly_sel = CSRStorage(databits // 8) self._rdly_dq_rst = CSR() self._rdly_dq_inc = CSR() self._rdly_dq_bitslip_rst = CSR() self._rdly_dq_bitslip = CSR() self._wdly_dq_rst = CSR() self._wdly_dq_inc = CSR() self._wdly_dqs_rst = CSR() self._wdly_dqs_inc = CSR() # compute phy settings cl, cwl = get_cl_cw(memtype, tck) cwl = cwl + cmd_latency cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) self.settings = PhySettings(memtype=memtype, databits=databits, dfi_databits=2 * databits, nranks=nranks, nphases=nphases, rdphase=rdphase, wrphase=wrphase, rdcmdphase=rdcmdphase, wrcmdphase=wrcmdphase, cl=cl, cwl=cwl - cmd_latency, read_latency=2 + cl_sys_latency + 1 + 3, write_latency=cwl_sys_latency) self.dfi = Interface(addressbits, bankbits, nranks, 2 * databits, nphases) if memtype == "DDR3": _dfi = self.dfi else: _dfi = Interface(addressbits, bankbits, nranks, 2 * databits, nphases) dfi_mux = DDR4DFIMux(self.dfi, _dfi) self.submodules += dfi_mux # # # # Clock clk_o_nodelay = Signal() clk_o_delayed = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=clk_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=0b10101010), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._cdly_rst.re, i_CE=self._cdly_inc.re, i_ODATAIN=clk_o_nodelay, o_DATAOUT=clk_o_delayed), Instance("OBUFDS", i_I=clk_o_delayed, o_O=pads.clk_p, o_OB=pads.clk_n) ] # Addresses and commands for i in range(addressbits if memtype == "DDR3" else addressbits - 3): a_o_nodelay = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=a_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(_dfi.phases[0].address[i], _dfi.phases[0].address[i], _dfi.phases[1].address[i], _dfi.phases[1].address[i], _dfi.phases[2].address[i], _dfi.phases[2].address[i], _dfi.phases[3].address[i], _dfi.phases[3].address[i])), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._cdly_rst.re, i_CE=self._cdly_inc.re, i_ODATAIN=a_o_nodelay, o_DATAOUT=pads.a[i]) ] pads_ba = Signal(bankbits) if memtype == "DDR3": self.comb += pads.ba.eq(pads_ba) else: self.comb += pads.ba.eq(pads_ba[:len(pads.ba)]) self.comb += pads.bg.eq(pads_ba[len(pads.ba):]) for i in range(bankbits): ba_o_nodelay = Signal() self.specials += [ Instance( "OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=ba_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(_dfi.phases[0].bank[i], _dfi.phases[0].bank[i], _dfi.phases[1].bank[i], _dfi.phases[1].bank[i], _dfi.phases[2].bank[i], _dfi.phases[2].bank[i], _dfi.phases[3].bank[i], _dfi.phases[3].bank[i])), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._cdly_rst.re, i_CE=self._cdly_inc.re, i_ODATAIN=ba_o_nodelay, o_DATAOUT=pads_ba[i]) ] controls = ["ras_n", "cas_n", "we_n", "cke", "odt"] if hasattr(pads, "reset_n"): controls.append("reset_n") if hasattr(pads, "cs_n"): controls.append("cs_n") if hasattr(pads, "act_n"): controls.append("act_n") for name in controls: x_o_nodelay = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=x_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(getattr(_dfi.phases[0], name), getattr(_dfi.phases[0], name), getattr(_dfi.phases[1], name), getattr(_dfi.phases[1], name), getattr(_dfi.phases[2], name), getattr(_dfi.phases[2], name), getattr(_dfi.phases[3], name), getattr(_dfi.phases[3], name))), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._cdly_rst.re, i_CE=self._cdly_inc.re, i_ODATAIN=x_o_nodelay, o_DATAOUT=getattr(pads, name)) ] # DQS and DM oe_dqs = Signal() dqs_serdes_pattern = Signal(8) self.comb += \ If(self._wlevel_en.storage, If(self._wlevel_strobe.re, dqs_serdes_pattern.eq(0b00000001) ).Else( dqs_serdes_pattern.eq(0b00000000) ) ).Else( dqs_serdes_pattern.eq(0b01010101) ) for i in range(databits // 8): dm_o_nodelay = Signal() self.specials += \ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dm_o_nodelay, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(_dfi.phases[0].wrdata_mask[i], _dfi.phases[0].wrdata_mask[databits//8+i], _dfi.phases[1].wrdata_mask[i], _dfi.phases[1].wrdata_mask[databits//8+i], _dfi.phases[2].wrdata_mask[i], _dfi.phases[2].wrdata_mask[databits//8+i], _dfi.phases[3].wrdata_mask[i], _dfi.phases[3].wrdata_mask[databits//8+i]) ) self.specials += \ Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq/1e6, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dq_inc.re, i_ODATAIN=dm_o_nodelay, o_DATAOUT=pads.dm[i] ) dqs_nodelay = Signal() dqs_delayed = Signal() dqs_t = Signal() if i == 0: # Store initial DQS DELAY_VALUE (in taps) to # be able to reload DELAY_VALUE after reset. dqs_taps = Signal(9) dqs_taps_timer = WaitTimer(2**16) self.submodules += dqs_taps_timer dqs_taps_done = Signal() self.comb += dqs_taps_timer.wait.eq(~dqs_taps_done) self.sync += \ If(dqs_taps_timer.done, dqs_taps_done.eq(1), self._half_sys8x_taps.status.eq(dqs_taps) ) self.specials += [ Instance( "OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dqs_nodelay, o_T_OUT=dqs_t, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(dqs_serdes_pattern[0], dqs_serdes_pattern[1], dqs_serdes_pattern[2], dqs_serdes_pattern[3], dqs_serdes_pattern[4], dqs_serdes_pattern[5], dqs_serdes_pattern[6], dqs_serdes_pattern[7]), i_T=~oe_dqs, ), Instance("ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=int(tck * 1e12 / 4), i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i] & self._wdly_dqs_rst.re, i_CE=self._dly_sel.storage[i] & self._wdly_dqs_inc.re, o_CNTVALUEOUT=Signal(9) if i != 0 else dqs_taps, i_ODATAIN=dqs_nodelay, o_DATAOUT=dqs_delayed), Instance("IOBUFDSE3", i_I=dqs_delayed, i_T=dqs_t, io_IO=pads.dqs_p[i], io_IOB=pads.dqs_n[i]) ] # DQ oe_dq = Signal() for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() dq_i_nodelay = Signal() dq_i_delayed = Signal() dq_t = Signal() dq_bitslip = BitSlip(8) self.sync += \ If(self._dly_sel.storage[i//8], If(self._rdly_dq_bitslip_rst.re, dq_bitslip.value.eq(0) ).Elif(self._rdly_dq_bitslip.re, dq_bitslip.value.eq(dq_bitslip.value + 1) ) ) self.submodules += dq_bitslip self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=dq_o_nodelay, o_T_OUT=dq_t, i_RST=ResetSignal(), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal(), i_D=Cat(_dfi.phases[0].wrdata[i], _dfi.phases[0].wrdata[databits + i], _dfi.phases[1].wrdata[i], _dfi.phases[1].wrdata[databits + i], _dfi.phases[2].wrdata[i], _dfi.phases[2].wrdata[databits + i], _dfi.phases[3].wrdata[i], _dfi.phases[3].wrdata[databits + i]), i_T=~oe_dq), Instance( "ISERDESE3", p_IS_CLK_INVERTED=0, p_IS_CLK_B_INVERTED=1, p_DATA_WIDTH=8, i_D=dq_i_delayed, i_RST=ResetSignal(), i_FIFO_RD_EN=0, i_CLK=ClockSignal("sys4x"), i_CLK_B=ClockSignal("sys4x"), # locally inverted i_CLKDIV=ClockSignal(), o_Q=dq_bitslip.i), Instance( "ODELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i // 8] & self._wdly_dq_rst.re, i_CE=self._dly_sel.storage[i // 8] & self._wdly_dq_inc.re, i_ODATAIN=dq_o_nodelay, o_DATAOUT=dq_o_delayed), Instance( "IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=iodelay_clk_freq / 1e6, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="TIME", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal(), i_INC=1, i_EN_VTC=self._en_vtc.storage, i_RST=self._dly_sel.storage[i // 8] & self._rdly_dq_rst.re, i_CE=self._dly_sel.storage[i // 8] & self._rdly_dq_inc.re, i_IDATAIN=dq_i_nodelay, o_DATAOUT=dq_i_delayed), Instance("IOBUF", i_I=dq_o_delayed, o_O=dq_i_nodelay, i_T=dq_t, io_IO=pads.dq[i]) ] self.comb += [ _dfi.phases[0].rddata[i].eq(dq_bitslip.o[0]), _dfi.phases[1].rddata[i].eq(dq_bitslip.o[2]), _dfi.phases[2].rddata[i].eq(dq_bitslip.o[4]), _dfi.phases[3].rddata[i].eq(dq_bitslip.o[6]), _dfi.phases[0].rddata[databits + i].eq(dq_bitslip.o[1]), _dfi.phases[1].rddata[databits + i].eq(dq_bitslip.o[3]), _dfi.phases[2].rddata[databits + i].eq(dq_bitslip.o[5]), _dfi.phases[3].rddata[databits + i].eq(dq_bitslip.o[7]), ] # Flow control # # total read latency: # 2 cycles through OSERDESE2 # cl_sys_latency cycles CAS # 2 cycles through ISERDESE2 # 3 cycles through Bitslip rddata_en = _dfi.phases[self.settings.rdphase].rddata_en for i in range(self.settings.read_latency - 1): n_rddata_en = Signal() self.sync += n_rddata_en.eq(rddata_en) rddata_en = n_rddata_en for phase in _dfi.phases: phase_rddata_valid = Signal() self.sync += phase_rddata_valid.eq(rddata_en | self._wlevel_en.storage) self.comb += phase.rddata_valid.eq(phase_rddata_valid) oe = Signal() last_wrdata_en = Signal(cwl_sys_latency + 2) wrphase = _dfi.phases[self.settings.wrphase] self.sync += last_wrdata_en.eq( Cat(wrphase.wrdata_en, last_wrdata_en[:-1])) self.comb += oe.eq(last_wrdata_en[cwl_sys_latency - 1] | last_wrdata_en[cwl_sys_latency] | last_wrdata_en[cwl_sys_latency + 1]) self.sync += \ If(self._wlevel_en.storage, oe_dqs.eq(1), oe_dq.eq(0) ).Else( oe_dqs.eq(oe), oe_dq.eq(oe) )
def __init__(self, pll, pads, mode="master"): self.tx_pattern = CSRStorage(20) self.tx_produce_square_wave = CSRStorage() self.tx_prbs_config = CSRStorage(2) self.rx_pattern = CSRStatus(20) self.rx_prbs_config = CSRStorage(2) self.rx_prbs_errors = CSRStatus(32) self.rx_bitslip_value = CSRStorage(5) self.rx_delay_rst = CSR() self.rx_delay_inc = CSRStorage() self.rx_delay_ce = CSR() # # # self.submodules.encoder = ClockDomainsRenamer("serdes")(Encoder( 2, True)) self.decoders = [ ClockDomainsRenamer("serdes")(Decoder(True)) for _ in range(2) ] self.submodules += self.decoders # clocking # master mode: # - linerate/10 pll refclk provided externally # - linerate/10 clock generated on clk_pads # slave mode: # - linerate/10 pll refclk provided by clk_pads self.clock_domains.cd_serdes = ClockDomain() self.clock_domains.cd_serdes_10x = ClockDomain() self.clock_domains.cd_serdes_2p5x = ClockDomain() self.comb += [ self.cd_serdes.clk.eq(pll.serdes_clk), self.cd_serdes_10x.clk.eq(pll.serdes_10x_clk), self.cd_serdes_2p5x.clk.eq(pll.serdes_2p5x_clk) ] self.specials += [ AsyncResetSynchronizer(self.cd_serdes, ~pll.lock), AsyncResetSynchronizer(self.cd_serdes_10x, ~pll.lock), AsyncResetSynchronizer(self.cd_serdes_2p5x, ~pll.lock) ] # control/status cdc tx_pattern = Signal(20) tx_produce_square_wave = Signal() tx_prbs_config = Signal(2) rx_pattern = Signal(20) rx_prbs_config = Signal(2) rx_prbs_errors = Signal(32) rx_bitslip_value = Signal(5) self.specials += [ MultiReg(self.tx_pattern.storage, tx_pattern, "serdes"), MultiReg(self.tx_produce_square_wave.storage, tx_produce_square_wave, "serdes"), MultiReg(self.tx_prbs_config.storage, tx_prbs_config, "serdes"), ] self.specials += [ MultiReg(rx_pattern, self.rx_pattern.status, "sys"), MultiReg(self.rx_prbs_config.storage, rx_prbs_config, "serdes"), MultiReg(rx_prbs_errors, self.rx_prbs_errors.status, "sys"), # FIXME ] self.specials += MultiReg(self.rx_bitslip_value.storage, rx_bitslip_value, "serdes"), # tx clock (linerate/10) if mode == "master": self.submodules.tx_clk_gearbox = Gearbox(20, "serdes", 8, "serdes_2p5x") self.comb += self.tx_clk_gearbox.i.eq(0b11111000001111100000) clk_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=clk_o, i_OCE=1, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_D1=self.tx_clk_gearbox.o[0], i_D2=self.tx_clk_gearbox.o[1], i_D3=self.tx_clk_gearbox.o[2], i_D4=self.tx_clk_gearbox.o[3], i_D5=self.tx_clk_gearbox.o[4], i_D6=self.tx_clk_gearbox.o[5], i_D7=self.tx_clk_gearbox.o[6], i_D8=self.tx_clk_gearbox.o[7]), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n) ] # tx data and prbs self.submodules.tx_prbs = ClockDomainsRenamer("serdes")(PRBSTX( 20, True)) self.comb += self.tx_prbs.config.eq(tx_prbs_config) self.submodules.tx_gearbox = Gearbox(20, "serdes", 8, "serdes_2p5x") self.sync.serdes += [ self.tx_prbs.i.eq(Cat(*[self.encoder.output[i] for i in range(2)])), If(tx_pattern != 0, self.tx_gearbox.i.eq(tx_pattern)).Elif( tx_produce_square_wave, # square wave @ linerate/20 for scope observation self.tx_gearbox.i.eq(0b11111111110000000000)).Else( self.tx_gearbox.i.eq(self.tx_prbs.o)) ] serdes_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=serdes_o, i_OCE=1, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_D1=self.tx_gearbox.o[0], i_D2=self.tx_gearbox.o[1], i_D3=self.tx_gearbox.o[2], i_D4=self.tx_gearbox.o[3], i_D5=self.tx_gearbox.o[4], i_D6=self.tx_gearbox.o[5], i_D7=self.tx_gearbox.o[6], i_D8=self.tx_gearbox.o[7]), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n) ] # rx clock use_bufr = False if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg), ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg), self.comb += pll.refclk.eq(clk_i_bufg) # rx self.submodules.rx_gearbox = Gearbox(8, "serdes_2p5x", 20, "serdes") self.submodules.rx_bitslip = ClockDomainsRenamer("serdes")(BitSlip(20)) self.submodules.phase_detector = ClockDomainsRenamer("serdes_2p5x")( PhaseDetector()) # use 2 serdes for phase detection: 1 master/ 1 slave serdes_m_i_nodelay = Signal() serdes_s_i_nodelay = Signal() self.specials += [ Instance( "IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_m_i_nodelay, o_OB=serdes_s_i_nodelay, ) ] serdes_m_i_delayed = Signal() serdes_m_q = Signal(8) serdes_m_idelay_value = int(1 / (4 * pll.linerate) / 78e-12) # 1/4 bit period assert serdes_m_idelay_value < 32 self.specials += [ Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=serdes_m_idelay_value, i_C=ClockSignal(), i_LD=self.rx_delay_rst.re, i_CE=self.rx_delay_ce.re, i_LDPIPEEN=0, i_INC=self.rx_delay_inc.storage, i_IDATAIN=serdes_m_i_nodelay, o_DATAOUT=serdes_m_i_delayed), Instance("ISERDESE2", p_DATA_WIDTH=8, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=serdes_m_i_delayed, i_CE1=1, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKB=~ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_BITSLIP=0, o_Q8=serdes_m_q[0], o_Q7=serdes_m_q[1], o_Q6=serdes_m_q[2], o_Q5=serdes_m_q[3], o_Q4=serdes_m_q[4], o_Q3=serdes_m_q[5], o_Q2=serdes_m_q[6], o_Q1=serdes_m_q[7]), ] self.comb += self.phase_detector.mdata.eq(serdes_m_q) serdes_s_i_delayed = Signal() serdes_s_q = Signal(8) serdes_s_idelay_value = int(1 / (2 * pll.linerate) / 78e-12) # 1/2 bit period assert serdes_s_idelay_value < 32 self.specials += [ Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=serdes_s_idelay_value, i_C=ClockSignal(), i_LD=self.rx_delay_rst.re, i_CE=self.rx_delay_ce.re, i_LDPIPEEN=0, i_INC=self.rx_delay_inc.storage, i_IDATAIN=serdes_s_i_nodelay, o_DATAOUT=serdes_s_i_delayed), Instance("ISERDESE2", p_DATA_WIDTH=8, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=serdes_s_i_delayed, i_CE1=1, i_RST=ResetSignal("serdes_2p5x"), i_CLK=ClockSignal("serdes_10x"), i_CLKB=~ClockSignal("serdes_10x"), i_CLKDIV=ClockSignal("serdes_2p5x"), i_BITSLIP=0, o_Q8=serdes_s_q[0], o_Q7=serdes_s_q[1], o_Q6=serdes_s_q[2], o_Q5=serdes_s_q[3], o_Q4=serdes_s_q[4], o_Q3=serdes_s_q[5], o_Q2=serdes_s_q[6], o_Q1=serdes_s_q[7]), ] self.comb += self.phase_detector.sdata.eq(~serdes_s_q) # rx data and prbs self.submodules.rx_prbs = ClockDomainsRenamer("serdes")(PRBSRX( 20, True)) self.comb += [ self.rx_prbs.config.eq(rx_prbs_config), rx_prbs_errors.eq(self.rx_prbs.errors) ] self.comb += [ self.rx_gearbox.i.eq(serdes_m_q), self.rx_bitslip.value.eq(rx_bitslip_value), self.rx_bitslip.i.eq(self.rx_gearbox.o), rx_pattern.eq(self.rx_gearbox.o), self.decoders[0].input.eq(self.rx_bitslip.o[:10]), self.decoders[1].input.eq(self.rx_bitslip.o[10:]), self.rx_prbs.i.eq(self.rx_bitslip.o) ]
def __init__(self, pads, mode="master"): # Control self.delay_rst = Signal() self.delay_inc = Signal() self.bitslip_value = bitslip_value = Signal(6) # Status self.idle = idle = Signal() self.comma = comma = Signal() # Datapath self.ce = ce = Signal() self.k = k = Signal(4) self.d = d = Signal(32) # # # # Data input (DDR with sys4x) data_nodelay = Signal() data_delayed = Signal() data_deserialized = Signal(8) self.specials += [ DifferentialInput(pads.rx_p, pads.rx_n, data_nodelay), Instance("IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="COUNT", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal("sys"), i_RST=self.delay_rst, i_LOAD=0, i_INC=1, i_EN_VTC=0, i_CE=self.delay_inc, i_IDATAIN=data_nodelay, o_DATAOUT=data_delayed ), Instance("ISERDESE3", p_IS_CLK_INVERTED=0, p_IS_CLK_B_INVERTED=1, p_DATA_WIDTH=8, i_D=data_delayed, i_RST=ResetSignal("sys"), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("sys4x"), i_CLK_B=ClockSignal("sys4x"), # locally inverted i_CLKDIV=ClockSignal("sys"), o_Q=data_deserialized ) ] # 8 --> 40 converter and bitslip converter = stream.Converter(8, 40) self.submodules += converter bitslip = CEInserter()(BitSlip(40)) self.submodules += bitslip self.comb += [ converter.sink.stb.eq(1), converter.source.ack.eq(1), # Enable pipeline when converter outputs the 40 bits ce.eq(converter.source.stb), # Connect input data to converter converter.sink.data.eq(data_deserialized), # Connect converter to bitslip bitslip.ce.eq(ce), bitslip.value.eq(bitslip_value), bitslip.i.eq(converter.source.data) ] # 8b10b decoder self.submodules.decoders = decoders = [CEInserter()(Decoder(True)) for _ in range(4)] self.comb += [decoders[i].ce.eq(ce) for i in range(4)] self.comb += [ # Connect bitslip to decoder decoders[0].input.eq(bitslip.o[0:10]), decoders[1].input.eq(bitslip.o[10:20]), decoders[2].input.eq(bitslip.o[20:30]), decoders[3].input.eq(bitslip.o[30:40]), # Connect decoder to output self.k.eq(Cat(*[decoders[i].k for i in range(4)])), self.d.eq(Cat(*[decoders[i].d for i in range(4)])), ] # Status idle_timer = WaitTimer(256) self.submodules += idle_timer self.comb += [ idle_timer.wait.eq(1), self.idle.eq(idle_timer.done & ((bitslip.o == 0) | (bitslip.o == (2**40-1)))), self.comma.eq( (decoders[0].k == 1) & (decoders[0].d == K(28,5)) & (decoders[1].k == 0) & (decoders[1].d == 0) & (decoders[2].k == 0) & (decoders[2].d == 0) & (decoders[3].k == 0) & (decoders[3].d == 0)) ]
def __init__(self, pads, sys_clk_freq=100e6): memtype = "DDR3" tck = 2 / (2 * 2 * sys_clk_freq) addressbits = len(pads.a) bankbits = len(pads.ba) nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) databits = len(pads.dq) nphases = 2 self._wlevel_en = CSRStorage() self._wlevel_strobe = CSR() self._dly_sel = CSRStorage(databits // 8) self._rdly_dq_rst = CSR() self._rdly_dq_inc = CSR() self._rdly_dq_bitslip_rst = CSR() self._rdly_dq_bitslip = CSR() self._wdly_dq_rst = CSR() self._wdly_dq_inc = CSR() self._wdly_dqs_rst = CSR() self._wdly_dqs_inc = CSR() # compute phy settings cl, cwl = get_cl_cw(memtype, tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) self.settings = PhySettings( memtype=memtype, dfi_databits=4 * databits, nranks=nranks, nphases=nphases, rdphase=rdphase, wrphase=wrphase, rdcmdphase=rdcmdphase, wrcmdphase=wrcmdphase, cl=cl, cwl=cwl, read_latency=2 + cl_sys_latency + 2 + log2_int(4 // nphases) + 3, # FIXME write_latency=cwl_sys_latency) self.dfi = Interface(addressbits, bankbits, nranks, 4 * databits, 4) # # # bl8_sel = Signal() # Clock for i in range(len(pads.clk_p)): sd_clk_se = Signal() self.specials += [ Instance("ODDRX2F", i_D0=0, i_D1=1, i_D2=0, i_D3=1, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal(), o_Q=pads.clk_p[i]), ] # Addresses and commands for i in range(addressbits): self.specials += \ Instance("ODDRX2F", i_D0=self.dfi.phases[0].address[i], i_D1=self.dfi.phases[0].address[i], i_D2=self.dfi.phases[1].address[i], i_D3=self.dfi.phases[1].address[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal(), o_Q=pads.a[i] ) for i in range(bankbits): self.specials += \ Instance("ODDRX2F", i_D0=self.dfi.phases[0].bank[i], i_D1=self.dfi.phases[0].bank[i], i_D2=self.dfi.phases[1].bank[i], i_D3=self.dfi.phases[1].bank[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal(), o_Q=pads.ba[i] ) controls = ["ras_n", "cas_n", "we_n", "cke", "odt"] if hasattr(pads, "reset_n"): controls.append("reset_n") if hasattr(pads, "cs_n"): controls.append("cs_n") for name in controls: for i in range(len(getattr(pads, name))): self.specials += \ Instance("ODDRX2F", i_D0=getattr(self.dfi.phases[0], name)[i], i_D1=getattr(self.dfi.phases[0], name)[i], i_D2=getattr(self.dfi.phases[1], name)[i], i_D3=getattr(self.dfi.phases[1], name)[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal(), o_Q=getattr(pads, name)[i] ) # DQSBUFM dqsr90 = Signal() dqsw270 = Signal() dqsw = Signal() rdpntr = Signal(3) wrpntr = Signal(3) self.specials += Instance( "DQSBUFM", i_DDRDEL=0b0, i_PAUSE=0b0, i_DQSI=pads.dqs_p[0], i_READ0=0b0, i_READ1=0b0, i_READCLKSEL0=0b0, i_READCLKSEL1=0b0, i_READCLKSEL2=0b0, i_DYNDELAY0=0b0, i_DYNDELAY1=0b0, i_DYNDELAY2=0b0, i_DYNDELAY3=0b0, i_DYNDELAY4=0b0, i_DYNDELAY5=0b0, i_DYNDELAY6=0b0, i_DYNDELAY7=0b0, i_RDLOADN=0, i_RDMOVE=0, i_RDDIRECTION=0, i_WRLOADN=0, i_WRMOVE=0, i_WRDIRECTION=0, #o_RDCFLAG=, #o_WRCFLAG=, #o_DATAVALID=, #o_BURSTDET=, o_DQSR90=dqsr90, o_RDPNTR0=rdpntr[0], o_RDPNTR1=rdpntr[1], o_RDPNTR2=rdpntr[2], o_WRPNTR0=wrpntr[0], o_WRPNTR1=wrpntr[1], o_WRPNTR2=wrpntr[2], i_SCLK=ClockSignal("sys"), i_ECLK=ClockSignal("sys2x"), o_DQSW270=dqsw270, o_DQSW=dqsw) # DQS and DM oe_dqs = Signal() dqs_preamble = Signal() dqs_postamble = Signal() dqs_serdes_pattern = Signal(8, reset=0b01010101) self.comb += \ If(self._wlevel_en.storage, If(self._wlevel_strobe.re, dqs_serdes_pattern.eq(0b00000001) ).Else( dqs_serdes_pattern.eq(0b00000000) ) ).Elif(dqs_preamble | dqs_postamble, dqs_serdes_pattern.eq(0b0000000) ).Else( dqs_serdes_pattern.eq(0b01010101) ) for i in range(databits // 8): dm_o_nodelay = Signal() dm_data = Signal(8) dm_data_d = Signal(8) dm_data_muxed = Signal(4) self.comb += dm_data.eq( Cat(self.dfi.phases[0].wrdata_mask[0 * databits // 8 + i], self.dfi.phases[0].wrdata_mask[1 * databits // 8 + i], self.dfi.phases[0].wrdata_mask[2 * databits // 8 + i], self.dfi.phases[0].wrdata_mask[3 * databits // 8 + i], self.dfi.phases[1].wrdata_mask[0 * databits // 8 + i], self.dfi.phases[1].wrdata_mask[1 * databits // 8 + i], self.dfi.phases[1].wrdata_mask[2 * databits // 8 + i], self.dfi.phases[1].wrdata_mask[3 * databits // 8 + i]), ) self.sync += dm_data_d.eq(dm_data) self.comb += \ If(bl8_sel, dm_data_muxed.eq(dm_data_d[4:]) ).Else( dm_data_muxed.eq(dm_data[:4]) ) self.specials += \ Instance("ODDRX2DQA", i_D0=dm_data_muxed[0], i_D1=dm_data_muxed[1], i_D2=dm_data_muxed[2], i_D3=dm_data_muxed[3], i_RST=ResetSignal(), i_DQSW270=dqsw270, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=dm_o_nodelay ) self.specials += \ Instance("DELAYF", i_A=dm_o_nodelay, i_LOADN=self._dly_sel.storage[i] & self._wdly_dq_rst.re, i_MOVE=self._dly_sel.storage[i] & self._wdly_dq_inc.re, i_DIRECTION=0, o_Z=pads.dm[i], #o_CFLAG=, ) dqs_nodelay = Signal() dqs_delayed = Signal() dqs_oe = Signal() self.specials += \ Instance("ODDRX2DQSB", i_D0=dqs_serdes_pattern[0], i_D1=dqs_serdes_pattern[1], i_D2=dqs_serdes_pattern[2], i_D3=dqs_serdes_pattern[3], i_RST=ResetSignal(), i_DQSW=dqsw, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=dqs_nodelay ) self.specials += \ Instance("DELAYF", i_A=dqs_nodelay, i_LOADN=self._dly_sel.storage[i] & self._wdly_dqs_rst.re, i_MOVE=self._dly_sel.storage[i] & self._wdly_dqs_inc.re, i_DIRECTION=0, o_Z=dqs_delayed, #o_CFLAG=, ) self.specials += \ Instance("TSHX2DQSA", i_T0=oe_dqs, i_T1=oe_dqs, i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_DQSW=dqsw, i_RST=ResetSignal(), o_Q=dqs_oe, ) self.specials += Tristate(pads.dqs_p[i], dqs_delayed, dqs_oe) # DQ oe_dq = Signal() for i in range(databits): dq_o_nodelay = Signal() dq_o_delayed = Signal() dq_i_nodelay = Signal() dq_i_delayed = Signal() dq_oe = Signal() dq_data = Signal(8) dq_data_d = Signal(8) dq_data_muxed = Signal(4) self.comb += dq_data.eq( Cat(self.dfi.phases[0].wrdata[0 * databits + i], self.dfi.phases[0].wrdata[1 * databits + i], self.dfi.phases[0].wrdata[2 * databits + i], self.dfi.phases[0].wrdata[3 * databits + i], self.dfi.phases[1].wrdata[0 * databits + i], self.dfi.phases[1].wrdata[1 * databits + i], self.dfi.phases[1].wrdata[2 * databits + i], self.dfi.phases[1].wrdata[3 * databits + i])) self.sync += dq_data_d.eq(dq_data) self.comb += \ If(bl8_sel, dq_data_muxed.eq(dq_data_d[4:]) ).Else( dq_data_muxed.eq(dq_data[:4]) ) self.specials += \ Instance("ODDRX2DQA", i_D0=dq_data_muxed[0], i_D1=dq_data_muxed[1], i_D2=dq_data_muxed[2], i_D3=dq_data_muxed[3], i_RST=ResetSignal(), i_DQSW270=dqsw270, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=dq_o_nodelay ) self.specials += \ Instance("DELAYF", i_A=dq_o_nodelay, i_LOADN=self._dly_sel.storage[i//8] & self._wdly_dq_rst.re, i_MOVE=self._dly_sel.storage[i//8] & self._wdly_dq_inc.re, i_DIRECTION=0, o_Z=dq_o_delayed, #o_CFLAG=, ) dq_i_data = Signal(8) dq_i_data_d = Signal(8) self.specials += \ Instance("IDDRX2DQA", i_D=dq_i_delayed, i_RST=ResetSignal(), i_DQSR90=dqsr90, i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_RDPNTR0=rdpntr[0], i_RDPNTR1=rdpntr[1], i_RDPNTR2=rdpntr[2], i_WRPNTR0=wrpntr[0], i_WRPNTR1=wrpntr[1], i_WRPNTR2=wrpntr[2], o_Q0=dq_i_data[0], o_Q1=dq_i_data[1], o_Q2=dq_i_data[2], o_Q3=dq_i_data[3], ) dq_bitslip = BitSlip(4) self.comb += dq_bitslip.i.eq(dq_i_data) self.sync += \ If(self._dly_sel.storage[i//8], If(self._rdly_dq_bitslip_rst.re, dq_bitslip.value.eq(0) ).Elif(self._rdly_dq_bitslip.re, dq_bitslip.value.eq(dq_bitslip.value + 1) ) ) self.submodules += dq_bitslip self.sync += dq_i_data_d.eq(dq_i_data) self.comb += [ self.dfi.phases[0].rddata[i].eq(dq_bitslip.o[0]), self.dfi.phases[0].rddata[databits + i].eq(dq_bitslip.o[1]), self.dfi.phases[1].rddata[i].eq(dq_bitslip.o[2]), self.dfi.phases[1].rddata[databits + i].eq(dq_bitslip.o[3]) ] #self.specials += \ # Instance("DELAYF", # i_A=dq_i_nodelay, # i_LOADN=self._dly_sel.storage[i//8] & self._rdly_dq_rst.re, # i_MOVE=self._dly_sel.storage[i//8] & self._wdly_dq_inc.re, # i_DIRECTION=0, # o_Z=dq_i_delayed, # #o_CFLAG=, # ) self.specials += \ Instance("TSHX2DQA", i_T0=oe_dq, i_T1=oe_dq, i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_DQSW270=dqsw270, i_RST=ResetSignal(), o_Q=dq_oe, ) self.specials += Tristate(pads.dq[i], dq_o_delayed, dq_oe, dq_i_delayed) # Flow control # # total read latency: # N cycles through ODDRX2DQA FIXME # cl_sys_latency cycles CAS # M cycles through IDDRX2DQA FIXME rddata_en = self.dfi.phases[self.settings.rdphase].rddata_en for i in range(self.settings.read_latency - 1): n_rddata_en = Signal() self.sync += n_rddata_en.eq(rddata_en) rddata_en = n_rddata_en self.sync += [ phase.rddata_valid.eq(rddata_en | self._wlevel_en.storage) for phase in self.dfi.phases ] oe = Signal() last_wrdata_en = Signal(cwl_sys_latency + 3) wrphase = self.dfi.phases[self.settings.wrphase] self.sync += last_wrdata_en.eq( Cat(wrphase.wrdata_en, last_wrdata_en[:-1])) self.comb += oe.eq(last_wrdata_en[cwl_sys_latency - 1] | last_wrdata_en[cwl_sys_latency] | last_wrdata_en[cwl_sys_latency + 1] | last_wrdata_en[cwl_sys_latency + 2]) self.sync += \ If(self._wlevel_en.storage, oe_dqs.eq(1), oe_dq.eq(0) ).Else( oe_dqs.eq(oe), oe_dq.eq(oe) ) self.sync += bl8_sel.eq(last_wrdata_en[cwl_sys_latency - 1])
def __init__(self, pads, mode="master"): # Control self.bitslip_value = bitslip_value = Signal(6) # Status self.idle = idle = Signal() self.comma = comma = Signal() # Datapath self.ce = ce = Signal() self.k = k = Signal(4) self.d = d = Signal(32) # # # # Input data (on rising edge of sys_clk) data = Signal() data_d = Signal() self.specials += DifferentialInput(pads.rx_p, pads.rx_n, data) self.sync += data_d.eq(data) # 1 --> 40 converter and bitslip converter = stream.Converter(1, 40) self.submodules += converter bitslip = CEInserter()(BitSlip(40)) self.submodules += bitslip self.comb += [ converter.sink.stb.eq(1), converter.source.ack.eq(1), # Enable pipeline when converter outputs the 40 bits ce.eq(converter.source.stb), # Connect input data to converter converter.sink.data.eq(data), # Connect converter to bitslip bitslip.ce.eq(ce), bitslip.value.eq(bitslip_value), bitslip.i.eq(converter.source.data) ] # 8b10b decoder self.submodules.decoders = decoders = [CEInserter()(Decoder(True)) for _ in range(4)] self.comb += [decoders[i].ce.eq(ce) for i in range(4)] self.comb += [ # Connect bitslip to decoder decoders[0].input.eq(bitslip.o[0:10]), decoders[1].input.eq(bitslip.o[10:20]), decoders[2].input.eq(bitslip.o[20:30]), decoders[3].input.eq(bitslip.o[30:40]), # Connect decoder to output self.k.eq(Cat(*[decoders[i].k for i in range(4)])), self.d.eq(Cat(*[decoders[i].d for i in range(4)])), ] # Status idle_timer = WaitTimer(256) self.submodules += idle_timer self.comb += [ idle_timer.wait.eq(1), self.idle.eq(idle_timer.done & ((bitslip.o == 0) | (bitslip.o == (2**40-1)))), self.comma.eq( (decoders[0].k == 1) & (decoders[0].d == K(28,5)) & (decoders[1].k == 0) & (decoders[1].d == 0) & (decoders[2].k == 0) & (decoders[2].d == 0) & (decoders[3].k == 0) & (decoders[3].d == 0)) ]
def __init__(self, pads, mode="master"): if mode == "slave": self.refclk = Signal() self.tx_ce = Signal() self.tx_k = Signal(4) self.tx_d = Signal(32) self.rx_ce = Signal() self.rx_k = Signal(4) self.rx_d = Signal(32) self.tx_idle = Signal() self.tx_comma = Signal() self.rx_idle = Signal() self.rx_comma = Signal() self.rx_bitslip_value = Signal(6) self.rx_delay_rst = Signal() self.rx_delay_inc = Signal() self.rx_delay_en_vtc = Signal() # # # self.submodules.encoder = encoder = CEInserter()(Encoder(4, True)) self.comb += encoder.ce.eq(self.tx_ce) self.submodules.decoders = decoders = [ CEInserter()(Decoder(True)) for _ in range(4) ] self.comb += [decoders[i].ce.eq(self.rx_ce) for i in range(4)] # clocking: # In master mode: # - linerate/10 refclk generated on clk_pads # In Slave mode: # - linerate/10 refclk provided by clk_pads # tx clock (linerate/10) if mode == "master": clk_converter = stream.Converter(40, 8) self.submodules += clk_converter self.comb += [ clk_converter.sink.stb.eq(1), clk_converter.sink.data.eq( Replicate(Signal(10, reset=0b1111100000), 4)), clk_converter.source.ack.eq(1) ] clk_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=clk_o, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_D=clk_converter.source.data), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n) ] # tx datapath # tx_data -> encoders -> converter -> serdes self.submodules.tx_converter = tx_converter = stream.Converter(40, 8) self.comb += [ tx_converter.sink.stb.eq(1), self.tx_ce.eq(tx_converter.sink.ack), tx_converter.source.ack.eq(1), If(self.tx_idle, tx_converter.sink.data.eq(0)).Else( tx_converter.sink.data.eq( Cat(*[encoder.output[i] for i in range(4)]))), If( self.tx_comma, encoder.k[0].eq(1), encoder.d[0].eq(K(28, 5)), ).Else(encoder.k[0].eq(self.tx_k[0]), encoder.k[1].eq( self.tx_k[1]), encoder.k[2].eq(self.tx_k[2]), encoder.k[3].eq(self.tx_k[3]), encoder.d[0].eq(self.tx_d[0:8]), encoder.d[1].eq(self.tx_d[8:16]), encoder.d[2].eq(self.tx_d[16:24]), encoder.d[3].eq(self.tx_d[24:32])) ] serdes_o = Signal() self.specials += [ Instance("OSERDESE3", p_DATA_WIDTH=8, p_INIT=0, p_IS_CLK_INVERTED=0, p_IS_CLKDIV_INVERTED=0, p_IS_RST_INVERTED=0, o_OQ=serdes_o, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_D=tx_converter.source.data), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n) ] # rx clock use_bufr = True if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg) ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg) self.comb += self.refclk.eq(clk_i_bufg) # rx datapath # serdes -> converter -> bitslip -> decoders -> rx_data self.submodules.rx_converter = rx_converter = stream.Converter(8, 40) self.comb += [ self.rx_ce.eq(rx_converter.source.stb), rx_converter.source.ack.eq(1) ] self.submodules.rx_bitslip = rx_bitslip = CEInserter()(BitSlip(40)) self.comb += rx_bitslip.ce.eq(self.rx_ce) serdes_i_nodelay = Signal() self.specials += [ Instance("IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_i_nodelay) ] serdes_i_delayed = Signal() serdes_q = Signal(8) self.specials += [ Instance("IDELAYE3", p_CASCADE="NONE", p_UPDATE_MODE="ASYNC", p_REFCLK_FREQUENCY=200.0, p_IS_CLK_INVERTED=0, p_IS_RST_INVERTED=0, p_DELAY_FORMAT="COUNT", p_DELAY_SRC="IDATAIN", p_DELAY_TYPE="VARIABLE", p_DELAY_VALUE=0, i_CLK=ClockSignal("sys"), i_RST=self.rx_delay_rst, i_LOAD=0, i_INC=1, i_EN_VTC=self.rx_delay_en_vtc, i_CE=self.rx_delay_inc, i_IDATAIN=serdes_i_nodelay, o_DATAOUT=serdes_i_delayed), Instance( "ISERDESE3", p_IS_CLK_INVERTED=0, p_IS_CLK_B_INVERTED=1, p_DATA_WIDTH=8, i_D=serdes_i_delayed, i_RST=ResetSignal("sys"), i_FIFO_RD_CLK=0, i_FIFO_RD_EN=0, i_CLK=ClockSignal("sys4x"), i_CLK_B=ClockSignal("sys4x"), # locally inverted i_CLKDIV=ClockSignal("sys"), o_Q=serdes_q) ] self.comb += [ rx_converter.sink.stb.eq(1), rx_converter.sink.data.eq(serdes_q), rx_bitslip.value.eq(self.rx_bitslip_value), rx_bitslip.i.eq(rx_converter.source.data), decoders[0].input.eq(rx_bitslip.o[0:10]), decoders[1].input.eq(rx_bitslip.o[10:20]), decoders[2].input.eq(rx_bitslip.o[20:30]), decoders[3].input.eq(rx_bitslip.o[30:40]), self.rx_k.eq(Cat(*[decoders[i].k for i in range(4)])), self.rx_d.eq(Cat(*[decoders[i].d for i in range(4)])), self.rx_comma.eq((decoders[0].k == 1) & (decoders[0].d == K(28, 5)) & (decoders[1].k == 0) & (decoders[1].d == 0) & (decoders[2].k == 0) & (decoders[2].d == 0) & (decoders[3].k == 0) & (decoders[3].d == 0)) ] idle_timer = WaitTimer(32) self.submodules += idle_timer self.comb += idle_timer.wait.eq(1) self.sync += self.rx_idle.eq(idle_timer.done & (rx_bitslip.o == 0))
def __init__(self, pll, pads, mode="master"): self.tx_k = Signal(4) self.tx_d = Signal(32) self.rx_k = Signal(4) self.rx_d = Signal(32) self.tx_idle = Signal() self.tx_comma = Signal() self.rx_idle = Signal() self.rx_comma = Signal() self.rx_bitslip_value = Signal(6) self.rx_delay_rst = Signal() self.rx_delay_inc = Signal() self.rx_delay_ce = Signal() # # # self.submodules.encoder = ClockDomainsRenamer("serwb_serdes")(Encoder( 4, True)) self.decoders = [ ClockDomainsRenamer("serwb_serdes")(Decoder(True)) for _ in range(4) ] self.submodules += self.decoders # clocking: # In master mode: # - linerate/10 pll refclk provided by user # - linerate/10 slave refclk generated on clk_pads # In Slave mode: # - linerate/10 pll refclk provided by clk_pads self.clock_domains.cd_serwb_serdes = ClockDomain() self.clock_domains.cd_serwb_serdes_5x = ClockDomain() self.clock_domains.cd_serwb_serdes_20x = ClockDomain(reset_less=True) self.comb += [ self.cd_serwb_serdes.clk.eq(pll.serwb_serdes_clk), self.cd_serwb_serdes_5x.clk.eq(pll.serwb_serdes_5x_clk), self.cd_serwb_serdes_20x.clk.eq(pll.serwb_serdes_20x_clk) ] self.specials += AsyncResetSynchronizer(self.cd_serwb_serdes, ~pll.lock) self.comb += self.cd_serwb_serdes_5x.rst.eq(self.cd_serwb_serdes.rst) # control/status cdc tx_idle = Signal() tx_comma = Signal() rx_idle = Signal() rx_comma = Signal() rx_bitslip_value = Signal(6) self.specials += [ MultiReg(self.tx_idle, tx_idle, "serwb_serdes"), MultiReg(self.tx_comma, tx_comma, "serwb_serdes"), MultiReg(rx_idle, self.rx_idle, "sys"), MultiReg(rx_comma, self.rx_comma, "sys") ] self.specials += MultiReg(self.rx_bitslip_value, rx_bitslip_value, "serwb_serdes"), # tx clock (linerate/10) if mode == "master": self.submodules.tx_clk_gearbox = Gearbox(40, "serwb_serdes", 8, "serwb_serdes_5x") self.comb += self.tx_clk_gearbox.i.eq((0b1111100000 << 30) | (0b1111100000 << 20) | (0b1111100000 << 10) | (0b1111100000 << 0)) clk_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=clk_o, i_OCE=1, i_RST=ResetSignal("serwb_serdes"), i_CLK=ClockSignal("serwb_serdes_20x"), i_CLKDIV=ClockSignal("serwb_serdes_5x"), i_D1=self.tx_clk_gearbox.o[0], i_D2=self.tx_clk_gearbox.o[1], i_D3=self.tx_clk_gearbox.o[2], i_D4=self.tx_clk_gearbox.o[3], i_D5=self.tx_clk_gearbox.o[4], i_D6=self.tx_clk_gearbox.o[5], i_D7=self.tx_clk_gearbox.o[6], i_D8=self.tx_clk_gearbox.o[7]), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n) ] # tx datapath # tx_data -> encoders -> gearbox -> serdes self.submodules.tx_gearbox = Gearbox(40, "serwb_serdes", 8, "serwb_serdes_5x") self.comb += [ If(tx_comma, self.encoder.k[0].eq(1), self.encoder.d[0].eq(0xbc)).Else( self.encoder.k[0].eq(self.tx_k[0]), self.encoder.k[1].eq( self.tx_k[1]), self.encoder.k[2].eq(self.tx_k[2]), self.encoder.k[3].eq(self.tx_k[3]), self.encoder.d[0].eq(self.tx_d[0:8]), self.encoder.d[1].eq(self.tx_d[8:16]), self.encoder.d[2].eq(self.tx_d[16:24]), self.encoder.d[3].eq(self.tx_d[24:32])) ] self.sync.serwb_serdes += \ If(tx_idle, self.tx_gearbox.i.eq(0) ).Else( self.tx_gearbox.i.eq(Cat(*[self.encoder.output[i] for i in range(4)])) ) serdes_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=serdes_o, i_OCE=1, i_RST=ResetSignal("serwb_serdes"), i_CLK=ClockSignal("serwb_serdes_20x"), i_CLKDIV=ClockSignal("serwb_serdes_5x"), i_D1=self.tx_gearbox.o[0], i_D2=self.tx_gearbox.o[1], i_D3=self.tx_gearbox.o[2], i_D4=self.tx_gearbox.o[3], i_D5=self.tx_gearbox.o[4], i_D6=self.tx_gearbox.o[5], i_D7=self.tx_gearbox.o[6], i_D8=self.tx_gearbox.o[7]), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n) ] # rx clock use_bufr = True if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg) ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg) self.comb += pll.refclk.eq(clk_i_bufg) # rx datapath # serdes -> gearbox -> bitslip -> decoders -> rx_data self.submodules.rx_gearbox = Gearbox(8, "serwb_serdes_5x", 40, "serwb_serdes") self.submodules.rx_bitslip = ClockDomainsRenamer("serwb_serdes")( BitSlip(40)) serdes_i_nodelay = Signal() self.specials += [ Instance("IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_i_nodelay) ] serdes_i_delayed = Signal() serdes_q = Signal(8) self.specials += [ Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=0, i_C=ClockSignal(), i_LD=self.rx_delay_rst, i_CE=self.rx_delay_ce, i_LDPIPEEN=0, i_INC=self.rx_delay_inc, i_IDATAIN=serdes_i_nodelay, o_DATAOUT=serdes_i_delayed), Instance("ISERDESE2", p_DATA_WIDTH=8, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=serdes_i_delayed, i_CE1=1, i_RST=ResetSignal("serwb_serdes"), i_CLK=ClockSignal("serwb_serdes_20x"), i_CLKB=~ClockSignal("serwb_serdes_20x"), i_CLKDIV=ClockSignal("serwb_serdes_5x"), i_BITSLIP=0, o_Q8=serdes_q[0], o_Q7=serdes_q[1], o_Q6=serdes_q[2], o_Q5=serdes_q[3], o_Q4=serdes_q[4], o_Q3=serdes_q[5], o_Q2=serdes_q[6], o_Q1=serdes_q[7]) ] self.comb += [ self.rx_gearbox.i.eq(serdes_q), self.rx_bitslip.value.eq(rx_bitslip_value), self.rx_bitslip.i.eq(self.rx_gearbox.o), self.decoders[0].input.eq(self.rx_bitslip.o[0:10]), self.decoders[1].input.eq(self.rx_bitslip.o[10:20]), self.decoders[2].input.eq(self.rx_bitslip.o[20:30]), self.decoders[3].input.eq(self.rx_bitslip.o[30:40]), self.rx_k.eq(Cat(*[self.decoders[i].k for i in range(4)])), self.rx_d.eq(Cat(*[self.decoders[i].d for i in range(4)])), rx_idle.eq(self.rx_bitslip.o == 0), rx_comma.eq( ((self.decoders[0].d == 0xbc) & (self.decoders[0].k == 1)) & ((self.decoders[1].d == 0x00) & (self.decoders[1].k == 0)) & ((self.decoders[2].d == 0x00) & (self.decoders[2].k == 0)) & ((self.decoders[3].d == 0x00) & (self.decoders[3].k == 0))) ]
def __init__(self, pads, mode="master"): # Control self.delay_rst = Signal() self.delay_inc = Signal() self.bitslip_value = bitslip_value = Signal(6) # Status self.idle = idle = Signal() self.comma = comma = Signal() # Datapath self.ce = ce = Signal() self.k = k = Signal(4) self.d = d = Signal(32) # # # # Data input (DDR with sys4x) data_nodelay = Signal() data_delayed = Signal() data_deserialized = Signal(8) self.specials += [ DifferentialInput(pads.rx_p, pads.rx_n, data_nodelay), Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=0, i_C=ClockSignal(), i_LD=self.delay_rst, i_CE=self.delay_inc, i_LDPIPEEN=0, i_INC=1, i_IDATAIN=data_nodelay, o_DATAOUT=data_delayed), Instance("ISERDESE2", p_DATA_WIDTH=8, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=data_delayed, i_CE1=1, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKB=~ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_BITSLIP=0, o_Q8=data_deserialized[0], o_Q7=data_deserialized[1], o_Q6=data_deserialized[2], o_Q5=data_deserialized[3], o_Q4=data_deserialized[4], o_Q3=data_deserialized[5], o_Q2=data_deserialized[6], o_Q1=data_deserialized[7]) ] # 8 --> 40 converter and bitslip converter = stream.Converter(8, 40) self.submodules += converter bitslip = CEInserter()(BitSlip(40)) self.submodules += bitslip self.comb += [ converter.sink.stb.eq(1), converter.source.ack.eq(1), # Enable pipeline when converter outputs the 40 bits ce.eq(converter.source.stb), # Connect input data to converter converter.sink.data.eq(data_deserialized), # Connect converter to bitslip bitslip.ce.eq(ce), bitslip.value.eq(bitslip_value), bitslip.i.eq(converter.source.data) ] # 8b10b decoder self.submodules.decoders = decoders = [ CEInserter()(Decoder(True)) for _ in range(4) ] self.comb += [decoders[i].ce.eq(ce) for i in range(4)] self.comb += [ # Connect bitslip to decoder decoders[0].input.eq(bitslip.o[0:10]), decoders[1].input.eq(bitslip.o[10:20]), decoders[2].input.eq(bitslip.o[20:30]), decoders[3].input.eq(bitslip.o[30:40]), # Connect decoder to output self.k.eq(Cat(*[decoders[i].k for i in range(4)])), self.d.eq(Cat(*[decoders[i].d for i in range(4)])), ] # Status idle_timer = WaitTimer(256) self.submodules += idle_timer self.comb += [ idle_timer.wait.eq(1), self.idle.eq(idle_timer.done & ((bitslip.o == 0) | (bitslip.o == (2**40 - 1)))), self.comma.eq((decoders[0].k == 1) & (decoders[0].d == K(28, 5)) & (decoders[1].k == 0) & (decoders[1].d == 0) & (decoders[2].k == 0) & (decoders[2].d == 0) & (decoders[3].k == 0) & (decoders[3].d == 0)) ]
def __init__(self, pads, sys_clk_freq=100e6): memtype = "DDR3" tck = 2 / (2 * 2 * sys_clk_freq) addressbits = len(pads.a) bankbits = len(pads.ba) nranks = 1 if not hasattr(pads, "cs_n") else len(pads.cs_n) databits = len(pads.dq) nphases = 2 assert databits % 8 == 0 # Init ------------------------------------------------------------------------------------- self.submodules.init = ClockDomainsRenamer("init")( ECP5DDRPHYInit("sys2x")) # Registers -------------------------------------------------------------------------------- self._dly_sel = CSRStorage(databits // 8) self._rdly_dq_rst = CSR() self._rdly_dq_inc = CSR() self._rdly_dq_bitslip_rst = CSR() self._rdly_dq_bitslip = CSR() self._burstdet_clr = CSR() self._burstdet_seen = CSRStatus(databits // 8) # Observation self.datavalid = Signal(databits // 8) # PHY settings ----------------------------------------------------------------------------- cl, cwl = get_cl_cw(memtype, tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) self.settings = PhySettings(memtype=memtype, databits=databits, dfi_databits=4 * databits, nranks=nranks, nphases=nphases, rdphase=rdphase, wrphase=wrphase, rdcmdphase=rdcmdphase, wrcmdphase=wrcmdphase, cl=cl, cwl=cwl, read_latency=2 + cl_sys_latency + 2 + log2_int(4 // nphases) + 6, write_latency=cwl_sys_latency) # DFI Interface ---------------------------------------------------------------------------- self.dfi = dfi = Interface(addressbits, bankbits, nranks, 4 * databits, 4) # # # bl8_sel = Signal() # Clock ------------------------------------------------------------------------------------ for i in range(len(pads.clk_p)): sd_clk_se = Signal() self.specials += [ Instance("ODDRX2F", i_D0=0, i_D1=1, i_D2=0, i_D3=1, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal("sys2x"), o_Q=pads.clk_p[i]), ] # Addresses and Commands ------------------------------------------------------------------- for i in range(addressbits): self.specials += \ Instance("ODDRX2F", i_D0=dfi.phases[0].address[i], i_D1=dfi.phases[0].address[i], i_D2=dfi.phases[1].address[i], i_D3=dfi.phases[1].address[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal("sys2x"), o_Q=pads.a[i] ) for i in range(bankbits): self.specials += \ Instance("ODDRX2F", i_D0=dfi.phases[0].bank[i], i_D1=dfi.phases[0].bank[i], i_D2=dfi.phases[1].bank[i], i_D3=dfi.phases[1].bank[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal("sys2x"), o_Q=pads.ba[i] ) controls = ["ras_n", "cas_n", "we_n", "cke", "odt"] if hasattr(pads, "reset_n"): controls.append("reset_n") if hasattr(pads, "cs_n"): controls.append("cs_n") for name in controls: for i in range(len(getattr(pads, name))): self.specials += \ Instance("ODDRX2F", i_D0=getattr(dfi.phases[0], name)[i], i_D1=getattr(dfi.phases[0], name)[i], i_D2=getattr(dfi.phases[1], name)[i], i_D3=getattr(dfi.phases[1], name)[i], i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), i_RST=ResetSignal("sys2x"), o_Q=getattr(pads, name)[i] ) # DQ --------------------------------------------------------------------------------------- oe_dq = Signal() oe_dqs = Signal() dqs_postamble = Signal() dqs_preamble = Signal() dqs_read = Signal() for i in range(databits // 8): # DQSBUFM dqs_i = Signal() dqsr90 = Signal() dqsw270 = Signal() dqsw = Signal() rdpntr = Signal(3) wrpntr = Signal(3) rdly = Signal(7) self.sync += \ If(self._dly_sel.storage[i], If(self._rdly_dq_rst.re, rdly.eq(0), ).Elif(self._rdly_dq_inc.re, rdly.eq(rdly + 1), ) ) datavalid = Signal() burstdet = Signal() self.specials += Instance( "DQSBUFM", p_DQS_LI_DEL_ADJ="MINUS", p_DQS_LI_DEL_VAL=1, p_DQS_LO_DEL_ADJ="MINUS", p_DQS_LO_DEL_VAL=4, # Clocks / Reset i_SCLK=ClockSignal("sys"), i_ECLK=ClockSignal("sys2x"), i_RST=ResetSignal("sys2x"), i_DDRDEL=self.init.delay, i_PAUSE=self.init.pause | self._dly_sel.storage[i], # Control # Assert LOADNs to use DDRDEL control i_RDLOADN=0, i_RDMOVE=0, i_RDDIRECTION=1, i_WRLOADN=0, i_WRMOVE=0, i_WRDIRECTION=1, # Reads (generate shifted DQS clock for reads) i_READ0=dqs_read, i_READ1=dqs_read, i_READCLKSEL0=rdly[0], i_READCLKSEL1=rdly[1], i_READCLKSEL2=rdly[2], i_DQSI=dqs_i, o_DQSR90=dqsr90, o_RDPNTR0=rdpntr[0], o_RDPNTR1=rdpntr[1], o_RDPNTR2=rdpntr[2], o_WRPNTR0=wrpntr[0], o_WRPNTR1=wrpntr[1], o_WRPNTR2=wrpntr[2], o_DATAVALID=self.datavalid[i], o_BURSTDET=burstdet, # Writes (generate shifted ECLK clock for writes) o_DQSW270=dqsw270, o_DQSW=dqsw) burstdet_d = Signal() self.sync += [ burstdet_d.eq(burstdet), If(self._burstdet_clr.re, self._burstdet_seen.status[i].eq(0)).Elif( burstdet & ~burstdet_d, self._burstdet_seen.status[i].eq(1)) ] # DQS and DM --------------------------------------------------------------------------- dqs_serdes_pattern = Signal(8, reset=0b1010) dm_o_data = Signal(8) dm_o_data_d = Signal(8) dm_o_data_muxed = Signal(4) self.comb += dm_o_data.eq( Cat(dfi.phases[0].wrdata_mask[0 * databits // 8 + i], dfi.phases[0].wrdata_mask[1 * databits // 8 + i], dfi.phases[0].wrdata_mask[2 * databits // 8 + i], dfi.phases[0].wrdata_mask[3 * databits // 8 + i], dfi.phases[1].wrdata_mask[0 * databits // 8 + i], dfi.phases[1].wrdata_mask[1 * databits // 8 + i], dfi.phases[1].wrdata_mask[2 * databits // 8 + i], dfi.phases[1].wrdata_mask[3 * databits // 8 + i]), ) self.sync += dm_o_data_d.eq(dm_o_data) self.sync += \ If(bl8_sel, dm_o_data_muxed.eq(dm_o_data_d[4:]) ).Else( dm_o_data_muxed.eq(dm_o_data[:4]) ) self.specials += \ Instance("ODDRX2DQA", i_D0=dm_o_data_muxed[0], i_D1=dm_o_data_muxed[1], i_D2=dm_o_data_muxed[2], i_D3=dm_o_data_muxed[3], i_RST=ResetSignal("sys2x"), i_DQSW270=dqsw270, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=pads.dm[i] ) dqs = Signal() dqs_oe_n = Signal() self.specials += \ Instance("ODDRX2DQSB", i_D0=dqs_serdes_pattern[0], i_D1=dqs_serdes_pattern[1], i_D2=dqs_serdes_pattern[2], i_D3=dqs_serdes_pattern[3], i_RST=ResetSignal("sys2x"), i_DQSW=dqsw, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=dqs ) self.specials += \ Instance("TSHX2DQSA", i_T0=~(oe_dqs|dqs_postamble), i_T1=~(oe_dqs|dqs_preamble), i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_DQSW=dqsw, i_RST=ResetSignal("sys2x"), o_Q=dqs_oe_n, ) self.specials += Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i) for j in range(8 * i, 8 * (i + 1)): dq_o = Signal() dq_i = Signal() dq_oe_n = Signal() dq_i_delayed = Signal() dq_i_data = Signal(4) dq_o_data = Signal(8) dq_o_data_d = Signal(8) dq_o_data_muxed = Signal(4) self.comb += dq_o_data.eq( Cat(dfi.phases[0].wrdata[0 * databits + j], dfi.phases[0].wrdata[1 * databits + j], dfi.phases[0].wrdata[2 * databits + j], dfi.phases[0].wrdata[3 * databits + j], dfi.phases[1].wrdata[0 * databits + j], dfi.phases[1].wrdata[1 * databits + j], dfi.phases[1].wrdata[2 * databits + j], dfi.phases[1].wrdata[3 * databits + j])) self.sync += dq_o_data_d.eq(dq_o_data) self.sync += \ If(bl8_sel, dq_o_data_muxed.eq(dq_o_data_d[4:]) ).Else( dq_o_data_muxed.eq(dq_o_data[:4]) ) self.specials += \ Instance("ODDRX2DQA", i_D0=dq_o_data_muxed[0], i_D1=dq_o_data_muxed[1], i_D2=dq_o_data_muxed[2], i_D3=dq_o_data_muxed[3], i_RST=ResetSignal("sys2x"), i_DQSW270=dqsw270, i_ECLK=ClockSignal("sys2x"), i_SCLK=ClockSignal(), o_Q=dq_o ) self.specials += \ Instance("DELAYF", i_A=dq_i, i_LOADN=1, i_MOVE=0, i_DIRECTION=0, o_Z=dq_i_delayed, p_DEL_MODE="DQS_ALIGNED_X2" ) self.specials += \ Instance("IDDRX2DQA", i_D=dq_i_delayed, i_RST=ResetSignal("sys2x"), i_DQSR90=dqsr90, i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_RDPNTR0=rdpntr[0], i_RDPNTR1=rdpntr[1], i_RDPNTR2=rdpntr[2], i_WRPNTR0=wrpntr[0], i_WRPNTR1=wrpntr[1], i_WRPNTR2=wrpntr[2], o_Q0=dq_i_data[0], o_Q1=dq_i_data[1], o_Q2=dq_i_data[2], o_Q3=dq_i_data[3], ) dq_bitslip = BitSlip(4) self.comb += dq_bitslip.i.eq(dq_i_data) self.sync += \ If(self._dly_sel.storage[i], If(self._rdly_dq_bitslip_rst.re, dq_bitslip.value.eq(0) ).Elif(self._rdly_dq_bitslip.re, dq_bitslip.value.eq(dq_bitslip.value + 1) ) ) self.submodules += dq_bitslip dq_bitslip_o_d = Signal(4) self.sync += dq_bitslip_o_d.eq(dq_bitslip.o) self.comb += [ dfi.phases[0].rddata[0 * databits + j].eq( dq_bitslip_o_d[0]), dfi.phases[0].rddata[1 * databits + j].eq( dq_bitslip_o_d[1]), dfi.phases[0].rddata[2 * databits + j].eq( dq_bitslip_o_d[2]), dfi.phases[0].rddata[3 * databits + j].eq( dq_bitslip_o_d[3]), dfi.phases[1].rddata[0 * databits + j].eq(dq_bitslip.o[0]), dfi.phases[1].rddata[1 * databits + j].eq(dq_bitslip.o[1]), dfi.phases[1].rddata[2 * databits + j].eq(dq_bitslip.o[2]), dfi.phases[1].rddata[3 * databits + j].eq(dq_bitslip.o[3]), ] self.specials += \ Instance("TSHX2DQA", i_T0=~oe_dq, i_T1=~oe_dq, i_SCLK=ClockSignal(), i_ECLK=ClockSignal("sys2x"), i_DQSW270=dqsw270, i_RST=ResetSignal("sys2x"), o_Q=dq_oe_n, ) self.specials += Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i) # Flow control ----------------------------------------------------------------------------- # # total read latency: # ODDRX2DQA latency # cl_sys_latency # IDDRX2DQA latency rddata_en = dfi.phases[self.settings.rdphase].rddata_en rddata_ens = Array( [Signal() for i in range(self.settings.read_latency - 1)]) for i in range(self.settings.read_latency - 1): n_rddata_en = Signal() self.sync += n_rddata_en.eq(rddata_en) self.comb += rddata_ens[i].eq(rddata_en) rddata_en = n_rddata_en self.sync += [phase.rddata_valid.eq(rddata_en) for phase in dfi.phases] #self.comb += dqs_read.eq(rddata_ens[cl_sys_latency+0] | rddata_ens[cl_sys_latency+1]) # Works only with wishbone-bridge test self.comb += dqs_read.eq( rddata_ens[cl_sys_latency + 1] | rddata_ens[cl_sys_latency + 2]) # Works only with SoC oe = Signal() last_wrdata_en = Signal(cwl_sys_latency + 3) wrphase = dfi.phases[self.settings.wrphase] self.sync += last_wrdata_en.eq( Cat(wrphase.wrdata_en, last_wrdata_en[:-1])) self.comb += oe.eq(last_wrdata_en[cwl_sys_latency - 1] | last_wrdata_en[cwl_sys_latency] | last_wrdata_en[cwl_sys_latency + 1] | last_wrdata_en[cwl_sys_latency + 2]) self.sync += oe_dqs.eq(oe), oe_dq.eq(oe) self.sync += bl8_sel.eq(last_wrdata_en[cwl_sys_latency - 1]) self.sync += dqs_preamble.eq(last_wrdata_en[cwl_sys_latency - 2]) self.sync += dqs_postamble.eq(oe_dqs)
def __init__(self, pads, mode="master"): if mode == "slave": self.refclk = Signal() self.tx_ce = Signal() self.tx_k = Signal(4) self.tx_d = Signal(32) self.rx_ce = Signal() self.rx_k = Signal(4) self.rx_d = Signal(32) self.tx_idle = Signal() self.tx_comma = Signal() self.rx_idle = Signal() self.rx_comma = Signal() self.rx_bitslip_value = Signal(6) self.rx_delay_rst = Signal() self.rx_delay_inc = Signal() # # # self.submodules.encoder = encoder = CEInserter()(Encoder(4, True)) self.comb += encoder.ce.eq(self.tx_ce) self.submodules.decoders = decoders = [ CEInserter()(Decoder(True)) for _ in range(4) ] self.comb += [decoders[i].ce.eq(self.rx_ce) for i in range(4)] # clocking: # In Master mode: # - linerate/10 refclk is generated on clk_pads # In Slave mode: # - linerate/10 refclk is provided by clk_pads # tx clock (linerate/10) if mode == "master": clk_converter = stream.Converter(40, 8) self.submodules += clk_converter self.comb += [ clk_converter.sink.stb.eq(1), clk_converter.sink.data.eq( Replicate(Signal(10, reset=0b1111100000), 4)), clk_converter.source.ack.eq(1) ] clk_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=clk_o, i_OCE=1, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_D1=clk_converter.source.data[0], i_D2=clk_converter.source.data[1], i_D3=clk_converter.source.data[2], i_D4=clk_converter.source.data[3], i_D5=clk_converter.source.data[4], i_D6=clk_converter.source.data[5], i_D7=clk_converter.source.data[6], i_D8=clk_converter.source.data[7]), Instance("OBUFDS", i_I=clk_o, o_O=pads.clk_p, o_OB=pads.clk_n) ] # tx datapath # tx_data -> encoders -> converter -> serdes self.submodules.tx_converter = tx_converter = stream.Converter(40, 8) self.comb += [ tx_converter.sink.stb.eq(1), self.tx_ce.eq(tx_converter.sink.ack), tx_converter.source.ack.eq(1), If(self.tx_idle, tx_converter.sink.data.eq(0)).Else( tx_converter.sink.data.eq( Cat(*[encoder.output[i] for i in range(4)]))), If( self.tx_comma, encoder.k[0].eq(1), encoder.d[0].eq(K(28, 5)), ).Else(encoder.k[0].eq(self.tx_k[0]), encoder.k[1].eq( self.tx_k[1]), encoder.k[2].eq(self.tx_k[2]), encoder.k[3].eq(self.tx_k[3]), encoder.d[0].eq(self.tx_d[0:8]), encoder.d[1].eq(self.tx_d[8:16]), encoder.d[2].eq(self.tx_d[16:24]), encoder.d[3].eq(self.tx_d[24:32])) ] serdes_o = Signal() self.specials += [ Instance("OSERDESE2", p_DATA_WIDTH=8, p_TRISTATE_WIDTH=1, p_DATA_RATE_OQ="DDR", p_DATA_RATE_TQ="BUF", p_SERDES_MODE="MASTER", o_OQ=serdes_o, i_OCE=1, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_D1=tx_converter.source.data[0], i_D2=tx_converter.source.data[1], i_D3=tx_converter.source.data[2], i_D4=tx_converter.source.data[3], i_D5=tx_converter.source.data[4], i_D6=tx_converter.source.data[5], i_D7=tx_converter.source.data[6], i_D8=tx_converter.source.data[7]), Instance("OBUFDS", i_I=serdes_o, o_O=pads.tx_p, o_OB=pads.tx_n) ] # rx clock use_bufr = True if mode == "slave": clk_i = Signal() clk_i_bufg = Signal() self.specials += [ Instance("IBUFDS", i_I=pads.clk_p, i_IB=pads.clk_n, o_O=clk_i) ] if use_bufr: clk_i_bufr = Signal() self.specials += [ Instance("BUFR", i_I=clk_i, o_O=clk_i_bufr), Instance("BUFG", i_I=clk_i_bufr, o_O=clk_i_bufg) ] else: self.specials += Instance("BUFG", i_I=clk_i, o_O=clk_i_bufg) self.comb += self.refclk.eq(clk_i_bufg) # rx datapath # serdes -> converter -> bitslip -> decoders -> rx_data self.submodules.rx_converter = rx_converter = stream.Converter(8, 40) self.comb += [ self.rx_ce.eq(rx_converter.source.stb), rx_converter.source.ack.eq(1) ] self.submodules.rx_bitslip = rx_bitslip = CEInserter()(BitSlip(40)) self.comb += rx_bitslip.ce.eq(self.rx_ce) serdes_i_nodelay = Signal() self.specials += [ Instance("IBUFDS_DIFF_OUT", i_I=pads.rx_p, i_IB=pads.rx_n, o_O=serdes_i_nodelay) ] serdes_i_delayed = Signal() serdes_q = Signal(8) self.specials += [ Instance("IDELAYE2", p_DELAY_SRC="IDATAIN", p_SIGNAL_PATTERN="DATA", p_CINVCTRL_SEL="FALSE", p_HIGH_PERFORMANCE_MODE="TRUE", p_REFCLK_FREQUENCY=200.0, p_PIPE_SEL="FALSE", p_IDELAY_TYPE="VARIABLE", p_IDELAY_VALUE=0, i_C=ClockSignal(), i_LD=self.rx_delay_rst, i_CE=self.rx_delay_inc, i_LDPIPEEN=0, i_INC=1, i_IDATAIN=serdes_i_nodelay, o_DATAOUT=serdes_i_delayed), Instance("ISERDESE2", p_DATA_WIDTH=8, p_DATA_RATE="DDR", p_SERDES_MODE="MASTER", p_INTERFACE_TYPE="NETWORKING", p_NUM_CE=1, p_IOBDELAY="IFD", i_DDLY=serdes_i_delayed, i_CE1=1, i_RST=ResetSignal("sys"), i_CLK=ClockSignal("sys4x"), i_CLKB=~ClockSignal("sys4x"), i_CLKDIV=ClockSignal("sys"), i_BITSLIP=0, o_Q8=serdes_q[0], o_Q7=serdes_q[1], o_Q6=serdes_q[2], o_Q5=serdes_q[3], o_Q4=serdes_q[4], o_Q3=serdes_q[5], o_Q2=serdes_q[6], o_Q1=serdes_q[7]) ] self.comb += [ rx_converter.sink.stb.eq(1), rx_converter.sink.data.eq(serdes_q), rx_bitslip.value.eq(self.rx_bitslip_value), rx_bitslip.i.eq(rx_converter.source.data), decoders[0].input.eq(rx_bitslip.o[0:10]), decoders[1].input.eq(rx_bitslip.o[10:20]), decoders[2].input.eq(rx_bitslip.o[20:30]), decoders[3].input.eq(rx_bitslip.o[30:40]), self.rx_k.eq(Cat(*[decoders[i].k for i in range(4)])), self.rx_d.eq(Cat(*[decoders[i].d for i in range(4)])), self.rx_comma.eq((decoders[0].k == 1) & (decoders[0].d == K(28, 5)) & (decoders[1].k == 0) & (decoders[1].d == 0) & (decoders[2].k == 0) & (decoders[2].d == 0) & (decoders[3].k == 0) & (decoders[3].d == 0)) ] idle_timer = WaitTimer(32) self.submodules += idle_timer self.comb += idle_timer.wait.eq(1) self.sync += self.rx_idle.eq(idle_timer.done & (rx_bitslip.o == 0))