def __init__(self, platform): platform.toolchain.bitstream_commands.extend([ "set_property BITSTREAM.GENERAL.COMPRESS True [current_design]", "set_property BITSTREAM.CONFIG.UNUSEDPIN Pullnone [current_design]", ]) self.submodules.j2s0 = j2s0 = JTAG2SPI() self.submodules.j2s1 = j2s1 = JTAG2SPI(platform.request("spiflash")) di = mg.Signal(4) self.comb += mg.Cat(j2s0.mosi.i, j2s0.miso.i).eq(di) self.specials += [ mg.Instance("BSCANE2", p_JTAG_CHAIN=1, o_SHIFT=j2s0.jtag.shift, o_SEL=j2s0.jtag.sel, o_CAPTURE=j2s0.jtag.capture, o_DRCK=j2s0.jtag.tck, o_TDI=j2s0.jtag.tdi, i_TDO=j2s0.jtag.tdo), mg.Instance("BSCANE2", p_JTAG_CHAIN=2, o_SHIFT=j2s1.jtag.shift, o_SEL=j2s1.jtag.sel, o_CAPTURE=j2s1.jtag.capture, o_DRCK=j2s1.jtag.tck, o_TDI=j2s1.jtag.tdi, i_TDO=j2s1.jtag.tdo), mg.Instance("STARTUPE3", i_GSR=0, i_GTS=0, i_KEYCLEARB=0, i_PACK=1, i_USRDONEO=1, i_USRDONETS=1, i_USRCCLKO=mg.Mux(j2s0.clk.oe, j2s0.clk.o, j2s1.clk.o), i_USRCCLKTS=~(j2s0.clk.oe | j2s1.clk.oe), i_FCSBO=j2s0.cs_n.o, i_FCSBTS=~j2s0.cs_n.oe, o_DI=di, i_DO=mg.Cat(j2s0.mosi.o, j2s0.miso.o, 0, 0), i_DTS=mg.Cat(~j2s0.mosi.oe, ~j2s0.miso.oe, 1, 1)) ] platform.add_period_constraint(j2s0.jtag.tck, 6) platform.add_period_constraint(j2s1.jtag.tck, 6)
def __init__(self, spi=None, bits=32): self.jtag = mg.Record([ ("sel", 1), ("shift", 1), ("capture", 1), ("tck", 1), ("tdi", 1), ("tdo", 1), ]) self.cs_n = mg.TSTriple() self.clk = mg.TSTriple() self.mosi = mg.TSTriple() self.miso = mg.TSTriple() # # # self.cs_n.o.reset = mg.Constant(1) self.submodules.fsm = fsm = mg.FSM("IDLE") en = mg.Signal() bits = mg.Signal(bits, reset_less=True) head = mg.Signal(max=len(bits), reset=len(bits) - 1) self.clock_domains.cd_sys = mg.ClockDomain() self.clock_domains.cd_rise = mg.ClockDomain(reset_less=True) if spi is not None: self.specials += [ self.cs_n.get_tristate(spi.cs_n), self.mosi.get_tristate(spi.mosi), self.miso.get_tristate(spi.miso), ] if hasattr(spi, "clk"): # 7 Series drive it fixed self.specials += self.clk.get_tristate(spi.clk) self.comb += [ en.eq(self.jtag.sel & self.jtag.shift), self.cd_sys.rst.eq(self.jtag.sel & self.jtag.capture), self.cd_sys.clk.eq(~self.jtag.tck), self.cd_rise.clk.eq(self.jtag.tck), self.cs_n.oe.eq(en), self.clk.oe.eq(en), self.mosi.oe.eq(en), self.miso.oe.eq(0), self.clk.o.eq(self.jtag.tck & ~self.cs_n.o), self.mosi.o.eq(self.jtag.tdi), ] # Some (Xilinx) bscan cells register TDO (from the fabric) on falling # TCK and output it (externally). # SPI requires sampling on rising CLK. This leads to one cycle of # latency. self.sync.rise += self.jtag.tdo.eq(self.miso.i) fsm.act("IDLE", mg.If(self.jtag.tdi, mg.NextState("HEAD"))) fsm.act("HEAD", mg.If(head == 0, mg.NextState("XFER"))) fsm.act( "XFER", mg.If(bits == 0, mg.NextState("IDLE")), self.cs_n.o.eq(0), ) self.sync += [ mg.If(fsm.ongoing("HEAD"), bits.eq(mg.Cat(self.jtag.tdi, bits)), head.eq(head - 1)), mg.If(fsm.ongoing("XFER"), bits.eq(bits - 1)) ]
def __init__(self, spi=None, bits=32): self.jtag = mg.Record([ ("sel", 1), ("shift", 1), ("capture", 1), ("tck", 1), ("tdi", 1), ("tdo", 1), ]) self.cs_n = mg.TSTriple() self.clk = mg.TSTriple() self.mosi = mg.TSTriple() self.miso = mg.TSTriple() # # # self.cs_n.o.reset = mg.Constant(1) self.mosi.o.reset_less = True bits = mg.Signal(bits, reset_less=True) head = mg.Signal(max=len(bits), reset=len(bits) - 1) self.clock_domains.cd_sys = mg.ClockDomain() self.submodules.fsm = mg.FSM("IDLE") if spi is not None: self.specials += [ self.cs_n.get_tristate(spi.cs_n), self.mosi.get_tristate(spi.mosi), self.miso.get_tristate(spi.miso), ] if hasattr(spi, "clk"): # 7 Series drive it fixed self.specials += self.clk.get_tristate(spi.clk) # self.specials += io.DDROutput(1, 0, spi.clk, self.clk.o) self.comb += [ self.cd_sys.rst.eq(self.jtag.sel & self.jtag.capture), self.cd_sys.clk.eq(self.jtag.tck), self.cs_n.oe.eq(self.jtag.sel), self.clk.oe.eq(self.jtag.sel), self.mosi.oe.eq(self.jtag.sel), self.miso.oe.eq(0), # Do not suppress CLK toggles outside CS_N asserted. # Xilinx USRCCLK0 requires three dummy cycles to do anything # https://www.xilinx.com/support/answers/52626.html # This is fine since CS_N changes only on falling CLK. self.clk.o.eq(~self.jtag.tck), self.jtag.tdo.eq(self.miso.i), ] # Latency calculation (in half cycles): # 0 (falling TCK, rising CLK): # JTAG adapter: set TDI # 1 (rising TCK, falling CLK): # JTAG2SPI: sample TDI -> set MOSI # SPI: set MISO # 2 (falling TCK, rising CLK): # SPI: sample MOSI # JTAG2SPI (BSCAN primitive): sample MISO -> set TDO # 3 (rising TCK, falling CLK): # JTAG adapter: sample TDO self.fsm.act( "IDLE", mg.If(self.jtag.tdi & self.jtag.sel & self.jtag.shift, mg.NextState("HEAD"))) self.fsm.act("HEAD", mg.If(head == 0, mg.NextState("XFER"))) self.fsm.act( "XFER", mg.If(bits == 0, mg.NextState("IDLE")), ) self.sync += [ self.mosi.o.eq(self.jtag.tdi), self.cs_n.o.eq(~self.fsm.ongoing("XFER")), mg.If(self.fsm.ongoing("HEAD"), bits.eq(mg.Cat(self.jtag.tdi, bits)), head.eq(head - 1)), mg.If(self.fsm.ongoing("XFER"), bits.eq(bits - 1)) ]
def __init__(self, z=18, x=15, zl=9, xd=4, backoff=None, share_lut=None): self.latency = 0 # computed later self.z = mg.Signal(z) # input phase self.x = mg.Signal((x + 1, True), reset_less=True) # output cos(z) self.y = mg.Signal((x + 1, True), reset_less=True) # output sin(z) ### if backoff is None: backoff = min(3, (1 << x - 1) - 1) self.x_max = (1 << x) - backoff # LUT depth if zl is None: zl = z - 3 assert zl >= 0 # generate the cos/sin LUT a = np.exp(1j * np.pi / 4 / (1 << zl) * (np.arange(1 << zl) + .5)) cs = np.round(self.x_max * a) csd = np.round(np.pi / 4 / (1 << x - xd) * cs) lut_init = [] for csi, csdi in zip(cs, csd): # save a bit by noticing that cos(z) > 1/2 for 0 < z < pi/4 xy = csi - (1 << x - 1) xi, yi = int(xy.real), int(xy.imag) assert 0 <= xi < 1 << x - 1, csi assert 0 <= yi < 1 << x, csi lut_init.append(xi | (yi << x - 1)) if xd: # derivative LUT # save a bit again xyd = csdi - (1 << xd - 1) xid, yid = int(xyd.real), int(xyd.imag) assert 0 <= xid < 1 << xd - 1, csdi assert 0 <= yid < 1 << xd, csdi lut_init[-1] |= (xid << 2 * x - 1) | (yid << 2 * x + xd - 2) # LUT ROM mem_layout = [("x", x - 1), ("y", x)] if xd: mem_layout.extend([("xd", xd - 1), ("yd", xd)]) lut_data = mg.Record(mem_layout, reset_less=True) assert len(lut_init) == 1 << zl assert all(0 <= _ < 1 << len(lut_data) for _ in lut_init) logger.info("CosSin LUT {} bit deep, {} bit wide".format( zl, len(lut_data))) if share_lut is not None: assert all(a == b for a, b in zip(share_lut.init, lut_init)) self.lut = share_lut else: self.lut = mg.Memory(len(lut_data), 1 << zl, init=lut_init) self.specials += self.lut lut_port = self.lut.get_port() self.specials += lut_port self.sync += [ # use BRAM output data register lut_data.raw_bits().eq(lut_port.dat_r), ] self.latency += 1 # mem dat_r output register # compute LUT address # 3 MSBs: octant # LSBs: phase, maped into first octant za = mg.Signal(z - 3) self.comb += [ za.eq( mg.Mux(self.z[-3], (1 << z - 3) - 1 - self.z[:-3], self.z[:-3])), lut_port.adr.eq(za[-zl:]), ] self.latency += 1 # mem address register if xd: # apply linear interpolation zk = z - 3 - zl zd = mg.Signal((zk + 1, True), reset_less=True) self.comb += zd.eq(za[:zk] - (1 << zk - 1) + self.z[-3]) zd = self.pipe(zd, self.latency) # add a rounding bias zq = z - 3 - x + xd assert zq > 0 qb = (1 << zq - 1) - 1 lxd = mg.Signal((xd + zk, True), reset_less=True) lyd = mg.Signal((xd + zk, True), reset_less=True) self.sync += [ lxd.eq(zd * (lut_data.xd | (1 << xd - 1))), lyd.eq(zd * lut_data.yd), ] x1 = self.pipe( self.pipe(lut_data.x | (1 << x - 1), 1) - ((lyd + qb) >> zq), 1) y1 = self.pipe(self.pipe(lut_data.y, 1) + ((lxd + qb) >> zq), 1) self.latency += 2 else: x1 = self.pipe(lut_data.x | (1 << x - 1), 0) y1 = self.pipe(lut_data.y, 0) # unmap octant zq = self.pipe( mg.Cat(self.z[-3] ^ self.z[-2], self.z[-2] ^ self.z[-1], self.z[-1]), self.latency) # intermediate unmapping signals x2 = self.pipe(mg.Mux(zq[0], y1, x1), 0) y2 = self.pipe(mg.Mux(zq[0], x1, y1), 0) self.comb += [ self.x.eq(mg.Mux(zq[1], -x2, x2)), self.y.eq(mg.Mux(zq[2], -y2, y2)), ]