def __init__(self, config_addr_width: int, config_data_width: int): super().__init__() self.config_addr_width = config_addr_width self.config_data_width = config_data_width config_type = ConfigurationType(config_addr_width, config_data_width) self.add_ports(clk=magma.In(magma.Clock), config=magma.In(config_type), config_out=magma.Out(config_type)) # Pipeline registers config_addr_reg = FromMagma(DefineRegister(config_addr_width)) config_data_reg = FromMagma(DefineRegister(config_data_width)) config_read_reg = FromMagma(DefineRegister(1)) config_write_reg = FromMagma(DefineRegister(1)) # Wire pipeline reg inputs self.wire(self.ports.config.config_addr, config_addr_reg.ports.I) self.wire(self.ports.config.config_data, config_data_reg.ports.I) self.wire(self.ports.config.read, config_read_reg.ports.I) self.wire(self.ports.config.write, config_write_reg.ports.I) # Wire pipeline reg outputs self.wire(config_addr_reg.ports.O, self.ports.config_out.config_addr) self.wire(config_data_reg.ports.O, self.ports.config_out.config_data) self.wire(config_read_reg.ports.O, self.ports.config_out.read) self.wire(config_write_reg.ports.O, self.ports.config_out.write)
def __create_reg(self): for reg_name, reg_node in self.switchbox.registers.items(): reg_cls = DefineRegister(reg_node.width, has_ce=True) reg = FromMagma(reg_cls) reg.instance_name = create_name(str(reg_node)) self.regs[reg_name] = reg_node, reg # add stall ports if len(self.regs) > 0: self.add_port("stall", magma.In(magma.Bits[self.stall_signal_width]))
def __wire_config_ce(self): if len(self.regs) == 0: return # fanout the stall signals to registers # invert the stall signal to clk_en invert = FromMagma(mantle.DefineInvert(1)) # FIXME: use the low bits of stall signal to stall self.wire(invert.ports.I[0], self.ports.stall[0]) for (reg_node, reg) in self.regs.values(): rmux: RegisterMuxNode = list(reg_node)[0] # get rmux address config_name = get_mux_sel_name(rmux) config_reg = self.registers[config_name] index_val = rmux.get_conn_in().index(reg_node) eq_gate = FromMagma(mantle.DefineEQ(config_reg.width)) self.wire(eq_gate.ports.I0, Const(index_val)) self.wire(eq_gate.ports.I1, config_reg.ports.O) and_gate = FromMagma(mantle.DefineAnd(2, 1)) self.wire(and_gate.ports.I0[0], eq_gate.ports.O) self.wire(and_gate.ports.I1, invert.ports.O) self.wire(reg.ports.CE, self.convert(and_gate.ports.O[0], magma.enable))
def __init__(self, peak_generator): super().__init__(8, 32) self.wrapper = _PeakWrapper(peak_generator) # Generate core RTL (as magma). self.peak_circuit = FromMagma(self.wrapper.rtl()) # Add input/output ports and wire them. inputs = self.wrapper.inputs() outputs = self.wrapper.outputs() for ports, dir_ in ( (inputs, magma.In), (outputs, magma.Out), ): for i, (name, typ) in enumerate(ports.items()): magma_type = _convert_type(typ) self.add_port(name, dir_(magma_type)) my_port = self.ports[name] if magma_type is magma.Bits[1]: my_port = my_port[0] magma_name = name if dir_ is magma.In else f"O{i}" self.wire(my_port, self.peak_circuit.ports[magma_name]) self.add_ports(config=magma.In(ConfigurationType(8, 32)), ) # TODO(rsetaluri): Figure out stall signals. # Set up configuration for PE instruction. Currently, we perform a naive # partitioning of the large instruction into 32-bit config registers. config_width = self.wrapper.instruction_width() num_config = math.ceil(config_width / 32) instr_name = self.wrapper.instruction_name() for i in range(num_config): name = f"{instr_name}_{i}" self.add_config(name, 32) lb = i * 32 ub = min(i * 32 + 32, config_width) len_ = ub - lb self.wire(self.registers[name].ports.O[:len_], self.peak_circuit.ports[instr_name][lb:ub]) self._setup_config()
def apply_global_fanout_wiring(interconnect: Interconnect, io_sides: IOSide): # straight-forward fanout for global signals width, height = interconnect.x_max + 1, interconnect.y_max + 1 x_min, x_max, y_min, y_max = get_array_size(width, height, io_sides) global_ports = interconnect.globals cgra_width = x_max - x_min + 1 interconnect_read_data_or = \ FromMagma(mantle.DefineOr(cgra_width, interconnect.config_data_width)) interconnect_read_data_or.instance_name = "read_config_data_or_final" # this is connected on a per-column bases for x in range(x_min, x_max + 1): column = interconnect.get_column(x) # skip the margin column = [entry for entry in column if "config" in entry.ports] # handle the read config column_read_data_or = \ FromMagma(mantle.DefineOr(len(column), interconnect.config_data_width)) column_read_data_or.instance_name = f"read_config_data_or_col_{x}" for idx, tile in enumerate(column): for signal_name in global_ports: interconnect.wire(interconnect.ports[signal_name], tile.ports[signal_name]) # connect the tile to the column read_data inputs interconnect.wire(column_read_data_or.ports[f"I{idx}"], tile.ports.read_config_data) # wire it to the interconnect_read_data_or idx = x - x_min interconnect.wire(interconnect_read_data_or.ports[f"I{idx}"], column_read_data_or.ports.O) # wiring the read_config_data interconnect.wire(interconnect.ports.read_config_data, interconnect_read_data_or.ports.O) return interconnect_read_data_or
def __init__(self, num_glb_tiles, num_cgra_cols, banks_per_tile=2, bank_addr_width=17, bank_data_width=64, cgra_data_width=16, axi_addr_width=12, axi_data_width=32, cfg_addr_width=32, cfg_data_width=32, parameter_only: bool = False): super().__init__() self.num_glb_tiles = num_glb_tiles self.num_cgra_cols = num_cgra_cols # the number of glb tiles is half the number of cgra columns assert 2 * self.num_glb_tiles == self.num_cgra_cols self.col_per_tile = num_cgra_cols // num_glb_tiles self.banks_per_tile = banks_per_tile self.bank_addr_width = bank_addr_width self.bank_data_width = bank_data_width self.bank_byte_offset = magma.bitutils.clog2(self.bank_data_width // 8) self.cgra_data_width = cgra_data_width self.cgra_byte_offset = magma.bitutils.clog2(self.cgra_data_width // 8) self.axi_addr_width = axi_addr_width self.axi_data_width = axi_data_width self.axi_strb_width = self.axi_data_width // 8 self.axi_byte_offset = magma.bitutils.clog2(self.axi_data_width // 8) self.cfg_addr_width = cfg_addr_width self.cfg_data_width = cfg_data_width self.glb_addr_width = (self.bank_addr_width + magma.bitutils.clog2(self.banks_per_tile) + magma.bitutils.clog2(self.num_glb_tiles)) self.tile_sel_addr_width = m.bitutils.clog2(self.num_glb_tiles) self.cgra_per_glb = self.num_cgra_cols // self.num_glb_tiles self.bank_sel_addr_width = m.bitutils.clog2(self.banks_per_tile) self.cgra_cfg_type = ConfigurationType(self.cfg_addr_width, self.cfg_data_width) self.max_num_words_width = ( self.glb_addr_width - self.bank_byte_offset + magma.bitutils.clog2(bank_data_width // cgra_data_width)) self.max_stride_width = self.axi_data_width - self.max_num_words_width self.max_num_cfgs_width = self.glb_addr_width - self.bank_byte_offset self.queue_depth = 4 self.loop_level = 4 self.latency_width = 1 + magma.bitutils.clog2(self.num_glb_tiles) self.add_ports( clk=magma.In(magma.Clock), reset=magma.In(magma.AsyncReset), stall=magma.In(magma.Bit), cgra_stall_in=magma.In(magma.Bit), cgra_soft_reset=magma.In(magma.Bit), proc_packet=ProcPacketIfc(self.glb_addr_width, self.bank_data_width).slave, glb_cfg=GlbCfgIfc(self.axi_addr_width, self.axi_data_width).slave, sram_cfg=GlbCfgIfc(self.glb_addr_width, self.axi_data_width).slave, stream_data_f2g=magma.In( magma.Array[self.num_cgra_cols, magma.Bits[self.cgra_data_width]]), stream_data_valid_f2g=magma.In(magma.Array[self.num_cgra_cols, magma.Bits[1]]), stream_data_g2f=magma.Out( magma.Array[self.num_cgra_cols, magma.Bits[self.cgra_data_width]]), stream_data_valid_g2f=magma.Out(magma.Array[self.num_cgra_cols, magma.Bits[1]]), cgra_cfg_jtag=magma.In(self.cgra_cfg_type), cgra_cfg_g2f=magma.Out(magma.Array[self.num_cgra_cols, self.cgra_cfg_type]), cgra_stall=magma.Out(magma.Bits[self.num_cgra_cols]), strm_start_pulse=magma.In(magma.Bits[self.num_glb_tiles]), pc_start_pulse=magma.In(magma.Bits[self.num_glb_tiles]), strm_f2g_interrupt_pulse=magma.Out(magma.Bits[self.num_glb_tiles]), strm_g2f_interrupt_pulse=magma.Out(magma.Bits[self.num_glb_tiles]), pcfg_g2f_interrupt_pulse=magma.Out(magma.Bits[self.num_glb_tiles])) # parameter self.param = GlobalBufferParams( NUM_GLB_TILES=self.num_glb_tiles, TILE_SEL_ADDR_WIDTH=(self.tile_sel_addr_width), NUM_CGRA_TILES=self.num_cgra_cols, CGRA_PER_GLB=self.cgra_per_glb, BANKS_PER_TILE=self.banks_per_tile, BANK_SEL_ADDR_WIDTH=(self.bank_sel_addr_width), BANK_DATA_WIDTH=self.bank_data_width, BANK_ADDR_WIDTH=self.bank_addr_width, BANK_BYTE_OFFSET=self.bank_byte_offset, GLB_ADDR_WIDTH=self.glb_addr_width, CGRA_DATA_WIDTH=self.cgra_data_width, CGRA_BYTE_OFFSET=self.cgra_byte_offset, AXI_ADDR_WIDTH=self.axi_addr_width, AXI_DATA_WIDTH=self.axi_data_width, AXI_STRB_WIDTH=self.axi_strb_width, AXI_BYTE_OFFSET=self.axi_byte_offset, MAX_NUM_WORDS_WIDTH=(self.max_num_words_width), MAX_STRIDE_WIDTH=(self.max_stride_width), MAX_NUM_CFGS_WIDTH=(self.max_num_cfgs_width), CGRA_CFG_ADDR_WIDTH=self.cfg_addr_width, CGRA_CFG_DATA_WIDTH=self.cfg_data_width, QUEUE_DEPTH=self.queue_depth, LOOP_LEVEL=self.loop_level, LATENCY_WIDTH=self.latency_width) if parameter_only: gen_param_files(self.param) return self.underlying = FromMagma( GlobalBufferDeclarationGenerator(self.param)) # wiring self.wire(self.ports.clk, self.underlying.ports.clk) self.wire(self.ports.stall, self.underlying.ports.stall) self.wire(self.ports.cgra_stall_in, self.underlying.ports.cgra_stall_in) self.wire(self.ports.reset, self.underlying.ports.reset) self.wire(self.ports.cgra_soft_reset, self.underlying.ports.cgra_soft_reset) self.wire(self.ports.proc_packet.wr_en, self.underlying.ports.proc_wr_en) self.wire(self.ports.proc_packet.wr_strb, self.underlying.ports.proc_wr_strb) self.wire(self.ports.proc_packet.wr_addr, self.underlying.ports.proc_wr_addr) self.wire(self.ports.proc_packet.wr_data, self.underlying.ports.proc_wr_data) self.wire(self.ports.proc_packet.rd_en, self.underlying.ports.proc_rd_en) self.wire(self.ports.proc_packet.rd_addr, self.underlying.ports.proc_rd_addr) self.wire(self.ports.proc_packet.rd_data, self.underlying.ports.proc_rd_data) self.wire(self.ports.proc_packet.rd_data_valid, self.underlying.ports.proc_rd_data_valid) self.wire(self.ports.glb_cfg.wr_en, self.underlying.ports.if_cfg_wr_en) self.wire(self.ports.glb_cfg.wr_clk_en, self.underlying.ports.if_cfg_wr_clk_en) self.wire(self.ports.glb_cfg.wr_addr, self.underlying.ports.if_cfg_wr_addr) self.wire(self.ports.glb_cfg.wr_data, self.underlying.ports.if_cfg_wr_data) self.wire(self.ports.glb_cfg.rd_en, self.underlying.ports.if_cfg_rd_en) self.wire(self.ports.glb_cfg.rd_clk_en, self.underlying.ports.if_cfg_rd_clk_en) self.wire(self.ports.glb_cfg.rd_addr, self.underlying.ports.if_cfg_rd_addr) self.wire(self.ports.glb_cfg.rd_data, self.underlying.ports.if_cfg_rd_data) self.wire(self.ports.glb_cfg.rd_data_valid, self.underlying.ports.if_cfg_rd_data_valid) self.wire(self.ports.sram_cfg.wr_en, self.underlying.ports.if_sram_cfg_wr_en) self.wire(self.ports.sram_cfg.wr_clk_en, self.underlying.ports.if_sram_cfg_wr_clk_en) self.wire(self.ports.sram_cfg.wr_addr, self.underlying.ports.if_sram_cfg_wr_addr) self.wire(self.ports.sram_cfg.wr_data, self.underlying.ports.if_sram_cfg_wr_data) self.wire(self.ports.sram_cfg.rd_en, self.underlying.ports.if_sram_cfg_rd_en) self.wire(self.ports.sram_cfg.rd_clk_en, self.underlying.ports.if_sram_cfg_rd_clk_en) self.wire(self.ports.sram_cfg.rd_addr, self.underlying.ports.if_sram_cfg_rd_addr) self.wire(self.ports.sram_cfg.rd_data, self.underlying.ports.if_sram_cfg_rd_data) self.wire(self.ports.sram_cfg.rd_data_valid, self.underlying.ports.if_sram_cfg_rd_data_valid) for i in range(self.num_cgra_cols): self.wire( self.ports.stream_data_f2g[i], self.underlying.ports.stream_data_f2g[i * self.cgra_data_width: (i + 1) * self.cgra_data_width]) self.wire(self.ports.stream_data_valid_f2g[i][0], self.underlying.ports.stream_data_valid_f2g[i]) self.wire( self.ports.stream_data_g2f[i], self.underlying.ports.stream_data_g2f[i * self.cgra_data_width: (i + 1) * self.cgra_data_width]) self.wire(self.ports.stream_data_valid_g2f[i][0], self.underlying.ports.stream_data_valid_g2f[i]) self.wire(self.ports.cgra_stall, self.underlying.ports.cgra_stall) self.wire(self.ports.cgra_cfg_jtag.write, self.underlying.ports.cgra_cfg_jtag_gc2glb_wr_en) self.wire(self.ports.cgra_cfg_jtag.read, self.underlying.ports.cgra_cfg_jtag_gc2glb_rd_en) self.wire(self.ports.cgra_cfg_jtag.config_addr, self.underlying.ports.cgra_cfg_jtag_gc2glb_addr) self.wire(self.ports.cgra_cfg_jtag.config_data, self.underlying.ports.cgra_cfg_jtag_gc2glb_data) for i in range(self.num_cgra_cols): self.wire(self.ports.cgra_cfg_g2f[i].write[0], self.underlying.ports.cgra_cfg_g2f_cfg_wr_en[i]) self.wire(self.ports.cgra_cfg_g2f[i].read[0], self.underlying.ports.cgra_cfg_g2f_cfg_rd_en[i]) self.wire( self.ports.cgra_cfg_g2f[i].config_addr, self.underlying.ports. cgra_cfg_g2f_cfg_addr[i * self.cfg_addr_width:(i + 1) * self.cfg_addr_width]) self.wire( self.ports.cgra_cfg_g2f[i].config_data, self.underlying.ports. cgra_cfg_g2f_cfg_data[i * self.cfg_data_width:(i + 1) * self.cfg_data_width]) self.wire(self.ports.strm_start_pulse, self.underlying.ports.strm_start_pulse) self.wire(self.ports.pc_start_pulse, self.underlying.ports.pc_start_pulse) self.wire(self.ports.strm_f2g_interrupt_pulse, self.underlying.ports.strm_f2g_interrupt_pulse) self.wire(self.ports.strm_g2f_interrupt_pulse, self.underlying.ports.strm_g2f_interrupt_pulse) self.wire(self.ports.pcfg_g2f_interrupt_pulse, self.underlying.ports.pcfg_g2f_interrupt_pulse)
def __init__(self, addr_width=32, data_width=32, axi_addr_width=12, axi_data_width=32, num_glb_tiles=16, glb_addr_width=22, block_axi_addr_width=12, glb_tile_mem_size=256): super().__init__() self.addr_width = addr_width self.data_width = data_width self.axi_addr_width = axi_addr_width self.axi_data_width = axi_data_width self.num_glb_tiles = num_glb_tiles self.glb_addr_width = glb_addr_width self.glb_tile_mem_size = glb_tile_mem_size self.block_axi_addr_width = block_axi_addr_width # Control logic assumes cgra config_data_width is same as axi_data_width assert self.axi_data_width == self.data_width self.config_type = ConfigurationType(self.addr_width, self.data_width) self.add_ports( clk_in=magma.In(magma.Clock), reset_in=magma.In(magma.AsyncReset), clk_out=magma.Out(magma.Clock), reset_out=magma.Out(magma.AsyncReset), cgra_stall=magma.Out(magma.Bits[self.num_glb_tiles]), glb_stall=magma.Out(magma.Bits[self.num_glb_tiles]), soft_reset=magma.Out(magma.Bit), glb_cfg=GlbCfgIfc(self.block_axi_addr_width, self.axi_data_width).master, sram_cfg=GlbCfgIfc(self.glb_addr_width, self.axi_data_width).master, strm_g2f_start_pulse=magma.Out(magma.Bits[self.num_glb_tiles]), strm_f2g_start_pulse=magma.Out(magma.Bits[self.num_glb_tiles]), pc_start_pulse=magma.Out(magma.Bits[self.num_glb_tiles]), strm_g2f_interrupt_pulse=magma.In(magma.Bits[self.num_glb_tiles]), strm_f2g_interrupt_pulse=magma.In(magma.Bits[self.num_glb_tiles]), pcfg_g2f_interrupt_pulse=magma.In(magma.Bits[self.num_glb_tiles]), cgra_config=magma.Out(self.config_type), read_data_in=magma.In(magma.Bits[self.data_width]), jtag=JTAGType, axi4_slave=AXI4LiteIfc(self.axi_addr_width, self.data_width).slave, interrupt=magma.Out(magma.Bit)) if not self.__class__.cache: self.__class__.cache = 1 self.run_glc_systemrdl() params = GlobalControllerParams( cfg_data_width=self.data_width, cfg_addr_width=self.addr_width, axi_addr_width=self.axi_addr_width, axi_data_width=self.axi_data_width, num_glb_tiles=self.num_glb_tiles, glb_tile_mem_size=self.glb_tile_mem_size, block_axi_addr_width=(self.block_axi_addr_width)) wrapper = gen_wrapper(params) generator = wrapper.generator(mode="declare") self.underlying = FromMagma(generator()) # wire clk and reset self.wire(self.ports.clk_in, self.underlying.ports.clk_in) self.wire(self.ports.reset_in, self.underlying.ports.reset_in) # cgra control signals self.wire(self.underlying.ports.clk_out, self.ports.clk_out) self.wire(self.underlying.ports.reset_out, self.ports.reset_out) self.wire(self.underlying.ports.cgra_stall, self.ports.cgra_stall) self.wire(self.underlying.ports.glb_stall, self.ports.glb_stall) self.wire(self.underlying.ports.soft_reset, self.ports.soft_reset) # global buffer configuration self.wire(self.ports.glb_cfg.wr_en, self.underlying.ports.glb_cfg_wr_en) self.wire(self.ports.glb_cfg.wr_clk_en, self.underlying.ports.glb_cfg_wr_clk_en) self.wire(self.ports.glb_cfg.wr_addr, self.underlying.ports.glb_cfg_wr_addr) self.wire(self.ports.glb_cfg.wr_data, self.underlying.ports.glb_cfg_wr_data) self.wire(self.ports.glb_cfg.rd_en, self.underlying.ports.glb_cfg_rd_en) self.wire(self.ports.glb_cfg.rd_clk_en, self.underlying.ports.glb_cfg_rd_clk_en) self.wire(self.ports.glb_cfg.rd_addr, self.underlying.ports.glb_cfg_rd_addr) self.wire(self.underlying.ports.glb_cfg_rd_data, self.ports.glb_cfg.rd_data) self.wire(self.underlying.ports.glb_cfg_rd_data_valid, self.ports.glb_cfg.rd_data_valid) # global buffer sram configuration self.wire(self.ports.sram_cfg.wr_en, self.underlying.ports.sram_cfg_wr_en) self.wire(self.ports.sram_cfg.wr_clk_en, self.underlying.ports.sram_cfg_wr_clk_en) self.wire(self.ports.sram_cfg.wr_addr, self.underlying.ports.sram_cfg_wr_addr) self.wire(self.ports.sram_cfg.wr_data, self.underlying.ports.sram_cfg_wr_data) self.wire(self.ports.sram_cfg.rd_en, self.underlying.ports.sram_cfg_rd_en) self.wire(self.ports.sram_cfg.rd_clk_en, self.underlying.ports.sram_cfg_rd_clk_en) self.wire(self.ports.sram_cfg.rd_addr, self.underlying.ports.sram_cfg_rd_addr) self.wire(self.underlying.ports.sram_cfg_rd_data, self.ports.sram_cfg.rd_data) self.wire(self.underlying.ports.sram_cfg_rd_data_valid, self.ports.sram_cfg.rd_data_valid) # start/done pulse self.wire(self.underlying.ports.strm_f2g_interrupt_pulse, self.ports.strm_f2g_interrupt_pulse) self.wire(self.underlying.ports.strm_g2f_interrupt_pulse, self.ports.strm_g2f_interrupt_pulse) self.wire(self.underlying.ports.pcfg_g2f_interrupt_pulse, self.ports.pcfg_g2f_interrupt_pulse) self.wire(self.ports.strm_g2f_start_pulse, self.underlying.ports.strm_g2f_start_pulse) self.wire(self.ports.strm_f2g_start_pulse, self.underlying.ports.strm_f2g_start_pulse) self.wire(self.ports.pc_start_pulse, self.underlying.ports.pc_start_pulse) # cgra configuration interface self.wire(self.underlying.ports.cgra_cfg_addr, self.ports.cgra_config.config_addr) self.wire(self.underlying.ports.cgra_cfg_wr_data, self.ports.cgra_config.config_data) self.wire(self.underlying.ports.cgra_cfg_read, self.ports.cgra_config.read[0]) self.wire(self.underlying.ports.cgra_cfg_write, self.ports.cgra_config.write[0]) self.wire(self.ports.read_data_in, self.underlying.ports.cgra_cfg_rd_data) # axi4-lite slave interface self.wire(self.ports.axi4_slave.awaddr, self.underlying.ports.axi_awaddr) self.wire(self.ports.axi4_slave.awvalid, self.underlying.ports.axi_awvalid) self.wire(self.ports.axi4_slave.awready, self.underlying.ports.axi_awready) self.wire(self.ports.axi4_slave.wdata, self.underlying.ports.axi_wdata) self.wire(self.ports.axi4_slave.wvalid, self.underlying.ports.axi_wvalid) self.wire(self.ports.axi4_slave.wready, self.underlying.ports.axi_wready) self.wire(self.ports.axi4_slave.bready, self.underlying.ports.axi_bready) self.wire(self.ports.axi4_slave.bvalid, self.underlying.ports.axi_bvalid) self.wire(self.ports.axi4_slave.bresp, self.underlying.ports.axi_bresp) self.wire(self.ports.axi4_slave.araddr, self.underlying.ports.axi_araddr) self.wire(self.ports.axi4_slave.arvalid, self.underlying.ports.axi_arvalid) self.wire(self.ports.axi4_slave.arready, self.underlying.ports.axi_arready) self.wire(self.ports.axi4_slave.rdata, self.underlying.ports.axi_rdata) self.wire(self.ports.axi4_slave.rresp, self.underlying.ports.axi_rresp) self.wire(self.ports.axi4_slave.rvalid, self.underlying.ports.axi_rvalid) self.wire(self.ports.axi4_slave.rready, self.underlying.ports.axi_rready) # interrupt self.wire(self.ports.interrupt, self.underlying.ports.interrupt) # jtag interface signals self.wire(self.ports.jtag.tdi, self.underlying.ports.tdi) self.wire(self.ports.jtag.tdo, self.underlying.ports.tdo) self.wire(self.ports.jtag.tms, self.underlying.ports.tms) self.wire(self.ports.jtag.tck, self.underlying.ports.tck) self.wire(self.ports.jtag.trst_n, self.underlying.ports.trst_n)
def __init__(self, addr_width, data_width, axi_addr_width): super().__init__() self.addr_width = addr_width self.data_width = data_width self.axi_addr_width = axi_addr_width self.config_type = ConfigurationType(self.addr_width, self.data_width) self.axi_config_type = ConfigurationType(self.axi_addr_width, self.data_width) self.add_ports( clk_in=magma.In(magma.Clock), reset_in=magma.In(magma.AsyncReset), clk_out=magma.Out(magma.Clock), reset_out=magma.Out(magma.AsyncReset), stall=magma.Out(magma.Bits[1]), glb_stall=magma.Out(magma.Bit), cgra_start_pulse=magma.Out(magma.Bit), cgra_done_pulse=magma.In(magma.Bit), cgra_soft_reset=magma.Out(magma.Bit), config_start_pulse=magma.Out(magma.Bit), config_done_pulse=magma.In(magma.Bit), glb_config=magma.Out(self.axi_config_type), glb_read_data_in=magma.In(magma.Bits[self.data_width]), glb_sram_config=magma.Out(self.config_type), glb_sram_read_data_in=magma.In(magma.Bits[self.data_width]), config=magma.Out(self.config_type), read_data_in=magma.In(magma.Bits[self.data_width]), jtag=JTAGType, axi4_ctrl=AXI4SlaveType(self.axi_addr_width, self.data_width), ) wrapper = global_controller_genesis2.gc_wrapper generator = wrapper.generator(mode="declare") self.underlying = FromMagma(generator()) # wire clk and reset self.wire(self.ports.clk_in, self.underlying.ports.clk_in) self.wire(self.ports.reset_in, self.underlying.ports.reset_in) # cgra control signals self.wire(self.underlying.ports.clk_out, self.ports.clk_out) self.wire(self.underlying.ports.reset_out, self.ports.reset_out) self.wire(self.underlying.ports.cgra_stalled, self.ports.stall) self.wire(self.underlying.ports.glb_stall, self.ports.glb_stall) self.wire(self.ports.cgra_start_pulse, self.underlying.ports.cgra_start_pulse) self.wire(self.ports.cgra_done_pulse, self.underlying.ports.cgra_done_pulse) self.wire(self.ports.cgra_soft_reset, self.underlying.ports.cgra_soft_reset) # fast reconfiguration interface self.wire(self.ports.config_start_pulse, self.underlying.ports.config_start_pulse) self.wire(self.ports.config_done_pulse, self.underlying.ports.config_done_pulse) # glb configuration interface self.wire(self.underlying.ports.glb_config_addr_out, self.ports.glb_config.config_addr) self.wire(self.underlying.ports.glb_config_data_out, self.ports.glb_config.config_data) self.wire(self.underlying.ports.glb_read, self.ports.glb_config.read[0]) self.wire(self.underlying.ports.glb_write, self.ports.glb_config.write[0]) self.wire(self.ports.glb_read_data_in, self.underlying.ports.glb_config_data_in) # glb sram configuration interface self.wire(self.underlying.ports.glb_sram_config_addr_out, self.ports.glb_sram_config.config_addr) self.wire(self.underlying.ports.glb_sram_config_data_out, self.ports.glb_sram_config.config_data) self.wire(self.underlying.ports.glb_sram_read, self.ports.glb_sram_config.read[0]) self.wire(self.underlying.ports.glb_sram_write, self.ports.glb_sram_config.write[0]) self.wire(self.ports.glb_sram_read_data_in, self.underlying.ports.glb_sram_config_data_in) # cgra configuration interface self.wire(self.underlying.ports.config_addr_out, self.ports.config.config_addr) self.wire(self.underlying.ports.config_data_out, self.ports.config.config_data) self.wire(self.underlying.ports.read, self.ports.config.read[0]) self.wire(self.underlying.ports.write, self.ports.config.write[0]) self.wire(self.ports.read_data_in, self.underlying.ports.config_data_in) # axi4-lite slave interface self.wire(self.ports.axi4_ctrl.awaddr, self.underlying.ports.AWADDR) self.wire(self.ports.axi4_ctrl.awvalid, self.underlying.ports.AWVALID) self.wire(self.ports.axi4_ctrl.awready, self.underlying.ports.AWREADY) self.wire(self.ports.axi4_ctrl.wdata, self.underlying.ports.WDATA) self.wire(self.ports.axi4_ctrl.wvalid, self.underlying.ports.WVALID) self.wire(self.ports.axi4_ctrl.wready, self.underlying.ports.WREADY) self.wire(self.ports.axi4_ctrl.araddr, self.underlying.ports.ARADDR) self.wire(self.ports.axi4_ctrl.arvalid, self.underlying.ports.ARVALID) self.wire(self.ports.axi4_ctrl.arready, self.underlying.ports.ARREADY) self.wire(self.ports.axi4_ctrl.rdata, self.underlying.ports.RDATA) self.wire(self.ports.axi4_ctrl.rresp, self.underlying.ports.RRESP) self.wire(self.ports.axi4_ctrl.rvalid, self.underlying.ports.RVALID) self.wire(self.ports.axi4_ctrl.rready, self.underlying.ports.RREADY) self.wire(self.ports.axi4_ctrl.interrupt, self.underlying.ports.interrupt) # jtag interface signals self.wire(self.ports.jtag.tdi, self.underlying.ports.tdi) self.wire(self.ports.jtag.tdo, self.underlying.ports.tdo) self.wire(self.ports.jtag.tms, self.underlying.ports.tms) self.wire(self.ports.jtag.tck, self.underlying.ports.tck) self.wire(self.ports.jtag.trst_n, self.underlying.ports.trst_n)
def apply_global_meso_wiring(interconnect: Interconnect, io_sides: IOSide): # "river routing" for global signal global_ports = interconnect.globals width, height = interconnect.x_max + 1, interconnect.y_max + 1 x_min, x_max, y_min, y_max = get_array_size(width, height, io_sides) cgra_width = x_max - x_min + 1 interconnect_read_data_or = \ FromMagma(mantle.DefineOr(cgra_width, interconnect.config_data_width)) interconnect_read_data_or.instance_name = "read_config_data_or_final" # looping through on a per-column bases for x in range(x_min, x_max + 1): column = interconnect.get_column(x) # skip the margin column = [entry for entry in column if "config" in entry.ports] # wire global inputs to first tile in column for signal in global_ports: interconnect.wire(interconnect.ports[signal], column[0].ports[signal]) # first pass to make signals pass through # pre_ports keep track of ports created by pass_signal_through pre_ports = {} for signal in global_ports: pre_ports[signal] = [] for tile in column: # use the transform pass pre_port = pass_signal_through(tile, signal) pre_ports[signal].append(pre_port) # second pass to wire them up for i in range(len(column) - 1): next_tile = column[i + 1] for signal in global_ports: pre_port = pre_ports[signal][i] interconnect.wire(pre_port, next_tile.ports[signal]) # read_config_data # Call tile function that adds input for read_data, # along with OR gate to reduce input read_data with # that tile's read_data # ports_in keep track of new ports created by or_reduction ports_in = [] for tile in column: port_in = or_reduction(tile, "read_data_mux", "read_config_data", interconnect.config_data_width) ports_in.append(port_in) # Connect 0 to first tile's read_data input interconnect.wire(ports_in[0], Const(magma.bits(0, interconnect.config_data_width))) # connect each tile's read_data output to next tile's # read_data input for i, tile in enumerate(column[:-1]): interconnect.wire(tile.ports.read_config_data, ports_in[i + 1]) # Connect the last tile's read_data output to the global OR idx = x - x_min interconnect.wire(interconnect_read_data_or.ports[f"I{idx}"], column[-1].ports.read_config_data) # wiring the read_config_data interconnect.wire(interconnect.ports.read_config_data, interconnect_read_data_or.ports.O) return interconnect_read_data_or
def __init__(self, num_banks, num_io, num_cfg, bank_addr_width, glb_addr_width=32, cfg_addr_width=32, cfg_data_width=32, axi_addr_width=12): super().__init__() self.num_banks = num_banks self.bank_addr_width = bank_addr_width self.glb_addr_width = glb_addr_width self.num_io = num_io self.num_cfg = num_cfg self.bank_data = 64 self.cgra_data = 16 self.cfg_addr_width = cfg_addr_width self.cfg_data_width = cfg_data_width self.axi_addr_width = axi_addr_width self.config_type = ConfigurationType(self.cfg_addr_width, self.cfg_data_width) self.axi_config_type = ConfigurationType(self.axi_addr_width, self.cfg_data_width) self.add_ports( clk=magma.In(magma.Clock), reset=magma.In(magma.AsyncReset), soc_data=SoCDataType(self.glb_addr_width, self.bank_data), cgra_to_io_wr_en=magma.In(magma.Array[self.num_io, magma.Bit]), cgra_to_io_rd_en=magma.In(magma.Array[self.num_io, magma.Bit]), io_to_cgra_rd_data_valid=magma.Out(magma.Array[self.num_io, magma.Bit]), cgra_to_io_wr_data=magma.In( magma.Array[self.num_io, magma.Bits[self.cgra_data]]), io_to_cgra_rd_data=magma.Out( magma.Array[self.num_io, magma.Bits[self.cgra_data]]), cgra_to_io_addr_high=magma.In( magma.Array[self.num_io, magma.Bits[self.cgra_data]]), cgra_to_io_addr_low=magma.In( magma.Array[self.num_io, magma.Bits[self.cgra_data]]), glc_to_io_stall=magma.In(magma.Bit), cgra_start_pulse=magma.In(magma.Bit), cgra_done_pulse=magma.Out(magma.Bit), config_start_pulse=magma.In(magma.Bit), config_done_pulse=magma.Out(magma.Bit), cgra_config=magma.In(self.config_type), glb_to_cgra_config=magma.Out(magma.Array[self.num_cfg, self.config_type]), glb_config=magma.In(self.axi_config_type), glb_config_rd_data=magma.Out(magma.Bits[self.cfg_data_width]), glb_sram_config=magma.In(self.config_type), glb_sram_config_rd_data=magma.Out(magma.Bits[self.cfg_data_width])) wrapper = global_buffer_genesis2.glb_wrapper param_mapping = global_buffer_genesis2.param_mapping generator = wrapper.generator(param_mapping, mode="declare") circ = generator(num_banks=self.num_banks, num_io=self.num_io, num_cfg=self.num_cfg, bank_addr_width=self.bank_addr_width, cfg_addr_width=self.cfg_addr_width, cfg_data=self.cfg_data_width) self.underlying = FromMagma(circ) self.wire(self.ports.clk, self.underlying.ports.clk) self.wire(self.ports.reset, self.underlying.ports.reset) self.wire(self.ports.soc_data.wr_strb, self.underlying.ports.host_wr_strb) self.wire(self.ports.soc_data.wr_addr, self.underlying.ports.host_wr_addr) self.wire(self.ports.soc_data.wr_data, self.underlying.ports.host_wr_data) self.wire(self.ports.soc_data.rd_en, self.underlying.ports.host_rd_en) self.wire(self.ports.soc_data.rd_addr, self.underlying.ports.host_rd_addr) self.wire(self.ports.soc_data.rd_data, self.underlying.ports.host_rd_data) for i in range(self.num_io): self.wire(self.ports.cgra_to_io_wr_en[i], self.underlying.ports.cgra_to_io_wr_en[i]) self.wire(self.ports.cgra_to_io_rd_en[i], self.underlying.ports.cgra_to_io_rd_en[i]) self.wire(self.ports.io_to_cgra_rd_data_valid[i], self.underlying.ports.io_to_cgra_rd_data_valid[i]) self.wire( self.ports.cgra_to_io_wr_data[i], self.underlying.ports.cgra_to_io_wr_data[i * self.cgra_data:(i + 1) * self.cgra_data]) self.wire( self.ports.io_to_cgra_rd_data[i], self.underlying.ports.io_to_cgra_rd_data[i * self.cgra_data:(i + 1) * self.cgra_data]) self.wire( self.ports.cgra_to_io_addr_high[i], self.underlying.ports.cgra_to_io_addr_high[i * self.cgra_data:(i + 1) * self.cgra_data]) self.wire( self.ports.cgra_to_io_addr_low[i], self.underlying.ports.cgra_to_io_addr_low[i * self.cgra_data:(i + 1) * self.cgra_data]) for i in range(self.num_cfg): self.wire(self.ports.glb_to_cgra_config[i].write[0], self.underlying.ports.glb_to_cgra_cfg_wr[i]) self.wire(self.ports.glb_to_cgra_config[i].read[0], self.underlying.ports.glb_to_cgra_cfg_rd[i]) self.wire( self.ports.glb_to_cgra_config[i].config_addr, self.underlying. ports.glb_to_cgra_cfg_addr[i * self.cfg_addr_width:(i + 1) * self.cfg_addr_width]) self.wire( self.ports.glb_to_cgra_config[i].config_data, self.underlying. ports.glb_to_cgra_cfg_data[i * self.cfg_data_width:(i + 1) * self.cfg_data_width]) self.wire(self.ports.glc_to_io_stall, self.underlying.ports.glc_to_io_stall) self.wire(self.ports.cgra_config.write[0], self.underlying.ports.glc_to_cgra_cfg_wr) self.wire(self.ports.cgra_config.read[0], self.underlying.ports.glc_to_cgra_cfg_rd) self.wire(self.ports.cgra_config.config_addr, self.underlying.ports.glc_to_cgra_cfg_addr) self.wire(self.ports.cgra_config.config_data, self.underlying.ports.glc_to_cgra_cfg_data) self.wire(self.ports.cgra_start_pulse, self.underlying.ports.cgra_start_pulse) self.wire(self.ports.cgra_done_pulse, self.underlying.ports.cgra_done_pulse) self.wire(self.ports.config_start_pulse, self.underlying.ports.config_start_pulse) self.wire(self.ports.config_done_pulse, self.underlying.ports.config_done_pulse) self.wire(self.ports.glb_config.write[0], self.underlying.ports.glb_config_wr) self.wire(self.ports.glb_config.read[0], self.underlying.ports.glb_config_rd) self.wire(self.ports.glb_config.config_data, self.underlying.ports.glb_config_wr_data) self.wire(self.ports.glb_config.config_addr, self.underlying.ports.glb_config_addr) self.wire(self.ports.glb_config_rd_data, self.underlying.ports.glb_config_rd_data) self.wire(self.ports.glb_sram_config.write[0], self.underlying.ports.glb_sram_config_wr) self.wire(self.ports.glb_sram_config.read[0], self.underlying.ports.glb_sram_config_rd) self.wire(self.ports.glb_sram_config.config_data, self.underlying.ports.glb_sram_config_wr_data) self.wire(self.ports.glb_sram_config.config_addr, self.underlying.ports.glb_sram_config_addr) self.wire(self.ports.glb_sram_config_rd_data, self.underlying.ports.glb_sram_config_rd_data)
def __init__( self, data_width=16, # CGRA Params mem_width=16, mem_depth=256, banks=1, input_iterator_support=6, # Addr Controllers output_iterator_support=6, input_config_width=16, output_config_width=16, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, mem_input_ports=1, mem_output_ports=1, use_sram_stub=True, sram_macro_info=SRAMMacroInfo(), read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=True, # Does the memory allow r+w in same cycle? agg_height=4, tb_sched_max=16, config_data_width=32, config_addr_width=8, num_tiles=1, remove_tb=False, fifo_mode=False, add_clk_enable=True, add_flush=True, override_name=None): # name if override_name: self.__name = override_name + "Core" lake_name = override_name else: self.__name = "MemCore" lake_name = "LakeTop" super().__init__(config_addr_width, config_data_width) # Capture everything to the tile object self.data_width = data_width self.mem_width = mem_width self.mem_depth = mem_depth self.banks = banks self.fw_int = int(self.mem_width / self.data_width) self.input_iterator_support = input_iterator_support self.output_iterator_support = output_iterator_support self.input_config_width = input_config_width self.output_config_width = output_config_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_input_ports = mem_input_ports self.mem_output_ports = mem_output_ports self.use_sram_stub = use_sram_stub self.sram_macro_info = sram_macro_info self.read_delay = read_delay self.rw_same_cycle = rw_same_cycle self.agg_height = agg_height self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.num_tiles = num_tiles self.remove_tb = remove_tb self.fifo_mode = fifo_mode self.add_clk_enable = add_clk_enable self.add_flush = add_flush # self.app_ctrl_depth_width = app_ctrl_depth_width # self.stcl_valid_iter = stcl_valid_iter # Typedefs for ease TData = magma.Bits[self.data_width] TBit = magma.Bits[1] self.__inputs = [] self.__outputs = [] # cache_key = (self.data_width, self.mem_width, self.mem_depth, self.banks, # self.input_iterator_support, self.output_iterator_support, # self.interconnect_input_ports, self.interconnect_output_ports, # self.use_sram_stub, self.sram_macro_info, self.read_delay, # self.rw_same_cycle, self.agg_height, self.max_agg_schedule, # self.input_max_port_sched, self.output_max_port_sched, # self.align_input, self.max_line_length, self.max_tb_height, # self.tb_range_max, self.tb_sched_max, self.max_tb_stride, # self.num_tb, self.tb_iterator_support, self.multiwrite, # self.max_prefetch, self.config_data_width, self.config_addr_width, # self.num_tiles, self.remove_tb, self.fifo_mode, self.stcl_valid_iter, # self.add_clk_enable, self.add_flush, self.app_ctrl_depth_width) cache_key = (self.data_width, self.mem_width, self.mem_depth, self.banks, self.input_iterator_support, self.output_iterator_support, self.interconnect_input_ports, self.interconnect_output_ports, self.use_sram_stub, self.sram_macro_info, self.read_delay, self.rw_same_cycle, self.agg_height, self.config_data_width, self.config_addr_width, self.num_tiles, self.remove_tb, self.fifo_mode, self.add_clk_enable, self.add_flush) # Check for circuit caching if cache_key not in MemCore.__circuit_cache: # Instantiate core object here - will only use the object representation to # query for information. The circuit representation will be cached and retrieved # in the following steps. # lt_dut = LakeTop(data_width=self.data_width, # mem_width=self.mem_width, # mem_depth=self.mem_depth, # banks=self.banks, # input_iterator_support=self.input_iterator_support, # output_iterator_support=self.output_iterator_support, # input_config_width=self.input_config_width, # output_config_width=self.output_config_width, # interconnect_input_ports=self.interconnect_input_ports, # interconnect_output_ports=self.interconnect_output_ports, # use_sram_stub=self.use_sram_stub, # sram_macro_info=self.sram_macro_info, # read_delay=self.read_delay, # rw_same_cycle=self.rw_same_cycle, # agg_height=self.agg_height, # max_agg_schedule=self.max_agg_schedule, # input_max_port_sched=self.input_max_port_sched, # output_max_port_sched=self.output_max_port_sched, # align_input=self.align_input, # max_line_length=self.max_line_length, # max_tb_height=self.max_tb_height, # tb_range_max=self.tb_range_max, # tb_range_inner_max=self.tb_range_inner_max, # tb_sched_max=self.tb_sched_max, # max_tb_stride=self.max_tb_stride, # num_tb=self.num_tb, # tb_iterator_support=self.tb_iterator_support, # multiwrite=self.multiwrite, # max_prefetch=self.max_prefetch, # config_data_width=self.config_data_width, # config_addr_width=self.config_addr_width, # num_tiles=self.num_tiles, # app_ctrl_depth_width=self.app_ctrl_depth_width, # remove_tb=self.remove_tb, # fifo_mode=self.fifo_mode, # add_clk_enable=self.add_clk_enable, # add_flush=self.add_flush, # stcl_valid_iter=self.stcl_valid_iter) lt_dut = LakeTop( data_width=self.data_width, mem_width=self.mem_width, mem_depth=self.mem_depth, banks=self.banks, input_iterator_support=self.input_iterator_support, output_iterator_support=self.output_iterator_support, input_config_width=self.input_config_width, output_config_width=self.output_config_width, interconnect_input_ports=self.interconnect_input_ports, interconnect_output_ports=self.interconnect_output_ports, use_sram_stub=self.use_sram_stub, sram_macro_info=self.sram_macro_info, read_delay=self.read_delay, rw_same_cycle=self.rw_same_cycle, agg_height=self.agg_height, config_data_width=self.config_data_width, config_addr_width=self.config_addr_width, num_tiles=self.num_tiles, remove_tb=self.remove_tb, fifo_mode=self.fifo_mode, add_clk_enable=self.add_clk_enable, add_flush=self.add_flush, name=lake_name, gen_addr=False) change_sram_port_pass = change_sram_port_names( use_sram_stub, sram_macro_info) circ = kts.util.to_magma( lt_dut, flatten_array=True, check_multiple_driver=False, optimize_if=False, check_flip_flop_always_ff=False, additional_passes={"change_sram_port": change_sram_port_pass}) MemCore.__circuit_cache[cache_key] = (circ, lt_dut) else: circ, lt_dut = MemCore.__circuit_cache[cache_key] # Save as underlying circuit object self.underlying = FromMagma(circ) # Enumerate input and output ports # (clk and reset are assumed) core_interface = get_interface(lt_dut) cfgs = extract_top_config(lt_dut) assert len(cfgs) > 0, "No configs?" # We basically add in the configuration bus differently # than the other ports... skip_names = [ "config_data_in", "config_write", "config_addr_in", "config_data_out", "config_read", "config_en", "clk_en" ] # Create a list of signals that will be able to be # hardwired to a constant at runtime... control_signals = [] # The rest of the signals to wire to the underlying representation... other_signals = [] # for port_name, port_size, port_width, is_ctrl, port_dir, explicit_array in core_interface: for io_info in core_interface: if io_info.port_name in skip_names: continue ind_ports = io_info.port_width intf_type = TBit # For our purposes, an explicit array means the inner data HAS to be 16 bits if io_info.expl_arr: ind_ports = io_info.port_size[0] intf_type = TData dir_type = magma.In app_list = self.__inputs if io_info.port_dir == "PortDirection.Out": dir_type = magma.Out app_list = self.__outputs if ind_ports > 1: for i in range(ind_ports): self.add_port(f"{io_info.port_name}_{i}", dir_type(intf_type)) app_list.append(self.ports[f"{io_info.port_name}_{i}"]) else: self.add_port(io_info.port_name, dir_type(intf_type)) app_list.append(self.ports[io_info.port_name]) # classify each signal for wiring to underlying representation... if io_info.is_ctrl: control_signals.append((io_info.port_name, io_info.port_width)) else: if ind_ports > 1: for i in range(ind_ports): other_signals.append( (f"{io_info.port_name}_{i}", io_info.port_dir, io_info.expl_arr, i, io_info.port_name)) else: other_signals.append( (io_info.port_name, io_info.port_dir, io_info.expl_arr, 0, io_info.port_name)) assert (len(self.__outputs) > 0) # We call clk_en stall at this level for legacy reasons???? self.add_ports(stall=magma.In(TBit), ) self.chain_idx_bits = max(1, kts.clog2(self.num_tiles)) # put a 1-bit register and a mux to select the control signals for control_signal, width in control_signals: if width == 1: mux = MuxWrapper(2, 1, name=f"{control_signal}_sel") reg_value_name = f"{control_signal}_reg_value" reg_sel_name = f"{control_signal}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[control_signal]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][0]) else: for i in range(width): mux = MuxWrapper(2, 1, name=f"{control_signal}_{i}_sel") reg_value_name = f"{control_signal}_{i}_reg_value" reg_sel_name = f"{control_signal}_{i}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[f"{control_signal}_{i}"]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][i]) # Wire the other signals up... for pname, pdir, expl_arr, ind, uname in other_signals: # If we are in an explicit array moment, use the given wire name... if expl_arr is False: # And if not, use the index self.wire(self.ports[pname][0], self.underlying.ports[uname][ind]) else: self.wire(self.ports[pname], self.underlying.ports[pname]) # CLK, RESET, and STALL PER STANDARD PROCEDURE # Need to invert this self.resetInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.resetInverter.ports.I[0], self.ports.reset) self.wire(self.resetInverter.ports.O[0], self.underlying.ports.rst_n) self.wire(self.ports.clk, self.underlying.ports.clk) # Mem core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall) self.wire(self.stallInverter.ports.O[0], self.underlying.ports.clk_en[0]) # we have six? features in total # 0: TILE # 1: TILE # 1-4: SMEM # Feature 0: Tile self.__features: List[CoreFeature] = [self] # Features 1-4: SRAM self.num_sram_features = lt_dut.total_sets for sram_index in range(self.num_sram_features): core_feature = CoreFeature(self, sram_index + 1) self.__features.append(core_feature) # Wire the config for idx, core_feature in enumerate(self.__features): if (idx > 0): self.add_port( f"config_{idx}", magma.In( ConfigurationType(self.config_addr_width, self.config_data_width))) # port aliasing core_feature.ports["config"] = self.ports[f"config_{idx}"] self.add_port( "config", magma.In( ConfigurationType(self.config_addr_width, self.config_data_width))) # or the signal up t = ConfigurationType(self.config_addr_width, self.config_data_width) t_names = ["config_addr", "config_data"] or_gates = {} for t_name in t_names: port_type = t[t_name] or_gate = FromMagma( mantle.DefineOr(len(self.__features), len(port_type))) or_gate.instance_name = f"OR_{t_name}_FEATURE" for idx, core_feature in enumerate(self.__features): self.wire(or_gate.ports[f"I{idx}"], core_feature.ports.config[t_name]) or_gates[t_name] = or_gate self.wire( or_gates["config_addr"].ports.O, self.underlying.ports.config_addr_in[0:self.config_addr_width]) self.wire(or_gates["config_data"].ports.O, self.underlying.ports.config_data_in) # read data out for idx, core_feature in enumerate(self.__features): if (idx > 0): # self.add_port(f"read_config_data_{idx}", self.add_port(f"read_config_data_{idx}", magma.Out(magma.Bits[self.config_data_width])) # port aliasing core_feature.ports["read_config_data"] = \ self.ports[f"read_config_data_{idx}"] # MEM Config configurations = [] # merged_configs = [] skip_cfgs = [] for cfg_info in cfgs: if cfg_info.port_name in skip_cfgs: continue if cfg_info.expl_arr: if cfg_info.port_size[0] > 1: for i in range(cfg_info.port_size[0]): configurations.append( (f"{cfg_info.port_name}_{i}", cfg_info.port_width)) else: configurations.append( (cfg_info.port_name, cfg_info.port_width)) else: configurations.append( (cfg_info.port_name, cfg_info.port_width)) # Do all the stuff for the main config main_feature = self.__features[0] for config_reg_name, width in configurations: main_feature.add_config(config_reg_name, width) if (width == 1): self.wire(main_feature.registers[config_reg_name].ports.O[0], self.underlying.ports[config_reg_name][0]) else: self.wire(main_feature.registers[config_reg_name].ports.O, self.underlying.ports[config_reg_name]) # SRAM # These should also account for num features # or_all_cfg_rd = FromMagma(mantle.DefineOr(4, 1)) or_all_cfg_rd = FromMagma(mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_rd.instance_name = f"OR_CONFIG_WR_SRAM" or_all_cfg_wr = FromMagma(mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_wr.instance_name = f"OR_CONFIG_RD_SRAM" for sram_index in range(self.num_sram_features): core_feature = self.__features[sram_index + 1] self.add_port(f"config_en_{sram_index}", magma.In(magma.Bit)) # port aliasing core_feature.ports["config_en"] = \ self.ports[f"config_en_{sram_index}"] # Sort of a temp hack - the name is just config_data_out if self.num_sram_features == 1: self.wire(core_feature.ports.read_config_data, self.underlying.ports["config_data_out"]) else: self.wire( core_feature.ports.read_config_data, self.underlying.ports[f"config_data_out_{sram_index}"]) # also need to wire the sram signal # the config enable is the OR of the rd+wr or_gate_en = FromMagma(mantle.DefineOr(2, 1)) or_gate_en.instance_name = f"OR_CONFIG_EN_SRAM_{sram_index}" self.wire(or_gate_en.ports.I0, core_feature.ports.config.write) self.wire(or_gate_en.ports.I1, core_feature.ports.config.read) self.wire(core_feature.ports.config_en, self.underlying.ports["config_en"][sram_index]) # Still connect to the OR of all the config rd/wr self.wire(core_feature.ports.config.write, or_all_cfg_wr.ports[f"I{sram_index}"]) self.wire(core_feature.ports.config.read, or_all_cfg_rd.ports[f"I{sram_index}"]) self.wire(or_all_cfg_rd.ports.O[0], self.underlying.ports.config_read[0]) self.wire(or_all_cfg_wr.ports.O[0], self.underlying.ports.config_write[0]) self._setup_config() conf_names = list(self.registers.keys()) conf_names.sort() with open("mem_cfg.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"(\"{reg}\", 0), # {self.registers[reg].width}\n" cfg_dump.write(write_line) with open("mem_synth.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"{reg}\n" cfg_dump.write(write_line)
def finalize(self): if self.finalized: raise Exception("Circuit already finalized") self.finalized = True # add stall and reset signal self.__add_stall() self.__add_reset() # see if we really need to add config or not if not self.__should_add_config(): return self.add_ports(config=magma.In( ConfigurationType(self.full_config_addr_width, self.config_data_width)), clk=magma.In(magma.Clock), read_config_data=magma.Out( magma.Bits[self.config_data_width])) # double buffer ports if self.double_buffer: self.add_ports(config_db=magma.In(magma.Bit), use_db=magma.In(magma.Bit)) features = self.features() num_features = len(features) self.read_data_mux = MuxWithDefaultWrapper(num_features, self.config_data_width, self.config_addr_width, 0) self.read_data_mux.instance_name = "read_data_mux" # most of the logic copied from tile_magma.py # remove all hardcoded values for feature in self.features(): if "config" not in feature.ports: continue self.wire(self.ports.config.config_addr[self.feature_config_slice], feature.ports.config.config_addr) self.wire(self.ports.config.config_data, feature.ports.config.config_data) self.wire(self.ports.config.read, feature.ports.config.read) if self.double_buffer and "config_db" in feature.ports: self.wire(self.ports.config_db, feature.ports.config_db) self.wire(self.ports.use_db, feature.ports.use_db) # Connect S input to config_addr.feature. self.wire(self.ports.config.config_addr[self.feature_addr_slice], self.read_data_mux.ports.S) self.wire(self.read_data_mux.ports.O, self.ports.read_config_data) # Logic to generate EN input for read_data_mux read_and_tile = FromMagma(mantle.DefineAnd(2)) eq_tile = FromMagma(mantle.DefineEQ(self.tile_id_width)) # config_addr.tile_id == self.tile_id? self.wire(self.ports.tile_id, eq_tile.ports.I0) self.wire(self.ports.config.config_addr[self.tile_id_slice], eq_tile.ports.I1) # (config_addr.tile_id == self.tile_id) & READ self.wire(read_and_tile.ports.I0, eq_tile.ports.O) self.wire(read_and_tile.ports.I1, self.ports.config.read[0]) # read_data_mux.EN = (config_addr.tile_id == self.tile_id) & READ self.wire(read_and_tile.ports.O, self.read_data_mux.ports.EN[0]) # Logic for writing to config registers # Config_en_tile = (config_addr.tile_id == self.tile_id & WRITE) write_and_tile = FromMagma(mantle.DefineAnd(2)) self.wire(write_and_tile.ports.I0, eq_tile.ports.O) self.wire(write_and_tile.ports.I1, self.ports.config.write[0]) decode_feat = [] feat_and_config_en_tile = [] for i, feat in enumerate(self.features()): # wire each feature's read_data output to # read_data_mux inputs if "read_config_data" in feat.ports: self.wire(feat.ports.read_config_data, self.read_data_mux.ports.I[i]) else: # wire constant self.wire(Const(0), self.read_data_mux.ports.I[i]) # for each feature, # config_en = (config_addr.feature == feature_num) & config_en_tile decode_feat.append( FromMagma(mantle.DefineDecode(i, self.config_addr_width))) decode_feat[-1].instance_name = f"DECODE_FEATURE_{i}" feat_and_config_en_tile.append(FromMagma(mantle.DefineAnd(2))) feat_and_config_en_tile[-1].instance_name = f"FEATURE_AND_{i}" self.wire(decode_feat[i].ports.I, self.ports.config.config_addr[self.feature_addr_slice]) self.wire(decode_feat[i].ports.O, feat_and_config_en_tile[i].ports.I0) self.wire(write_and_tile.ports.O, feat_and_config_en_tile[i].ports.I1) if "config" in feat.ports: self.wire(feat_and_config_en_tile[i].ports.O, feat.ports.config.write[0]) if "config_en" in feat.ports: self.wire(decode_feat[i].ports.O, feat.ports["config_en"])
def wrap_lake_core(self): # Typedefs for ease if self.data_width: TData = magma.Bits[self.data_width] else: TData = magma.Bits[ 16] # This shouldn't be used if the data_width was None TBit = magma.Bits[1] # Enumerate input and output ports # (clk and reset are assumed) core_interface = get_interface(self.dut) cfgs = extract_top_config(self.dut) assert len(cfgs) > 0, "No configs?" # We basically add in the configuration bus differently # than the other ports... skip_names = [ "config_data_in", "config_write", "config_addr_in", "config_data_out", "config_read", "config_en", "clk_en" ] # Create a list of signals that will be able to be # hardwired to a constant at runtime... control_signals = [] # The rest of the signals to wire to the underlying representation... other_signals = [] # for port_name, port_size, port_width, is_ctrl, port_dir, explicit_array in core_interface: for io_info in core_interface: if io_info.port_name in skip_names: continue ind_ports = io_info.port_width intf_type = TBit # For our purposes, an explicit array means the inner data HAS to be 16 bits if io_info.expl_arr: ind_ports = io_info.port_size[0] intf_type = TData dir_type = magma.In app_list = self.__inputs if io_info.port_dir == "PortDirection.Out": dir_type = magma.Out app_list = self.__outputs if ind_ports > 1: for i in range(ind_ports): self.add_port(f"{io_info.port_name}_{i}", dir_type(intf_type)) app_list.append(self.ports[f"{io_info.port_name}_{i}"]) else: self.add_port(io_info.port_name, dir_type(intf_type)) app_list.append(self.ports[io_info.port_name]) # classify each signal for wiring to underlying representation... if io_info.is_ctrl: control_signals.append((io_info.port_name, io_info.port_width)) else: if ind_ports > 1: for i in range(ind_ports): other_signals.append( (f"{io_info.port_name}_{i}", io_info.port_dir, io_info.expl_arr, i, io_info.port_name)) else: other_signals.append( (io_info.port_name, io_info.port_dir, io_info.expl_arr, 0, io_info.port_name)) assert (len(self.__outputs) > 0) # We call clk_en stall at this level for legacy reasons???? self.add_ports(stall=magma.In(TBit), ) # put a 1-bit register and a mux to select the control signals for control_signal, width in control_signals: if width == 1: mux = MuxWrapper(2, 1, name=f"{control_signal}_sel") reg_value_name = f"{control_signal}_reg_value" reg_sel_name = f"{control_signal}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[control_signal]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][0]) else: for i in range(width): mux = MuxWrapper(2, 1, name=f"{control_signal}_{i}_sel") reg_value_name = f"{control_signal}_{i}_reg_value" reg_sel_name = f"{control_signal}_{i}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[f"{control_signal}_{i}"]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][i]) # Wire the other signals up... for pname, pdir, expl_arr, ind, uname in other_signals: # If we are in an explicit array moment, use the given wire name... if expl_arr is False: # And if not, use the index self.wire(self.ports[pname][0], self.underlying.ports[uname][ind]) else: self.wire(self.ports[pname], self.underlying.ports[pname]) # CLK, RESET, and STALL PER STANDARD PROCEDURE # Need to invert this self.resetInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.resetInverter.ports.I[0], self.ports.reset) self.wire( self.convert(self.resetInverter.ports.O[0], magma.asyncreset), self.underlying.ports.rst_n) self.wire(self.ports.clk, self.underlying.ports.clk) # Mem core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall) self.wire(self.stallInverter.ports.O[0], self.underlying.ports.clk_en[0]) # we have six? features in total # 0: TILE # 1: TILE # 1-4: SMEM # Feature 0: Tile self.__features: List[CoreFeature] = [self] # Features 1-4: SRAM self.num_sram_features = self.dut.total_sets for sram_index in range(self.num_sram_features): core_feature = CoreFeature(self, sram_index + 1) core_feature.skip_compression = True self.__features.append(core_feature) # Wire the config for idx, core_feature in enumerate(self.__features): if (idx > 0): self.add_port( f"config_{idx}", magma.In( ConfigurationType(self.config_addr_width, self.config_data_width))) # port aliasing core_feature.ports["config"] = self.ports[f"config_{idx}"] self.add_port( "config", magma.In( ConfigurationType(self.config_addr_width, self.config_data_width))) if self.num_sram_features > 0: # or the signal up t = ConfigurationType(self.config_addr_width, self.config_data_width) t_names = ["config_addr", "config_data"] or_gates = {} for t_name in t_names: port_type = t[t_name] or_gate = FromMagma( mantle.DefineOr(len(self.__features), len(port_type))) or_gate.instance_name = f"OR_{t_name}_FEATURE" for idx, core_feature in enumerate(self.__features): self.wire(or_gate.ports[f"I{idx}"], core_feature.ports.config[t_name]) or_gates[t_name] = or_gate self.wire( or_gates["config_addr"].ports.O, self.underlying.ports.config_addr_in[0:self.config_addr_width]) self.wire(or_gates["config_data"].ports.O, self.underlying.ports.config_data_in) # read data out for idx, core_feature in enumerate(self.__features): if (idx > 0): # self.add_port(f"read_config_data_{idx}", self.add_port(f"read_config_data_{idx}", magma.Out(magma.Bits[self.config_data_width])) # port aliasing core_feature.ports["read_config_data"] = \ self.ports[f"read_config_data_{idx}"] # MEM Config configurations = [] # merged_configs = [] skip_cfgs = [] for cfg_info in cfgs: if cfg_info.port_name in skip_cfgs: continue if cfg_info.expl_arr: if cfg_info.port_size[0] > 1: for i in range(cfg_info.port_size[0]): configurations.append( (f"{cfg_info.port_name}_{i}", cfg_info.port_width)) else: configurations.append( (cfg_info.port_name, cfg_info.port_width)) else: configurations.append( (cfg_info.port_name, cfg_info.port_width)) # Do all the stuff for the main config main_feature = self.__features[0] for config_reg_name, width in configurations: main_feature.add_config(config_reg_name, width) if (width == 1): self.wire(main_feature.registers[config_reg_name].ports.O[0], self.underlying.ports[config_reg_name][0]) else: self.wire(main_feature.registers[config_reg_name].ports.O, self.underlying.ports[config_reg_name]) # SRAM # These should also account for num features # or_all_cfg_rd = FromMagma(mantle.DefineOr(4, 1)) if self.num_sram_features > 0: or_all_cfg_rd = FromMagma( mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_rd.instance_name = f"OR_CONFIG_WR_SRAM" or_all_cfg_wr = FromMagma( mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_wr.instance_name = f"OR_CONFIG_RD_SRAM" for sram_index in range(self.num_sram_features): core_feature = self.__features[sram_index + 1] self.add_port(f"config_en_{sram_index}", magma.In(magma.Bit)) # port aliasing core_feature.ports["config_en"] = \ self.ports[f"config_en_{sram_index}"] # Sort of a temp hack - the name is just config_data_out if self.num_sram_features == 1: self.wire(core_feature.ports.read_config_data, self.underlying.ports["config_data_out"]) else: self.wire( core_feature.ports.read_config_data, self.underlying.ports[f"config_data_out_{sram_index}"]) and_gate_en = FromMagma(mantle.DefineAnd(2, 1)) and_gate_en.instance_name = f"AND_CONFIG_EN_SRAM_{sram_index}" # also need to wire the sram signal # the config enable is the OR of the rd+wr or_gate_en = FromMagma(mantle.DefineOr(2, 1)) or_gate_en.instance_name = f"OR_CONFIG_EN_SRAM_{sram_index}" self.wire(or_gate_en.ports.I0, core_feature.ports.config.write) self.wire(or_gate_en.ports.I1, core_feature.ports.config.read) self.wire(and_gate_en.ports.I0, or_gate_en.ports.O) self.wire(and_gate_en.ports.I1[0], core_feature.ports.config_en) self.wire(and_gate_en.ports.O[0], self.underlying.ports["config_en"][sram_index]) # Still connect to the OR of all the config rd/wr self.wire(core_feature.ports.config.write, or_all_cfg_wr.ports[f"I{sram_index}"]) self.wire(core_feature.ports.config.read, or_all_cfg_rd.ports[f"I{sram_index}"]) self.wire(or_all_cfg_rd.ports.O[0], self.underlying.ports.config_read[0]) self.wire(or_all_cfg_wr.ports.O[0], self.underlying.ports.config_write[0]) self._setup_config()
def __init__(self, data_width, data_depth): super().__init__(8, 32) self.data_width = data_width self.data_depth = data_depth TData = magma.Bits[self.data_width] TBit = magma.Bits[1] self.add_ports(data_in=magma.In(TData), addr_in=magma.In(TData), data_out=magma.Out(TData), flush=magma.In(TBit), wen_in=magma.In(TBit), ren_in=magma.In(TBit), stall=magma.In(magma.Bits[4])) # Instead of a single read_config_data, we have multiple for each # "sub"-feature of this core. self.ports.pop("read_config_data") wrapper = memory_core_genesis2.memory_core_wrapper param_mapping = memory_core_genesis2.param_mapping generator = wrapper.generator(param_mapping, mode="declare") circ = generator(data_width=self.data_width, data_depth=self.data_depth) self.underlying = FromMagma(circ) self.wire(self.ports.data_in, self.underlying.ports.data_in) self.wire(self.ports.addr_in, self.underlying.ports.addr_in) self.wire(self.ports.data_out, self.underlying.ports.data_out) self.wire(self.ports.reset, self.underlying.ports.reset) self.wire(self.ports.flush[0], self.underlying.ports.flush) self.wire(self.ports.wen_in[0], self.underlying.ports.wen_in) self.wire(self.ports.ren_in[0], self.underlying.ports.ren_in) # PE core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall[0:1]) self.wire(self.stallInverter.ports.O[0], self.underlying.ports.clk_en) # TODO(rsetaluri): Actually wire these inputs. zero_signals = ( ("config_en_linebuf", 1), ("chain_wen_in", 1), ("chain_in", self.data_width), ) one_signals = ( ("config_read", 1), ("config_write", 1), ) # enable read and write by default for name, width in zero_signals: val = magma.bits(0, width) if width > 1 else magma.bit(0) self.wire(Const(val), self.underlying.ports[name]) for name, width in one_signals: val = magma.bits(1, width) if width > 1 else magma.bit(1) self.wire(Const(val), self.underlying.ports[name]) self.wire(Const(magma.bits(0, 24)), self.underlying.ports.config_addr[0:24]) # we have five features in total # 0: LINEBUF # 1-4: SMEM # current setup is already in line buffer mode, so we pass self in # notice that config_en_linebuf is to change the address in the # line buffer mode, which is not used in practice self.__features: List[CoreFeature] = [CoreFeature(self, 0)] for sram_index in range(4): core_feature = CoreFeature(self, sram_index + 1) self.__features.append(core_feature) for idx, core_feature in enumerate(self.__features): self.add_port(f"config_{idx}", magma.In(ConfigurationType(8, 32))) # port aliasing core_feature.ports["config"] = self.ports[f"config_{idx}"] # or the signal up t = ConfigurationType(8, 32) t_names = ["config_addr", "config_data"] or_gates = {} for t_name in t_names: port_type = t[t_name] or_gate = FromMagma( mantle.DefineOr(len(self.__features), len(port_type))) or_gate.instance_name = f"OR_{t_name}_FEATURE" for idx, core_feature in enumerate(self.__features): self.wire(or_gate.ports[f"I{idx}"], core_feature.ports.config[t_name]) or_gates[t_name] = or_gate self.wire(or_gates["config_addr"].ports.O, self.underlying.ports.config_addr[24:32]) self.wire(or_gates["config_data"].ports.O, self.underlying.ports.config_data) # only the first one has config_en self.wire(self.__features[0].ports.config.write[0], self.underlying.ports.config_en) # read data out for idx, core_feature in enumerate(self.__features): self.add_port(f"read_config_data_{idx}", magma.Out(magma.Bits[32])) # port aliasing core_feature.ports["read_config_data"] = \ self.ports[f"read_config_data_{idx}"] # MEM config self.wire(self.ports.read_config_data_0, self.underlying.ports.read_data) # SRAM for sram_index in range(4): core_feature = self.__features[sram_index + 1] self.wire(core_feature.ports.read_config_data, self.underlying.ports[f"read_data_sram_{sram_index}"]) # also need to wire the sram signal self.add_port(f"config_en_{sram_index}", magma.In(magma.Bit)) # port aliasing core_feature.ports["config_en"] = \ self.ports[f"config_en_{sram_index}"] self.wire(self.underlying.ports["config_en_sram"][sram_index], self.ports[f"config_en_{sram_index}"])
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_iterator_support=6, # Addr Controllers output_iterator_support=6, input_config_width=16, output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, use_sram_stub=True, sram_macro_info=SRAMMacroInfo("TS1N16FFCLLSBLVTC512X32M4S", wtsel_value=0, rtsel_value=1), read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, tb_sched_max=16, config_data_width=32, config_addr_width=8, num_tiles=1, fifo_mode=True, add_clk_enable=True, add_flush=True, override_name=None, gen_addr=True): lake_name = "LakeTop" super().__init__(config_data_width=config_data_width, config_addr_width=config_addr_width, data_width=data_width, name="MemCore") # Capture everything to the tile object # self.data_width = data_width self.mem_width = mem_width self.mem_depth = mem_depth self.banks = banks self.fw_int = int(self.mem_width / self.data_width) self.input_iterator_support = input_iterator_support self.output_iterator_support = output_iterator_support self.input_config_width = input_config_width self.output_config_width = output_config_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_input_ports = mem_input_ports self.mem_output_ports = mem_output_ports self.use_sram_stub = use_sram_stub self.sram_macro_info = sram_macro_info self.read_delay = read_delay self.rw_same_cycle = rw_same_cycle self.agg_height = agg_height self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.num_tiles = num_tiles self.fifo_mode = fifo_mode self.add_clk_enable = add_clk_enable self.add_flush = add_flush self.gen_addr = gen_addr # self.app_ctrl_depth_width = app_ctrl_depth_width # self.stcl_valid_iter = stcl_valid_iter # Typedefs for ease TData = magma.Bits[self.data_width] TBit = magma.Bits[1] cache_key = (self.data_width, self.mem_width, self.mem_depth, self.banks, self.input_iterator_support, self.output_iterator_support, self.interconnect_input_ports, self.interconnect_output_ports, self.use_sram_stub, self.sram_macro_info, self.read_delay, self.rw_same_cycle, self.agg_height, self.config_data_width, self.config_addr_width, self.num_tiles, self.fifo_mode, self.add_clk_enable, self.add_flush, self.gen_addr) # Check for circuit caching if cache_key not in LakeCoreBase._circuit_cache: # Instantiate core object here - will only use the object representation to # query for information. The circuit representation will be cached and retrieved # in the following steps. self.dut = LakeTop( data_width=self.data_width, mem_width=self.mem_width, mem_depth=self.mem_depth, banks=self.banks, input_iterator_support=self.input_iterator_support, output_iterator_support=self.output_iterator_support, input_config_width=self.input_config_width, output_config_width=self.output_config_width, interconnect_input_ports=self.interconnect_input_ports, interconnect_output_ports=self.interconnect_output_ports, use_sram_stub=self.use_sram_stub, sram_macro_info=self.sram_macro_info, read_delay=self.read_delay, rw_same_cycle=self.rw_same_cycle, agg_height=self.agg_height, config_data_width=self.config_data_width, config_addr_width=self.config_addr_width, num_tiles=self.num_tiles, fifo_mode=self.fifo_mode, add_clk_enable=self.add_clk_enable, add_flush=self.add_flush, name=lake_name, gen_addr=self.gen_addr) change_sram_port_pass = change_sram_port_names( use_sram_stub, sram_macro_info) circ = kts.util.to_magma( self.dut, flatten_array=True, check_multiple_driver=False, optimize_if=False, check_flip_flop_always_ff=False, additional_passes={"change_sram_port": change_sram_port_pass}) LakeCoreBase._circuit_cache[cache_key] = (circ, self.dut) else: circ, self.dut = LakeCoreBase._circuit_cache[cache_key] # Save as underlying circuit object self.underlying = FromMagma(circ) self.wrap_lake_core() conf_names = list(self.registers.keys()) conf_names.sort() with open("mem_cfg.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"(\"{reg}\", 0), # {self.registers[reg].width}\n" cfg_dump.write(write_line) with open("mem_synth.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"{reg}\n" cfg_dump.write(write_line)
def __init__(self, peak_generator): super().__init__(8, 32) self.ignored_ports = { "clk_en", "reset", "config_addr", "config_data", "config_en", "read_config_data" } self.wrapper = _PeakWrapper(peak_generator) # Generate core RTL (as magma). self.peak_circuit = FromMagma(self.wrapper.rtl()) # Add input/output ports and wire them. inputs = self.wrapper.inputs() outputs = self.wrapper.outputs() for ports, dir_ in ( (inputs, magma.In), (outputs, magma.Out), ): for i, (name, typ) in enumerate(ports.items()): if name in self.ignored_ports: continue magma_type = _convert_type(typ) self.add_port(name, dir_(magma_type)) my_port = self.ports[name] if magma_type is magma.Bits[1]: my_port = my_port[0] magma_name = name if dir_ is magma.In else f"O{i}" self.wire(my_port, self.peak_circuit.ports[magma_name]) self.add_ports(config=magma.In(ConfigurationType(8, 32)), stall=magma.In(magma.Bits[1])) # Set up configuration for PE instruction. Currently, we perform a naive # partitioning of the large instruction into 32-bit config registers. config_width = self.wrapper.instruction_width() num_config = math.ceil(config_width / 32) instr_name = self.wrapper.instruction_name() self.reg_width = {} for i in range(num_config): name = f"{instr_name}_{i}" self.add_config(name, 32) lb = i * 32 ub = min(i * 32 + 32, config_width) len_ = ub - lb self.reg_width[name] = len_ self.wire(self.registers[name].ports.O[:len_], self.peak_circuit.ports[instr_name][lb:ub]) # connecting the wires # TODO: connect this wire once lassen has async reset self.wire(self.ports.reset, self.peak_circuit.ports.ASYNCRESET) # wire the fake register to the actual lassen core ports = ["config_data", "config_addr"] for port in ports: self.wire(self.ports.config[port], self.peak_circuit.ports[port]) # self.wire(reg1.ports[reg_port], self.peak_circuit.ports[port]) # wire it to 0, since we'll never going to use it self.wire(Const(0), self.peak_circuit.ports.config_en) # PE core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall) self.wire(self.stallInverter.ports.O[0], self.peak_circuit.ports.clk_en) self._setup_config()
def __init__( self, data_width=16, # CGRA Params mem_width=64, mem_depth=512, banks=1, input_iterator_support=6, # Addr Controllers output_iterator_support=6, input_config_width=16, output_config_width=16, interconnect_input_ports=2, # Connection to int interconnect_output_ports=2, mem_input_ports=1, mem_output_ports=1, use_sram_stub=1, sram_macro_info=SRAMMacroInfo("TS1N16FFCLLSBLVTC512X32M4S"), read_delay=1, # Cycle delay in read (SRAM vs Register File) rw_same_cycle=False, # Does the memory allow r+w in same cycle? agg_height=4, max_agg_schedule=16, input_max_port_sched=16, output_max_port_sched=16, align_input=1, max_line_length=128, max_tb_height=1, tb_range_max=1024, tb_range_inner_max=64, tb_sched_max=16, max_tb_stride=15, num_tb=1, tb_iterator_support=2, multiwrite=1, max_prefetch=8, config_data_width=32, config_addr_width=8, num_tiles=2, app_ctrl_depth_width=16, remove_tb=False, fifo_mode=True, add_clk_enable=True, add_flush=True, core_reset_pos=False, stcl_valid_iter=4): super().__init__(config_addr_width, config_data_width) # Capture everything to the tile object self.data_width = data_width self.mem_width = mem_width self.mem_depth = mem_depth self.banks = banks self.fw_int = int(self.mem_width / self.data_width) self.input_iterator_support = input_iterator_support self.output_iterator_support = output_iterator_support self.input_config_width = input_config_width self.output_config_width = output_config_width self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_input_ports = mem_input_ports self.mem_output_ports = mem_output_ports self.use_sram_stub = use_sram_stub self.sram_macro_info = sram_macro_info self.read_delay = read_delay self.rw_same_cycle = rw_same_cycle self.agg_height = agg_height self.max_agg_schedule = max_agg_schedule self.input_max_port_sched = input_max_port_sched self.output_max_port_sched = output_max_port_sched self.align_input = align_input self.max_line_length = max_line_length self.max_tb_height = max_tb_height self.tb_range_max = tb_range_max self.tb_range_inner_max = tb_range_inner_max self.tb_sched_max = tb_sched_max self.max_tb_stride = max_tb_stride self.num_tb = num_tb self.tb_iterator_support = tb_iterator_support self.multiwrite = multiwrite self.max_prefetch = max_prefetch self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.num_tiles = num_tiles self.remove_tb = remove_tb self.fifo_mode = fifo_mode self.add_clk_enable = add_clk_enable self.add_flush = add_flush self.core_reset_pos = core_reset_pos self.app_ctrl_depth_width = app_ctrl_depth_width self.stcl_valid_iter = stcl_valid_iter # Typedefs for ease TData = magma.Bits[self.data_width] TBit = magma.Bits[1] self.__inputs = [] self.__outputs = [] # Enumerate input and output ports # (clk and reset are assumed) if self.interconnect_input_ports > 1: for i in range(self.interconnect_input_ports): self.add_port(f"addr_in_{i}", magma.In(TData)) self.__inputs.append(self.ports[f"addr_in_{i}"]) self.add_port(f"data_in_{i}", magma.In(TData)) self.__inputs.append(self.ports[f"data_in_{i}"]) self.add_port(f"wen_in_{i}", magma.In(TBit)) self.__inputs.append(self.ports[f"wen_in_{i}"]) else: self.add_port("addr_in", magma.In(TData)) self.__inputs.append(self.ports[f"addr_in"]) self.add_port("data_in", magma.In(TData)) self.__inputs.append(self.ports[f"data_in"]) self.add_port("wen_in", magma.In(TBit)) self.__inputs.append(self.ports.wen_in) if self.interconnect_output_ports > 1: for i in range(self.interconnect_output_ports): self.add_port(f"data_out_{i}", magma.Out(TData)) self.__outputs.append(self.ports[f"data_out_{i}"]) self.add_port(f"ren_in_{i}", magma.In(TBit)) self.__inputs.append(self.ports[f"ren_in_{i}"]) self.add_port(f"valid_out_{i}", magma.Out(TBit)) self.__outputs.append(self.ports[f"valid_out_{i}"]) # Chaining self.add_port(f"chain_valid_in_{i}", magma.In(TBit)) self.__inputs.append(self.ports[f"chain_valid_in_{i}"]) self.add_port(f"chain_data_in_{i}", magma.In(TData)) self.__inputs.append(self.ports[f"chain_data_in_{i}"]) self.add_port(f"chain_data_out_{i}", magma.Out(TData)) self.__outputs.append(self.ports[f"chain_data_out_{i}"]) self.add_port(f"chain_valid_out_{i}", magma.Out(TBit)) self.__outputs.append(self.ports[f"chain_valid_out_{i}"]) else: self.add_port("data_out", magma.Out(TData)) self.__outputs.append(self.ports[f"data_out"]) self.add_port(f"ren_in", magma.In(TBit)) self.__inputs.append(self.ports[f"ren_in"]) self.add_port(f"valid_out", magma.Out(TBit)) self.__outputs.append(self.ports[f"valid_out"]) self.add_port(f"chain_valid_in", magma.In(TBit)) self.__inputs.append(self.ports[f"chain_valid_in"]) self.add_port(f"chain_data_in", magma.In(TData)) self.__inputs.append(self.ports[f"chain_data_in"]) self.add_port(f"chain_data_out", magma.Out(TData)) self.__outputs.append(self.ports[f"chain_data_out"]) self.add_port(f"chain_valid_out", magma.Out(TBit)) self.__outputs.append(self.ports[f"chain_valid_out"]) self.add_ports(flush=magma.In(TBit), full=magma.Out(TBit), empty=magma.Out(TBit), stall=magma.In(TBit), sram_ready_out=magma.Out(TBit)) self.__inputs.append(self.ports.flush) # self.__inputs.append(self.ports.stall) self.__outputs.append(self.ports.full) self.__outputs.append(self.ports.empty) self.__outputs.append(self.ports.sram_ready_out) cache_key = (self.data_width, self.mem_width, self.mem_depth, self.banks, self.input_iterator_support, self.output_iterator_support, self.interconnect_input_ports, self.interconnect_output_ports, self.use_sram_stub, self.sram_macro_info, self.read_delay, self.rw_same_cycle, self.agg_height, self.max_agg_schedule, self.input_max_port_sched, self.output_max_port_sched, self.align_input, self.max_line_length, self.max_tb_height, self.tb_range_max, self.tb_sched_max, self.max_tb_stride, self.num_tb, self.tb_iterator_support, self.multiwrite, self.max_prefetch, self.config_data_width, self.config_addr_width, self.num_tiles, self.remove_tb, self.fifo_mode, self.stcl_valid_iter, self.add_clk_enable, self.add_flush, self.app_ctrl_depth_width) # Check for circuit caching if cache_key not in MemCore.__circuit_cache: # Instantiate core object here - will only use the object representation to # query for information. The circuit representation will be cached and retrieved # in the following steps. lt_dut = LakeTop( data_width=self.data_width, mem_width=self.mem_width, mem_depth=self.mem_depth, banks=self.banks, input_iterator_support=self.input_iterator_support, output_iterator_support=self.output_iterator_support, input_config_width=self.input_config_width, output_config_width=self.output_config_width, interconnect_input_ports=self.interconnect_input_ports, interconnect_output_ports=self.interconnect_output_ports, use_sram_stub=self.use_sram_stub, sram_macro_info=self.sram_macro_info, read_delay=self.read_delay, rw_same_cycle=self.rw_same_cycle, agg_height=self.agg_height, max_agg_schedule=self.max_agg_schedule, input_max_port_sched=self.input_max_port_sched, output_max_port_sched=self.output_max_port_sched, align_input=self.align_input, max_line_length=self.max_line_length, max_tb_height=self.max_tb_height, tb_range_max=self.tb_range_max, tb_range_inner_max=self.tb_range_inner_max, tb_sched_max=self.tb_sched_max, max_tb_stride=self.max_tb_stride, num_tb=self.num_tb, tb_iterator_support=self.tb_iterator_support, multiwrite=self.multiwrite, max_prefetch=self.max_prefetch, config_data_width=self.config_data_width, config_addr_width=self.config_addr_width, num_tiles=self.num_tiles, app_ctrl_depth_width=self.app_ctrl_depth_width, remove_tb=self.remove_tb, fifo_mode=self.fifo_mode, add_clk_enable=self.add_clk_enable, add_flush=self.add_flush, stcl_valid_iter=self.stcl_valid_iter) change_sram_port_pass = change_sram_port_names( use_sram_stub, sram_macro_info) circ = kts.util.to_magma( lt_dut, flatten_array=True, check_multiple_driver=False, optimize_if=False, check_flip_flop_always_ff=False, additional_passes={"change_sram_port": change_sram_port_pass}) MemCore.__circuit_cache[cache_key] = (circ, lt_dut) else: circ, lt_dut = MemCore.__circuit_cache[cache_key] # Save as underlying circuit object self.underlying = FromMagma(circ) self.chain_idx_bits = max(1, kts.clog2(self.num_tiles)) # put a 1-bit register and a mux to select the control signals # TODO: check if enable_chain_output needs to be here? I don't think so? control_signals = [("wen_in", self.interconnect_input_ports), ("ren_in", self.interconnect_output_ports), ("flush", 1), ("chain_valid_in", self.interconnect_output_ports)] for control_signal, width in control_signals: # TODO: consult with Ankita to see if we can use the normal # mux here if width == 1: mux = MuxWrapper(2, 1, name=f"{control_signal}_sel") reg_value_name = f"{control_signal}_reg_value" reg_sel_name = f"{control_signal}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[control_signal]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][0]) else: for i in range(width): mux = MuxWrapper(2, 1, name=f"{control_signal}_{i}_sel") reg_value_name = f"{control_signal}_{i}_reg_value" reg_sel_name = f"{control_signal}_{i}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[f"{control_signal}_{i}"]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal][i]) if self.interconnect_input_ports > 1: for i in range(self.interconnect_input_ports): self.wire(self.ports[f"data_in_{i}"], self.underlying.ports[f"data_in_{i}"]) self.wire(self.ports[f"addr_in_{i}"], self.underlying.ports[f"addr_in_{i}"]) else: self.wire(self.ports.addr_in, self.underlying.ports.addr_in) self.wire(self.ports.data_in, self.underlying.ports.data_in) if self.interconnect_output_ports > 1: for i in range(self.interconnect_output_ports): self.wire(self.ports[f"data_out_{i}"], self.underlying.ports[f"data_out_{i}"]) self.wire(self.ports[f"chain_data_in_{i}"], self.underlying.ports[f"chain_data_in_{i}"]) self.wire(self.ports[f"chain_data_out_{i}"], self.underlying.ports[f"chain_data_out_{i}"]) else: self.wire(self.ports.data_out, self.underlying.ports.data_out) self.wire(self.ports.chain_data_in, self.underlying.ports.chain_data_in) self.wire(self.ports.chain_data_out, self.underlying.ports.chain_data_out) # Need to invert this self.resetInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.resetInverter.ports.I[0], self.ports.reset) self.wire(self.resetInverter.ports.O[0], self.underlying.ports.rst_n) self.wire(self.ports.clk, self.underlying.ports.clk) if self.interconnect_output_ports == 1: self.wire(self.ports.valid_out[0], self.underlying.ports.valid_out[0]) self.wire(self.ports.chain_valid_out[0], self.underlying.ports.chain_valid_out[0]) else: for j in range(self.interconnect_output_ports): self.wire(self.ports[f"valid_out_{j}"][0], self.underlying.ports.valid_out[j]) self.wire(self.ports[f"chain_valid_out_{j}"][0], self.underlying.ports.chain_valid_out[j]) self.wire(self.ports.empty[0], self.underlying.ports.empty[0]) self.wire(self.ports.full[0], self.underlying.ports.full[0]) # PE core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall) self.wire(self.stallInverter.ports.O[0], self.underlying.ports.clk_en[0]) self.wire(self.ports.sram_ready_out[0], self.underlying.ports.sram_ready_out[0]) # we have six? features in total # 0: TILE # 1: TILE # 1-4: SMEM # Feature 0: Tile self.__features: List[CoreFeature] = [self] # Features 1-4: SRAM self.num_sram_features = lt_dut.total_sets for sram_index in range(self.num_sram_features): core_feature = CoreFeature(self, sram_index + 1) self.__features.append(core_feature) # Wire the config for idx, core_feature in enumerate(self.__features): if (idx > 0): self.add_port(f"config_{idx}", magma.In(ConfigurationType(8, 32))) # port aliasing core_feature.ports["config"] = self.ports[f"config_{idx}"] self.add_port("config", magma.In(ConfigurationType(8, 32))) # or the signal up t = ConfigurationType(8, 32) t_names = ["config_addr", "config_data"] or_gates = {} for t_name in t_names: port_type = t[t_name] or_gate = FromMagma( mantle.DefineOr(len(self.__features), len(port_type))) or_gate.instance_name = f"OR_{t_name}_FEATURE" for idx, core_feature in enumerate(self.__features): self.wire(or_gate.ports[f"I{idx}"], core_feature.ports.config[t_name]) or_gates[t_name] = or_gate self.wire(or_gates["config_addr"].ports.O, self.underlying.ports.config_addr_in[0:8]) self.wire(or_gates["config_data"].ports.O, self.underlying.ports.config_data_in) # read data out for idx, core_feature in enumerate(self.__features): if (idx > 0): # self.add_port(f"read_config_data_{idx}", self.add_port(f"read_config_data_{idx}", magma.Out(magma.Bits[32])) # port aliasing core_feature.ports["read_config_data"] = \ self.ports[f"read_config_data_{idx}"] # MEM Config configurations = [("tile_en", 1), ("fifo_ctrl_fifo_depth", 16), ("mode", 2), ("enable_chain_output", 1), ("enable_chain_input", 1)] # ("stencil_width", 16), NOT YET merged_configs = [] merged_in_sched = [] merged_out_sched = [] # Add config registers to configurations # TODO: Have lake spit this information out automatically from the wrapper configurations.append((f"chain_idx_input", self.chain_idx_bits)) configurations.append((f"chain_idx_output", self.chain_idx_bits)) for i in range(self.interconnect_input_ports): configurations.append((f"strg_ub_agg_align_{i}_line_length", kts.clog2(self.max_line_length))) configurations.append((f"strg_ub_agg_in_{i}_in_period", kts.clog2(self.input_max_port_sched))) # num_bits_in_sched = kts.clog2(self.agg_height) # sched_per_feat = math.floor(self.config_data_width / num_bits_in_sched) # new_width = num_bits_in_sched * sched_per_feat # feat_num = 0 # num_feats_merge = math.ceil(self.input_max_port_sched / sched_per_feat) # for k in range(num_feats_merge): # num_here = sched_per_feat # if self.input_max_port_sched - (k * sched_per_feat) < sched_per_feat: # num_here = self.input_max_port_sched - (k * sched_per_feat) # merged_configs.append((f"strg_ub_agg_in_{i}_in_sched_merged_{k * sched_per_feat}", # num_here * num_bits_in_sched, num_here)) for j in range(self.input_max_port_sched): configurations.append((f"strg_ub_agg_in_{i}_in_sched_{j}", kts.clog2(self.agg_height))) configurations.append((f"strg_ub_agg_in_{i}_out_period", kts.clog2(self.input_max_port_sched))) for j in range(self.output_max_port_sched): configurations.append((f"strg_ub_agg_in_{i}_out_sched_{j}", kts.clog2(self.agg_height))) configurations.append((f"strg_ub_app_ctrl_write_depth_wo_{i}", self.app_ctrl_depth_width)) configurations.append((f"strg_ub_app_ctrl_write_depth_ss_{i}", self.app_ctrl_depth_width)) configurations.append( (f"strg_ub_app_ctrl_coarse_write_depth_wo_{i}", self.app_ctrl_depth_width)) configurations.append( (f"strg_ub_app_ctrl_coarse_write_depth_ss_{i}", self.app_ctrl_depth_width)) configurations.append( (f"strg_ub_input_addr_ctrl_address_gen_{i}_dimensionality", 1 + kts.clog2(self.input_iterator_support))) configurations.append( (f"strg_ub_input_addr_ctrl_address_gen_{i}_starting_addr", self.input_config_width)) for j in range(self.input_iterator_support): configurations.append( (f"strg_ub_input_addr_ctrl_address_gen_{i}_ranges_{j}", self.input_config_width)) configurations.append( (f"strg_ub_input_addr_ctrl_address_gen_{i}_strides_{j}", self.input_config_width)) configurations.append( (f"strg_ub_app_ctrl_prefill", self.interconnect_output_ports)) configurations.append((f"strg_ub_app_ctrl_coarse_prefill", self.interconnect_output_ports)) for i in range(self.stcl_valid_iter): configurations.append((f"strg_ub_app_ctrl_ranges_{i}", 16)) configurations.append((f"strg_ub_app_ctrl_threshold_{i}", 16)) for i in range(self.interconnect_output_ports): configurations.append((f"strg_ub_app_ctrl_input_port_{i}", kts.clog2(self.interconnect_input_ports))) configurations.append((f"strg_ub_app_ctrl_read_depth_{i}", self.app_ctrl_depth_width)) configurations.append((f"strg_ub_app_ctrl_coarse_input_port_{i}", kts.clog2(self.interconnect_input_ports))) configurations.append((f"strg_ub_app_ctrl_coarse_read_depth_{i}", self.app_ctrl_depth_width)) configurations.append( (f"strg_ub_output_addr_ctrl_address_gen_{i}_dimensionality", 1 + kts.clog2(self.output_iterator_support))) configurations.append( (f"strg_ub_output_addr_ctrl_address_gen_{i}_starting_addr", self.output_config_width)) for j in range(self.output_iterator_support): configurations.append( (f"strg_ub_output_addr_ctrl_address_gen_{i}_ranges_{j}", self.output_config_width)) configurations.append( (f"strg_ub_output_addr_ctrl_address_gen_{i}_strides_{j}", self.output_config_width)) configurations.append((f"strg_ub_pre_fetch_{i}_input_latency", kts.clog2(self.max_prefetch) + 1)) configurations.append((f"strg_ub_sync_grp_sync_group_{i}", self.interconnect_output_ports)) configurations.append( (f"strg_ub_rate_matched_{i}", 1 + kts.clog2(self.interconnect_input_ports))) for j in range(self.num_tb): configurations.append( (f"strg_ub_tba_{i}_tb_{j}_dimensionality", 2)) num_indices_bits = 1 + kts.clog2(self.fw_int) indices_per_feat = math.floor(self.config_data_width / num_indices_bits) new_width = num_indices_bits * indices_per_feat feat_num = 0 num_feats_merge = math.ceil(self.tb_range_inner_max / indices_per_feat) for k in range(num_feats_merge): num_idx = indices_per_feat if (self.tb_range_inner_max - (k * indices_per_feat)) < indices_per_feat: num_idx = self.tb_range_inner_max - (k * indices_per_feat) merged_configs.append(( f"strg_ub_tba_{i}_tb_{j}_indices_merged_{k * indices_per_feat}", num_idx * num_indices_bits, num_idx)) # for k in range(self.tb_range_inner_max): # configurations.append((f"strg_ub_tba_{i}_tb_{j}_indices_{k}", kts.clog2(self.fw_int) + 1)) configurations.append((f"strg_ub_tba_{i}_tb_{j}_range_inner", kts.clog2(self.tb_range_inner_max))) configurations.append((f"strg_ub_tba_{i}_tb_{j}_range_outer", kts.clog2(self.tb_range_max))) configurations.append((f"strg_ub_tba_{i}_tb_{j}_stride", kts.clog2(self.max_tb_stride))) configurations.append((f"strg_ub_tba_{i}_tb_{j}_tb_height", max(1, kts.clog2(self.num_tb)))) configurations.append((f"strg_ub_tba_{i}_tb_{j}_starting_addr", max(1, kts.clog2(self.fw_int)))) # Do all the stuff for the main config main_feature = self.__features[0] for config_reg_name, width in configurations: main_feature.add_config(config_reg_name, width) if (width == 1): self.wire(main_feature.registers[config_reg_name].ports.O[0], self.underlying.ports[config_reg_name][0]) else: self.wire(main_feature.registers[config_reg_name].ports.O, self.underlying.ports[config_reg_name]) for config_reg_name, width, num_merged in merged_configs: main_feature.add_config(config_reg_name, width) token_under = config_reg_name.split("_") base_name = config_reg_name.split("_merged")[0] base_indices = int(config_reg_name.split("_merged_")[1]) num_bits = width // num_merged for i in range(num_merged): self.wire( main_feature.registers[config_reg_name].ports. O[i * num_bits:(i + 1) * num_bits], self.underlying.ports[f"{base_name}_{base_indices + i}"]) # SRAM # These should also account for num features # or_all_cfg_rd = FromMagma(mantle.DefineOr(4, 1)) or_all_cfg_rd = FromMagma(mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_rd.instance_name = f"OR_CONFIG_WR_SRAM" or_all_cfg_wr = FromMagma(mantle.DefineOr(self.num_sram_features, 1)) or_all_cfg_wr.instance_name = f"OR_CONFIG_RD_SRAM" for sram_index in range(self.num_sram_features): core_feature = self.__features[sram_index + 1] self.add_port(f"config_en_{sram_index}", magma.In(magma.Bit)) # port aliasing core_feature.ports["config_en"] = \ self.ports[f"config_en_{sram_index}"] self.wire(core_feature.ports.read_config_data, self.underlying.ports[f"config_data_out_{sram_index}"]) # also need to wire the sram signal # the config enable is the OR of the rd+wr or_gate_en = FromMagma(mantle.DefineOr(2, 1)) or_gate_en.instance_name = f"OR_CONFIG_EN_SRAM_{sram_index}" self.wire(or_gate_en.ports.I0, core_feature.ports.config.write) self.wire(or_gate_en.ports.I1, core_feature.ports.config.read) self.wire(core_feature.ports.config_en, self.underlying.ports["config_en"][sram_index]) # Still connect to the OR of all the config rd/wr self.wire(core_feature.ports.config.write, or_all_cfg_wr.ports[f"I{sram_index}"]) self.wire(core_feature.ports.config.read, or_all_cfg_rd.ports[f"I{sram_index}"]) self.wire(or_all_cfg_rd.ports.O[0], self.underlying.ports.config_read[0]) self.wire(or_all_cfg_wr.ports.O[0], self.underlying.ports.config_write[0]) self._setup_config() conf_names = list(self.registers.keys()) conf_names.sort() with open("mem_cfg.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"|{reg}|{idx}|{self.registers[reg].width}||\n" cfg_dump.write(write_line) with open("mem_synth.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"{reg}\n" cfg_dump.write(write_line)
def __init__(self, data_width, word_width, data_depth, num_banks, use_sram_stub): super().__init__(8, 32) self.data_width = data_width self.data_depth = data_depth self.num_banks = num_banks self.word_width = word_width if use_sram_stub: self.use_sram_stub = 1 else: self.use_sram_stub = 0 TData = magma.Bits[self.word_width] TBit = magma.Bits[1] self.add_ports( data_in=magma.In(TData), addr_in=magma.In(TData), data_out=magma.Out(TData), flush=magma.In(TBit), wen_in=magma.In(TBit), ren_in=magma.In(TBit), stall=magma.In(magma.Bits[4]), valid_out=magma.Out(TBit), switch_db=magma.In(TBit) ) # Instead of a single read_config_data, we have multiple for each # "sub"-feature of this core. # self.ports.pop("read_config_data") if (data_width, word_width, data_depth, num_banks, use_sram_stub) not in \ MemCore.__circuit_cache: wrapper = memory_core_genesis2.memory_core_wrapper param_mapping = memory_core_genesis2.param_mapping generator = wrapper.generator(param_mapping, mode="declare") circ = generator(data_width=self.data_width, data_depth=self.data_depth, word_width=self.word_width, num_banks=self.num_banks, use_sram_stub=self.use_sram_stub) MemCore.__circuit_cache[(data_width, word_width, data_depth, num_banks, use_sram_stub)] = circ else: circ = MemCore.__circuit_cache[(data_width, word_width, data_depth, num_banks, use_sram_stub)] self.underlying = FromMagma(circ) # put a 1-bit register and a mux to select the control signals control_signals = ["wen_in", "ren_in", "flush", "switch_db"] for control_signal in control_signals: # TODO: consult with Ankita to see if we can use the normal # mux here mux = MuxWrapper(2, 1, name=f"{control_signal}_sel") reg_value_name = f"{control_signal}_reg_value" reg_sel_name = f"{control_signal}_reg_sel" self.add_config(reg_value_name, 1) self.add_config(reg_sel_name, 1) self.wire(mux.ports.I[0], self.ports[control_signal]) self.wire(mux.ports.I[1], self.registers[reg_value_name].ports.O) self.wire(mux.ports.S, self.registers[reg_sel_name].ports.O) # 0 is the default wire, which takes from the routing network self.wire(mux.ports.O[0], self.underlying.ports[control_signal]) self.wire(self.ports.data_in, self.underlying.ports.data_in) self.wire(self.ports.addr_in, self.underlying.ports.addr_in) self.wire(self.ports.data_out, self.underlying.ports.data_out) self.wire(self.ports.reset, self.underlying.ports.reset) self.wire(self.ports.clk, self.underlying.ports.clk) self.wire(self.ports.valid_out[0], self.underlying.ports.valid_out) # PE core uses clk_en (essentially active low stall) self.stallInverter = FromMagma(mantle.DefineInvert(1)) self.wire(self.stallInverter.ports.I, self.ports.stall[0:1]) self.wire(self.stallInverter.ports.O[0], self.underlying.ports.clk_en) zero_signals = ( ("chain_wen_in", 1), ("chain_in", self.word_width), ) one_signals = ( ("config_read", 1), ("config_write", 1) ) # enable read and write by default for name, width in zero_signals: val = magma.bits(0, width) if width > 1 else magma.bit(0) self.wire(Const(val), self.underlying.ports[name]) for name, width in one_signals: val = magma.bits(1, width) if width > 1 else magma.bit(1) self.wire(Const(val), self.underlying.ports[name]) self.wire(Const(magma.bits(0, 24)), self.underlying.ports.config_addr[0:24]) # we have five features in total # 0: TILE # 1-4: SMEM # Feature 0: Tile self.__features: List[CoreFeature] = [self] # Features 1-4: SRAM for sram_index in range(4): core_feature = CoreFeature(self, sram_index + 1) self.__features.append(core_feature) # Wire the config for idx, core_feature in enumerate(self.__features): if(idx > 0): self.add_port(f"config_{idx}", magma.In(ConfigurationType(8, 32))) # port aliasing core_feature.ports["config"] = self.ports[f"config_{idx}"] self.add_port("config", magma.In(ConfigurationType(8, 32))) # or the signal up t = ConfigurationType(8, 32) t_names = ["config_addr", "config_data"] or_gates = {} for t_name in t_names: port_type = t[t_name] or_gate = FromMagma(mantle.DefineOr(len(self.__features), len(port_type))) or_gate.instance_name = f"OR_{t_name}_FEATURE" for idx, core_feature in enumerate(self.__features): self.wire(or_gate.ports[f"I{idx}"], core_feature.ports.config[t_name]) or_gates[t_name] = or_gate self.wire(or_gates["config_addr"].ports.O, self.underlying.ports.config_addr[24:32]) self.wire(or_gates["config_data"].ports.O, self.underlying.ports.config_data) # only the first one has config_en # self.wire(self.__features[0].ports.config.write[0], # self.underlying.ports.config_en) # read data out for idx, core_feature in enumerate(self.__features): if(idx > 0): self.add_port(f"read_config_data_{idx}", magma.Out(magma.Bits[32])) # port aliasing core_feature.ports["read_config_data"] = \ self.ports[f"read_config_data_{idx}"] # MEM config # self.wire(self.ports.read_config_data, # self.underlying.ports.read_config_data) configurations = [ ("stencil_width", 32), ("read_mode", 1), ("arbitrary_addr", 1), ("starting_addr", 32), ("iter_cnt", 32), ("dimensionality", 32), ("circular_en", 1), ("almost_count", 4), ("enable_chain", 1), ("mode", 2), ("tile_en", 1), ("chain_idx", 4), ("depth", 13) ] # Do all the stuff for the main config main_feature = self.__features[0] for config_reg_name, width in configurations: main_feature.add_config(config_reg_name, width) if(width == 1): self.wire(main_feature.registers[config_reg_name].ports.O[0], self.underlying.ports[config_reg_name]) else: self.wire(main_feature.registers[config_reg_name].ports.O, self.underlying.ports[config_reg_name]) for idx in range(8): main_feature.add_config(f"stride_{idx}", 32) main_feature.add_config(f"range_{idx}", 32) self.wire(main_feature.registers[f"stride_{idx}"].ports.O, self.underlying.ports[f"stride_{idx}"]) self.wire(main_feature.registers[f"range_{idx}"].ports.O, self.underlying.ports[f"range_{idx}"]) # SRAM for sram_index in range(4): core_feature = self.__features[sram_index + 1] self.wire(core_feature.ports.read_config_data, self.underlying.ports[f"read_data_sram_{sram_index}"]) # also need to wire the sram signal self.wire(core_feature.ports.config.write[0], self.underlying.ports["config_en_sram"][sram_index]) self._setup_config() conf_names = list(self.registers.keys()) conf_names.sort() with open("mem_cfg.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"|{reg}|{idx}|{self.registers[reg].width}||\n" cfg_dump.write(write_line)
def __init__( self, data_width=16, # CGRA Params mem_depth=32, default_iterator_support=3, interconnect_input_ports=1, # Connection to int interconnect_output_ports=1, config_data_width=32, config_addr_width=8, cycle_count_width=16, add_clk_enable=True, add_flush=True): lake_name = "Pond_pond" super().__init__(config_data_width=config_data_width, config_addr_width=config_addr_width, data_width=data_width, name="PondCore") # Capture everything to the tile object self.interconnect_input_ports = interconnect_input_ports self.interconnect_output_ports = interconnect_output_ports self.mem_depth = mem_depth self.data_width = data_width self.config_data_width = config_data_width self.config_addr_width = config_addr_width self.add_clk_enable = add_clk_enable self.add_flush = add_flush self.cycle_count_width = cycle_count_width self.default_iterator_support = default_iterator_support self.default_config_width = kts.clog2(self.mem_depth) cache_key = (self.data_width, self.mem_depth, self.interconnect_input_ports, self.interconnect_output_ports, self.config_data_width, self.config_addr_width, self.add_clk_enable, self.add_flush, self.cycle_count_width, self.default_iterator_support) # Check for circuit caching if cache_key not in LakeCoreBase._circuit_cache: # Instantiate core object here - will only use the object representation to # query for information. The circuit representation will be cached and retrieved # in the following steps. self.dut = Pond( data_width=data_width, # CGRA Params mem_depth=mem_depth, default_iterator_support=default_iterator_support, interconnect_input_ports= interconnect_input_ports, # Connection to int interconnect_output_ports=interconnect_output_ports, config_data_width=config_data_width, config_addr_width=config_addr_width, cycle_count_width=cycle_count_width, add_clk_enable=add_clk_enable, add_flush=add_flush) circ = kts.util.to_magma(self.dut, flatten_array=True, check_multiple_driver=False, optimize_if=False, check_flip_flop_always_ff=False) LakeCoreBase._circuit_cache[cache_key] = (circ, self.dut) else: circ, self.dut = LakeCoreBase._circuit_cache[cache_key] # Save as underlying circuit object self.underlying = FromMagma(circ) self.wrap_lake_core() conf_names = list(self.registers.keys()) conf_names.sort() with open("pond_cfg.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"(\"{reg}\", 0), # {self.registers[reg].width}\n" cfg_dump.write(write_line) with open("pond_synth.txt", "w+") as cfg_dump: for idx, reg in enumerate(conf_names): write_line = f"{reg}\n" cfg_dump.write(write_line)
def GlobalBufferMagma(params: GlobalBufferParams): dut = GlobalBuffer(params) circ = to_magma(dut, flatten_array=True) return FromMagma(circ)