def instantiate(self, setup): self.class_name = 'PE' self.row = setup['row'] self.col = setup['col'] self.debug = 'PE[' + str(self.row) + ']' + '[' + str(self.col) + ']' # ================================================================ # Stats Related Setup # ================================================================ self.component_class_specification_stats = 'hide' self.component_specification_stats = 'show' self.access_counts_stats = 'show' self.recorder = nnsimRecorder()\ if self.traces_stats == 'show'\ else None # ================================================================= # IO Channels # ================================================================= self.weights_data_in_chn = ModuleList(setup['weights_data_in_chn']) self.ifmap_data_in_chn = ModuleList(setup['ifmap_data_in_chn']) self.psum_data_in_chn = ModuleList(setup['psum_data_in_chn']) self.psum_data_out_chn = ModuleList(setup['psum_data_out_chn']) # ================================================================= # Hardware components # ================================================================= # >>>> weights scratchpad (reg) self.weights_reader = ModuleList(Ch()) weights_sp_setup = {'fill_data_ichns': self.weights_data_in_chn,\ 'drain_data_ochns': self.weights_reader,\ 'num_logical_managers': 1,\ 'SRAM': {'depth': setup['wsp_depth'],\ 'width': setup['wsp_width'],\ 'data_width': setup['wsp_data_width'],\ 'nports': setup['wsp_nports'],\ 'nbanks': setup['wsp_nbanks'], \ 'port_type': setup['wsp_port_type']},\ 'debug': self.debug + '_weights_sp'} self.weight_sp = WeightsSP(weights_sp_setup) # >>>> mac unit mac_setup = {'opa_chn': self.ifmap_data_in_chn[0],\ 'opb_chn': self.weights_reader[0], \ 'opc_chn': self.psum_data_in_chn[0],\ 'result_chn': self.psum_data_out_chn[0],\ 'latency': setup['mac_latency'],\ 'debug': self.debug} self.mac = mac(mac_setup)
def instantiate(self, setup): # nnsimTestBench class sets trace generator if there is one nnsimTestBench.instantiate(self, setup) self.trace_generator = ws_chip_trace_generator() self.traces_stats = 'show' self.recorder = nnsimRecorder() if self.traces_stats == 'show' else None self.generated_trace = True # bookkeeping variables self.result = [] # --------------------------------------------------------------------- # IO channels for onchip and offchip communications # --------------------------------------------------------------------- # -> input channels for sending the input trace data to the GLBs # smartbuffer needs channels specified as list format self.weights_in_chn = ModuleList(Channel()) self.ifmap_chn = ModuleList(Channel()) self.psum_in_chn = ModuleList(Channel()) # -> output channel for reciving calculated psum from the chip self.psum_out_chn = Channel() # --------------------------------------------------------------------- # Setup and instantiation of the design under test # --------------------------------------------------------------------- chip_setup = {} chip_setup.update(self.arch) # attributes that can be automatically derived chip_setup['weights_seri_ratio'] = self.arch['width']['WeightsGLB'] chip_setup['ifmap_seri_ratio'] = self.arch['width']['IFmapGLB'] chip_setup['psum_seri_ratio'] = self.arch['width']['PsumGLB'] chip_setup['pe_array'] = self.arch['PE_array'] chip_setup['io_chns'] = {'weights': self.weights_in_chn,\ 'ifmap': self.ifmap_chn,\ 'psum_in': self.psum_in_chn,\ 'psum_out': self.psum_out_chn} chip_setup['PE'] = self.arch['PE'] self.dut = chip(chip_setup) # for the use of dispatching data inside the testbench self.WGLB_width = self.arch['width']['WeightsGLB'] self.IGLB_width = self.arch['width']['IFmapGLB'] self.PGLB_width = self.arch['width']['PsumGLB']
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.post_tr_x = arr_x # num output channels = 8 self.post_tr_y = 4 # num tiles = 4 self.pre_tr_ifmap_x = arr_y # num input channels = 4 self.pre_tr_ifmap_y = 4 # num tiles = 4 self.pre_tr_weights_x = arr_y # num input channels = 4 self.pre_tr_weights_y = arr_x # num output channels = 8 self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.weights_wr_chn = Channel() self.bias_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_glb_wr_chn = Channel(3) self.ifmap_rd_chn = Channel(3) self.ifmap_glb = IFMapGLB(self.ifmap_glb_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() # self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, # psum_glb_depth, chn_per_word) self.weights_glb_wr_chn = Channel(3) self.weights_rd_chn = Channel() self.weights_glb = WeightsGLB(self.weights_glb_wr_chn, self.weights_rd_chn) self.bias_rd_chn = Channel() self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append(Channel(32)) # Actual PE array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_chns[y+1].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y+1][x] ) ) # Pre Transform IFMap array and local channel declaration self.pre_tr_ifmap_array = ModuleList() self.pre_tr_ifmap_in_chns = ModuleList() self.pre_tr_ifmap_out_chns = ModuleList() # Actual pre transform IFMap array instantiation for y in range(self.pre_tr_ifmap_y): self.pre_tr_ifmap_array.append(ModuleList()) self.pre_tr_ifmap_in_chns.append(ModuleList()) self.pre_tr_ifmap_out_chns.append(ModuleList()) for x in range(self.pre_tr_ifmap_x): self.pre_tr_ifmap_in_chns[y].append(Channel(32)) self.pre_tr_ifmap_out_chns[y].append(Channel(32)) self.pre_tr_ifmap_array[y].append( PreTransformIFMap(x, y, self.pre_tr_ifmap_in_chns[y][x], self.pre_tr_ifmap_out_chns[y][x] ) ) # Pre Transform Weight array and local channel declaration self.pre_tr_weights_array = ModuleList() self.pre_tr_weights_in_chns = ModuleList() self.pre_tr_weights_out_chns = ModuleList() # Actual pre transform Weight array instantiation for y in range(self.pre_tr_weights_y): self.pre_tr_weights_array.append(ModuleList()) self.pre_tr_weights_in_chns.append(ModuleList()) self.pre_tr_weights_out_chns.append(ModuleList()) for x in range(self.pre_tr_weights_x): self.pre_tr_weights_in_chns[y].append(Channel(32)) self.pre_tr_weights_out_chns[y].append(Channel(32)) self.pre_tr_weights_array[y].append( PreTransformWeights(x, y, self.pre_tr_weights_in_chns[y][x], self.pre_tr_weights_out_chns[y][x] ) ) # Post Transform Array and local channel declaration self.post_tr_array = ModuleList() self.post_tr_bias_chns = ModuleList() self.post_tr_ofmap_in_chns = ModuleList() self.post_tr_ofmap_out_chns = ModuleList() # Actual post transform array instantiation for y in range(self.post_tr_y): self.post_tr_array.append(ModuleList()) self.post_tr_bias_chns.append(ModuleList()) self.post_tr_ofmap_in_chns.append(ModuleList()) self.post_tr_ofmap_out_chns.append(ModuleList()) for x in range(self.post_tr_x): self.post_tr_bias_chns[y].append(Channel(32)) self.post_tr_ofmap_in_chns[y].append(Channel(32)) self.post_tr_ofmap_out_chns[y].append(Channel(32)) self.post_tr_array[y].append( PostTransform(x, y, self.post_tr_bias_chns[y][x], self.post_tr_ofmap_in_chns[y][x], self.post_tr_ofmap_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word) #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word) self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word) # Setup NoC for post transform blocks self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word) self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word) # Instantiate tiler for ifmaps self.ifmap_tiler = IFMapTiler(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) # Setup NoC for pre transform blocks #self.pre_tr_ifmap_wr_noc = PreTrIFMapWrNoC(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) self.pre_tr_ifmap_rd_noc = PreTrIFMapRdNoC(self.pre_tr_ifmap_out_chns, self.ifmap_glb_wr_chn, self.chn_per_word) self.pre_tr_weights_wr_noc = PreTrWeightsWrNoC(self.weights_wr_chn, self.pre_tr_weights_in_chns, self.chn_per_word) self.pre_tr_weights_rd_noc = PreTrWeightsRdNoC(self.pre_tr_weights_out_chns, self.weights_glb_wr_chn, self.chn_per_word)
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, psum_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel(name="ifmap_wr_chn") self.psum_wr_chn = Channel(name="psum_wr_chn") self.weights_wr_chn = Channel(name="weights_wr_chn") self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel(name="psum_output_chn") self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn') self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3, name='psum_rd_chn') self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn') self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel(name='weights_rd_chn') self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, 0))) # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append( Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y))) self.pe_filter_chns[y].append( Channel(32, name='pe_filter_chns_{}_{}'.format(x, y))) self.pe_psum_chns[y + 1].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, y))) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y + 1][x])) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0], self.chn_per_word) self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_noc_wr_chn, self.psum_output_chn, self.chn_per_word)
def instantiate (self, setup): self.class_name = "nnsimBuffer" self.debug = setup['debug'] self.enabled_buffer = setup['enabled_buffer'] if 'enabled_buffer' in setup.keys() else False self.reg_insert_opt = setup['reg_insert_opt'] if 'reg_insert_opt' in setup.keys() else True self.nbanks = setup['nbanks'] # physical channels from outside world to write data into the buffer self.physical_chns = [] # address generators for each memory bank self.addr_generator_type = [] #====================================================================== # Virtual Data Transfer Channels #====================================================================== # - instantiated from outside # - responsible for receiving data from outside # - responsible for pushing data to outside self.fill_data_chns = setup['fill_chns']\ if 'fill_chns' in setup.keys() else None self.update_data_chns = setup['update_chns']\ if 'update_chns' in setup.keys() else None self.update_src_chns = setup['update_src_chns']\ if 'update_src_chns' in setup.keys() else None self.drain_rsp_chns = setup['drain_chns']\ if 'drain_chns' in setup.keys() else None # channels determine whether to perform the read or not, used for zero gating self.drain_enable_chns = setup['drain_enable_chns']\ if 'drain_enable_chns' in setup.keys() else None # number of virtual ports for each memory bank self.nvports = setup['nvports'] # constrcut dictionary to store all the data channels for ease of access self.vdata_chns = {} self.vdata_chns['fill'] = self.fill_data_chns if self.fill_data_chns is not None else None self.vdata_chns['update'] = self.update_data_chns if self.update_data_chns is not None else None #====================================================================== # Address Generators #====================================================================== self.addr_generator_types = setup['addr_generators'] #====================================================================== # Hardware Memory Properties #====================================================================== self.depth = setup['depth'] # depth of each memory bank self.width = setup['width'] # # of data in each memory bank self.nports = setup['nports'] # number of physical ports for each memory bank self.pvmapping = setup['pvmapping'] #====================================================================== # Internal Channels and Data Structures #====================================================================== # addr generators self.addr_generators = [] self.faddr_generator = ModuleList() self.uaddr_generator = ModuleList() self.daddr_generator = ModuleList() # addr chns self.fill_addr_chn = ModuleList() self.update_addr_chn = ModuleList() self.drain_addr_chn = ModuleList() self.fill_local_addr_chn = ModuleList() self.update_local_addr_chn = ModuleList() self.drain_local_addr_chn = ModuleList() #------------------------------------------------------------------ # Address Generators #------------------------------------------------------------------ self.addr_generators = {'fill': self.faddr_generator, \ 'update': self.uaddr_generator, \ 'drain': self.daddr_generator} #------------------------------------------------------------------ # Address Channels for Addr Generators to Push to Sbuffer #------------------------------------------------------------------ self.vaddr_chns = {'fill': self.fill_addr_chn,\ 'update': self.update_addr_chn,\ 'drain': self.drain_addr_chn} self.local_vaddr_chns = {'fill': self.fill_local_addr_chn,\ 'update': self.update_local_addr_chn,\ 'drain': self.drain_local_addr_chn} #------------------------------------------------------------------ # Setup Virtual Ports #------------------------------------------------------------------ # each virtual port has: # a channel for addr seq # a channel for input data # an addr generator # optionally a destination generator # optionally a channel for output data for vport, num in self.nvports.items(): for i in range(num): # --------------------------------------------------------- # Address Generator # --------------------------------------------------------- # 1. setup single address generator for the virtual port if setup['multi_addr_generators'][vport] == 1: self.vaddr_chns[vport].append(NoLatencyChannel()) self.local_vaddr_chns[vport].append(None) addr_gen_setup = {'addr_chn': self.vaddr_chns[vport][i],\ 'width': self.width, \ 'type': vport,\ 'id': i, 'depth': self.depth,\ 'debug': self.debug } addr_gen_obj = self.addr_generator_types[vport][i](addr_gen_setup) self.addr_generators[vport].append(addr_gen_obj) # 2. update address comes from multiple instances of udpate address generators else: self.addr_generators[vport].append(ModuleList()) # this channel pushes the chosen address from all the local address generators to the smartbuffer self.vaddr_chns[vport].append(NoLatencyChannel()) self.local_vaddr_chns[vport].append(ModuleList()) for idx in range(setup['multi_addr_generators'][vport]): self.local_vaddr_chns[vport][i].append(NoLatencyChannel()) addr_gen_setup = {'addr_chn': self.local_vaddr_chns[vport][i][idx],\ 'width': self.width, \ 'type': vport,\ 'id': i, 'depth': self.depth,\ 'debug': self.debug + '[' + str(idx) + ']'} # assuming these generators are of the same type, just the offset (config) will be different self.addr_generators[vport][i].append(self.addr_generator_types[vport][i](addr_gen_setup)) #------------------------------------------------------------------ # Setup Smartbuffer (deals with dependency and confict) #------------------------------------------------------------------ # destination channels are for the destination calculator to push in destinations # destination_chns = self.destination_chns if self.mode == 'glb' else None # update_src_chns are channels for sending in external update address sequence update_src_chn = self.update_src_chns if self.update_src_chns is not None else None # drain enable channels are channels that are used to send enable signeal to the buffer for read operations drain_enable_chn = self.drain_enable_chns if self.drain_enable_chns is not None else None # a smartbuffer is a unit of memory bank smartbuffer_setup = { 'depth': self.depth,\ 'width': self.width, \ 'nports': self.nports, \ 'pvmapping': self.pvmapping,\ 'nvports': self.nvports,\ 'vdata_chns': self.vdata_chns, \ 'vaddr_chns': self.vaddr_chns,\ 'drain_rsp_chns': self.drain_rsp_chns,\ 'drain_enable_chns': drain_enable_chn,\ 'update_src_chns': update_src_chn,\ 'enabled_buffer': self.enabled_buffer,\ 'reg_insert_opt': self.reg_insert_opt,\ 'debug': self.debug } self.smartbuffer_type = setup['smartbuffer_type'] self.sbuffer = self.smartbuffer_type(smartbuffer_setup) #====================================================================== # Stats Collection Info #====================================================================== self.attrs = { 'dpeth': setup['depth'],\ 'width': setup['width'],\ 'data_width': setup['data_width'],\ 'nbanks': setup['nbanks'],\ 'nports': setup['nports']}
def instantiate(self, arr_y, input_chn, output_chn, block_size, num_nonzero, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): # PE static configuration (immutable) self.name = 'chip' #self.arr_x = arr_x self.arr_y = arr_y self.block_size = block_size self.num_nonzero = num_nonzero self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.psum_wr_chn = Channel() self.weights_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_y, block_size, num_nonzero) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3) #self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, arr_y, # ifmap_glb_depth, block_size, num_nonzero) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, block_size, num_nonzero) self.weights_rd_chn = Channel() #self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, block_size) self.ifmap_weights_glb = IFMapWeightsGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,\ self.weights_wr_chn, self.weights_rd_chn, arr_y, ifmap_glb_depth,\ weight_glb_depth, block_size, num_nonzero) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_in_chns = ModuleList() self.pe_psum_out_chns = ModuleList() # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_in_chns.append(ModuleList()) self.pe_psum_out_chns.append(ModuleList()) for x in range(1): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_in_chns[y].append(Channel(32)) self.pe_psum_out_chns[y].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_in_chns[y][x], self.pe_psum_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, block_size) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_in_chns, self.arr_y, block_size) self.psum_wr_noc = PSumWrNoC(self.pe_psum_out_chns, self.psum_noc_wr_chn, self.psum_output_chn, self.arr_y, block_size)
def instantiate(self, setup): self.pe_array_row = setup['pe_array'][0] self.pe_array_col = setup['pe_array'][1] # --------------------------------------------------------------------- # io data b/w offchip # --------------------------------------------------------------------- self.weights_in_chns = setup['io_chns']['weights'] self.ifmap_in_chns = setup['io_chns']['ifmap'] self.psum_in_chns = setup['io_chns']['psum_in'] self.psum_out_chn = setup['io_chns']['psum_out'] # --------------------------------------------------------------------- # onchip channels # --------------------------------------------------------------------- # >> buffer output channels self.weights_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.psum_out_chns = ModuleList(Channel()) # >> update data to buffer self.psum_update_chns = ModuleList(Channel()) # =================================================================== # GLBs # =================================================================== # ------------------------ WEIGHTS ------------------------------- weights_glb_setup = {'fill_data_ichns': self.weights_in_chns,\ 'drain_data_ochns': self.weights_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['WeightsGLB'],\ 'width': setup['width']['WeightsGLB'],\ 'data_width': setup['data_width']['WeightsGLB'],\ 'nbanks': setup['nbanks']['WeightsGLB'],\ 'nports': setup['nports']['WeightsGLB'],\ 'port_type': setup['port_type']['WeightsGLB'],\ },\ 'debug': ' WeightsGLB'} self.weights_glb = WeightsGLB(weights_glb_setup) # ------------------------ IFMAP ------------------------------- ifmap_glb_setup = {'fill_data_ichns': self.ifmap_in_chns,\ 'drain_data_ochns': self.ifmap_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['IFmapGLB'],\ 'width': setup['width']['IFmapGLB'],\ 'data_width': setup['data_width']['IFmapGLB'],\ 'nbanks': setup['nbanks']['IFmapGLB'],\ 'nports': setup['nports']['IFmapGLB'],\ 'port_type': setup['port_type']['IFmapGLB'],\ },\ 'debug': ' IFmapGLB'} self.ifmap_glb = IFmapGLB(ifmap_glb_setup) # ------------------------ PSUM ------------------------------- psum_glb_setup = {'fill_data_ichns': self.psum_in_chns,\ 'update_data_ichns': self.psum_update_chns,\ 'drain_data_ochns': self.psum_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['PsumGLB'],\ 'width': setup['width']['PsumGLB'],\ 'data_width': setup['data_width']['PsumGLB'],\ 'nbanks': setup['nbanks']['PsumGLB'],\ 'nports': setup['nports']['PsumGLB'],\ 'port_type': setup['port_type']['PsumGLB'],\ },\ 'debug': ' PsumGLB'} self.psum_glb = PsumGLB(psum_glb_setup) # =================================================================== # PE Array Channels # =================================================================== self.ifmap_pe_data_chns = ModuleList() self.weights_pe_data_chns = ModuleList() self.psum_data_chns = ModuleList() self.pe_data_chns = {'ifmap': self.ifmap_pe_data_chns,\ 'weights': self.weights_pe_data_chns,\ 'psum': self.psum_data_chns} for pe_row in range(self.pe_array_row): for chn_type, chn_row in self.pe_data_chns.items(): chn_row.append(ModuleList()) for pe_col in range(self.pe_array_col): for chn_type, chn_col in self.pe_data_chns.items(): chn_col[pe_row].append(Channel()) self.psum_data_chns.append(ModuleList()) for pe_col in range(self.pe_array_col): self.psum_data_chns[-1].append(Channel()) # =================================================================== # Destination Calculators for NoC # =================================================================== self.ifmap_NoC_destination_chn = ModuleList(Channel()) ifmap_NoC_destination_calculator_setup = \ {'out_chn': self.ifmap_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'IFmapNoCDestCalc'} self.ifmap_NoC_destination_calculator = IFmapNoCDestCalc( ifmap_NoC_destination_calculator_setup) self.weights_NoC_destination_chn = ModuleList(Channel()) weights_NoC_desitnation_calculator_setup = \ {'out_chn': self.weights_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'WeightsNoCDestCalc'} self.weights_NoC_destionation_calculator = WeightsNoCDestCalc( weights_NoC_desitnation_calculator_setup) self.psum_in_NoC_destination_chn = ModuleList(Channel()) psum_in_NoC_desitnation_calculator_setup = \ {'out_chn': self.psum_in_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'PsumInNoCDestCalc'} self.psum_in_NoC_destionation_calculator = PsumInNoCDestCalc( psum_in_NoC_desitnation_calculator_setup) # =================================================================== # NoCs # =================================================================== # weights serializer 4:1 self.weights_serializered_data_chn = Channel() weights_serializer_setup = {'in_chn': self.weights_out_chns[0],\ 'out_chn': self.weights_serializered_data_chn,\ 'ratio': setup['weights_seri_ratio'],\ 'debug': 'weights_serialzer'} self.weights_serializer = WeightsSerializer(weights_serializer_setup) # Weights NoC: weightGLB -> PEs weights_noc_setup = {'rd_chns': self.pe_data_chns['weights'],\ 'wr_chns': self.weights_serializered_data_chn,\ 'dest_chns': self.weights_NoC_destination_chn,\ 'debug': 'WeightsNoC'} self.weightsNoC = WeightsNoC(weights_noc_setup) # -------------- IFmap NoC: IfmapGLB -> PEs ----------------------- # ifmap serializer 4:1 self.ifmap_serialized_data_chn = Channel() ifmap_serializer_setup = {'in_chn': self.ifmap_out_chns[0],\ 'out_chn': self.ifmap_serialized_data_chn,\ 'ratio': setup['ifmap_seri_ratio'],\ 'debug': 'ifmap_serializer'} self.ifmap_serilizer = IfmapSerializer(ifmap_serializer_setup) # ifmap NoC ifmap_noc_wr_chns = ModuleList() ifmap_noc_wr_chns.append(self.ifmap_serialized_data_chn) ifmap_noc_setup = {'rd_chns': self.pe_data_chns['ifmap'],\ 'wr_chns': ifmap_noc_wr_chns,\ 'dest_chns': self.ifmap_NoC_destination_chn,\ 'debug': 'IFmapNoC'} self.ifmapNoC = IFMapNoC(ifmap_noc_setup) # --------------------------------------------------------------------- # -------------- PsumRd NoC: Psum GLB -> PEs ---------------------- # psum serializer 4:1 self.psum_serialized_data_chn = Channel() psum_serializer_setup = {'in_chn': self.psum_out_chns[0],\ 'out_chn': self.psum_serialized_data_chn,\ 'ratio': setup['psum_seri_ratio'],\ 'debug': 'psum_serialzer'} self.psum_serializer = PsumSerializer(psum_serializer_setup) # psum read noc pe_noc_wr_chn = ModuleList(self.psum_serialized_data_chn) psum_rd_noc_setup = {'rd_chns': self.psum_data_chns,\ 'wr_chns': pe_noc_wr_chn,\ 'dest_chns': self.psum_in_NoC_destination_chn,\ 'debug': 'PsumRdNoC'} self.psumRdNoC = PsumRdNoC(psum_rd_noc_setup) # --------------------------------------------------------------------- # -------------- PsumWrNoC: PEs -> ifmapPsum GLB --------------------- self.psum_out_noc_rd_chns = ModuleList() self.psum_out_noc_rd_chns.append( self.psum_update_chns[0]) # write back to GLB self.psum_out_noc_rd_chns.append(self.psum_out_chn) # write offchip psum_wr_noc_setup = {'rd_chns': self.psum_out_noc_rd_chns,\ 'wr_chns': self.psum_data_chns,\ 'debug': 'PsumWrNoC'} self.psumWrNoC = PsumWrNoC(psum_wr_noc_setup) # =================================================================== # PE Array # =================================================================== # general setup of a PE PE_setup = setup['PE'] self.PE = ModuleList() for pe_row in range(self.pe_array_row): self.PE.append(ModuleList()) for pe_col in range(self.pe_array_col): # PE specific setup PE_setup['row'] = pe_row PE_setup['col'] = pe_col PE_setup['weights_data_in_chn'] = self.weights_pe_data_chns[ pe_row][pe_col] PE_setup['ifmap_data_in_chn'] = self.ifmap_pe_data_chns[ pe_row][pe_col] PE_setup['psum_data_in_chn'] = self.psum_data_chns[pe_row][ pe_col] PE_setup['psum_data_out_chn'] = self.psum_data_chns[pe_row + 1][pe_col] self.PE[pe_row].append(PE(PE_setup))