def instantiate(self, setup): self.class_name = 'PE' self.row = setup['row'] self.col = setup['col'] self.debug = 'PE[' + str(self.row) + ']' + '[' + str(self.col) + ']' # ================================================================ # Stats Related Setup # ================================================================ self.component_class_specification_stats = 'hide' self.component_specification_stats = 'show' self.access_counts_stats = 'show' self.recorder = nnsimRecorder()\ if self.traces_stats == 'show'\ else None # ================================================================= # IO Channels # ================================================================= self.weights_data_in_chn = ModuleList(setup['weights_data_in_chn']) self.ifmap_data_in_chn = ModuleList(setup['ifmap_data_in_chn']) self.psum_data_in_chn = ModuleList(setup['psum_data_in_chn']) self.psum_data_out_chn = ModuleList(setup['psum_data_out_chn']) # ================================================================= # Hardware components # ================================================================= # >>>> weights scratchpad (reg) self.weights_reader = ModuleList(Ch()) weights_sp_setup = {'fill_data_ichns': self.weights_data_in_chn,\ 'drain_data_ochns': self.weights_reader,\ 'num_logical_managers': 1,\ 'SRAM': {'depth': setup['wsp_depth'],\ 'width': setup['wsp_width'],\ 'data_width': setup['wsp_data_width'],\ 'nports': setup['wsp_nports'],\ 'nbanks': setup['wsp_nbanks'], \ 'port_type': setup['wsp_port_type']},\ 'debug': self.debug + '_weights_sp'} self.weight_sp = WeightsSP(weights_sp_setup) # >>>> mac unit mac_setup = {'opa_chn': self.ifmap_data_in_chn[0],\ 'opb_chn': self.weights_reader[0], \ 'opc_chn': self.psum_data_in_chn[0],\ 'result_chn': self.psum_data_out_chn[0],\ 'latency': setup['mac_latency'],\ 'debug': self.debug} self.mac = mac(mac_setup)
def instantiate(self, setup): # nnsimTestBench class sets trace generator if there is one nnsimTestBench.instantiate(self, setup) self.trace_generator = ws_chip_trace_generator() self.traces_stats = 'show' self.recorder = nnsimRecorder() if self.traces_stats == 'show' else None self.generated_trace = True # bookkeeping variables self.result = [] # --------------------------------------------------------------------- # IO channels for onchip and offchip communications # --------------------------------------------------------------------- # -> input channels for sending the input trace data to the GLBs # smartbuffer needs channels specified as list format self.weights_in_chn = ModuleList(Channel()) self.ifmap_chn = ModuleList(Channel()) self.psum_in_chn = ModuleList(Channel()) # -> output channel for reciving calculated psum from the chip self.psum_out_chn = Channel() # --------------------------------------------------------------------- # Setup and instantiation of the design under test # --------------------------------------------------------------------- chip_setup = {} chip_setup.update(self.arch) # attributes that can be automatically derived chip_setup['weights_seri_ratio'] = self.arch['width']['WeightsGLB'] chip_setup['ifmap_seri_ratio'] = self.arch['width']['IFmapGLB'] chip_setup['psum_seri_ratio'] = self.arch['width']['PsumGLB'] chip_setup['pe_array'] = self.arch['PE_array'] chip_setup['io_chns'] = {'weights': self.weights_in_chn,\ 'ifmap': self.ifmap_chn,\ 'psum_in': self.psum_in_chn,\ 'psum_out': self.psum_out_chn} chip_setup['PE'] = self.arch['PE'] self.dut = chip(chip_setup) # for the use of dispatching data inside the testbench self.WGLB_width = self.arch['width']['WeightsGLB'] self.IGLB_width = self.arch['width']['IFmapGLB'] self.PGLB_width = self.arch['width']['PsumGLB']
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.post_tr_x = arr_x # num output channels = 8 self.post_tr_y = 4 # num tiles = 4 self.pre_tr_ifmap_x = arr_y # num input channels = 4 self.pre_tr_ifmap_y = 4 # num tiles = 4 self.pre_tr_weights_x = arr_y # num input channels = 4 self.pre_tr_weights_y = arr_x # num output channels = 8 self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.weights_wr_chn = Channel() self.bias_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_glb_wr_chn = Channel(3) self.ifmap_rd_chn = Channel(3) self.ifmap_glb = IFMapGLB(self.ifmap_glb_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() # self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, # psum_glb_depth, chn_per_word) self.weights_glb_wr_chn = Channel(3) self.weights_rd_chn = Channel() self.weights_glb = WeightsGLB(self.weights_glb_wr_chn, self.weights_rd_chn) self.bias_rd_chn = Channel() self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append(Channel(32)) # Actual PE array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_chns[y+1].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y+1][x] ) ) # Pre Transform IFMap array and local channel declaration self.pre_tr_ifmap_array = ModuleList() self.pre_tr_ifmap_in_chns = ModuleList() self.pre_tr_ifmap_out_chns = ModuleList() # Actual pre transform IFMap array instantiation for y in range(self.pre_tr_ifmap_y): self.pre_tr_ifmap_array.append(ModuleList()) self.pre_tr_ifmap_in_chns.append(ModuleList()) self.pre_tr_ifmap_out_chns.append(ModuleList()) for x in range(self.pre_tr_ifmap_x): self.pre_tr_ifmap_in_chns[y].append(Channel(32)) self.pre_tr_ifmap_out_chns[y].append(Channel(32)) self.pre_tr_ifmap_array[y].append( PreTransformIFMap(x, y, self.pre_tr_ifmap_in_chns[y][x], self.pre_tr_ifmap_out_chns[y][x] ) ) # Pre Transform Weight array and local channel declaration self.pre_tr_weights_array = ModuleList() self.pre_tr_weights_in_chns = ModuleList() self.pre_tr_weights_out_chns = ModuleList() # Actual pre transform Weight array instantiation for y in range(self.pre_tr_weights_y): self.pre_tr_weights_array.append(ModuleList()) self.pre_tr_weights_in_chns.append(ModuleList()) self.pre_tr_weights_out_chns.append(ModuleList()) for x in range(self.pre_tr_weights_x): self.pre_tr_weights_in_chns[y].append(Channel(32)) self.pre_tr_weights_out_chns[y].append(Channel(32)) self.pre_tr_weights_array[y].append( PreTransformWeights(x, y, self.pre_tr_weights_in_chns[y][x], self.pre_tr_weights_out_chns[y][x] ) ) # Post Transform Array and local channel declaration self.post_tr_array = ModuleList() self.post_tr_bias_chns = ModuleList() self.post_tr_ofmap_in_chns = ModuleList() self.post_tr_ofmap_out_chns = ModuleList() # Actual post transform array instantiation for y in range(self.post_tr_y): self.post_tr_array.append(ModuleList()) self.post_tr_bias_chns.append(ModuleList()) self.post_tr_ofmap_in_chns.append(ModuleList()) self.post_tr_ofmap_out_chns.append(ModuleList()) for x in range(self.post_tr_x): self.post_tr_bias_chns[y].append(Channel(32)) self.post_tr_ofmap_in_chns[y].append(Channel(32)) self.post_tr_ofmap_out_chns[y].append(Channel(32)) self.post_tr_array[y].append( PostTransform(x, y, self.post_tr_bias_chns[y][x], self.post_tr_ofmap_in_chns[y][x], self.post_tr_ofmap_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word) #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word) self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word) # Setup NoC for post transform blocks self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word) self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word) # Instantiate tiler for ifmaps self.ifmap_tiler = IFMapTiler(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) # Setup NoC for pre transform blocks #self.pre_tr_ifmap_wr_noc = PreTrIFMapWrNoC(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) self.pre_tr_ifmap_rd_noc = PreTrIFMapRdNoC(self.pre_tr_ifmap_out_chns, self.ifmap_glb_wr_chn, self.chn_per_word) self.pre_tr_weights_wr_noc = PreTrWeightsWrNoC(self.weights_wr_chn, self.pre_tr_weights_in_chns, self.chn_per_word) self.pre_tr_weights_rd_noc = PreTrWeightsRdNoC(self.pre_tr_weights_out_chns, self.weights_glb_wr_chn, self.chn_per_word)
class WSArch(Module): def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.post_tr_x = arr_x # num output channels = 8 self.post_tr_y = 4 # num tiles = 4 self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.weights_wr_chn = Channel() self.bias_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3) self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() # self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, # psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel() self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn) self.bias_rd_chn = Channel() self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append(Channel(32)) # Actual PE array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_chns[y+1].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y+1][x] ) ) # Post Transform Array and local channel declaration self.post_tr_array = ModuleList() self.post_tr_bias_chns = ModuleList() self.post_tr_ofmap_in_chns = ModuleList() self.post_tr_ofmap_out_chns = ModuleList() # Actual post transform array instantiation for y in range(self.post_tr_y): self.post_tr_array.append(ModuleList()) self.post_tr_bias_chns.append(ModuleList()) self.post_tr_ofmap_in_chns.append(ModuleList()) self.post_tr_ofmap_out_chns.append(ModuleList()) for x in range(self.post_tr_x): self.post_tr_bias_chns[y].append(Channel(32)) self.post_tr_ofmap_in_chns[y].append(Channel(32)) self.post_tr_ofmap_out_chns[y].append(Channel(32)) self.post_tr_array[y].append( PostTransform(x, y, self.post_tr_bias_chns[y][x], self.post_tr_ofmap_in_chns[y][x], self.post_tr_ofmap_out_chns[y][x], ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word) #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word) self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word) self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word) self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word) def configure(self, image_size, filter_size, in_chn, out_chn): in_sets = self.arr_y//self.chn_per_word out_sets = self.arr_x//self.chn_per_word fmap_per_iteration = image_size[0]*image_size[1] num_iteration = image_size[0]*image_size[1] self.deserializer.configure(image_size, filter_size) self.ifmap_glb.configure(image_size, filter_size, in_sets, fmap_per_iteration) # self.psum_glb.configure(filter_size, out_sets, fmap_per_iteration) self.filter_noc.configure(in_sets, self.arr_x) self.ifmap_noc.configure(in_sets) self.bias_noc.configure(self.post_tr_x, self.post_tr_y) self.post_tr_wr_noc.configure(self.post_tr_x, self.post_tr_y) self.post_tr_rd_noc.configure() self.psum_rd_noc.configure(self.arr_x) #self.psum_wr_noc.configure(num_iteration, fmap_per_iteration, out_sets) for y in range(self.arr_y): for x in range(self.arr_x): self.pe_array[y][x].configure(fmap_per_iteration, num_iteration) for y in range(self.post_tr_y): for x in range(self.post_tr_x): self.post_tr_array[y][x].configure() print("Num PEs: ",self.arr_x*self.arr_y) print("Num post transform blocks: ", self.post_tr_x*self.post_tr_y) print("image size: ",image_size) print("filter size: ",filter_size) print("input channels: ",in_chn) print("output channels: ",out_chn)
def instantiate(self, setup): self.class_name = 'nnsimSmartBuffer' self.debug = setup['debug'] # ---------------------------------------------------------------- # input channels # ---------------------------------------------------------------- self.fill_data_ichns = setup['fill_data_ichns'] self.update_data_ichns = setup['update_data_ichns']\ if 'update_data_ichns' in setup\ else None self.drain_enable_ichns = setup['drain_enable_ichns']\ if 'drain_enable_ichns' in setup\ else None # ---------------------------------------------------------------- # output channels # ---------------------------------------------------------------- self.drain_data_ochns = setup['drain_data_ochns'] # ---------------------------------------------------------------- # subcomponents # ---------------------------------------------------------------- self.logical_managers = ModuleL() self.memory = SRAM(setup['SRAM']) self.memory_width = setup['SRAM']['width'] # ---------------------------------------------------------------- # internal channels # ---------------------------------------------------------------- self.ack_internal_chns = ModuleL() self.lm_fill_addr_internal_chns = ModuleL() self.lm_drain_addr_internal_chns = ModuleL() self.lm_update_addr_internal_chns = ModuleL() # ---------------------------------------------------------------- # setup logical managers # ---------------------------------------------------------------- self.num_logical_managers = setup['num_logical_managers'] for logical_unit_idx in range(self.num_logical_managers): lm_setup = { 'debug': self.debug + ' (LM:' + str(logical_unit_idx) + ')' } lm_setup['memory_width'] = self.memory_width lm_setup.update(setup['LMs'][logical_unit_idx].copy()) # ------ >> ack channel self.ack_internal_chns.append(Ch()) lm_setup['ack_ichn'] = self.ack_internal_chns[logical_unit_idx] # ------ >> fill address channel self.lm_fill_addr_internal_chns.append(Ch()) lm_setup['fill_addr_ochn'] = self.lm_fill_addr_internal_chns[ logical_unit_idx] # ------ >> drain address channel self.lm_drain_addr_internal_chns.append(Ch()) lm_setup['drain_addr_ochn'] = self.lm_drain_addr_internal_chns[ logical_unit_idx] # ------ >> update address channel if 'update' in setup['LMs'][logical_unit_idx]['AGs']: self.lm_update_addr_internal_chns.append(Ch()) lm_setup[ 'update_addr_ochn'] = self.lm_update_addr_internal_chns[ logical_unit_idx] else: self.lm_update_addr_internal_chns.append(None) # ------ >> instantiate logical manager self.logical_managers.append(LM(lm_setup)) # ------ >> assign unique class name for each logical manager self.logical_managers[logical_unit_idx].class_name = \ 'LM'+ str(logical_unit_idx) + '_' + self.logical_managers[logical_unit_idx].class_name # ---------------------------------------------------------------- # internal records containers # ---------------------------------------------------------------- # >> record for processed reads # used for retrieving read data information self.last_read = ModuleL() for sram_port_idx in range(self.memory.nports): self.last_read.append(NLCh(depth=1)) # used for detecting drains that can be forwarded self.approved_write_data_in_cycle = [] for lm_idx in range(self.num_logical_managers): self.approved_write_data_in_cycle.append({ 'fill': None, 'update': None }) # ----------------------------------------------------------------- # Static and Runtime information # ----------------------------------------------------------------- # definitin of compound actions self.setup_access_info() attrs_dict = setup['SRAM'].copy() attrs_dict.pop('port_type') self.attrs.update(attrs_dict) self.component_with_action = True
class nnsimSmartBuffer(Module): def instantiate(self, setup): self.class_name = 'nnsimSmartBuffer' self.debug = setup['debug'] # ---------------------------------------------------------------- # input channels # ---------------------------------------------------------------- self.fill_data_ichns = setup['fill_data_ichns'] self.update_data_ichns = setup['update_data_ichns']\ if 'update_data_ichns' in setup\ else None self.drain_enable_ichns = setup['drain_enable_ichns']\ if 'drain_enable_ichns' in setup\ else None # ---------------------------------------------------------------- # output channels # ---------------------------------------------------------------- self.drain_data_ochns = setup['drain_data_ochns'] # ---------------------------------------------------------------- # subcomponents # ---------------------------------------------------------------- self.logical_managers = ModuleL() self.memory = SRAM(setup['SRAM']) self.memory_width = setup['SRAM']['width'] # ---------------------------------------------------------------- # internal channels # ---------------------------------------------------------------- self.ack_internal_chns = ModuleL() self.lm_fill_addr_internal_chns = ModuleL() self.lm_drain_addr_internal_chns = ModuleL() self.lm_update_addr_internal_chns = ModuleL() # ---------------------------------------------------------------- # setup logical managers # ---------------------------------------------------------------- self.num_logical_managers = setup['num_logical_managers'] for logical_unit_idx in range(self.num_logical_managers): lm_setup = { 'debug': self.debug + ' (LM:' + str(logical_unit_idx) + ')' } lm_setup['memory_width'] = self.memory_width lm_setup.update(setup['LMs'][logical_unit_idx].copy()) # ------ >> ack channel self.ack_internal_chns.append(Ch()) lm_setup['ack_ichn'] = self.ack_internal_chns[logical_unit_idx] # ------ >> fill address channel self.lm_fill_addr_internal_chns.append(Ch()) lm_setup['fill_addr_ochn'] = self.lm_fill_addr_internal_chns[ logical_unit_idx] # ------ >> drain address channel self.lm_drain_addr_internal_chns.append(Ch()) lm_setup['drain_addr_ochn'] = self.lm_drain_addr_internal_chns[ logical_unit_idx] # ------ >> update address channel if 'update' in setup['LMs'][logical_unit_idx]['AGs']: self.lm_update_addr_internal_chns.append(Ch()) lm_setup[ 'update_addr_ochn'] = self.lm_update_addr_internal_chns[ logical_unit_idx] else: self.lm_update_addr_internal_chns.append(None) # ------ >> instantiate logical manager self.logical_managers.append(LM(lm_setup)) # ------ >> assign unique class name for each logical manager self.logical_managers[logical_unit_idx].class_name = \ 'LM'+ str(logical_unit_idx) + '_' + self.logical_managers[logical_unit_idx].class_name # ---------------------------------------------------------------- # internal records containers # ---------------------------------------------------------------- # >> record for processed reads # used for retrieving read data information self.last_read = ModuleL() for sram_port_idx in range(self.memory.nports): self.last_read.append(NLCh(depth=1)) # used for detecting drains that can be forwarded self.approved_write_data_in_cycle = [] for lm_idx in range(self.num_logical_managers): self.approved_write_data_in_cycle.append({ 'fill': None, 'update': None }) # ----------------------------------------------------------------- # Static and Runtime information # ----------------------------------------------------------------- # definitin of compound actions self.setup_access_info() attrs_dict = setup['SRAM'].copy() attrs_dict.pop('port_type') self.attrs.update(attrs_dict) self.component_with_action = True def configure(self, config): for logical_unit_idx in range(self.num_logical_managers): self.logical_managers[logical_unit_idx].configure( config['LM'][logical_unit_idx]) # ---------------------------------------------------------------- # internal records configuration # -------------- -------------------------------------------------- # >> record for logical manager and sram ports correspondance self.lm_sram_map = config['lm_sram_map'] def tick(self): self.ack_packet = [] for i in range(self.num_logical_managers): self.ack_packet.append([]) # ------------------------------------------------------------------ # Check for unprocessed reads # ------------------------------------------------------------------ for sram_port_idx in range(self.memory.nports): if self.last_read[sram_port_idx].valid(): read_request_info = self.last_read[sram_port_idx].peek() # if 'IFmapGLB' in self.debug: # print(self.debug, read_request_info, 'ochn vacancy:', self.drain_data_ochns[read_request_info['lm_idx']].vacancy()) if self.drain_data_ochns[ read_request_info['lm_idx']].vacancy(): if read_request_info['enabled']: if not read_request_info['forwarded']: read_data = [ d for d in self.memory.response( port=sram_port_idx) ] # if 'PsumGLB' in self.debug: # print('DDDDD ' ,self.debug, 'read from memory:', read_data, 'from ', read_request_info['addr']) else: read_data = read_request_info['forwarded_data'] # if 'PsumGLB' in self.debug: # print('DDDDD ' ,self.debug, 'forwared read:', read_data, 'from ', read_request_info['addr']) else: read_data = [0] * self.memory.width # if 'PsumGLB' in self.debug: # print('DDDDD ' ,self.debug, 'gated data:', read_data, 'from ', read_request_info['addr']) self.last_read[sram_port_idx].pop() # if 'PsumGLB' in self.debug: # print(self.debug, 'port ', sram_port_idx, 'pop') self.drain_data_ochns[read_request_info['lm_idx']].push( read_data) # ------------------------------------------------------------------ # Check for ready requests # ------------------------------------------------------------------ for lm_idx in range(self.num_logical_managers): self.check_for_update_request(lm_idx) self.check_for_fill_request(lm_idx) self.check_for_drain_request(lm_idx) if self.logical_managers[lm_idx].fill_round_done.rd()\ and self.logical_managers[lm_idx].drain_round_done.rd(): self.logical_managers[lm_idx].reset_book_keeping_pointers() # print('round done') def check_for_drain_request(self, lm_idx): # if not self.lm_drain_addr_internal_chns[lm_idx].valid(): # if 'IFmapGLB' in self.debug: # print('!!!!!', self.debug,' no drain request') # else: # if 'IFmapGLB' in self.debug: # print('!!!!!', self.debug,'wait to process request',self.lm_drain_addr_internal_chns[lm_idx].peek() ) if self.lm_drain_addr_internal_chns[lm_idx].valid(): request_info = self.logical_managers[lm_idx].check_request('drain') # print(request_info) # if 'IFmapGLB' in self.debug: # print(self.debug, request_info) if request_info['addr'] is not None: enable_signal_ready = True enabled = True enabled_buffer = False if self.drain_enable_ichns is not None and \ self.drain_enable_ichns[lm_idx] is not None: enable_signal_ready = self.drain_enable_ichns[ lm_idx].valid() enabled_buffer = True if enable_signal_ready: enabled = self.drain_enable_ichns[lm_idx].peek()[0] # if 'IFmapGLB' in self.debug: # print(self.debug, request_info, 'enable signal:', enable_signal_ready) if enable_signal_ready: sram_rd_port = self.lm_sram_map[lm_idx]['drain'] if self.last_read[sram_rd_port].vacancy( ) and not self.memory.port_in_use(sram_rd_port): # if 'PsumGLB' in self.debug: # print(self.debug, 'there is space and available prot for issuing read request') if enabled_buffer: self.drain_enable_ichns[lm_idx].pop() address = request_info['addr'] self.lm_drain_addr_internal_chns[lm_idx].pop() if enabled and not request_info['forwarded']: self.memory.request(RD, address, port=sram_rd_port) if enabled and request_info['forwarded']: # print('forwarding data') if request_info['prereq'] == 'fill': forwarded_data = self.approved_write_data_in_cycle[ lm_idx]['fill'] if forwarded_data is None: print( self.debug, 'there is no fill data available for forwarding' ) sys.exit(0) elif request_info['prereq'] == 'update': forwarded_data = self.approved_write_data_in_cycle[ lm_idx]['update'] if forwarded_data is None: print( self.debug, 'there is no update data available for forwarding' ) sys.exit(0) else: print(self.debug, 'nowhere to find data being forwarded') sys.exit(0) else: forwarded_data = None self.last_read[sram_rd_port].push({'lm_idx': lm_idx,\ 'enabled': enabled,\ 'addr': address,\ 'forwarded': request_info['forwarded'],\ 'forwarded_data': forwarded_data}) ack_packet = {'type': 'drain', 'addr': address, 'shrink': request_info['shrink'],\ 'prereq': request_info['prereq'], 'forwarded': request_info['forwarded'],\ 'reset_phy_head': request_info['reset_phy_head'] } # if 'IFmapGLB' in self.debug: # print('########## DRAIN #################', self.debug, request_info) self.logical_managers[lm_idx].update_book_keeping( ack_packet) # ------------------------------------------------- # record access # ------------------------------------------------- if enabled: last_addr = self.last_read_addr[lm_idx] if request_info['forwarded']: arg_name = 'lm' + str( lm_idx) + '_nforwarded_drain' self.cycle_access[arg_name] += 1 elif last_addr is not None and last_addr == address: arg_name = 'lm' + str( lm_idx) + '_nrepeated_drain' self.cycle_access[arg_name] += 1 else: arg_name = 'lm' + str(lm_idx) + '_ndrain' self.last_read_addr[lm_idx] = address self.cycle_access[arg_name] += 1 else: arg_name = 'lm' + str(lm_idx) + '_ngated_drain' self.cycle_access[arg_name] += 1 # else: # if 'PsumGLB' in self.debug: # print(self.debug, 'rd_port:', sram_rd_port, 'last_read_vacant:', self.last_read[sram_rd_port].vacancy(), \ # 'port_in_use:', self.memory.port_in_use(sram_rd_port)) def check_for_update_request(self, lm_idx): if self.logical_managers[lm_idx].update_AG is None: return # if 'PsumGLB' in self.debug: # stop = 1 if (self.update_data_ichns[lm_idx].valid() and self.lm_update_addr_internal_chns[lm_idx].valid()) or \ (self.lm_update_addr_internal_chns[lm_idx].valid() and self.lm_update_addr_internal_chns[lm_idx].peek()['addr'] == 'reset_phead' ): request_info = self.logical_managers[lm_idx].check_request( 'update') # if 'PsumGLB' in self.debug: # print(self.debug, request_info) if request_info['addr'] is not None: sram_wr_port = self.lm_sram_map[lm_idx]['update'] if not self.memory.port_in_use(sram_wr_port): address = request_info['addr'] data = self.update_data_ichns[lm_idx].pop() # if 'PsumGLB' in self.debug: # print(self.debug, request_info, 'data for update:', data) self.approved_write_data_in_cycle[lm_idx]['update'] = data self.lm_update_addr_internal_chns[lm_idx].pop() self.memory.request(WR, address, data, port=sram_wr_port) ack_packet = { 'type': 'update', 'addr': address, 'reset_phy_head': request_info['reset_phy_head'] } self.logical_managers[lm_idx].update_book_keeping( ack_packet) # --------------------------------------- # record access # --------------------------------------- if not data == self.last_write_data[lm_idx]: arg_name = 'lm' + str(lm_idx) + '_nupdate' else: arg_name = 'lm' + str( lm_idx) + '_nrepeated_data_update' self.cycle_access[arg_name] += 1 # self.curr_write_addr[lm_idx] = address self.last_write_data[lm_idx] = data if self.traces_stats == 'show': self.recorder.record(self.debug + '_update.txt', data[0]) else: # if the write port is not avaiable, overwrite the logical manager's information self.logical_managers[lm_idx].approved_write_addr_in_cycle[ 'update'] = None # else: # if 'PsumGLB' in self.debug: # print('update_data:', self.update_data_ichns[lm_idx].valid()) def check_for_fill_request(self, lm_idx): if (self.fill_data_ichns[lm_idx].valid() and self.lm_fill_addr_internal_chns[lm_idx].valid()) or \ (self.lm_fill_addr_internal_chns[lm_idx].valid() and self.lm_fill_addr_internal_chns[lm_idx].peek()['addr'] == 'reset_phead' ): request_info = self.logical_managers[lm_idx].check_request('fill') if request_info['addr'] is not None: sram_wr_port = self.lm_sram_map[lm_idx]['fill'] if not self.memory.port_in_use(sram_wr_port): address = request_info['addr'] data = self.fill_data_ichns[lm_idx].pop() self.approved_write_data_in_cycle[lm_idx]['fill'] = data self.lm_fill_addr_internal_chns[lm_idx].pop() # print(self.debug, data) self.memory.request(WR, address, data, port=sram_wr_port) ack_packet = { 'type': 'fill', 'addr': address, 'reset_phy_head': request_info['reset_phy_head'] } self.logical_managers[lm_idx].update_book_keeping( ack_packet) # if 'IFmapGLB' in self.debug: # print(self.debug, request_info) # --------------------------------------- # record access # --------------------------------------- if not data == self.last_write_data[lm_idx]: arg_name = 'lm' + str(lm_idx) + '_nfill' else: arg_name = 'lm' + str(lm_idx) + '_nrepeated_data_fill' self.cycle_access[arg_name] += 1 # self.curr_write_addr[lm_idx] = address self.last_write_data[lm_idx] = data # ------------------------------------------- # record filled data # ------------------------------------------- if self.traces_stats == 'show': self.recorder.record(self.debug + '_fill.txt', data[0]) else: # if the write port is not avaiable, overwrite the logical manager's information self.logical_managers[lm_idx].approved_write_addr_in_cycle[ 'fill'] = None def setup_access_info(self): # ===================================================================== # construct action description of the compound action: buffer access # ===================================================================== self.customized_access = True self.arg_lst = [] # top level action attributes total_nread = [] # all drain related counts: aggregated for memory total_nwrite = [] # all fill related counts: aggregated for memory total_nrepeated_read = [ ] # all repeated drain related counts: aggregated for memory total_nrepeated_data_write = [] # all repeated data write counts subcomp_class_actions = {} # all related subcomponent class actions # --------------------------------------------------------------------- # Collect related information from each logical manager # --------------------------------------------------------------------- for lm_idx in range(self.num_logical_managers): lm_ndrain = 'lm' + str(lm_idx) + '_ndrain' lm_nfill = 'lm' + str(lm_idx) + '_nfill' lm_nupdate = 'lm' + str(lm_idx) + '_nupdate' lm_nrepeated_data_fill = 'lm' + str( lm_idx) + '_nrepeated_data_fill' lm_nrepeated_data_update = 'lm' + str( lm_idx) + '_nrepeated_data_update' lm_nrepeated_drain = 'lm' + str(lm_idx) + '_nrepeated_drain' lm_nforwarded_drain = 'lm' + str(lm_idx) + '_nforwarded_drain' lm_ngated_drain = 'lm' + str(lm_idx) + '_ngated_drain' self.arg_lst.append(lm_ndrain) self.arg_lst.append(lm_nfill) self.arg_lst.append(lm_nupdate) self.arg_lst.append(lm_nrepeated_data_fill) self.arg_lst.append(lm_nrepeated_data_update) self.arg_lst.append(lm_nrepeated_drain) self.arg_lst.append(lm_nforwarded_drain) self.arg_lst.append(lm_ngated_drain) # sum up the access that will point into RAM # gated and forwarded drain does not invovle actual RAM accesses total_nread.append(lm_ndrain) total_nwrite.append(lm_nfill) total_nwrite.append(lm_nupdate) total_nrepeated_read.append(lm_nrepeated_drain) total_nrepeated_data_write.append(lm_nrepeated_data_fill) total_nrepeated_data_write.append(lm_nrepeated_data_update) # --------------------------------------------------------------------------- # Collect related information for the address generators, embedded in LMs # --------------------------------------------------------------------------- subcomp_class_actions.update({self.logical_managers[lm_idx].class_name:\ {'action_name': 'generate',\ 'arguments': [lm_ndrain, lm_ngated_drain, lm_nfill, lm_nupdate, lm_nrepeated_data_update, lm_nrepeated_data_fill ],\ 'repeat': {'sum':[lm_ndrain, lm_ngated_drain, lm_nfill, lm_nupdate, lm_nrepeated_data_update, lm_nrepeated_data_fill]}}\ }) # --------------------------------------------------------------------- # Collect related information for the channel subcomponent (move to top level/serializer/deserializer) # --------------------------------------------------------------------- self.fill_data_ichns[lm_idx].base_class_name = 'channel' self.fill_data_ichns[lm_idx].component_class_as_subclass = 'show' self.drain_data_ochns[lm_idx].base_class_name = 'channel' self.drain_data_ochns[lm_idx].component_class_as_subclass = 'show' self.fill_data_ichns[lm_idx].class_name = 'lm_' + str( lm_idx) + '_fill_chn' self.drain_data_ochns[lm_idx].class_name = 'lm_' + str( lm_idx) + '_drain_chn' subcomp_class_actions.update({self.fill_data_ichns[lm_idx].class_name: {'action_name': 'access',\ 'repeat': lm_nfill}}) subcomp_class_actions.update({self.drain_data_ochns[lm_idx].class_name: {'action_name': 'access',\ 'repeat': {'sum':[lm_ndrain, lm_nrepeated_drain]}}}) if self.update_data_ichns is not None and self.update_data_ichns[ lm_idx] is not None: self.update_data_ichns[lm_idx].base_class_name = 'channel' self.update_data_ichns[ lm_idx].component_class_as_subclass = 'show' self.update_data_ichns[lm_idx].class_name = 'lm_' + str( lm_idx) + '_update_chn' subcomp_class_actions.update({self.update_data_ichns[lm_idx].class_name: {'action_name': 'access',\ 'repeat': lm_nupdate}}) # --------------------------------------------------------------------- # Collect related information for the memory subcomponent # --------------------------------------------------------------------- subcomp_class_actions.update({self.memory.class_name: {'action_name': 'RAM_access',\ 'arguments': [{'sum': total_nread}, \ {'sum': total_nwrite}, \ {'sum': total_nrepeated_read},\ {'sum': total_nrepeated_data_write}],\ 'repeat': 1}}) # --------------------------------------------------------------------- # Define all of the actions using collected info # --------------------------------------------------------------------- smartbuffer_access_action_def = {'arguments': self.arg_lst,\ 'subcomponent_class_actions': subcomp_class_actions} idle_access_action_def = {'subcomponent_class_actions': \ {self.memory.class_name: {'action_name': 'idle','repeat': 1}}} self.actions = {'idle': idle_access_action_def,\ 'buffer_access': smartbuffer_access_action_def} # ===================================================================== # construct containers for recording access counts # ===================================================================== self.last_read_addr = [None] * self.num_logical_managers self.last_write_addr = [None] * self.num_logical_managers self.last_write_data = [None] * self.num_logical_managers self.curr_write_addr = [None] * self.num_logical_managers self.access_stats = {'buffer_access': [], 'idle': {'count': 0}} self.cycle_access = {} self.raw_access_stats = {'buffer_access': {}} self.reset_cycle_access() def reset_cycle_access(self): for arg in self.arg_lst: self.cycle_access.update({arg: 0}) def __ntick__(self): Module.__ntick__(self) idle = True for arg, value in self.cycle_access.items(): if value > 0: idle = False break if idle: self.access_stats['idle']['count'] += 1 else: cycle_access_lst = [] for arg in self.arg_lst: cycle_access_lst.append(self.cycle_access[arg]) cycle_access_tuple = tuple(cycle_access_lst) if cycle_access_tuple[6] == 1: self.access_stats['idle']['count'] += 1 if cycle_access_tuple in self.raw_access_stats['buffer_access']: self.raw_access_stats['buffer_access'][cycle_access_tuple] += 1 else: self.raw_access_stats['buffer_access'][cycle_access_tuple] = 1 self.reset_cycle_access() for lm_idx in range(self.num_logical_managers): self.last_write_addr[lm_idx] = self.curr_write_addr[lm_idx] self.logical_managers[lm_idx].approved_write_addr_in_cycle[ 'fill'] = None self.logical_managers[lm_idx].approved_write_addr_in_cycle[ 'update'] = None self.approved_write_data_in_cycle[lm_idx]['fill'] = None self.approved_write_data_in_cycle[lm_idx]['update'] = None def summerize_access_stats(self): for access_info_tuple, count in self.raw_access_stats[ 'buffer_access'].items(): arg_dict = {} arg_idx = 0 for arg_name in self.arg_lst: arg_dict[arg_name] = access_info_tuple[arg_idx] arg_idx += 1 access_info_dict = {'arguments': arg_dict, 'count': count} self.access_stats['buffer_access'].append(access_info_dict.copy())
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, psum_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel(name="ifmap_wr_chn") self.psum_wr_chn = Channel(name="psum_wr_chn") self.weights_wr_chn = Channel(name="weights_wr_chn") self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel(name="psum_output_chn") self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn') self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3, name='psum_rd_chn') self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn') self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel(name='weights_rd_chn') self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, 0))) # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append( Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y))) self.pe_filter_chns[y].append( Channel(32, name='pe_filter_chns_{}_{}'.format(x, y))) self.pe_psum_chns[y + 1].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, y))) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y + 1][x])) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0], self.chn_per_word) self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_noc_wr_chn, self.psum_output_chn, self.chn_per_word)
class WSArch(Module): def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel(name='ifmap_wr_chn') self.psum_wr_chn = Channel(name='psum_wr_chn') self.weights_wr_chn = Channel(name='weights_wr_chn') self.psum_chn = Channel(32, name='psum_chn') self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, self.psum_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel(name='psum_output_chn') self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn, self.psum_chn, self.arr_x, self.arr_y, self.chn_per_word) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn') self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3, name='psum_rd_chn') self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn') self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel(3, name='weights_rd_chn') self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, chn_per_word) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append( Channel(32, name='pe_psum_chns_{}_0'.format(x))) # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append( Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y))) self.pe_filter_chns[y].append( Channel(32, name='pe_filter_chns_{}_{}'.format(x, y))) self.pe_psum_chns[y + 1].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, y))) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y + 1][x])) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0], self.chn_per_word) self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_noc_wr_chn, self.psum_output_chn, self.chn_per_word) def configure(self, image_size, filter_size, in_chn, out_chn, full_in_chn, full_out_chn): full_in_sets = full_in_chn // self.chn_per_word full_out_sets = full_out_chn // self.chn_per_word in_sets = self.arr_y // self.chn_per_word out_sets = self.arr_x // self.chn_per_word tile_ins = full_in_sets // in_sets tile_outs = full_out_sets // out_sets fmap_per_iteration = image_size[0] * image_size[1] num_iteration = filter_size[0] * filter_size[1] self.deserializer.configure(image_size, filter_size, full_in_sets, tile_ins, tile_outs) self.serializer.configure(image_size, tile_ins, tile_outs) self.ifmap_glb.configure(image_size, filter_size, in_sets, full_in_sets, tile_outs, fmap_per_iteration) self.psum_glb.configure(filter_size, out_sets, fmap_per_iteration) self.weights_glb.configure(filter_size, in_sets, out_sets) self.filter_noc.configure(in_sets, self.arr_x) self.ifmap_noc.configure(in_sets) self.psum_rd_noc.configure(out_sets) self.psum_wr_noc.configure(num_iteration, fmap_per_iteration, out_sets) for y in range(self.arr_y): for x in range(self.arr_x): self.pe_array[y][x].configure(fmap_per_iteration, num_iteration)
def instantiate (self, setup): self.class_name = "nnsimBuffer" self.debug = setup['debug'] self.enabled_buffer = setup['enabled_buffer'] if 'enabled_buffer' in setup.keys() else False self.reg_insert_opt = setup['reg_insert_opt'] if 'reg_insert_opt' in setup.keys() else True self.nbanks = setup['nbanks'] # physical channels from outside world to write data into the buffer self.physical_chns = [] # address generators for each memory bank self.addr_generator_type = [] #====================================================================== # Virtual Data Transfer Channels #====================================================================== # - instantiated from outside # - responsible for receiving data from outside # - responsible for pushing data to outside self.fill_data_chns = setup['fill_chns']\ if 'fill_chns' in setup.keys() else None self.update_data_chns = setup['update_chns']\ if 'update_chns' in setup.keys() else None self.update_src_chns = setup['update_src_chns']\ if 'update_src_chns' in setup.keys() else None self.drain_rsp_chns = setup['drain_chns']\ if 'drain_chns' in setup.keys() else None # channels determine whether to perform the read or not, used for zero gating self.drain_enable_chns = setup['drain_enable_chns']\ if 'drain_enable_chns' in setup.keys() else None # number of virtual ports for each memory bank self.nvports = setup['nvports'] # constrcut dictionary to store all the data channels for ease of access self.vdata_chns = {} self.vdata_chns['fill'] = self.fill_data_chns if self.fill_data_chns is not None else None self.vdata_chns['update'] = self.update_data_chns if self.update_data_chns is not None else None #====================================================================== # Address Generators #====================================================================== self.addr_generator_types = setup['addr_generators'] #====================================================================== # Hardware Memory Properties #====================================================================== self.depth = setup['depth'] # depth of each memory bank self.width = setup['width'] # # of data in each memory bank self.nports = setup['nports'] # number of physical ports for each memory bank self.pvmapping = setup['pvmapping'] #====================================================================== # Internal Channels and Data Structures #====================================================================== # addr generators self.addr_generators = [] self.faddr_generator = ModuleList() self.uaddr_generator = ModuleList() self.daddr_generator = ModuleList() # addr chns self.fill_addr_chn = ModuleList() self.update_addr_chn = ModuleList() self.drain_addr_chn = ModuleList() self.fill_local_addr_chn = ModuleList() self.update_local_addr_chn = ModuleList() self.drain_local_addr_chn = ModuleList() #------------------------------------------------------------------ # Address Generators #------------------------------------------------------------------ self.addr_generators = {'fill': self.faddr_generator, \ 'update': self.uaddr_generator, \ 'drain': self.daddr_generator} #------------------------------------------------------------------ # Address Channels for Addr Generators to Push to Sbuffer #------------------------------------------------------------------ self.vaddr_chns = {'fill': self.fill_addr_chn,\ 'update': self.update_addr_chn,\ 'drain': self.drain_addr_chn} self.local_vaddr_chns = {'fill': self.fill_local_addr_chn,\ 'update': self.update_local_addr_chn,\ 'drain': self.drain_local_addr_chn} #------------------------------------------------------------------ # Setup Virtual Ports #------------------------------------------------------------------ # each virtual port has: # a channel for addr seq # a channel for input data # an addr generator # optionally a destination generator # optionally a channel for output data for vport, num in self.nvports.items(): for i in range(num): # --------------------------------------------------------- # Address Generator # --------------------------------------------------------- # 1. setup single address generator for the virtual port if setup['multi_addr_generators'][vport] == 1: self.vaddr_chns[vport].append(NoLatencyChannel()) self.local_vaddr_chns[vport].append(None) addr_gen_setup = {'addr_chn': self.vaddr_chns[vport][i],\ 'width': self.width, \ 'type': vport,\ 'id': i, 'depth': self.depth,\ 'debug': self.debug } addr_gen_obj = self.addr_generator_types[vport][i](addr_gen_setup) self.addr_generators[vport].append(addr_gen_obj) # 2. update address comes from multiple instances of udpate address generators else: self.addr_generators[vport].append(ModuleList()) # this channel pushes the chosen address from all the local address generators to the smartbuffer self.vaddr_chns[vport].append(NoLatencyChannel()) self.local_vaddr_chns[vport].append(ModuleList()) for idx in range(setup['multi_addr_generators'][vport]): self.local_vaddr_chns[vport][i].append(NoLatencyChannel()) addr_gen_setup = {'addr_chn': self.local_vaddr_chns[vport][i][idx],\ 'width': self.width, \ 'type': vport,\ 'id': i, 'depth': self.depth,\ 'debug': self.debug + '[' + str(idx) + ']'} # assuming these generators are of the same type, just the offset (config) will be different self.addr_generators[vport][i].append(self.addr_generator_types[vport][i](addr_gen_setup)) #------------------------------------------------------------------ # Setup Smartbuffer (deals with dependency and confict) #------------------------------------------------------------------ # destination channels are for the destination calculator to push in destinations # destination_chns = self.destination_chns if self.mode == 'glb' else None # update_src_chns are channels for sending in external update address sequence update_src_chn = self.update_src_chns if self.update_src_chns is not None else None # drain enable channels are channels that are used to send enable signeal to the buffer for read operations drain_enable_chn = self.drain_enable_chns if self.drain_enable_chns is not None else None # a smartbuffer is a unit of memory bank smartbuffer_setup = { 'depth': self.depth,\ 'width': self.width, \ 'nports': self.nports, \ 'pvmapping': self.pvmapping,\ 'nvports': self.nvports,\ 'vdata_chns': self.vdata_chns, \ 'vaddr_chns': self.vaddr_chns,\ 'drain_rsp_chns': self.drain_rsp_chns,\ 'drain_enable_chns': drain_enable_chn,\ 'update_src_chns': update_src_chn,\ 'enabled_buffer': self.enabled_buffer,\ 'reg_insert_opt': self.reg_insert_opt,\ 'debug': self.debug } self.smartbuffer_type = setup['smartbuffer_type'] self.sbuffer = self.smartbuffer_type(smartbuffer_setup) #====================================================================== # Stats Collection Info #====================================================================== self.attrs = { 'dpeth': setup['depth'],\ 'width': setup['width'],\ 'data_width': setup['data_width'],\ 'nbanks': setup['nbanks'],\ 'nports': setup['nports']}
class OSArch(Module): def instantiate(self, arr_y, input_chn, output_chn, block_size, num_nonzero, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): # PE static configuration (immutable) self.name = 'chip' #self.arr_x = arr_x self.arr_y = arr_y self.block_size = block_size self.num_nonzero = num_nonzero self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.psum_wr_chn = Channel() self.weights_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_y, block_size, num_nonzero) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3) #self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, arr_y, # ifmap_glb_depth, block_size, num_nonzero) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, block_size, num_nonzero) self.weights_rd_chn = Channel() #self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, block_size) self.ifmap_weights_glb = IFMapWeightsGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,\ self.weights_wr_chn, self.weights_rd_chn, arr_y, ifmap_glb_depth,\ weight_glb_depth, block_size, num_nonzero) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_in_chns = ModuleList() self.pe_psum_out_chns = ModuleList() # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_in_chns.append(ModuleList()) self.pe_psum_out_chns.append(ModuleList()) for x in range(1): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_in_chns[y].append(Channel(32)) self.pe_psum_out_chns[y].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_in_chns[y][x], self.pe_psum_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, block_size) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_in_chns, self.arr_y, block_size) self.psum_wr_noc = PSumWrNoC(self.pe_psum_out_chns, self.psum_noc_wr_chn, self.psum_output_chn, self.arr_y, block_size) def configure(self, image_size, filter_size, in_chn, out_chn): in_sets = in_chn//self.block_size out_sets = out_chn//self.arr_y fmap_per_iteration = image_size[0]*image_size[1] num_iteration = filter_size[0]*filter_size[1] self.deserializer.configure(image_size, filter_size, in_chn, out_chn) #self.ifmap_glb.configure(image_size, filter_size, in_chn, fmap_per_iteration) self.psum_glb.configure(filter_size, out_chn, fmap_per_iteration) #self.weights_glb.configure(filter_size, image_size, in_chn, out_chn) self.filter_noc.configure(out_chn//self.block_size, self.arr_y, in_chn, out_chn) self.ifmap_noc.configure(in_sets, self.arr_y) self.psum_rd_noc.configure(out_chn//self.block_size) self.psum_wr_noc.configure(fmap_per_iteration, self.arr_y//self.block_size) self.ifmap_weights_glb.configure(image_size, filter_size, in_chn, out_chn,\ fmap_per_iteration) for y in range(self.arr_y): for x in range(1): self.pe_array[y][x].configure(num_iteration*in_sets, fmap_per_iteration*out_sets)
class WSArch(Module): def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, psum_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.psum_wr_chn = Channel() self.weights_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3) #self.hold_weights = Channel(9) self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) #, self.hold_weights) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel() self.weights_glb = WeightsGLB( self.weights_wr_chn, self.weights_rd_chn) #, self.hold_weights) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append(Channel(32)) # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_chns[y + 1].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y + 1][x])) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0], self.chn_per_word) self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_noc_wr_chn, self.psum_output_chn, self.chn_per_word) def configure(self, image_size, filter_size, in_chn, out_chn): # print inputs print("image size:", image_size) print("filter size:", filter_size) print("in chn:", in_chn) print("out chn:", out_chn) in_sets = self.arr_y // self.chn_per_word out_sets = self.arr_x // self.chn_per_word fmap_per_iteration = image_size[0] * image_size[1] fmap_per_iteration_in = image_size[0] * image_size[1] fmap_per_iteration_out = 4 # (image_size[0]-filter_size[0]+1)*(image_size[1]-filter_size[1]+1) num_iteration = filter_size[0] * filter_size[1] self.deserializer.configure(image_size) self.ifmap_glb.configure(image_size, filter_size, in_sets, fmap_per_iteration_out) self.psum_glb.configure(filter_size, out_sets, fmap_per_iteration_out) self.filter_noc.configure(in_sets, self.arr_x) self.ifmap_noc.configure(in_sets) self.psum_rd_noc.configure(out_sets) self.psum_wr_noc.configure(num_iteration, fmap_per_iteration_out, out_sets) print("PE array size:", self.arr_y * self.arr_x) for y in range(self.arr_y): for x in range(self.arr_x): self.pe_array[y][x].configure(fmap_per_iteration_out, num_iteration)
def instantiate(self, setup): self.pe_array_row = setup['pe_array'][0] self.pe_array_col = setup['pe_array'][1] # --------------------------------------------------------------------- # io data b/w offchip # --------------------------------------------------------------------- self.weights_in_chns = setup['io_chns']['weights'] self.ifmap_in_chns = setup['io_chns']['ifmap'] self.psum_in_chns = setup['io_chns']['psum_in'] self.psum_out_chn = setup['io_chns']['psum_out'] # --------------------------------------------------------------------- # onchip channels # --------------------------------------------------------------------- # >> buffer output channels self.weights_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.psum_out_chns = ModuleList(Channel()) # >> update data to buffer self.psum_update_chns = ModuleList(Channel()) # =================================================================== # GLBs # =================================================================== # ------------------------ WEIGHTS ------------------------------- weights_glb_setup = {'fill_data_ichns': self.weights_in_chns,\ 'drain_data_ochns': self.weights_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['WeightsGLB'],\ 'width': setup['width']['WeightsGLB'],\ 'data_width': setup['data_width']['WeightsGLB'],\ 'nbanks': setup['nbanks']['WeightsGLB'],\ 'nports': setup['nports']['WeightsGLB'],\ 'port_type': setup['port_type']['WeightsGLB'],\ },\ 'debug': ' WeightsGLB'} self.weights_glb = WeightsGLB(weights_glb_setup) # ------------------------ IFMAP ------------------------------- ifmap_glb_setup = {'fill_data_ichns': self.ifmap_in_chns,\ 'drain_data_ochns': self.ifmap_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['IFmapGLB'],\ 'width': setup['width']['IFmapGLB'],\ 'data_width': setup['data_width']['IFmapGLB'],\ 'nbanks': setup['nbanks']['IFmapGLB'],\ 'nports': setup['nports']['IFmapGLB'],\ 'port_type': setup['port_type']['IFmapGLB'],\ },\ 'debug': ' IFmapGLB'} self.ifmap_glb = IFmapGLB(ifmap_glb_setup) # ------------------------ PSUM ------------------------------- psum_glb_setup = {'fill_data_ichns': self.psum_in_chns,\ 'update_data_ichns': self.psum_update_chns,\ 'drain_data_ochns': self.psum_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['PsumGLB'],\ 'width': setup['width']['PsumGLB'],\ 'data_width': setup['data_width']['PsumGLB'],\ 'nbanks': setup['nbanks']['PsumGLB'],\ 'nports': setup['nports']['PsumGLB'],\ 'port_type': setup['port_type']['PsumGLB'],\ },\ 'debug': ' PsumGLB'} self.psum_glb = PsumGLB(psum_glb_setup) # =================================================================== # PE Array Channels # =================================================================== self.ifmap_pe_data_chns = ModuleList() self.weights_pe_data_chns = ModuleList() self.psum_data_chns = ModuleList() self.pe_data_chns = {'ifmap': self.ifmap_pe_data_chns,\ 'weights': self.weights_pe_data_chns,\ 'psum': self.psum_data_chns} for pe_row in range(self.pe_array_row): for chn_type, chn_row in self.pe_data_chns.items(): chn_row.append(ModuleList()) for pe_col in range(self.pe_array_col): for chn_type, chn_col in self.pe_data_chns.items(): chn_col[pe_row].append(Channel()) self.psum_data_chns.append(ModuleList()) for pe_col in range(self.pe_array_col): self.psum_data_chns[-1].append(Channel()) # =================================================================== # Destination Calculators for NoC # =================================================================== self.ifmap_NoC_destination_chn = ModuleList(Channel()) ifmap_NoC_destination_calculator_setup = \ {'out_chn': self.ifmap_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'IFmapNoCDestCalc'} self.ifmap_NoC_destination_calculator = IFmapNoCDestCalc( ifmap_NoC_destination_calculator_setup) self.weights_NoC_destination_chn = ModuleList(Channel()) weights_NoC_desitnation_calculator_setup = \ {'out_chn': self.weights_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'WeightsNoCDestCalc'} self.weights_NoC_destionation_calculator = WeightsNoCDestCalc( weights_NoC_desitnation_calculator_setup) self.psum_in_NoC_destination_chn = ModuleList(Channel()) psum_in_NoC_desitnation_calculator_setup = \ {'out_chn': self.psum_in_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'PsumInNoCDestCalc'} self.psum_in_NoC_destionation_calculator = PsumInNoCDestCalc( psum_in_NoC_desitnation_calculator_setup) # =================================================================== # NoCs # =================================================================== # weights serializer 4:1 self.weights_serializered_data_chn = Channel() weights_serializer_setup = {'in_chn': self.weights_out_chns[0],\ 'out_chn': self.weights_serializered_data_chn,\ 'ratio': setup['weights_seri_ratio'],\ 'debug': 'weights_serialzer'} self.weights_serializer = WeightsSerializer(weights_serializer_setup) # Weights NoC: weightGLB -> PEs weights_noc_setup = {'rd_chns': self.pe_data_chns['weights'],\ 'wr_chns': self.weights_serializered_data_chn,\ 'dest_chns': self.weights_NoC_destination_chn,\ 'debug': 'WeightsNoC'} self.weightsNoC = WeightsNoC(weights_noc_setup) # -------------- IFmap NoC: IfmapGLB -> PEs ----------------------- # ifmap serializer 4:1 self.ifmap_serialized_data_chn = Channel() ifmap_serializer_setup = {'in_chn': self.ifmap_out_chns[0],\ 'out_chn': self.ifmap_serialized_data_chn,\ 'ratio': setup['ifmap_seri_ratio'],\ 'debug': 'ifmap_serializer'} self.ifmap_serilizer = IfmapSerializer(ifmap_serializer_setup) # ifmap NoC ifmap_noc_wr_chns = ModuleList() ifmap_noc_wr_chns.append(self.ifmap_serialized_data_chn) ifmap_noc_setup = {'rd_chns': self.pe_data_chns['ifmap'],\ 'wr_chns': ifmap_noc_wr_chns,\ 'dest_chns': self.ifmap_NoC_destination_chn,\ 'debug': 'IFmapNoC'} self.ifmapNoC = IFMapNoC(ifmap_noc_setup) # --------------------------------------------------------------------- # -------------- PsumRd NoC: Psum GLB -> PEs ---------------------- # psum serializer 4:1 self.psum_serialized_data_chn = Channel() psum_serializer_setup = {'in_chn': self.psum_out_chns[0],\ 'out_chn': self.psum_serialized_data_chn,\ 'ratio': setup['psum_seri_ratio'],\ 'debug': 'psum_serialzer'} self.psum_serializer = PsumSerializer(psum_serializer_setup) # psum read noc pe_noc_wr_chn = ModuleList(self.psum_serialized_data_chn) psum_rd_noc_setup = {'rd_chns': self.psum_data_chns,\ 'wr_chns': pe_noc_wr_chn,\ 'dest_chns': self.psum_in_NoC_destination_chn,\ 'debug': 'PsumRdNoC'} self.psumRdNoC = PsumRdNoC(psum_rd_noc_setup) # --------------------------------------------------------------------- # -------------- PsumWrNoC: PEs -> ifmapPsum GLB --------------------- self.psum_out_noc_rd_chns = ModuleList() self.psum_out_noc_rd_chns.append( self.psum_update_chns[0]) # write back to GLB self.psum_out_noc_rd_chns.append(self.psum_out_chn) # write offchip psum_wr_noc_setup = {'rd_chns': self.psum_out_noc_rd_chns,\ 'wr_chns': self.psum_data_chns,\ 'debug': 'PsumWrNoC'} self.psumWrNoC = PsumWrNoC(psum_wr_noc_setup) # =================================================================== # PE Array # =================================================================== # general setup of a PE PE_setup = setup['PE'] self.PE = ModuleList() for pe_row in range(self.pe_array_row): self.PE.append(ModuleList()) for pe_col in range(self.pe_array_col): # PE specific setup PE_setup['row'] = pe_row PE_setup['col'] = pe_col PE_setup['weights_data_in_chn'] = self.weights_pe_data_chns[ pe_row][pe_col] PE_setup['ifmap_data_in_chn'] = self.ifmap_pe_data_chns[ pe_row][pe_col] PE_setup['psum_data_in_chn'] = self.psum_data_chns[pe_row][ pe_col] PE_setup['psum_data_out_chn'] = self.psum_data_chns[pe_row + 1][pe_col] self.PE[pe_row].append(PE(PE_setup))
class chip(Module): def instantiate(self, setup): self.pe_array_row = setup['pe_array'][0] self.pe_array_col = setup['pe_array'][1] # --------------------------------------------------------------------- # io data b/w offchip # --------------------------------------------------------------------- self.weights_in_chns = setup['io_chns']['weights'] self.ifmap_in_chns = setup['io_chns']['ifmap'] self.psum_in_chns = setup['io_chns']['psum_in'] self.psum_out_chn = setup['io_chns']['psum_out'] # --------------------------------------------------------------------- # onchip channels # --------------------------------------------------------------------- # >> buffer output channels self.weights_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.ifmap_out_chns = ModuleList(Channel()) self.psum_out_chns = ModuleList(Channel()) # >> update data to buffer self.psum_update_chns = ModuleList(Channel()) # =================================================================== # GLBs # =================================================================== # ------------------------ WEIGHTS ------------------------------- weights_glb_setup = {'fill_data_ichns': self.weights_in_chns,\ 'drain_data_ochns': self.weights_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['WeightsGLB'],\ 'width': setup['width']['WeightsGLB'],\ 'data_width': setup['data_width']['WeightsGLB'],\ 'nbanks': setup['nbanks']['WeightsGLB'],\ 'nports': setup['nports']['WeightsGLB'],\ 'port_type': setup['port_type']['WeightsGLB'],\ },\ 'debug': ' WeightsGLB'} self.weights_glb = WeightsGLB(weights_glb_setup) # ------------------------ IFMAP ------------------------------- ifmap_glb_setup = {'fill_data_ichns': self.ifmap_in_chns,\ 'drain_data_ochns': self.ifmap_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['IFmapGLB'],\ 'width': setup['width']['IFmapGLB'],\ 'data_width': setup['data_width']['IFmapGLB'],\ 'nbanks': setup['nbanks']['IFmapGLB'],\ 'nports': setup['nports']['IFmapGLB'],\ 'port_type': setup['port_type']['IFmapGLB'],\ },\ 'debug': ' IFmapGLB'} self.ifmap_glb = IFmapGLB(ifmap_glb_setup) # ------------------------ PSUM ------------------------------- psum_glb_setup = {'fill_data_ichns': self.psum_in_chns,\ 'update_data_ichns': self.psum_update_chns,\ 'drain_data_ochns': self.psum_out_chns, \ 'num_logical_managers': 1,\ 'SRAM': {\ 'depth': setup['depth']['PsumGLB'],\ 'width': setup['width']['PsumGLB'],\ 'data_width': setup['data_width']['PsumGLB'],\ 'nbanks': setup['nbanks']['PsumGLB'],\ 'nports': setup['nports']['PsumGLB'],\ 'port_type': setup['port_type']['PsumGLB'],\ },\ 'debug': ' PsumGLB'} self.psum_glb = PsumGLB(psum_glb_setup) # =================================================================== # PE Array Channels # =================================================================== self.ifmap_pe_data_chns = ModuleList() self.weights_pe_data_chns = ModuleList() self.psum_data_chns = ModuleList() self.pe_data_chns = {'ifmap': self.ifmap_pe_data_chns,\ 'weights': self.weights_pe_data_chns,\ 'psum': self.psum_data_chns} for pe_row in range(self.pe_array_row): for chn_type, chn_row in self.pe_data_chns.items(): chn_row.append(ModuleList()) for pe_col in range(self.pe_array_col): for chn_type, chn_col in self.pe_data_chns.items(): chn_col[pe_row].append(Channel()) self.psum_data_chns.append(ModuleList()) for pe_col in range(self.pe_array_col): self.psum_data_chns[-1].append(Channel()) # =================================================================== # Destination Calculators for NoC # =================================================================== self.ifmap_NoC_destination_chn = ModuleList(Channel()) ifmap_NoC_destination_calculator_setup = \ {'out_chn': self.ifmap_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'IFmapNoCDestCalc'} self.ifmap_NoC_destination_calculator = IFmapNoCDestCalc( ifmap_NoC_destination_calculator_setup) self.weights_NoC_destination_chn = ModuleList(Channel()) weights_NoC_desitnation_calculator_setup = \ {'out_chn': self.weights_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'WeightsNoCDestCalc'} self.weights_NoC_destionation_calculator = WeightsNoCDestCalc( weights_NoC_desitnation_calculator_setup) self.psum_in_NoC_destination_chn = ModuleList(Channel()) psum_in_NoC_desitnation_calculator_setup = \ {'out_chn': self.psum_in_NoC_destination_chn[0],\ 'out_channel_width': 1,\ 'debug': 'PsumInNoCDestCalc'} self.psum_in_NoC_destionation_calculator = PsumInNoCDestCalc( psum_in_NoC_desitnation_calculator_setup) # =================================================================== # NoCs # =================================================================== # weights serializer 4:1 self.weights_serializered_data_chn = Channel() weights_serializer_setup = {'in_chn': self.weights_out_chns[0],\ 'out_chn': self.weights_serializered_data_chn,\ 'ratio': setup['weights_seri_ratio'],\ 'debug': 'weights_serialzer'} self.weights_serializer = WeightsSerializer(weights_serializer_setup) # Weights NoC: weightGLB -> PEs weights_noc_setup = {'rd_chns': self.pe_data_chns['weights'],\ 'wr_chns': self.weights_serializered_data_chn,\ 'dest_chns': self.weights_NoC_destination_chn,\ 'debug': 'WeightsNoC'} self.weightsNoC = WeightsNoC(weights_noc_setup) # -------------- IFmap NoC: IfmapGLB -> PEs ----------------------- # ifmap serializer 4:1 self.ifmap_serialized_data_chn = Channel() ifmap_serializer_setup = {'in_chn': self.ifmap_out_chns[0],\ 'out_chn': self.ifmap_serialized_data_chn,\ 'ratio': setup['ifmap_seri_ratio'],\ 'debug': 'ifmap_serializer'} self.ifmap_serilizer = IfmapSerializer(ifmap_serializer_setup) # ifmap NoC ifmap_noc_wr_chns = ModuleList() ifmap_noc_wr_chns.append(self.ifmap_serialized_data_chn) ifmap_noc_setup = {'rd_chns': self.pe_data_chns['ifmap'],\ 'wr_chns': ifmap_noc_wr_chns,\ 'dest_chns': self.ifmap_NoC_destination_chn,\ 'debug': 'IFmapNoC'} self.ifmapNoC = IFMapNoC(ifmap_noc_setup) # --------------------------------------------------------------------- # -------------- PsumRd NoC: Psum GLB -> PEs ---------------------- # psum serializer 4:1 self.psum_serialized_data_chn = Channel() psum_serializer_setup = {'in_chn': self.psum_out_chns[0],\ 'out_chn': self.psum_serialized_data_chn,\ 'ratio': setup['psum_seri_ratio'],\ 'debug': 'psum_serialzer'} self.psum_serializer = PsumSerializer(psum_serializer_setup) # psum read noc pe_noc_wr_chn = ModuleList(self.psum_serialized_data_chn) psum_rd_noc_setup = {'rd_chns': self.psum_data_chns,\ 'wr_chns': pe_noc_wr_chn,\ 'dest_chns': self.psum_in_NoC_destination_chn,\ 'debug': 'PsumRdNoC'} self.psumRdNoC = PsumRdNoC(psum_rd_noc_setup) # --------------------------------------------------------------------- # -------------- PsumWrNoC: PEs -> ifmapPsum GLB --------------------- self.psum_out_noc_rd_chns = ModuleList() self.psum_out_noc_rd_chns.append( self.psum_update_chns[0]) # write back to GLB self.psum_out_noc_rd_chns.append(self.psum_out_chn) # write offchip psum_wr_noc_setup = {'rd_chns': self.psum_out_noc_rd_chns,\ 'wr_chns': self.psum_data_chns,\ 'debug': 'PsumWrNoC'} self.psumWrNoC = PsumWrNoC(psum_wr_noc_setup) # =================================================================== # PE Array # =================================================================== # general setup of a PE PE_setup = setup['PE'] self.PE = ModuleList() for pe_row in range(self.pe_array_row): self.PE.append(ModuleList()) for pe_col in range(self.pe_array_col): # PE specific setup PE_setup['row'] = pe_row PE_setup['col'] = pe_col PE_setup['weights_data_in_chn'] = self.weights_pe_data_chns[ pe_row][pe_col] PE_setup['ifmap_data_in_chn'] = self.ifmap_pe_data_chns[ pe_row][pe_col] PE_setup['psum_data_in_chn'] = self.psum_data_chns[pe_row][ pe_col] PE_setup['psum_data_out_chn'] = self.psum_data_chns[pe_row + 1][pe_col] self.PE[pe_row].append(PE(PE_setup)) def configure(self, config): # extract the shape and mapping information self.mapping = config['mapping'] self.shape = config['shape'] # -------------------------------------------------------------------- # Configure Global Buffers # -------------------------------------------------------------------- self.weights_glb.configure(config['WeightsGLB']) self.ifmap_glb.configure(config['IFmapGLB']) self.psum_glb.configure(config['PsumGLB']) # -------------------------------------------------------------------- # Configure Destination Calculators # -------------------------------------------------------------------- shape_mapping_info = {'mapping': self.mapping, 'shape': self.shape} self.ifmap_NoC_destination_calculator.configure(shape_mapping_info) self.weights_NoC_destionation_calculator.configure(shape_mapping_info) self.psum_in_NoC_destionation_calculator.configure(shape_mapping_info) # -------------------------------------------------------------------- # Configure NoCs # -------------------------------------------------------------------- self.weightsNoC.configure() self.ifmapNoC.configure() self.psumRdNoC.configure() # >> self defined NoC module self.psumWrNoC.configure({ 'mapping': self.mapping, 'shape': self.shape }) # -------------------------------------------------------------------- # Configure Serializers/Deserilizers # -------------------------------------------------------------------- self.ifmap_serilizer.configure() self.psum_serializer.configure() self.weights_serializer.configure() # -------------------------------------------------------------------- # Configure PE Arrays # -------------------------------------------------------------------- PE_config = config['PE'] for pe_row in range(self.pe_array_row): for pe_col in range(self.pe_array_col): # determine if the PE needs to be used for this layer PE_config['clk_gated'] = True if pe_col >= self.mapping['M0'] or \ pe_row >= self.mapping['C0']\ else False self.PE[pe_row][pe_col].configure(PE_config)
class OSArch(Module): def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, weight_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel(name='ifmap_wr_chn') self.weights_wr_chn = Channel(name='weights_wr_chn') self.bias_wr_chn = Channel(name='bias_wr_chn') self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y, chn_per_word) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn') self.ifmap_glb = GLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, self.arr_y, chn_per_word, name='ifmap_glb') self.weights_rd_chn = Channel(name='weights_rd_chn') self.weights_glb = GLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, self.arr_x, self.chn_per_word, name='weight_glb') # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_weight_chns = ModuleList() self.pe_bias_chns = ModuleList() self.pe_out_chns = ModuleList() # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_weight_chns.append(ModuleList()) self.pe_out_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y))) self.pe_weight_chns[y].append(Channel(32, name='pe_filter_chns_{}_{}'.format(x, y))) self.pe_out_chns[y].append(Channel(32, name='pe_psum_chns_{}_{}'.format(x, y))) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_weight_chns[y][x], self.pe_out_chns[y][x], ) ) # Setup NoC to deliver weights, ifmaps and psums self.weight_noc = WeightsNoC(self.weights_rd_chn, self.pe_weight_chns, self.arr_x, self.arr_y) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.arr_y) self.serializer = OutputSerializer(self.output_chn, self.pe_out_chns, self.arr_x, self.arr_y, chn_per_word) def configure(self, batch_size, input_size, output_size): self.deserializer.configure(batch_size, input_size, output_size) self.ifmap_glb.configure(batch_size * input_size // self.arr_y, output_size // self.arr_x, batch_size) self.weights_glb.configure((input_size+1) * output_size // self.arr_x, batch_size // self.arr_y, self.arr_x) self.weight_noc.configure() self.ifmap_noc.configure() for y in range(self.arr_y): for x in range(self.arr_x): self.pe_array[y][x].configure(input_size)