def gen_all_traces(array_h=4, array_w=4, ifmap_h=7, ifmap_w=7, filt_h=3, filt_w=3, num_channels=3, strides=1, num_filt=8, data_flow='os', word_size_bytes=1, filter_sram_size=64, ifmap_sram_size=64, ofmap_sram_size=64, filt_base=1000000, ifmap_base=0, ofmap_base=2000000, sram_read_trace_file="sram_read.csv", sram_write_trace_file="sram_write.csv", dram_filter_trace_file="dram_filter_read.csv", dram_ifmap_trace_file="dram_ifmap_read.csv", dram_ofmap_trace_file="dram_ofmap_write.csv"): sram_cycles = 0 util = 0 print("Generating traces and bw numbers") if data_flow == 'os': sram_cycles, util = \ sram.sram_traffic( dimension_rows= array_h, dimension_cols= array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, filt_base=filt_base, ifmap_base=ifmap_base, ofmap_base = ofmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) elif data_flow == 'ws': sram_cycles, util = \ sram_ws.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) elif data_flow == 'is': sram_cycles, util = \ sram_is.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) #print("Generating DRAM traffic") dram.dram_trace_read_v2( sram_sz=ifmap_sram_size, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_ifmap_trace_file, ) #print("IFMAP SRAM Size: \t" + str(ifmap_sram_size)) dram.dram_trace_read_v2( sram_sz=filter_sram_size, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_filter_trace_file, ) dram.dram_trace_write(ofmap_sram_size=ofmap_sram_size, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file, dram_write_trace_file=dram_ofmap_trace_file) print("Average utilization : \t" + str(util) + " %") print("Cycles for compute : \t" + str(sram_cycles) + " cycles") bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file, dram_filter_trace_file, dram_ofmap_trace_file, sram_write_trace_file, sram_read_trace_file) #array_h, array_w) return bw_numbers, detailed_log, util, sram_cycles
def gen_all_traces_dramsim_format( array_h=4, array_w=4, ifmap_h=7, ifmap_w=7, filt_h=3, filt_w=3, num_channels=3, strides=1, num_filt=8, data_flow='os', # word_size_bytes=1, dram_read_bw=8, dram_write_bw=8, dram_read_wsb=1, dram_write_wsb=1, filter_sram_size=64, ifmap_sram_size=64, ofmap_sram_size=64, filt_base=1000000, ifmap_base=0, ofmap_base=2000000, sram_read_trace_file="sram_read.csv", sram_write_trace_file="sram_write.csv", dram_filter_trace_file="dram_filter_read.csv", dram_ifmap_trace_file="dram_ifmap_read.csv", dram_ofmap_trace_file="dram_ofmap_write.csv"): sram_cycles = 0 util = 0 print("Generating traces and bw numbers") if data_flow == 'os': sram_cycles, util = \ sram.sram_traffic( dimension_rows=array_h, dimension_cols=array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, filt_base=filt_base, ifmap_base=ifmap_base, ofmap_base=ofmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) elif data_flow == 'ws': sram_cycles, util = \ sram_ws.sram_traffic( dimension_rows=array_h, dimension_cols=array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, ofmap_base=ofmap_base, filt_base=filt_base, ifmap_base=ifmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) elif data_flow == 'is': sram_cycles, util = \ sram_is.sram_traffic( dimension_rows=array_h, dimension_cols=array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, ofmap_base=ofmap_base, filt_base=filt_base, ifmap_base=ifmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) # print("Generating DRAM traffic") dramsim_format.dram_trace_read_dramsim( sram_sz=ifmap_sram_size, word_sz_bytes=dram_read_wsb, default_read_bw=dram_read_bw, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_ifmap_trace_file, ) dramsim_format.dram_trace_read_dramsim( sram_sz=filter_sram_size, word_sz_bytes=dram_read_wsb, default_read_bw=dram_read_bw, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_filter_trace_file, ) dramsim_format.dram_trace_write_dramsim( ofmap_sram_size=ofmap_sram_size, data_width_bytes=dram_write_wsb, default_write_bw=dram_write_bw, sram_write_trace_file=sram_write_trace_file, dram_write_trace_file=dram_ofmap_trace_file) print("Average utilization : \t" + str(util) + " %") print("Cycles for compute : \t" + str(sram_cycles) + " cycles") print( "Fred [WARNING]: The bw stats part are not taken care, yet to be correct by parsing dramsim format dram " "traces") # bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file, dram_filter_trace_file, # dram_ofmap_trace_file, sram_write_trace_file, # sram_read_trace_file) # array_h, array_w) return util, sram_cycles
def gen_all_traces(array_h=4, array_w=4, ifmap_h=7, ifmap_w=7, filt_h=3, filt_w=3, num_channels=3, strides=1, num_filt=8, data_flow='os', layer_tag="Conv1", word_size_bytes=1, filter_sram_size=64, ifmap_sram_size=64, ofmap_sram_size=64, filt_base=1000000, ifmap_base=0, ofmap_base=2000000, sram_read_trace_file="sram_read.csv", sram_write_trace_file="sram_write.csv", dram_filter_trace_file="dram_filter_read.csv", dram_ifmap_trace_file="dram_ifmap_read.csv", dram_ofmap_trace_file="dram_ofmap_write.csv"): sram_cycles = 0 util = 0 tag = layer_tag + "_" + str(array_h) + "_" + str(array_w) # Anand: Removing the following for SILENT mode #print("Generating traces and bw numbers") if data_flow == 'os': tag_match = False if lookup_flag: sram_tag = tag + "_os" tag_match, dir_name = lut.lookup(sram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir) else: sram_cycles, util = \ sram.sram_traffic( dimension_rows= array_h, dimension_cols= array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, filt_base=filt_base, ifmap_base=ifmap_base, ofmap_base = ofmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) # If the lookup_flag is set make a new data base entry #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util) elif data_flow == 'ws': tag_match = False if lookup_flag: sram_tag = tag + "_ws" tag_match, dir_name = lut.lookup(sram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir) else: sram_cycles, util = \ sram_ws.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) # If the lookup_flag is set make a new data base entry #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util) elif data_flow == 'is': tag_match = False if lookup_flag: sram_tag = tag + "_is" tag_match, dir_name = lut.lookup(sram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() sram_cycles, util = lut.get_sram_stats(dir_name, dest_dir=this_dir) else: sram_cycles, util = \ sram_is.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) # If the lookup_flag is set make a new data base entry #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_sram_entry(sram_tag, this_dir, sram_cycles, util) tag_match = False if lookup_flag: dram_tag = tag + "_" + data_flow + "_" + str( ifmap_sram_size) + "_ifmap" tag_match, dir_name = lut.lookup(dram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() lut.get_dram_trace(dir_name, this_dir, cat='ifmap') else: #print("Generating DRAM traffic") dram.dram_trace_read_v2( sram_sz=ifmap_sram_size, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_ifmap_trace_file, ) #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_dram_entry(dram_tag, this_dir, cat='ifmap') tag_match = False if lookup_flag: dram_tag = tag + "_" + data_flow + "_" + str( filter_sram_size) + "_filter" #dram_tag += "_" + str(filter_sram_size) + "_filter" tag_match, dir_name = lut.lookup(dram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() lut.get_dram_trace(dir_name, this_dir, cat='filter') else: dram.dram_trace_read_v2( sram_sz=filter_sram_size, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=(filt_base * 10000), sram_trace_file=sram_read_trace_file, dram_trace_file=dram_filter_trace_file, ) #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_dram_entry(dram_tag, this_dir, cat='filter') tag_match = False if lookup_flag: dram_tag = tag + "_" + data_flow + "_" + str( ofmap_sram_size) + "_ofmap" #dram_tag += "_" + str(filter_sram_size) + "_filter" #dram_tag += "_" + str(ofmap_sram_size) + "_ofmap" tag_match, dir_name = lut.lookup(dram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() lut.get_dram_trace(dir_name, this_dir, cat='ofmap') else: dram.dram_trace_write(ofmap_sram_size=ofmap_sram_size, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file, dram_write_trace_file=dram_ofmap_trace_file) #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_dram_entry(dram_tag, this_dir, cat='ofmap') tag_match = False if lookup_flag: dram_tag = tag + "_" + data_flow + "_" + str( ifmap_sram_size) + "_ifmap" dram_tag += "_" + str(filter_sram_size) + "_filter" dram_tag += "_" + str(ofmap_sram_size) + "_ofmap" tag_match, dir_name = lut.lookup(dram_tag) if tag_match: #this_dir = lut.to_string(subprocess.check_output(['pwd'])) this_dir = os.getcwd() bw_numbers, detailed_log =\ lut.get_log_entries(dir_name, this_dir) else: # Anand: Removing the following for SILENT mode #print("Average utilization : \t" + str(util) + " %") #print("Cycles for compute : \t" + str(sram_cycles) + " cycles") bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file, dram_filter_trace_file, dram_ofmap_trace_file, sram_write_trace_file, sram_read_trace_file) #array_h, array_w) #if lookup_flag: # #this_dir = lut.to_string(subprocess.check_output(['pwd'])) # this_dir = os.getcwd() # lut.create_log_entry(dram_tag, bw_numbers, detailed_log) return bw_numbers, detailed_log, util, sram_cycles
def gen_all_traces(array_h_first=4, array_w_first=4, array_h_second=4, array_w_second=4, single_array=1, ifmap_h=7, ifmap_w=7, filt_h=3, filt_w=3, num_channels=3, strides=1, num_filt=8, data_flow='os', word_size_bytes=1, filter_sram_size_first=64, ifmap_sram_size_first=64, ofmap_sram_size_first=64, filter_sram_size_second=64, ifmap_sram_size_second=64, ofmap_sram_size_second=64, filt_base=1000000, ifmap_base=0, ofmap_base=2000000, sram_read_trace_file_first="sram0_read.csv", sram_read_trace_file_second="sram1_read.csv", sram_write_trace_file_first="sram0_write.csv", sram_write_trace_file_second="sram1_write.csv", dram_filter_trace_file="dram_filter_read.csv", dram_ifmap_trace_file="dram_ifmap_read.csv", dram_ofmap_trace_file="dram_ofmap_write.csv"): sram_cycles = 0 sram_cycles_first = 0 sram_cycles_second = 0 array_one_used = 0 array_two_used = 0 util_first = 0 util_second = 0 dram_filter_trace_file_first = "dram_sram0_filter_read.csv" dram_ifmap_trace_file_first = "dram_sram0_ifmap_read.csv" dram_ofmap_trace_file_first = "dram_sram0_ofmap_write.csv" dram_filter_trace_file_second = "dram_sram1_filter_read.csv" dram_ifmap_trace_file_second = "dram_sram1_ifmap_read.csv" dram_ofmap_trace_file_second = "dram_sram1_ofmap_write.csv" print("Generating traces and bw numbers") if data_flow == 'os': num_filt_first = 0 num_filt_second = 0 i = 1 no_of_filt_px = filt_h * filt_w * num_channels max_parallel_window_first = 1 #Since OS can't have more than one filter in a column given er vertical fold max_parallel_window_second = 1 avail_filt_per_fold = (array_w_first * max_parallel_window_first) + ( array_w_second * max_parallel_window_second) while True: filt_processing = i * avail_filt_per_fold if num_filt <= filt_processing: filt_pend = num_filt - ((i - 1) * avail_filt_per_fold) if filt_pend <= (array_w_first * max_parallel_window_first) and filt_pend > ( array_w_second * max_parallel_window_second ): ##Accomodating the last fold in systolic 1 num_filt_first = num_filt_first + filt_pend elif filt_pend > ( array_w_first * max_parallel_window_first) and filt_pend <= ( array_w_second * max_parallel_window_second ): ## Accomodating the last fold in systolic 2 num_filt_second = num_filt_second + filt_pend elif filt_pend == avail_filt_per_fold: num_filt_first = num_filt_first + ( array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) elif filt_pend <= ( array_w_first * max_parallel_window_first) and filt_pend <= ( array_w_second * max_parallel_window_second): col_ratio_first = float( filt_pend / (array_w_first * max_parallel_window_first)) col_ratio_second = float( filt_pend / (array_w_second * max_parallel_window_second)) if (col_ratio_first >= col_ratio_second): num_filt_first = num_filt_first + filt_pend else: num_filt_second = num_filt_second + filt_pend else: col_ratio_first = float( (filt_pend - (array_w_second * max_parallel_window_second)) / (array_w_first * max_parallel_window_first)) col_ratio_second = float( (filt_pend - (array_w_first * max_parallel_window_first)) / (array_w_second * max_parallel_window_second)) if (col_ratio_first > col_ratio_second): num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) num_filt_first = num_filt_first + ( filt_pend - (array_w_second * max_parallel_window_second)) else: num_filt_first = num_filt_first + ( array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( filt_pend - (array_w_first * max_parallel_window_first)) col_idx_base = num_filt_first ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss break else: num_filt_first = num_filt_first + (array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) i = i + 1 if single_array == 1: num_filt_first = num_filt num_filt_second = 0 if num_filt_first > 0: array_one_used = 1 sram_cycles_first, util_first = \ sram.sram_traffic( dimension_rows= array_h_first, dimension_cols= array_w_first, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt_first, total_num_filt = num_filt, filt_base=filt_base, ifmap_base=ifmap_base, col_idx_base = 0, ofmap_base = ofmap_base, sram_read_trace_file=sram_read_trace_file_first, sram_write_trace_file=sram_write_trace_file_first ) else: sram_cycles_first = 0 if num_filt_second > 0: array_two_used = 1 sram_cycles_second, util_second = \ sram.sram_traffic( dimension_rows= array_h_second, dimension_cols= array_w_second, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt_second, total_num_filt = num_filt, filt_base=filt_base, ifmap_base=ifmap_base, col_idx_base = col_idx_base, ofmap_base = ofmap_base, sram_read_trace_file=sram_read_trace_file_second, sram_write_trace_file=sram_write_trace_file_second ) else: sram_cycles_second = 0 elif data_flow == 'ws': num_filt_first = 0 num_filt_second = 0 i = 1 no_of_filt_px = filt_h * filt_w * num_channels if array_h_first < no_of_filt_px: max_parallel_window_first = 1 else: max_parallel_window_first = math.floor(array_h_first / no_of_filt_px) if array_h_second < no_of_filt_px: max_parallel_window_second = 1 else: max_parallel_window_second = math.floor(array_h_second / no_of_filt_px) avail_filt_per_fold = (array_w_first * max_parallel_window_first) + ( array_w_second * max_parallel_window_second) while True: filt_processing = i * avail_filt_per_fold if num_filt <= filt_processing: filt_pend = num_filt - ((i - 1) * avail_filt_per_fold) if filt_pend <= (array_w_first * max_parallel_window_first) and filt_pend > ( array_w_second * max_parallel_window_second ): ##Accomodating the last fold in systolic 1 num_filt_first = num_filt_first + filt_pend elif filt_pend > ( array_w_first * max_parallel_window_first) and filt_pend <= ( array_w_second * max_parallel_window_second ): ## Accomodating the last fold in systolic 2 num_filt_second = num_filt_second + filt_pend elif filt_pend == avail_filt_per_fold: num_filt_first = num_filt_first + ( array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) elif filt_pend <= ( array_w_first * max_parallel_window_first) and filt_pend <= ( array_w_second * max_parallel_window_second): col_ratio_first = float( filt_pend / (array_w_first * max_parallel_window_first)) col_ratio_second = float( filt_pend / (array_w_second * max_parallel_window_second)) if (col_ratio_first >= col_ratio_second): num_filt_first = num_filt_first + filt_pend else: num_filt_second = num_filt_second + filt_pend else: col_ratio_first = float( (filt_pend - (array_w_second * max_parallel_window_second)) / (array_w_first * max_parallel_window_first)) col_ratio_second = float( (filt_pend - (array_w_first * max_parallel_window_first)) / (array_w_second * max_parallel_window_second)) if (col_ratio_first > col_ratio_second): num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) num_filt_first = num_filt_first + ( filt_pend - (array_w_second * max_parallel_window_second)) else: num_filt_first = num_filt_first + ( array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( filt_pend - (array_w_first * max_parallel_window_first)) col_idx_base = num_filt_first ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss break else: num_filt_first = num_filt_first + (array_w_first * max_parallel_window_first) num_filt_second = num_filt_second + ( array_w_second * max_parallel_window_second) i = i + 1 if single_array == 1: num_filt_first = num_filt num_filt_second = 0 if num_filt_first > 0: array_one_used = 1 sram_cycles_first, util_first = \ sram_ws.sram_traffic( dimension_rows = array_h_first, dimension_cols = array_w_first, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, col_idx_base = 0, total_num_filt = num_filt, strides = strides, num_filt = num_filt_first, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file_first, sram_write_trace_file = sram_write_trace_file_first ) else: sram_cycles_first = 0 if num_filt_second > 0: array_two_used = 1 sram_cycles_second, util_second = \ sram_ws.sram_traffic( dimension_rows = array_h_second, dimension_cols = array_w_second, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, col_idx_base = col_idx_base, total_num_filt = num_filt, strides = strides, num_filt = num_filt_second, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file_second, sram_write_trace_file = sram_write_trace_file_second ) else: sram_cycles_second = 0 elif data_flow == 'is': ofmap_h = (ifmap_h - filt_h) / strides + 1 ofmap_w = (ifmap_w - filt_w) / strides + 1 num_ofmap = ofmap_h * ofmap_w num_ofmap_first = 0 num_ofmap_second = 0 i = 1 no_of_filt_px = filt_h * filt_w * num_channels if array_h_first < no_of_filt_px: max_parallel_window_first = 1 else: max_parallel_window_first = math.floor(array_h_first / no_of_filt_px) if array_h_second < no_of_filt_px: max_parallel_window_second = 1 else: max_parallel_window_second = math.floor(array_h_second / no_of_filt_px) avail_ofmap_per_fold = (array_w_first * max_parallel_window_first) + ( array_w_second * max_parallel_window_second) while True: ofmap_processing = i * avail_ofmap_per_fold if num_ofmap <= ofmap_processing: ofmap_pend = num_ofmap - ((i - 1) * avail_ofmap_per_fold) if ofmap_pend <= ( array_w_first * max_parallel_window_first) and ofmap_pend > ( array_w_second * max_parallel_window_second ): ##Accomodating the last fold in systolic 1 num_ofmap_first = num_ofmap_first + ofmap_pend elif ofmap_pend > ( array_w_first * max_parallel_window_first) and ofmap_pend <= ( array_w_second * max_parallel_window_second ): ## Accomodating the last fold in systolic 2 num_ofmap_second = num_ofmap_second + ofmap_pend elif ofmap_pend == avail_ofmap_per_fold: num_ofmap_first = num_ofmap_first + ( array_w_first * max_parallel_window_first) num_ofmap_second = num_ofmap_second + ( array_w_second * max_parallel_window_second) elif ofmap_pend <= ( array_w_first * max_parallel_window_first) and ofmap_pend <= ( array_w_second * max_parallel_window_second): col_ratio_first = float( ofmap_pend / (array_w_first * max_parallel_window_first)) col_ratio_second = float( ofmap_pend / (array_w_second * max_parallel_window_second)) if (col_ratio_first >= col_ratio_second): num_ofmap_first = num_ofmap_first + ofmap_pend else: num_ofmap_second = num_ofmap_second + ofmap_pend else: col_ratio_first = float( (ofmap_pend - (array_w_second * max_parallel_window_second)) / (array_w_first * max_parallel_window_first)) col_ratio_second = float( (ofmap_pend - (array_w_first * max_parallel_window_first)) / (array_w_second * max_parallel_window_second)) if (col_ratio_first > col_ratio_second): num_ofmap_second = num_ofmap_second + ( array_w_second * max_parallel_window_second) num_ofmap_first = num_ofmap_first + ( ofmap_pend - (array_w_second * max_parallel_window_second)) else: num_ofmap_first = num_ofmap_first + ( array_w_first * max_parallel_window_first) num_ofmap_second = num_ofmap_second + ( ofmap_pend - (array_w_first * max_parallel_window_first)) col_idx_base = num_ofmap_first ##Starting from systolic 1 and taking the systolic 1 filter count as the beginning for the next systolic as base addresss break else: num_ofmap_first = num_ofmap_first + (array_w_first * max_parallel_window_first) num_ofmap_second = num_ofmap_second + ( array_w_second * max_parallel_window_second) i = i + 1 if single_array == 1: num_ofmap_first = num_ofmap num_ofmap_second = 0 if num_ofmap_first > 0: array_one_used = 1 sram_cycles_first, util_first = \ sram_is.sram_traffic( dimension_rows = array_h_first, dimension_cols = array_w_first, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, num_ofmap = num_ofmap_first, strides = strides, num_filt = num_filt, col_idx_base = 0, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file_first, sram_write_trace_file = sram_write_trace_file_first ) else: sram_cycles_first = 0 if num_ofmap_second > 0: array_two_used = 1 sram_cycles_second, util_second = \ sram_is.sram_traffic( dimension_rows = array_h_second, dimension_cols = array_w_second, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, num_ofmap = num_ofmap_second, strides = strides, num_filt = num_filt, col_idx_base = col_idx_base, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file_second, sram_write_trace_file = sram_write_trace_file_second ) else: sram_cycles_second = 0 #print("Generating DRAM traffic") if array_one_used == 1: if single_array == 1 or array_two_used == 0: dram.dram_trace_read_v2(sram_sz=ifmap_sram_size_first, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file_first, dram_trace_file=dram_ifmap_trace_file) dram.dram_trace_read_v2(sram_sz=filter_sram_size_first, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file_first, dram_trace_file=dram_filter_trace_file) dram.dram_trace_write( ofmap_sram_size=ofmap_sram_size_first, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file_first, dram_write_trace_file=dram_ofmap_trace_file) else: dram.dram_trace_read_v2( sram_sz=ifmap_sram_size_first, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file_first, dram_trace_file=dram_ifmap_trace_file_first) dram.dram_trace_read_v2( sram_sz=filter_sram_size_first, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file_first, dram_trace_file=dram_filter_trace_file_first) dram.dram_trace_write( ofmap_sram_size=ofmap_sram_size_first, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file_first, dram_write_trace_file=dram_ofmap_trace_file_first) if array_two_used == 1: if array_one_used == 0: dram.dram_trace_read_v2( sram_sz=ifmap_sram_size_second, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file_second, dram_trace_file=dram_ifmap_trace_file) dram.dram_trace_read_v2( sram_sz=filter_sram_size_second, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file_second, dram_trace_file=dram_filter_trace_file) dram.dram_trace_write( ofmap_sram_size=ofmap_sram_size_second, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file_second, dram_write_trace_file=dram_ofmap_trace_file) else: dram.dram_trace_read_v2( sram_sz=ifmap_sram_size_second, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file_second, dram_trace_file=dram_ifmap_trace_file_second) dram.dram_trace_read_v2( sram_sz=filter_sram_size_second, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file_second, dram_trace_file=dram_filter_trace_file_second) dram.dram_trace_write( ofmap_sram_size=ofmap_sram_size_second, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file_second, dram_write_trace_file=dram_ofmap_trace_file_second) # Selvaraj: Merge both DRAM traffic CSV's for BW calculations if array_one_used == 1 and array_two_used == 1: sram_controller(dram_ifmap_trace_file_first, dram_ifmap_trace_file_second, dram_ifmap_trace_file) sram_controller(dram_filter_trace_file_first, dram_filter_trace_file_second, dram_filter_trace_file) sram_controller(dram_ofmap_trace_file_first, dram_ofmap_trace_file_second, dram_ofmap_trace_file) if array_one_used == 1 and array_two_used == 0: util = util_first power_metric = (int(sram_cycles_first) * int(array_h_first) * int(array_w_first)) / 1000000 elif array_one_used == 0 and array_two_used == 1: util = util_second power_metric = (int(sram_cycles_second) * int(array_h_second) * int(array_w_second)) / 1000000 else: util = (util_first + util_second) / 2 #Equally weighted from two systolic arrays power_metric = ((int(sram_cycles_first) * int(array_h_first) * int(array_w_first)) + (int(sram_cycles_second) * int(array_h_second) * int(array_w_second))) / 1000000 sram_cycles = max(int(sram_cycles_first), int(sram_cycles_second)) print("Average utilization : \t" + str(util) + " %") print("Cycles for compute : \t" + str(sram_cycles) + " cycles") print("Power consumed : \t" + str(power_metric) + " Mega-units") if single_array == 1: # SCALE-Sim used as a single compute array simulator bw_numbers, detailed_log = gen_bw_numbers( both_array_used=0, array_one_idle=0, array_two_idle=0, dram_ifmap_trace_file=dram_ifmap_trace_file, dram_filter_trace_file= dram_filter_trace_file, #Selvaraj: Add support for two SRAM based BW generation after DRAM merge dram_ofmap_trace_file=dram_ofmap_trace_file, sram_write_trace_file_first=sram_write_trace_file_first, sram_read_trace_file_first=sram_read_trace_file_first) #array_h, array_w) elif (array_one_used == 1 and array_two_used == 0): ## Second array not powered on at all bw_numbers, detailed_log = gen_bw_numbers( both_array_used=1, array_one_idle=0, array_two_idle=1, dram_ifmap_trace_file=dram_ifmap_trace_file, dram_filter_trace_file=dram_filter_trace_file, dram_ofmap_trace_file=dram_ofmap_trace_file, sram_write_trace_file_first=sram_write_trace_file_first, sram_read_trace_file_first=sram_read_trace_file_first) elif (array_one_used == 0 and array_two_used == 1): ## First array not powered on at all bw_numbers, detailed_log = gen_bw_numbers( both_array_used=1, array_one_idle=1, array_two_idle=0, dram_ifmap_trace_file=dram_ifmap_trace_file, dram_filter_trace_file=dram_filter_trace_file, dram_ofmap_trace_file=dram_ofmap_trace_file, sram_write_trace_file_second=sram_write_trace_file_second, sram_read_trace_file_second=sram_read_trace_file_second) elif array_one_used == 1 and array_two_used == 1: bw_numbers, detailed_log = gen_bw_numbers( both_array_used=1, array_one_idle=0, array_two_idle=0, dram_ifmap_trace_file=dram_ifmap_trace_file, dram_filter_trace_file=dram_filter_trace_file, dram_ofmap_trace_file=dram_ofmap_trace_file, sram_write_trace_file_first=sram_write_trace_file_first, sram_read_trace_file_first=sram_read_trace_file_first, sram_write_trace_file_second=sram_write_trace_file_second, sram_read_trace_file_second=sram_read_trace_file_second) #array_h, array_w) return bw_numbers, detailed_log, util, str( sram_cycles), array_one_used, array_two_used, power_metric
def gen_all_traces(array_h=4, array_w=4, ifmap_h=7, ifmap_w=7, filt_h=3, filt_w=3, num_channels=3, strides=1, num_filt=8, data_flow='os', word_size_bytes=1, filter_sram_size=64, ifmap_sram_size=64, ofmap_sram_size=64, filt_base=1000000, ifmap_base=0, ofmap_base=2000000, sram_read_trace_file="sram_read.csv", sram_write_trace_file="sram_write.csv", dram_filter_trace_file="dram_filter_read.csv", dram_ifmap_trace_file="dram_ifmap_read.csv", dram_ofmap_trace_file="dram_ofmap_write.csv"): sram_cycles = 0 util = 0 ifmap_dram_acc_times = 0 ofmap_dram_acc_times = 0 filt_dram_acc_times = 0 print("Generating traces and bw numbers") if data_flow == 'os': sram_cycles, util = \ sram.sram_traffic( dimension_rows= array_h, dimension_cols= array_w, ifmap_h=ifmap_h, ifmap_w=ifmap_w, filt_h=filt_h, filt_w=filt_w, num_channels=num_channels, strides=strides, num_filt=num_filt, filt_base=filt_base, ifmap_base=ifmap_base, ofmap_base = ofmap_base, sram_read_trace_file=sram_read_trace_file, sram_write_trace_file=sram_write_trace_file ) elif data_flow == 'ws': sram_cycles, util = \ sram_ws.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) elif data_flow == 'is': sram_cycles, util = \ sram_is.sram_traffic( dimension_rows = array_h, dimension_cols = array_w, ifmap_h = ifmap_h, ifmap_w = ifmap_w, filt_h = filt_h, filt_w = filt_w, num_channels = num_channels, strides = strides, num_filt = num_filt, ofmap_base = ofmap_base, filt_base = filt_base, ifmap_base = ifmap_base, sram_read_trace_file = sram_read_trace_file, sram_write_trace_file = sram_write_trace_file ) #print("Generating DRAM traffic") # 利用Sram的时间反推DRAMD执行时间 # 这里还是一个一个调用的,也就是说这三者可以同时发生,但是实际上我们的带宽并没有这么多, # 而且应该如何cover计算和访存之间的延时 ifmap_dram_acc_times = dram.dram_trace_read_v2( sram_sz=ifmap_sram_size, word_sz_bytes=word_size_bytes, min_addr=ifmap_base, max_addr=filt_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_ifmap_trace_file, ) filt_dram_acc_times = dram.dram_trace_read_v2( sram_sz=filter_sram_size, word_sz_bytes=word_size_bytes, min_addr=filt_base, max_addr=ofmap_base, sram_trace_file=sram_read_trace_file, dram_trace_file=dram_filter_trace_file, ) ofmap_dram_acc_times = dram.dram_trace_write( ofmap_sram_size=ofmap_sram_size, data_width_bytes=word_size_bytes, sram_write_trace_file=sram_write_trace_file, dram_write_trace_file=dram_ofmap_trace_file) print("Average utilization : \t" + str(util) + " %") print("Cycles for compute : \t" + str(sram_cycles) + " cycles") bw_numbers, detailed_log = gen_bw_numbers(dram_ifmap_trace_file, dram_filter_trace_file, dram_ofmap_trace_file, sram_write_trace_file, sram_read_trace_file) #array_h, array_w) detailed_log += str(ifmap_dram_acc_times) + ",\t" + str( filt_dram_acc_times) + ",\t" + str(ofmap_dram_acc_times) + "," print((ifmap_dram_acc_times, filt_dram_acc_times, ofmap_dram_acc_times)) return bw_numbers, detailed_log, util, sram_cycles