def gen_bmp(layercnt, vgglog): """ description: Generate model files parameters: vgglog: Path to vgglog file return code: None """ net = "" if "vggnet" in ptpath: net = "vggnet" elif "resnet18" in ptpath: net = "resnet18" elif "resnet34" in ptpath: net = "resnet34" elif "resnet50" in ptpath: net = "resnet50" else: print(f'Unknown {ptpath}') return 1 cmd_list = [ f'python3 input/inout_print.py {net} imagenet/im6.bmp {ptpath} {bmpdtpath} > .debug/genbmp.log' ] for i in range(len(cmd_list)): os.system(cmd_list[i]) if layercnt > 1 and "resnet" not in net: # Muse-v2 interface, v3 is not used. deal_fc_k(vgglog) prints("run gen_bmp successfully")
def gen_fpga(filepath): """ description: Generate bin files for fpga parameters: filepath: Run config_gen_file.py directory return code: None """ os.chdir(filepath) cmd_list = [ "rm -rf con*txt cfg*txt *bn* *bias* *alpha* *weight* *input* *output* \ *k* data_for_fpga", f'cp -af {confpath}/* {ptdtpath}/* {bmpdtpath}/* imagenet/', "python3 ../input/config_gen_file.py -d ../imagenet/ -n imagenet_img6 -f > ../.debug/genfile.log", f'mv {filepath}/data_for_fpga {outputpath}' ] for i in range(len(cmd_list)): if "cp -af " in cmd_list[i] or "mv " in cmd_list[i]: os.chdir("..") elif "python" in cmd_list[i]: os.chdir(filepath) os.system(cmd_list[i]) prints("generate fpga files successfully")
def binary_addr(instdir, datadir, output): binsize = 0 binsize = get_binsize(os.listdir(instdir), binsize, output) binsize = get_binsize(os.listdir(datadir), binsize, output) fmain.write(f'all binary size:{binsize} bytes\n') fmain.write(f'DDR ADDRBLOCK size :{binsize} bytes\n') prints("calculate address of binary successfully")
def gen_ddraddr(name_list, data_list, act_bit, wet_bit): fmain.write(f'dram_capacity:{dram_capacity}\n') netinout_addr(netpath) poollist, downsample = weight_addr(name_list, data_list, act_bit, wet_bit) if layer_cnts > 1: global CALCULATE CALCULATE = ADDRBLOCK otherinout_addr() prints("run gen_ddraddr successfully") return layers_act_addr,layers_wet_addr,datas_locate,poollist,downsample
def binary_addr(instdir, datadir, output): binsize = 0 binsize = get_binsize(os.listdir(instdir), binsize, output) binsize = get_binsize(os.listdir(datadir), binsize, output) fmain.write(f'all binary size:{binsize} bytes\n') global ADDRBLOCK ADDRBLOCK += binsize fmain.write(f'DDR ADDRBLOCK size :{ADDRBLOCK} bytes\n') prints("run binary_addr successfully")
def test_chip_relocate84(layer_locate, outchl, C): weight_width = align(outchl, BUS_WIDTH) end = 0 for x in range(int(C / 2)): # outchannel/2组bn+weight bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 2) * BUS_WIDTH] for i in range(len(bn_locate)): if bn_locate[i] != '00': prints( f'test_chip_relocate84 bnlocate[{i}] failed value:{bn_locate[i]}' ) return # prints(f'test_chip_relocate84 {BUS_WIDTH*2} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+2)*BUS_WIDTH-1}]={bn_locate}') # weight_locate = layer_locate[end + (x + 2) * BUS_WIDTH:end + weight_width*2 + (x + 2) * BUS_WIDTH] end += weight_width * 2 + BUS_WIDTH
def test_chip_relocate84(layer_locate, outchl, C): fmain.write(f'test_relocate...Start Rearrange weight...len(layer_locate):{len(layer_locate)}\n') weight_width = align(outchl, BUS_WIDTH) end = 0 for x in range(int(C/2)): # outchannel/2组bn+weight bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 2) * BUS_WIDTH] for i in range(len(bn_locate)): if bn_locate[i] != '00': prints(f'test_chip_relocate84 bnlocate[{i}] failed value:{bn_locate[i]}') return # prints(f'test_chip_relocate84 {BUS_WIDTH*2} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+2)*BUS_WIDTH-1}]={bn_locate}') # weight_locate = layer_locate[end + (x + 2) * BUS_WIDTH:end + weight_width*2 + (x + 2) * BUS_WIDTH] end += weight_width * 2 + BUS_WIDTH fmain.write(f'test_relocate...Endof Rearrange weight...len(layer_locate):{len(layer_locate)}\n') fmain.write(f'test_chip_relocate84 fc weight data success\n')
def gen_bmp(layercnt, actbit, vgglog): """ description: Generate model files parameters: vgglog: Path to vgglog file return code: None """ net = logpath.split('/')[1].split('.')[0] cmd_list = [ f'python3 input/inout_print.py {net} imagenet/im6.bmp {ptpath} {bmpdtpath} {actbit} > .debug/genbmp.log 2>&1' ] for i in range(len(cmd_list)): os.system(cmd_list[i]) if layercnt > 1 and "resnet" not in net: # Muse-v2 interface, v3 is not used. deal_fc_k(vgglog) prints("generate bmp files successfully")
def test_relocate(layer_locate, outchl, C, H, W): weight_width = align(C * H * W, BUS_WIDTH) if H == 1 and W == 1: weight_width = align(outchl, BUS_WIDTH) outchl = C end = 0 for x in range(outchl): # outchannel组bn+weight bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 1) * BUS_WIDTH] if H == 1 and W == 1: for i in range(len(bn_locate)): if bn_locate[i] != '00': prints( f'test_relocatetest bnlocate[{i}] failed value:{bn_locate[i]}' ) return # fmain.write(f'test_relocate {BUS_WIDTH} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+1)*BUS_WIDTH-1}]={bn_locate}\n') # fmain.write(f'test_relocate {weight_width} weight_locate:{len(weight_locate)} layer_locate[{end+(x+1)*BUS_WIDTH}:{end+weight_width+(x+1)*BUS_WIDTH-1}]={weight_locate}\n') end += weight_width if H == 1 and W == 1: fmain.write(f'test_relocate fc weight data success\n')
def weight_addr(name_list, data_list, active_bit, weight_bit): """ description: Load pt file and format output parameters: loadpt: The Class of load_pt return code: None """ bn_bias = [] k = scale = 0 bn_weight = [] fc_flag = False running_var = [] weight_bn_k = [] weight_bn_b = [] weight_data = [] global ADDRBLOCK global BUS_WIDTH word_address = 0 start = ADDRBLOCK running_mean = [] layer_locate = [] N = C = H = W = 0 global datas_locate global layers_wet_addr layer_cnt = fc_cnt = 0 BUS_WIDTH = int(256 / weight_bit) poollist,downsample = get_layer_num("pool") fmain.write(f'weight_addr start:{ADDRBLOCK} bus_addr:{bus_address()} chiplet:{2**chiplet_id}\n') for i in range(len(name_list)): name = name_list[i] data = data_list[i] if pt_skip(name): continue # 从pt中获取running_mean running_var bn_bias scale和weight elif "scale" in name: scale = data.tolist() elif "weight" in name: if "classifier" in name or "fc" in name: fc_flag = True elif "bn" in name or "downsample.1" in name: bn_weight = data continue weight_data = data layer_cnt += 1 if layer_cnt in poollist: layer_cnt += 1 elif "bias" in name: bn_bias = data elif "running_mean" in name: running_mean = data elif "running_var" in name: running_var = data # 通过running_mean running_var bn_bias bn_weight计算bn_k和bn_b if len(running_mean) and len(running_var) and len(bn_bias) and len(bn_weight): for i in range(len(bn_bias)): bn_k = bn_weight[i] / (running_var[i].sqrt() + 1e-6) bn_b = -bn_weight[i] * running_mean[i] / \ (running_var[i].sqrt() + 1e-6) + bn_bias[i] weight_bn_k.append(bn_k.half().float()) weight_bn_b.append(bn_b.half().float()) running_mean = [] running_var = [] bn_weight = [] bn_bias = [] # activation地址排布和weight数据保存 if scale and (len(weight_bn_k) and len(weight_bn_b) and len(weight_data) or fc_flag): if "classifier" in name or "fc" in name: fc_cnt += 1 layer_cnt = get_layer_num(f'fc{fc_cnt}') layer_name = f'layer{str(layer_cnt)}' dim_list = list(weight_data.size()) dim_lens = len(dim_list) if dim_lens == 4: N = dim_list[0] C = dim_list[1] H = dim_list[2] W = dim_list[3] elif dim_lens == 2: N = C = 1 H = dim_list[0] W = dim_list[1] for n in range(N): if len(weight_bn_k) and len(weight_bn_b): # pt中的bnkb存入layer_locate hexdata_k = '%X' % st.unpack('I', st.pack('f', weight_bn_k[n]))[0] hexdata_b = '%X' % st.unpack('I', st.pack('f', weight_bn_b[n]))[0] for x in range(len(str(hexdata_k))): if (x % 2) == 0: layer_locate.append(hexdata_k[x] + hexdata_k[x + 1]) for y in range(len(str(hexdata_b))): if (y % 2) == 0: layer_locate.append(hexdata_b[y] + hexdata_b[y + 1]) layer_locate = add_padding(layer_locate) for c in range(C): for h in range(H): if dim_lens == 2: for i in range(BUS_WIDTH): layer_locate.append("00") for w in range(W): if dim_lens == 4: word_addr = w + h * W + c * H * W rounds = round(weight_data[n][c][h][w].tolist() / scale) elif dim_lens == 2: word_addr = w + h * W rounds = round(weight_data[h][w].tolist() / scale) else: prints(f'Unknown weight:{name} length:{dim_lens} dim:{dim_list}') continue hexdata_w = HexProcess(rounds) layer_locate.append(hexdata_w) word_address += BUS_WIDTH # bn预留(256bit) word_address += align(word_addr, BUS_WIDTH) layer_locate = add_padding(layer_locate) end = ADDRBLOCK + word_address layers_wet_addr.append([f'{layer_name} weight start and end:', [start, end]]) start = end if active_bit == 16 or active_bit == 4: # weight 16*8 4*4不重排 pass elif fc_flag == False: # active_bit=8 conv weight重排 if chiplet_id: fmain.write(f'{layer_name} relocate conv\'s weight...\n') layer_locate = relocate(layer_locate, N, C, H, W) # test_relocate(layer_locate, N, C, H, W) else: # fc weight重排 if weight_bit == 8: # fc weight 8*8重排 layer_locate = fc_relocate88(layer_locate, W, H) elif weight_bit == 4: # fc weight 8*4重排 layer_locate = fc_relocate84(layer_locate, W, H) test_chip_relocate84(layer_locate, W, H) if chiplet_id: fmain.write(f'{layer_name} relocate fc\'s weight...\n') if weight_bit == 8: # chiplet mode fc weight 8*8重排 layer_locate = relocate(layer_locate, W, H, 1, 1) test_relocate(layer_locate, W, H, 1, 1) elif weight_bit == 4: # chiplet mode fc weight 8*4重排 layer_locate = chip_relocate84(layer_locate, W, H) test_chip_relocate84(layer_locate, W, H) fmain.write("%7s bn_k+bn_b+conv_weight data save data success\n" % layer_name) datas_locate[k] = [f'layer {str(layer_cnt)} data', [layer_locate]] datas_locate.append(datas_locate[k]) layer_locate = [] weight_bn_k = [] weight_bn_b = [] weight_data = [] fc_flag = 0 scale = 0 k += 1 if layer_cnts <= k: break ADDRBLOCK += word_address return poollist, downsample
def gen_txt(self): """ description: Load pt file and format output parameters: loadpt: The Class of load_pt return code: None """ counts = 0 name_list = [] data_list = [] quant_list = [] onelayer_cnt = [] self.get_tensorinfo(self.netpath) with open(self.ptpath, 'rb') as f: ptdata = torch.load(self.ptpath, map_location=torch.device('cpu')) chip_id = 0 if "vggnet16" in self.logpath or "resnet34" in self.logpath: act_bit = ptdata['hyper_parameters']['act_quant_bit'] wet_bit = ptdata['hyper_parameters']['weight_quant_bit'] else: act_bit = 8 wet_bit = 8 for k, v in ptdata['state_dict'].items(): k = k.split(".", 1)[1].strip() name_list.append(k) data_list.append(v) if "resnet" not in self.netpath.lower(): if "quant_" in k or "classifier." in k: quant_list.append(k) quant_list.append(v) if "resnet" in self.netpath.lower(): return name_list, data_list, act_bit, wet_bit, chip_id else: with open(f'{self.ptdtpath}/img.input.q.txt', 'w') as fq: fq.write('{}{}'.format(self.in_q, '\n')) self.fw.write(f'{self.ptdtpath}/img.input.q.txt write success\n') for i in range(self.layer_cnts): layer = f'layers.{i}.' for j in range(len(name_list)): if layer in name_list[j]: self.layers.append([name_list[j], data_list[j]]) counts += 1 onelayer_cnt.append(str(counts)) counts = 0 del (self.layers[0]) for i in range(self.layer_cnts): layername = f'layer_num:{str(i + 1)}' self.layermsg = self.get_layer_info(self.logpath, layername) self.splicing_output(int(onelayer_cnt[i]), counts, quant_list) counts += int(onelayer_cnt[i]) scale = fcname = weight = "" for i in range(len(quant_list)): tmpstr = str(quant_list[i]) if ".scale" in tmpstr or ".weight" in tmpstr: if ".scale" in tmpstr: scale = quant_list[i + 1] else: fcname = quant_list[i] weight = quant_list[i + 1] if len(fcname) and len(str(scale)) and len(str(weight)): write_data = threading.Thread(target=self.write_pt_data, args=(fcname, weight, scale)) write_data.start() write_data.join() continue elif "quant_" in tmpstr or "classifier" in tmpstr: write_data = threading.Thread(target=self.write_pt_data, args=(quant_list[i], quant_list[i + 1], scale)) write_data.start() write_data.join() prints("run gen_txt successfully") return name_list, data_list, act_bit, wet_bit, chip_id