예제 #1
0
def gen_bmp(layercnt, vgglog):
    """
    description:
                Generate model files
    parameters:
                vgglog: Path to vgglog file
    return code:
                None
    """
    net = ""
    if "vggnet" in ptpath:
        net = "vggnet"
    elif "resnet18" in ptpath:
        net = "resnet18"
    elif "resnet34" in ptpath:
        net = "resnet34"
    elif "resnet50" in ptpath:
        net = "resnet50"
    else:
        print(f'Unknown {ptpath}')
        return 1

    cmd_list = [
        f'python3 input/inout_print.py {net} imagenet/im6.bmp {ptpath} {bmpdtpath} > .debug/genbmp.log'
    ]

    for i in range(len(cmd_list)):
        os.system(cmd_list[i])

    if layercnt > 1 and "resnet" not in net:
        # Muse-v2 interface, v3 is not used.
        deal_fc_k(vgglog)

    prints("run gen_bmp successfully")
예제 #2
0
def gen_fpga(filepath):
    """
    description:
                Generate bin files for fpga
    parameters:
                filepath: Run config_gen_file.py directory
    return code:
                None
    """
    os.chdir(filepath)
    cmd_list = [
        "rm -rf con*txt cfg*txt *bn* *bias* *alpha* *weight* *input* *output* \
                  *k* data_for_fpga",
        f'cp -af {confpath}/* {ptdtpath}/* {bmpdtpath}/* imagenet/',
        "python3 ../input/config_gen_file.py -d ../imagenet/ -n imagenet_img6 -f > ../.debug/genfile.log",
        f'mv {filepath}/data_for_fpga {outputpath}'
    ]

    for i in range(len(cmd_list)):
        if "cp -af " in cmd_list[i] or "mv " in cmd_list[i]:
            os.chdir("..")
        elif "python" in cmd_list[i]:
            os.chdir(filepath)
        os.system(cmd_list[i])

    prints("generate fpga files successfully")
예제 #3
0
def binary_addr(instdir, datadir, output):
    binsize = 0
    binsize = get_binsize(os.listdir(instdir), binsize, output)
    binsize = get_binsize(os.listdir(datadir), binsize, output)

    fmain.write(f'all binary size:{binsize} bytes\n')
    fmain.write(f'DDR ADDRBLOCK size :{binsize} bytes\n')
    prints("calculate address of binary successfully")
예제 #4
0
def gen_ddraddr(name_list, data_list, act_bit, wet_bit):
    fmain.write(f'dram_capacity:{dram_capacity}\n')
    netinout_addr(netpath)
    poollist, downsample = weight_addr(name_list, data_list, act_bit, wet_bit)
    if layer_cnts > 1:
        global CALCULATE
        CALCULATE = ADDRBLOCK
        otherinout_addr()
    prints("run gen_ddraddr successfully")
    return layers_act_addr,layers_wet_addr,datas_locate,poollist,downsample
예제 #5
0
def binary_addr(instdir, datadir, output):
    binsize = 0
    binsize = get_binsize(os.listdir(instdir), binsize, output)
    binsize = get_binsize(os.listdir(datadir), binsize, output)

    fmain.write(f'all binary size:{binsize} bytes\n')

    global ADDRBLOCK
    ADDRBLOCK += binsize
    fmain.write(f'DDR ADDRBLOCK size :{ADDRBLOCK} bytes\n')
    prints("run binary_addr successfully")
예제 #6
0
def test_chip_relocate84(layer_locate, outchl, C):
    weight_width = align(outchl, BUS_WIDTH)
    end = 0

    for x in range(int(C / 2)):  # outchannel/2组bn+weight
        bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 2) * BUS_WIDTH]
        for i in range(len(bn_locate)):
            if bn_locate[i] != '00':
                prints(
                    f'test_chip_relocate84 bnlocate[{i}] failed value:{bn_locate[i]}'
                )
                return
        # prints(f'test_chip_relocate84 {BUS_WIDTH*2} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+2)*BUS_WIDTH-1}]={bn_locate}')
        # weight_locate = layer_locate[end + (x + 2) * BUS_WIDTH:end + weight_width*2 + (x + 2) * BUS_WIDTH]
        end += weight_width * 2 + BUS_WIDTH
예제 #7
0
def test_chip_relocate84(layer_locate, outchl, C):
    fmain.write(f'test_relocate...Start Rearrange weight...len(layer_locate):{len(layer_locate)}\n')
    weight_width = align(outchl, BUS_WIDTH)
    end = 0

    for x in range(int(C/2)):  # outchannel/2组bn+weight
        bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 2) * BUS_WIDTH]
        for i in range(len(bn_locate)):
            if bn_locate[i] != '00':
                prints(f'test_chip_relocate84 bnlocate[{i}] failed value:{bn_locate[i]}')
                return
        # prints(f'test_chip_relocate84 {BUS_WIDTH*2} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+2)*BUS_WIDTH-1}]={bn_locate}')
        # weight_locate = layer_locate[end + (x + 2) * BUS_WIDTH:end + weight_width*2 + (x + 2) * BUS_WIDTH]
        end += weight_width * 2 + BUS_WIDTH

    fmain.write(f'test_relocate...Endof Rearrange weight...len(layer_locate):{len(layer_locate)}\n')
    fmain.write(f'test_chip_relocate84 fc weight data success\n')
예제 #8
0
def gen_bmp(layercnt, actbit, vgglog):
    """
    description:
                Generate model files
    parameters:
                vgglog: Path to vgglog file
    return code:
                None
    """
    net = logpath.split('/')[1].split('.')[0]
    cmd_list = [
        f'python3 input/inout_print.py {net} imagenet/im6.bmp {ptpath} {bmpdtpath} {actbit} > .debug/genbmp.log 2>&1'
    ]

    for i in range(len(cmd_list)):
        os.system(cmd_list[i])

    if layercnt > 1 and "resnet" not in net:
        # Muse-v2 interface, v3 is not used.
        deal_fc_k(vgglog)

    prints("generate bmp files successfully")
예제 #9
0
def test_relocate(layer_locate, outchl, C, H, W):
    weight_width = align(C * H * W, BUS_WIDTH)
    if H == 1 and W == 1:
        weight_width = align(outchl, BUS_WIDTH)
        outchl = C
    end = 0

    for x in range(outchl):  # outchannel组bn+weight
        bn_locate = layer_locate[end + BUS_WIDTH * x:end + (x + 1) * BUS_WIDTH]
        if H == 1 and W == 1:
            for i in range(len(bn_locate)):
                if bn_locate[i] != '00':
                    prints(
                        f'test_relocatetest bnlocate[{i}] failed value:{bn_locate[i]}'
                    )
                    return
        # fmain.write(f'test_relocate {BUS_WIDTH} bn_locate:{len(bn_locate)} layer_locate[{end+BUS_WIDTH*x}:{end+(x+1)*BUS_WIDTH-1}]={bn_locate}\n')
        # fmain.write(f'test_relocate {weight_width} weight_locate:{len(weight_locate)} layer_locate[{end+(x+1)*BUS_WIDTH}:{end+weight_width+(x+1)*BUS_WIDTH-1}]={weight_locate}\n')
        end += weight_width

    if H == 1 and W == 1:
        fmain.write(f'test_relocate fc weight data success\n')
예제 #10
0
def weight_addr(name_list, data_list, active_bit, weight_bit):
    """
    description:
                Load pt file and format output
    parameters:
                loadpt: The Class of load_pt
    return code:
                None
    """
    bn_bias = []
    k = scale = 0
    bn_weight = []
    fc_flag = False
    running_var = []
    weight_bn_k = []
    weight_bn_b = []
    weight_data = []
    global ADDRBLOCK
    global BUS_WIDTH
    word_address = 0
    start = ADDRBLOCK
    running_mean = []
    layer_locate = []
    N = C = H = W = 0
    global datas_locate
    global layers_wet_addr
    layer_cnt = fc_cnt = 0
    BUS_WIDTH = int(256 / weight_bit)

    poollist,downsample = get_layer_num("pool")
    fmain.write(f'weight_addr start:{ADDRBLOCK} bus_addr:{bus_address()} chiplet:{2**chiplet_id}\n')
    for i in range(len(name_list)):
        name = name_list[i]
        data = data_list[i]
        if pt_skip(name):
            continue
        # 从pt中获取running_mean running_var bn_bias scale和weight
        elif "scale" in name:
            scale = data.tolist()
        elif "weight" in name:
            if "classifier" in name or "fc" in name:
                fc_flag = True
            elif "bn" in name or "downsample.1" in name:
                bn_weight = data
                continue
            weight_data = data
            layer_cnt += 1
            if layer_cnt in poollist:
                layer_cnt += 1
        elif "bias" in name:
            bn_bias = data
        elif "running_mean" in name:
            running_mean = data
        elif "running_var" in name:
            running_var = data
        # 通过running_mean running_var bn_bias bn_weight计算bn_k和bn_b
        if len(running_mean) and len(running_var) and len(bn_bias) and len(bn_weight):
            for i in range(len(bn_bias)):
                bn_k = bn_weight[i] / (running_var[i].sqrt() + 1e-6)
                bn_b = -bn_weight[i] * running_mean[i] / \
                       (running_var[i].sqrt() + 1e-6) + bn_bias[i]
                weight_bn_k.append(bn_k.half().float())
                weight_bn_b.append(bn_b.half().float())
            running_mean = []
            running_var = []
            bn_weight = []
            bn_bias = []
        # activation地址排布和weight数据保存
        if scale and (len(weight_bn_k) and len(weight_bn_b) and len(weight_data) or fc_flag):
            if "classifier" in name or "fc" in name:
                fc_cnt += 1
                layer_cnt = get_layer_num(f'fc{fc_cnt}')
            layer_name = f'layer{str(layer_cnt)}'
            dim_list = list(weight_data.size())
            dim_lens = len(dim_list)
            if dim_lens == 4:
                N = dim_list[0]
                C = dim_list[1]
                H = dim_list[2]
                W = dim_list[3]
            elif dim_lens == 2:
                N = C = 1
                H = dim_list[0]
                W = dim_list[1]
            for n in range(N):
                if len(weight_bn_k) and len(weight_bn_b):  # pt中的bnkb存入layer_locate
                    hexdata_k = '%X' % st.unpack('I', st.pack('f', weight_bn_k[n]))[0]
                    hexdata_b = '%X' % st.unpack('I', st.pack('f', weight_bn_b[n]))[0]
                    for x in range(len(str(hexdata_k))):
                        if (x % 2) == 0:
                            layer_locate.append(hexdata_k[x] + hexdata_k[x + 1])
                    for y in range(len(str(hexdata_b))):
                        if (y % 2) == 0:
                            layer_locate.append(hexdata_b[y] + hexdata_b[y + 1])
                layer_locate = add_padding(layer_locate)
                for c in range(C):
                    for h in range(H):
                        if dim_lens == 2:
                            for i in range(BUS_WIDTH):
                                layer_locate.append("00")
                        for w in range(W):
                            if dim_lens == 4:
                                word_addr = w + h * W + c * H * W
                                rounds = round(weight_data[n][c][h][w].tolist() / scale)
                            elif dim_lens == 2:
                                word_addr = w + h * W
                                rounds = round(weight_data[h][w].tolist() / scale)
                            else:
                                prints(f'Unknown weight:{name} length:{dim_lens} dim:{dim_list}')
                                continue
                            hexdata_w = HexProcess(rounds)
                            layer_locate.append(hexdata_w)
                word_address += BUS_WIDTH  # bn预留(256bit)
                word_address += align(word_addr, BUS_WIDTH)
                layer_locate = add_padding(layer_locate)
            end = ADDRBLOCK + word_address
            layers_wet_addr.append([f'{layer_name} weight start and end:', [start, end]])
            start = end
            if active_bit == 16 or active_bit == 4:  # weight 16*8 4*4不重排
                pass
            elif fc_flag == False:  # active_bit=8 conv weight重排
                if chiplet_id:
                    fmain.write(f'{layer_name} relocate conv\'s weight...\n')
                    layer_locate = relocate(layer_locate, N, C, H, W)
                    # test_relocate(layer_locate, N, C, H, W)
            else:  # fc weight重排
                if weight_bit == 8:  # fc weight 8*8重排
                    layer_locate = fc_relocate88(layer_locate, W, H)
                elif weight_bit == 4:  # fc weight 8*4重排
                    layer_locate = fc_relocate84(layer_locate, W, H)
                    test_chip_relocate84(layer_locate, W, H)
                if chiplet_id:
                    fmain.write(f'{layer_name} relocate fc\'s weight...\n')
                    if weight_bit == 8:  # chiplet mode fc weight 8*8重排
                        layer_locate = relocate(layer_locate, W, H, 1, 1)
                        test_relocate(layer_locate, W, H, 1, 1)
                    elif weight_bit == 4:  # chiplet mode fc weight 8*4重排
                        layer_locate = chip_relocate84(layer_locate, W, H)
                        test_chip_relocate84(layer_locate, W, H)
            fmain.write("%7s bn_k+bn_b+conv_weight data save data success\n" % layer_name)
            datas_locate[k] = [f'layer {str(layer_cnt)} data', [layer_locate]]
            datas_locate.append(datas_locate[k])
            layer_locate = []
            weight_bn_k = []
            weight_bn_b = []
            weight_data = []
            fc_flag = 0
            scale = 0
            k += 1
            if layer_cnts <= k:
                break

    ADDRBLOCK += word_address
    return poollist, downsample
예제 #11
0
    def gen_txt(self):
        """
        description:
                    Load pt file and format output
        parameters:
                    loadpt: The Class of load_pt
        return code:
                    None
        """
        counts = 0
        name_list = []
        data_list = []
        quant_list = []
        onelayer_cnt = []

        self.get_tensorinfo(self.netpath)
        with open(self.ptpath, 'rb') as f:
            ptdata = torch.load(self.ptpath, map_location=torch.device('cpu'))
            chip_id = 0
            if "vggnet16" in self.logpath or "resnet34" in self.logpath:
                act_bit = ptdata['hyper_parameters']['act_quant_bit']
                wet_bit = ptdata['hyper_parameters']['weight_quant_bit']
            else:
                act_bit = 8
                wet_bit = 8
            for k, v in ptdata['state_dict'].items():
                k = k.split(".", 1)[1].strip()
                name_list.append(k)
                data_list.append(v)
                if "resnet" not in self.netpath.lower():
                    if "quant_" in k or "classifier." in k:
                        quant_list.append(k)
                        quant_list.append(v)

        if "resnet" in self.netpath.lower():
            return name_list, data_list, act_bit, wet_bit, chip_id
        else:
            with open(f'{self.ptdtpath}/img.input.q.txt', 'w') as fq:
                fq.write('{}{}'.format(self.in_q, '\n'))
            self.fw.write(f'{self.ptdtpath}/img.input.q.txt write success\n')

        for i in range(self.layer_cnts):
            layer = f'layers.{i}.'
            for j in range(len(name_list)):
                if layer in name_list[j]:
                    self.layers.append([name_list[j], data_list[j]])
                    counts += 1
            onelayer_cnt.append(str(counts))
            counts = 0

        del (self.layers[0])
        for i in range(self.layer_cnts):
            layername = f'layer_num:{str(i + 1)}'
            self.layermsg = self.get_layer_info(self.logpath, layername)
            self.splicing_output(int(onelayer_cnt[i]), counts, quant_list)
            counts += int(onelayer_cnt[i])

        scale = fcname = weight = ""
        for i in range(len(quant_list)):
            tmpstr = str(quant_list[i])
            if ".scale" in tmpstr or ".weight" in tmpstr:
                if ".scale" in tmpstr:
                    scale = quant_list[i + 1]
                else:
                    fcname = quant_list[i]
                    weight = quant_list[i + 1]
                if len(fcname) and len(str(scale)) and len(str(weight)):
                    write_data = threading.Thread(target=self.write_pt_data,
                                                  args=(fcname, weight, scale))
                    write_data.start()
                    write_data.join()
                    continue
            elif "quant_" in tmpstr or "classifier" in tmpstr:
                write_data = threading.Thread(target=self.write_pt_data,
                                              args=(quant_list[i], quant_list[i + 1], scale))
                write_data.start()
                write_data.join()

        prints("run gen_txt successfully")
        return name_list, data_list, act_bit, wet_bit, chip_id