예제 #1
0
def per_die_config_dse_multiAcc(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R,
                                sub_conv_K, sub_conv_S, sub_flag):
    DSP = 6840 / 3
    dsp_list = []
    pair_list = []
    lat_list = []
    util_list = []
    factor = 1
    opt_ratio = 0

    for i in range(0, len(sub_conv_N)):
        dsp_list.append([])
        sub_net_gop = gop_calculate(sub_conv_N[i], sub_conv_M[i],
                                    sub_conv_R[i], sub_conv_K[i])
        for j in range(0, len(sub_conv_N[i])):
            # allocate_dsp by layer gops
            dsp_list[i].append(
                DSP *
                (sub_conv_N[i][j] * sub_conv_M[i][j] * sub_conv_R[i][j] *
                 sub_conv_R[i][j] * sub_conv_K[i][j] * sub_conv_K[i][j]) /
                sub_net_gop)
            # do contrained dse for layer
            pair, cycle, cycle_per_layer = constrained_dse_layer(
                sub_conv_N[i][j], sub_conv_M[i][j], sub_conv_r[i][j],
                sub_conv_R[i][j], sub_conv_K[i][j], sub_conv_S[i][j],
                sub_flag[i][j], int(dsp_list[i][j]), int(37), factor)
            pair_list.append(pair)
            lat_list.append(cycle)
            util_list.append(pair[0] * pair[1] / float(int(dsp_list[i][j])))
    print "dsp_list value: ", dsp_list, pair_list
    print "util_list value: ", util_list

    # note done best configuration

    for i in range(0, len(sub_conv_N)):
        pair, cycle, cycle_per_layer = constrained_dse(
            sub_conv_N[i], sub_conv_M[i], sub_conv_r[i],
            sub_conv_R[i], sub_conv_K[i], sub_conv_S[i], sub_flag[i], int(DSP),
            int(37), factor)

        if len(pair_list) > len(sub_conv_N):
            for remove_cnt in range(0, len(sub_conv_N)):
                pair_list.remove(pair_list[0])
                lat_list.remove(lat_list[0])
                util_list.remove(util_list[0])
    #
    # ratio_tmp = ((max(lat_list) - min(lat_list)) / float(min(lat_list)))
    # print "initial diff_ratio: ", ratio_tmp
    #
    # max_lat_index = lat_list.index(max(lat_list))
    # # find the max latency sub_net
    # for j in range(0, len(sub_conv_N[max_lat_index])):
    #     if len(sub_conv_N[max_lat_index]) >=4:
    #         max_acc_num = 4
    #     else:
    #         max_acc_num = len(sub_conv_N[max_lat_index])
    #     for acc_num in range(0, max_acc_num):
    #         #TODO: keep partitioning the sub_net and search the best number of acc and corresponding configuration

    return pair_list, lat_list, util_list
예제 #2
0
    def run(self):

        start = time.time()
        process_gop_list = []
        process_item_list = []
        process_util_list = []
        process_pair_list = []

        search_counter = 0

        print("Process " + str(self.processIdx) + " starts global search.")

        for idx, item in enumerate(
                partition_to_k(self.layer_list, self.acc_cluster_num, False),
                0):
            if idx % PROCESS_NUM == self.processIdx:
                sub_gop_list = []
                search_counter = search_counter + 1
                sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \
                    = model_split_by_list(self.conv_N, self.conv_M, self.conv_r, self.conv_R, self.conv_K, self.conv_S, self.flag, item)
                sub_pair_list, sub_lat_list, sub_util_list = \
                    local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)

                for i in range(0, len(sub_conv_N)):
                    sub_gop_list.append(
                        gop_calculate(sub_conv_N[i], sub_conv_M[i],
                                      sub_conv_R[i], sub_conv_K[i]))

                if max(sub_lat_list) < self.overall_lat:
                    overall_lat = max(sub_lat_list)
                    if len(process_pair_list) < 6:
                        process_item_list.append(item)
                        process_pair_list.append(sub_pair_list)
                        # process_pair_list.append([overall_lat])
                        process_util_list.append([overall_lat])
                        process_gop_list.append(sub_gop_list)
                        # process_util_list.append(sub_util_list)
                        # process_pair_list.append(sub_util_list)
                    # else:
                    #     max_among_mins = process_pair_list.index(max(overall_lat))
                    #     process_pair_list.remove(process_pair_list[max_among_mins])
                    #     process_pair_list.append(sub_pair_list)
                    #     process_pair_list.append([overall_lat])
                    #     process_pair_list.append(sub_util_list)

            # print "For set ID: " + str(idx) + ", the final explored points = ", search_counter

        if len(process_pair_list) != 0:
            self.result_Q.put((process_pair_list, process_item_list,
                               process_gop_list, process_util_list))

        end = time.time()
        print("Thread ", self.processIdx, " :", (end - start))
예제 #3
0
def model_partition_by_gop(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S,
                           conv_G, flag, cut_flag):
    sub_conv_N = []
    sub_conv_M = []
    sub_conv_r = []
    sub_conv_R = []
    sub_conv_K = []
    sub_conv_S = []
    sub_flag = []
    balance_ratio = 0
    min_ration = 0.5
    min_pair = [0, 0]
    sub_gops = [[], [], []]
    model_len = int(len(conv_N))
    for i in range(0, model_len - 2):
        for j in range(i + 1, model_len - 1):
            sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag = model_partition_ordered(
                conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag,
                i + 1, j + 1)
            # print sub_conv_N
            for k in range(0, 3):
                sub_gops[k] = gop_calculate(sub_conv_N[k], sub_conv_M[k],
                                            sub_conv_R[k], sub_conv_K[k])
                # sub_gops[k] = conv_net_perf_theo(sub_conv_N[k], sub_conv_M[k], sub_conv_R[k], sub_conv_K[k])
            balance_ratio = (max(sub_gops) - min(sub_gops)) / float(
                min(sub_gops))
            # print sub_gops

            # print "2: ", i, j, sub_gops, balance_ratio, sub_conv_N, sub_conv_M
            print("Verigy cut status: ", i, j, cut_flag[i], cut_flag[j])
            if i == 0 and j == 1:
                min_ration = balance_ratio
                print("initial balance ratio: ", balance_ratio)
                min_pair = [i, j]
            else:
                if cut_flag[i] == 1 & cut_flag[j] == 1:
                    if balance_ratio < min_ration:
                        min_ration = balance_ratio
                        min_pair = [i, j]
                        print "min_ratio: ", min_ration, min_pair

    return min_pair, min_ration
예제 #4
0
def multiAcc_dse():
    # define the network parameter containers
    conv_N = []
    conv_M = []
    conv_r = []
    conv_R = []
    conv_K = []
    conv_S = []
    flag = []
    cut_flag = []

    sub_conv_N = []
    sub_conv_M = []
    sub_conv_r = []
    sub_conv_R = []
    sub_conv_K = []
    sub_conv_S = []
    sub_flag = []

    pair_1 = []
    pair_2 = []
    pair_3 = []
    lat_1 = 0
    lat_2 = 0
    lat_3 = 0
    sub_lat_list = []
    lat_list = []

    util_1 = 0
    util_2 = 0
    util_3 = 0
    sub_util_list = []
    util_list = []

    OPs = 0
    sub_pair_list = []
    item_list = []
    pair_list = []
    overall_lat = 60551400
    layer_list = []
    gop_list = []
    """
    step 1: extract model from txt file with parameter no_include_fc / include_fc
    """
    conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag = model_extract(
        'no_include_fc')
    print("Extracted cut flag: ", cut_flag)
    OPs = gop_calculate(conv_N, conv_M, conv_R, conv_K)
    max_layerout = max_layer_dataout(conv_N, conv_M, conv_R, conv_K)

    print_line("Model extract phase")
    print "1: ", "Model extracted"
    print "1: ", "Overall convolution operation required: ", OPs
    print "1: ", "Max layer output data: ", max_layerout
    # print_line("Model split finish")
    """
    step 2: randomly cluster, param k=4, layer label results are in item
    """
    print_line("Model partition phase")
    for i in range(0, len(conv_N)):
        layer_list.append(i)
    # kmeans=clusters_layers_kmeans(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, 2)
    # print kmeans
    partition_location, diff_ratio = model_partition_by_gop(
        conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag)
    print "2: layers extracted", conv_N
    print "2: layers cutable  ", cut_flag
    print "2: partition location", partition_location
    print "2: diff_ratio: ", diff_ratio

    sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \
        =model_partition_ordered(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, partition_location[0]+1, partition_location[1]+1)
    # print "2: Best partition output: ", partition_location, diff_ratio
    print "2:", sub_conv_N

    sub_gop_list = []
    for i in range(0, len(sub_conv_N)):
        sub_gop_list.append(
            gop_calculate(sub_conv_N[i], sub_conv_M[i], sub_conv_r[i],
                          sub_conv_K[i]))

    print "2: gop of sub_nets", sub_gop_list
    print "2: length of sub_conv_N", len(sub_conv_N[0]), len(
        sub_conv_N[1]), len(sub_conv_N[2])
    print "2", sub_flag
    print "2: length of sub_flag", len(sub_flag[0]), len(sub_flag[1]), len(
        sub_flag[2])
    sub_pair_list = []
    sub_lat_list = []
    sub_util_list = []

    print_line("Best Configuration Search")
    overall_start = time.time()
    # acc_cluster_num = 3
    # pair_list, item_list, gop_list, util_list = global_search(layer_list, acc_cluster_num, conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag, overall_lat)
    # pair_list, gop_list, util_list = per_die_config_dse_multiAcc(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K,
    # sub_conv_S, sub_flag)

    pair_list = per_die_config_dse_multiAcc_flex(sub_conv_N, sub_conv_M,
                                                 sub_conv_r, sub_conv_R,
                                                 sub_conv_K, sub_conv_S,
                                                 sub_flag)

    overall_end = time.time()

    print_line("DSEoutpout")
    print "Best Configuration Search Results: "
    for i in range(0, len(pair_list)):
        print pair_list[i]

    # print item_list
    #print "gop_list: ",  gop_list
    #print "pair_list: ", pair_list
    #print "util_list: ", util_list
    # for i in range(0, len(util_list)):
    #     print util_list[i], sum(util_list[i])
    print "------------------------Final optimal configuration-------------------------------"
    # print "Network clustered results =", item_list[util_list.index(min(util_list))]
    # print "<Tm, Tn> = ", pair_list[util_list.index(min(util_list))]
    # print "Estimated overall latency = ", min(util_list)
    print "Overall time cost:", overall_end - overall_start, "s"
    print "----------------------------------------------------------------------------------"

    # item = return_partition(layer_list, 4, False)
    #
    # '''step 3: split the layers based on label clustering results'''
    # print("layer number is: ", int(len(conv_N)))
    # sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \
    #     = model_split_by_list(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag, item)
    # print sub_conv_N
    # print "model clustering test done!!!"
    #
    # '''step 4: do local search for all sub-models and find optimial <Tm, Tn> pair, lat, and util'''
    # sub_pair_list, sub_lat_list, sub_util_list = \
    #     local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)
    # print sub_pair_list, sub_lat_list, sub_util_list
    #
    # if max(sub_lat_list) < overall_lat:
    #     overall_lat = max(sub_lat_list)
    #     if len(pair_list) < 10:
    #         pair_list.append(sub_pair_list)
    #         pair_list.append([overall_lat])
    #     else:
    #         max_among_mins = pair_list.index(max(overall_lat))
    #         pair_list.remove(pair_list[max_among_mins])
    #         pair_list.append(sub_pair_list)
    #         pair_list.append([overall_lat])

    # print(pair_1, "%.2f" % util_1, pair_2, "%.2f" % util_2, pair_3, "%.2f" % util_3, lat_1, lat_2, lat_3)
    # for i in range(1, int(len(conv_N)-1)):
    #     for j in range(int(i+1), int(len(conv_N))):
    # for i in range(1, 10):
    #     for j in range(1, 10):
    #         sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag = model_split_ordered(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag, i, j)
    # sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag = model_split_unordered(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag)
    # sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag = model_split_by_label(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag, kmeans.labels_)
    # print(sub_conv_N)
    # pair_1, lat_1, pair_2, lat_2, pair_3, lat_3, util_1, util_2, util_3 = local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)
    #
    # print(i, j, pair_1, "%.2f" % util_1, pair_2, "%.2f" % util_2, pair_3, "%.2f" % util_3, lat_1, lat_2, lat_3)
    #
    # if max(lat_1, lat_2, lat_3) < overall_lat:
    #     overall_lat = max(lat_1, lat_2, lat_3)
    #     # if len(pair_list) < 50:
    #     pair_list.append([i, j])
    #     pair_list.append(pair_1)
    #     pair_list.append(pair_2)
    #     pair_list.append(pair_3)
    #     pair_list.append([overall_lat])
    #     # else:
    #     #     max_among_mins  = pair_list.index(max(overall_lat))
    #     #     pair_list.remove(pair_list[max_among_mins])
    #     #     pair_list.append(pair_1)
    #     #     pair_list.append(pair_2)
    #     #     pair_list.append(pair_3)
    #     #     pair_list.append(overall_lat)
    # print(pair_list)

    # #step 3:
    # find_min_in_pairs()
    # min_among_mins = pair_list.index(min(overall_lat))
    # print(pair_list[min_among_mins])

    print "---------------------------- test part -------------------------------------------"
    print conv_net_perf(sub_conv_N[2], sub_conv_M[2], sub_conv_R[2],
                        sub_conv_S[2], sub_conv_K[2], sub_flag[2], 8, 274, 37,
                        4, 4)
예제 #5
0
def multiAcc_dse():
    # define the network parameter containers
    conv_N = []
    conv_M = []
    conv_r = []
    conv_R = []
    conv_K = []
    conv_S = []
    flag = []
    cut_flag = []
    pool_N = []

    sub_conv_N = []
    sub_conv_M = []
    sub_conv_r = []
    sub_conv_R = []
    sub_conv_K = []
    sub_conv_S = []
    sub_flag = []

    pair_1 = []
    pair_2 = []
    pair_3 = []
    lat_1 = 0
    lat_2 = 0
    lat_3 = 0
    sub_lat_list = []
    lat_list = []

    util_1 = 0
    util_2 = 0
    util_3 = 0
    sub_util_list = []
    util_list = []

    OPs = 0
    sub_pair_list = []
    item_list = []
    pair_list = []
    overall_lat = 60551400
    layer_list = []
    gop_list = []
    """
    step 1: extract model from txt file with parameter no_include_fc / include_fc
    """
    conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag, pool_N = model_extract(
        'no_include_fc')
    # print("Extracted cut flag: ", cut_flag)
    # print("Extracted pool flag:", flag)
    OPs = gop_calculate(conv_N, conv_M, conv_R, conv_K)
    max_layerout = max_layer_dataout(conv_N, conv_M, conv_R, conv_K)

    print_line("Model extract phase")
    print("1: ", "Model extracted")
    print("1: ", "Overall convolution operation required: ", OPs)
    print("1: ", "Max layer output data: ", max_layerout)
    # print_line("Model split finish")
    """
    step 2: randomly cluster, param k=4, layer label results are in item
    """
    print_line("Model partition phase")
    for i in range(0, len(conv_N)):
        layer_list.append(i)
    # kmeans=clusters_layers_kmeans(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, 2)
    # print kmeans
    partition_location, diff_ratio = model_partition_by_gop(
        conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag)
    print("2: layers extracted", conv_N)
    print("2: layers cutable  ", cut_flag)
    print("2: partition location", partition_location)
    print("2: diff_ratio: ", diff_ratio)

    sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \
        =model_partition_ordered(conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, partition_location[0]+1, partition_location[1]+1)
    # print "2: Best partition output: ", partition_location, diff_ratio
    print("2:", sub_conv_N)

    sub_gop_list = []
    for i in range(0, len(sub_conv_N)):
        sub_gop_list.append(
            gop_calculate(sub_conv_N[i], sub_conv_M[i], sub_conv_R[i],
                          sub_conv_K[i]))

    print("2: gop of sub_nets", sub_gop_list)
    print("2: length of sub_conv_N", len(sub_conv_N[0]), len(sub_conv_N[1]),
          len(sub_conv_N[2]))
    print("2", sub_flag)
    print("2: length of sub_flag", len(sub_flag[0]), len(sub_flag[1]),
          len(sub_flag[2]))
    sub_pair_list = []
    sub_lat_list = []
    sub_util_list = []

    print_line("Best Configuration Search")
    overall_start = time.time()
    # acc_cluster_num = 3
    # pair_list, item_list, gop_list, util_list = global_search(layer_list, acc_cluster_num, conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, flag, overall_lat)
    # pair_list, gop_list, util_list = per_die_config_dse_multiAcc(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K,
    # sub_conv_S, sub_flag)
    pair_list = per_die_config_dse_multiAcc_flex(sub_conv_N, sub_conv_M,
                                                 sub_conv_r, sub_conv_R,
                                                 sub_conv_K, sub_conv_S,
                                                 sub_flag)

    overall_end = time.time()

    print_line("DSE Output")
    print("Best Configuration Search Results for layer accelerators: ")
    for i in range(0, len(pair_list)):
        print(pair_list[i])

    acc_task_list, total_acc_num = acc_task_analysis(pair_list, sub_conv_N, sub_conv_M, sub_conv_r, \
                                      sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)

    print("Accelerator task list: ")
    for acc_num in range(0, len(acc_task_list)):
        print("acc core", acc_num, " task list: ", acc_task_list[acc_num])

    print_line("Subnet Task Out")
    subnet_task_list = subnet_task_analysis(pair_list, acc_task_list, sub_conv_N, sub_conv_M, sub_conv_r, \
                                      sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)
    print("sub net interface list:")
    for i in range(0, len(subnet_task_list)):
        print(subnet_task_list[i])

    print_line("Write out configurations")
    print(len(pair_list), "sub-nets are generated")
    print(total_acc_num, "accelerators are written into the cofig file")
    generate_param_file(pair_list, pool_N, acc_task_list, subnet_task_list,
                        "acc_ins_params.txt")

    print_line("netGen run time system info")
    print("Overall time cost:", overall_end - overall_start, "s")
    print_line("line")

    print_line("test")
    print(
        conv_net_perf(sub_conv_N[2], sub_conv_M[2], sub_conv_R[2],
                      sub_conv_S[2], sub_conv_K[2], sub_flag[2], 8, 274, 37, 4,
                      4))
예제 #6
0
def local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K,
                 sub_conv_S, sub_flag):
    """
    :param sub_conv_N: the input sub_conv_N is already splitted into several sub-nets
    :param sub_conv_M: same as above
    :param sub_conv_r: saa
    :param sub_conv_R: saa
    :param sub_conv_K: saa
    :param sub_conv_S: saa
    :param sub_flag: saa
    :return: the most optimal configuration for current sub-nets for an optimal system latency
    """
    DSP = 6840 / 3
    # datatype = fixed
    factor = 1

    pair_1 = []
    lat_1 = 0
    util_1 = 0
    pair_2 = []
    lat_2 = 0
    util_2 = 0
    pair_3 = []
    lat_3 = 0
    util_3 = 0

    pair_list = []
    lat_list = []
    util_list = []
    gop_list = []
    gop_per_subnet = []
    gop_total = 0
    dsp_per_acc = []
    dsp_occupied = 0
    # print "lists in sub_conv_N"
    # print len(sub_conv_N)
    # print sub_conv_N

    step = int(1)
    ratio = 0.05
    search_counter = 0
    Resolution = 10
    ratio_init = 0
    """initializing the dsp number for per acc based on the ops requirement"""
    for i in range(0, len(sub_conv_N)):
        gop_per_subnet.append(
            gop_calculate(sub_conv_N[i], sub_conv_M[i], sub_conv_R[i],
                          sub_conv_K[i]))
        gop_total += gop_per_subnet[i]
    print "gop_per_subnet in local_search: ", gop_per_subnet

    for i in range(0, len(sub_conv_N)):
        if i < len(sub_conv_N) - 1:
            dsp_per_acc.append(
                math.ceil(DSP * (gop_per_subnet[i] / float(gop_total))))
            dsp_occupied += dsp_per_acc[i]
        else:
            dsp_per_acc.append(math.ceil(DSP - dsp_occupied))
    """ Iteratively find the system level optimal configuration for the all the sub-nets"""
    search_stop = 0
    while search_stop == 0 and search_counter < Resolution + 1:
        for i in range(0, len(sub_conv_N)):
            pair, cycle, cycle_per_layer = constrained_dse(
                sub_conv_N[i], sub_conv_M[i], sub_conv_r[i],
                sub_conv_R[i], sub_conv_K[i], sub_conv_S[i], sub_flag[i],
                int(2200), int(37), factor)
            pair_list.append(pair)
            lat_list.append(cycle)
            util_list.append(pair[0] * pair[1] / float(DSP))
            if len(pair_list) > len(sub_conv_N):
                for remove_cnt in range(0, len(sub_conv_N)):
                    pair_list.remove(pair_list[0])
                    lat_list.remove(lat_list[0])
                    util_list.remove(util_list[0])

        ratio_tmp = ((max(lat_list) - min(lat_list)) / float(min(lat_list)))
        # print ratio_tmp
        if search_counter == 0:
            ratio_init = ratio_tmp
            # or search_counter == Resolution:
        if ratio_tmp < ratio:
            search_stop = 1
        else:
            max_idx = lat_list.index(min(lat_list))
            min_idx = lat_list.index(max(lat_list))
            if ratio_tmp - ratio > float(0.1):
                if (dsp_per_acc[max_idx] - 5 * step > 0):
                    dsp_per_acc[max_idx] = dsp_per_acc[max_idx] - 5 * step
                    dsp_per_acc[min_idx] = dsp_per_acc[min_idx] + 5 * step
                else:
                    dsp_per_acc[max_idx] = dsp_per_acc[max_idx] - step
                    dsp_per_acc[min_idx] = dsp_per_acc[min_idx] + step
            else:
                if (dsp_per_acc[max_idx] - step > 0):
                    dsp_per_acc[max_idx] = dsp_per_acc[max_idx] - step
                    dsp_per_acc[min_idx] = dsp_per_acc[min_idx] + step

        search_counter = search_counter + 1
    # if search_stop == 1:
    # and search_counter == 101
    print "local search stopped at =", search_counter - 1, "current ratio: ", ratio_tmp
    print "initial ratio ->", ratio_init

    return pair_list, lat_list, util_list
예제 #7
0
def per_die_config_dse_multiAcc_flex(sub_conv_N, sub_conv_M, sub_conv_r,
                                     sub_conv_R, sub_conv_K, sub_conv_S,
                                     sub_flag):

    print "sub_conv_N (original): ", sub_conv_N
    print "sub_flag (original): ", sub_flag

    opt_res = []

    # i: iterate over each sub-net
    for i in range(0, len(sub_conv_N)):
        # print "sub_conv_N[" + str(i) + "]: ", sub_conv_N[i]
        min_cycle = sys.maxint
        min_idx = -1

        sub_conv_net_gop = gop_calculate(sub_conv_N[i], sub_conv_M[i],
                                         sub_conv_R[i], sub_conv_K[i])
        cycle_list = []
        pair_list = []

        # when the number of accelerators is j
        # for j in range(1, 3 + 1):
        for j in range(1, 3 + 1):
            # cycle should be compared here, to find optimal accelerator number and config
            lat_list = []
            start_index = 0

            # k: the index to split the sub_conv_N
            for k in split_sub_net(0, len(sub_conv_N[i]), j):
                DSP = int(6840 / 3 * 0.8)
                dsp_list = []
                local_cycle_list = []
                local_pair_list = []
                sub_net_gop_list = []
                factor = 1

                # re-caculate sub_conv_N, sub_conv_M, sub_conv_R, sub_conv_K
                sub_conv_N_new = []
                sub_conv_M_new = []
                sub_conv_r_new = []
                sub_conv_R_new = []
                sub_conv_K_new = []
                sub_conv_S_new = []
                sub_flag_new = []

                # -2: illegal setting, pass
                if k[0] == -2:
                    print "illegal partitioning of sub-net, passing!"
                    continue

                # -1: only one accelerator
                if k[0] == -1:
                    sub_conv_N_new.append(sub_conv_N[i])
                    sub_conv_M_new.append(sub_conv_M[i])
                    sub_conv_r_new.append(sub_conv_r[i])
                    sub_conv_R_new.append(sub_conv_R[i])
                    sub_conv_K_new.append(sub_conv_K[i])
                    sub_conv_S_new.append(sub_conv_S[i])
                    sub_flag_new.append(sub_flag[i])

                # else: 2 or 3 accelerators
                else:
                    zi = zip([0] + k, k + [None])
                    for idx in range(0, len(zi)):
                        sub_conv_M_new.append(
                            flatten(sub_conv_M[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_N_new.append(
                            flatten(sub_conv_N[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_r_new.append(
                            flatten(sub_conv_r[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_R_new.append(
                            flatten(sub_conv_R[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_K_new.append(
                            flatten(sub_conv_K[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_S_new.append(
                            flatten(sub_conv_S[i])[zi[idx][0]:zi[idx][1]])
                        sub_flag_new.append(
                            flatten(sub_flag[i])[zi[idx][0]:zi[idx][1]])

                # print "split index k = ", k, "accelerator j = ", j, "sub_conv_N_new: ", sub_conv_N_new

                # m: the mth sub-sub-net in the sub-net
                temp_pair_list = []
                for m in range(0, len(sub_conv_N_new)):
                    # print "sub_conv_N_new[" + str(m) + "]: ", sub_conv_N_new[m]
                    sub_net_gop_list.append(
                        gop_calculate(sub_conv_N_new[m], sub_conv_M_new[m],
                                      sub_conv_R_new[m], sub_conv_K_new[m]))
                    # allocate_dsp by layer gops
                    dsp_list.append(
                        math.ceil(DSP * (sub_net_gop_list[m]) /
                                  sub_conv_net_gop))
                    # search best <Tm,Tn> configurations
                    pair, cycle, cycle_per_layer = constrained_dse(
                        sub_conv_N_new[m], sub_conv_M_new[m],
                        sub_conv_r_new[m], sub_conv_R_new[m],
                        sub_conv_K_new[m], sub_conv_S_new[m], sub_flag_new[m],
                        int(dsp_list[m]), int(37), factor, j)
                    local_cycle_list.append(cycle)
                    temp_pair_list.append(pair)

                    # local_pair_list.append(pair)

                cycle_list.append([j, k, max(local_cycle_list)])
                pair_list.append(temp_pair_list)

        # find the minimum cycles and the corresponding index for each sub-net
        for n in range(0, len(cycle_list)):
            if cycle_list[n][2] < min_cycle:
                min_cycle = cycle_list[n][2]
                min_idx = n
        opt_res.append([cycle_list[min_idx], pair_list[min_idx]])
    return opt_res
예제 #8
0
def per_die_config_dse_multiAcc_flex(sub_conv_N, sub_conv_M, sub_conv_r,
                                     sub_conv_R, sub_conv_K, sub_conv_S,
                                     sub_flag):

    print("sub_conv_N (original): ", sub_conv_N)
    print("sub_flag (original): ", sub_flag)

    opt_res = []

    # i: iterate over each sub-net
    for i in range(0, len(sub_conv_N)):
        min_cycle = sys.maxsize
        min_idx = -1

        sub_conv_net_gop = gop_calculate(sub_conv_N[i], sub_conv_M[i],
                                         sub_conv_R[i], sub_conv_K[i])
        print("\t[DEBUG] processing sub_net - sub_conv_N[{}]: {} - sub_conv_M[{}]: {} - sub_conv_r[{}]: {} - TOTAL GOPs = {:0.4f}".format(\
                                                                            i,sub_conv_N[i],i,sub_conv_M[i],i,sub_conv_r[i],sub_conv_net_gop/1e9))
        cycle_list = []
        pair_list = []

        # when the number of accelerators is j
        for j in range(1, 3 + 1):
            # cycle should be compared here, to find optimal accelerator number and config
            lat_list = []
            start_index = 0

            # k: the index to split the sub_conv_N
            for k in split_sub_net(0, len(sub_conv_N[i]), j):
                print("\t\t[DEBUG] index to split sub_conv_N[{}] : {}".format(
                    i, j))
                DSP = int(6840 / 3 * 0.8)
                dsp_list = []
                local_cycle_list = []
                local_pair_list = []
                sub_net_gop_list = []
                factor = 1

                # re-caculate sub_conv_N, sub_conv_M, sub_conv_R, sub_conv_K
                sub_conv_N_new = []
                sub_conv_M_new = []
                sub_conv_r_new = []
                sub_conv_R_new = []
                sub_conv_K_new = []
                sub_conv_S_new = []
                sub_flag_new = []

                # -2: illegal setting, pass
                if k[0] == -2:
                    print("\t\tillegal partitioning of sub-net, passing!")
                    continue

                # -1: only one accelerator
                if k[0] == -1:
                    sub_conv_N_new.append(sub_conv_N[i])
                    sub_conv_M_new.append(sub_conv_M[i])
                    sub_conv_r_new.append(sub_conv_r[i])
                    sub_conv_R_new.append(sub_conv_R[i])
                    sub_conv_K_new.append(sub_conv_K[i])
                    sub_conv_S_new.append(sub_conv_S[i])
                    sub_flag_new.append(sub_flag[i])
                    print("\t\tsub_conv_N_new: ", sub_conv_N_new)

                # else: 2 or 3 accelerators
                else:
                    zi = list(zip([0] + k, k + [None]))
                    print("\t\ttesting zi in python 3.5", zi, len(zi))
                    for idx in range(0, len(zi)):
                        sub_conv_M_new.append(
                            flatten(sub_conv_M[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_N_new.append(
                            flatten(sub_conv_N[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_r_new.append(
                            flatten(sub_conv_r[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_R_new.append(
                            flatten(sub_conv_R[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_K_new.append(
                            flatten(sub_conv_K[i])[zi[idx][0]:zi[idx][1]])
                        sub_conv_S_new.append(
                            flatten(sub_conv_S[i])[zi[idx][0]:zi[idx][1]])
                        sub_flag_new.append(
                            flatten(sub_flag[i])[zi[idx][0]:zi[idx][1]])
                    print("\t\tsub_conv_N_new: ", sub_conv_N_new)

                # m: the mth sub-sub-net in the sub-net
                temp_pair_list = []
                acc_layer_task_list = []
                for m in range(0, len(sub_conv_N_new)):
                    sub_net_gop_list.append(
                        gop_calculate(sub_conv_N_new[m], sub_conv_M_new[m],
                                      sub_conv_R_new[m], sub_conv_K_new[m]))
                    # allocate_dsp by layer gops
                    total_gops_ratio = sub_net_gop_list[m] / sub_conv_net_gop
                    dsp_list.append(math.ceil(DSP * total_gops_ratio))
                    print(
                        "\t\t\tsub_conv_N_new[{}]: {} - GOPs: {:0.4f} ({:0.2f})- DSPs {}"
                        .format(m, sub_conv_N_new[m],
                                sub_net_gop_list[m] / 1e9, total_gops_ratio,
                                dsp_list[m]))
                    # search best <Tm,Tn> configurations
                    pair, cycle, cycle_per_layer = constrained_dse(
                        sub_conv_N_new[m], sub_conv_M_new[m],
                        sub_conv_r_new[m], sub_conv_R_new[m],
                        sub_conv_K_new[m], sub_conv_S_new[m], sub_flag_new[m],
                        int(dsp_list[m]), int(37), factor, j)
                    local_cycle_list.append(cycle)
                    temp_pair_list.append(pair)
                    acc_layer_task_list.append(sub_conv_N_new)
                    # local_pair_list.append(pair)

                cycle_list.append([j, k, max(local_cycle_list)])
                pair_list.append(temp_pair_list)

        # find the minimum cycles and the corresponding index for each sub-net
        for n in range(0, len(cycle_list)):
            if cycle_list[n][2] < min_cycle:
                min_cycle = cycle_list[n][2]
                min_idx = n
        print("\t[DEBUG] cycle_list[min_idx] = {} - pair_list[min_idx] = {}".
              format(cycle_list[min_idx], pair_list[min_idx]))
        opt_res.append([cycle_list[min_idx], pair_list[min_idx]])
        print("\t[DEBUG] DONE for sub_net_N[{}]".format(i))
    return opt_res