Esempio n. 1
0
def generate_weights_biases(length, s, arr1, arr2, prefix=SEPARATER):
    comm = "// Prepare weights and bias for "

    array = helping_functions.read_params(sys.argv[1])
    arr, arr_str = helping_functions.extraction(array)

    wb_str = ""
    for c in range(length):
        c_name = s + "_" + str(c + 1) + "_weight2D"
        b_name = s + "_" + str(c + 1) + "_bias2D"
        wb_str += prefix + comm + s + " layer " + str(c + 1) + EOL
        wb_str += generate_w_b(c_name, arr1, "weight", c, s)
        if s == "conv":
            if "conv_bias_size" in arr_str:
                wb_str += generate_w_b(b_name, arr2, "bias", c, s)
        elif s == "fc":
            if "fc_bias_size" in arr_str:
                wb_str += generate_w_b(b_name, arr2, "bias", c, s)
        wb_str += prefix + "in_number_" + s + "++;" + EOL + EOL

    wb_str += prefix + "cout<<\"Finished loading " + s + " weight into memory! Total: \" <<" + s + "_weight_num  << \"... ... ...\"<<endl;" + EOL
    if s == "conv":
        if "conv_bias_size" in arr_str:
            wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num  << \"... ... ...\"<<endl;" + EOL * 2
    elif s == "fc":
        if "fc_bias_size" in arr_str:
            wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num  << \"... ... ...\"<<endl;" + EOL * 2

    return wb_str
Esempio n. 2
0
def generate_preprocessor(prep_json):
    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    comm = "// C++ compilation debug mode" + EOL
    prep_str = EOL
    prep_str += comm
    prep_str += prep_json[0] + EOL * 2
    prep_str += comm
    for prep_sen in prep_json[1:]:
        if "nn_scale_size" in prms_str:
            if prep_sen == "//#define _SCALE_ 1":
                prep_str += "#define _SCALE_ 1" + EOL
            else:
                prep_str += prep_sen + EOL
        else:
            if prep_sen == "//#define _SCALE_ 1":
                prep_str += ""
            else:
                prep_str += prep_sen + EOL
    return prep_str
Esempio n. 3
0
def generate_function_w_bn(fn_nm,
                           return_type,
                           arg_types_arr,
                           arg_names_arr,
                           fn_body,
                           prefix=SEPARATER):
    fn_str = return_type + SPACE + fn_nm + PARAMETER_BEGIN + EOL

    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    for i, f in enumerate(arg_types_arr):
        if "conv_bias_size" in prms_str:
            if i == 15:
                fn_str += "#if _SCALE_" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            elif i == 18:
                fn_str += "#endif" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            else:
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            if (i != len(arg_types_arr) - 1):
                fn_str += "," + EOL
        else:
            if i == 14:
                fn_str += "#if _SCALE_" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            elif i == 17:
                fn_str += "#endif" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            else:
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            if (i != len(arg_types_arr) - 1):
                fn_str += "," + EOL
    fn_str += PARAMETER_END + SPACE + BODY_BEGIN + EOL * 2
    for fb in fn_body:
        fn_str += prefix + fb + EOL
    fn_str += EOL
    fn_str += BODY_END + EOL * 2

    return fn_str
Esempio n. 4
0
def generate_header(head_json, arr):
    std = "using namespace std;" + EOL * 2
    head_str = EOL + std

    head_str += head_json["return_type"] + SEPARATER
    head_str += head_json["function_name"] + PARAMETER_BEGIN + EOL
    """param = open("net_config_params.txt", "r")"""

    prms, prms_str = helping_functions.extraction(arr)

    n = prms[prms_str.index("n")]
    nn_in_data_size_values = prms[prms_str.index("nn_in_data_size_conv")]
    nn_padding_conv = prms[prms_str.index("nn_padding_conv")]
    nn_in_number_conv = prms[prms_str.index("nn_in_number_conv")]
    nn_out_number_fc = prms[prms_str.index("nn_out_number_fc")]

    fc_nm = "fc_" + n + "_out_a"

    for s in head_json["intput_parameters"]:
        if s["pName"] == "in_data_3D":
            head_str += SEPARATER + s["pType"] + SEPARATER + s["pName"] + ARRAY_BEGIN + nn_in_number_conv[0] + "*" +\
                 nn_in_data_size_values[0] + "*" + nn_in_data_size_values[0] + ARRAY_END + COMMA + EOL
        elif s["pName"] == "fc_out_a":
            head_str += SEPARATER + s[
                "pType"] + SEPARATER + fc_nm + ARRAY_BEGIN + str(
                    nn_out_number_fc[len(nn_out_number_fc) -
                                     1]) + "*1*1" + ARRAY_END + COMMA + EOL
        elif s["pName"] == "activation_type":
            head_str += SEPARATER + s["pType"] + SEPARATER + s[
                "pName"] + COMMA + EOL
        elif s["pName"] == "output_temp_1" or s["pName"] == "output_temp_2":
            head_str += SEPARATER + s["pType"] + SEPARATER + s[
                "pName"] + ARRAY_BEGIN + prms[prms_str.index(
                    "maximum")] + ARRAY_END + COMMA + EOL
        else:
            head_str += SEPARATER + s["pType"] + s["pName"] + COMMA + EOL

    head_str = head_str[0:-2]
    head_str += PARAMETER_END + SEPARATER + BODY_BEGIN
    return head_str
Esempio n. 5
0
def generate_body(body_json, out_json, comm_json, arr, prefix=SEPARATER):
    col_gray = ""
    while (col_gray != "color" and col_gray != "grayscale"):
        col_gray = raw_input(
            "\nPlease enter color specification input (color, grayscale): ")
        if col_gray == "color":
            chn = 3
        elif col_gray == "grayscale":
            chn = 1
        else:
            print "Please enter \"color\" for colored image and \"grayscale\" for grayscaled one "

    sz = "sizeof"
    ms = "memset"
    body_str = EOL
    body_str1 = ""
    alloc_str = EOL + prefix + comm_json[11] + EOL
    body_str += prefix + out_json[0] + EOL
    body_str += EOL + prefix + comm_json[10]
    body_str += EOL
    value = ""
    arr1, arr1_str = helping_functions.extraction(arr)
    arrr = arr1[arr1_str.index("in_data_mem_size")].split(" * ")
    arr2 = arr1[arr1_str.index("conv_weight_size")].split(" + ")
    arr3 = ""
    if "conv_bias_size" in arr1_str:
        arr3 = arr1[arr1_str.index("conv_bias_size")].split(" + ")
    if "fc_bias_size" in arr1_str:
        arr4 = arr1[arr1_str.index("fc_weight_size")].split(" + ")
        arr5 = arr1[arr1_str.index("fc_bias_size")].split(" + ")
    n_layers = arr1[arr1_str.index("n")]
    layers_order = arr1[arr1_str.index("layers_order")]

    if "fc_bias_size" in arr1_str:
        fc_out = "fc_" + str(n_layers) + "_out"
    '''make only one nn_batch_norm_size and nn_scale_size declaration'''
    repeat1 = False
    repeat2 = False
    for k, var_sen in enumerate(body_json["var_init"]):
        if var_sen["name"] in arr1_str:
            if var_sen["name"] == "nn_batch_norm_size":
                if not repeat1:
                    body_str += prefix + var_sen["type"] + SPACE
                    body_str += var_sen["name"]
                    repeat1 = True
                    body_str += EQUAL + PARAMETER_BEGIN
                    body_str += arr1[arr1_str.index(var_sen["name"])]
                    body_str += PARAMETER_END + MULT + sz +\
                     PARAMETER_BEGIN + var_sen["size"] + PARAMETER_END +\
                     EOS + EOL
            elif var_sen["name"] == "nn_scale_size":
                if not repeat2:
                    body_str += prefix + var_sen["type"] + SPACE
                    body_str += var_sen["name"]
                    repeat2 = True
                    body_str += EQUAL + PARAMETER_BEGIN
                    body_str += arr1[arr1_str.index(var_sen["name"])]
                    body_str += PARAMETER_END + MULT + sz +\
                     PARAMETER_BEGIN + var_sen["size"] + PARAMETER_END +\
                     EOS + EOL
            else:
                body_str += prefix + var_sen["type"] + SPACE
                if var_sen["name"] == "fc_out_size":
                    body_str += fc_out + "_size"
                else:
                    body_str += var_sen["name"]
                body_str += EQUAL + PARAMETER_BEGIN
                body_str += arr1[arr1_str.index(var_sen["name"])]
                body_str += PARAMETER_END + MULT + sz +\
                  PARAMETER_BEGIN + var_sen["size"] + PARAMETER_END +\
                  EOS + EOL

    print "\nPlease make sure the Tm and Tn can be divided by the number of ports!"
    port_num = int(
        helping_functions.prompt("\nPlease enter the number of ports: "))
    with open("parameter3.json", "w") as f:
        json.dump(str(port_num), f)

    maximum = ""
    if int(arrr[1]) * int(arrr[1]) > int(
            math.ceil(float(arr1[arr1_str.index("maximum")]) / port_num)):
        maximum = int(arrr[1]) * int(arrr[1])
    else:
        maximum = int(
            math.ceil(float(arr1[arr1_str.index("maximum")]) / port_num))
    for i in range(0, 2):
        for j in range(1, port_num + 1):
            body_str += prefix + "unsigned int" + SPACE
            body_str += "out_size_" + str(i) + "_" + str(j)
            body_str += EQUAL + PARAMETER_BEGIN
            body_str += str(maximum)
            body_str += PARAMETER_END + MULT + sz +\
               PARAMETER_BEGIN + "data_type_o" + PARAMETER_END +\
               EOS + EOL
    if "Eltwise" in layers_order or "Concat" in layers_order:
        for j in range(1, port_num + 1):
            body_str += prefix + "unsigned int" + SPACE
            body_str += "out_size_2_" + str(j)
            body_str += EQUAL + PARAMETER_BEGIN
            body_str += str(maximum)
            body_str += PARAMETER_END + MULT + sz +\
               PARAMETER_BEGIN + "data_type_o" + PARAMETER_END +\
               EOS + EOL

    ker = 0
    for k, var_sen in enumerate(body_json["var_init"]):
        if (var_sen["memory"] == "fc_8_out_mem_int"):
            alloc_str += KERNEL + EOL
            ker = 1

        if var_sen["name"] in arr1_str:
            alloc_str += prefix + var_sen["size"] + SPACE + "*"

            if var_sen["name"] == "fc_out_size":
                cond1 = "fc_" + str(n_layers) + "_out_mem_int"
                alloc_str += "fc_" + str(n_layers) + "_out_mem_int"
                fcout = "fc_" + str(n_layers) + "_out_size"
            else:
                cond1 = var_sen["memory"]
                alloc_str += var_sen["memory"]
                fcout = var_sen["name"]
            alloc_str += EQUAL + PARAMETER_BEGIN + var_sen["size"] + "*" + PARAMETER_END +\
                  "malloc" + PARAMETER_BEGIN + fcout + PARAMETER_END + EOS + EOL

            cond = cond1 + " == " + NULL
            alloc_str += prefix + helping_functions.generate_if(cond, [out_json[1] + var_sen["memory"] + "\\n\"" +\
                  PARAMETER_END + EOS], ["printf(\"" + var_sen["location"] + "= 0x%x \\n\", " + cond1 + PARAMETER_END + EOS], 1)
            if ker == 1:
                alloc_str += PREP_ENDIF + EOL
                ker = 0

    for i in range(0, 2):
        for j in range(1, port_num + 1):
            alloc_str += prefix + "data_type_o *" + "temp_out_" + str(
                i) + "_" + str(j)
            alloc_str += EQUAL + PARAMETER_BEGIN + "data_type_o *" + PARAMETER_END +\
                 "malloc" + PARAMETER_BEGIN + "out_size_" + str(i) + "_" + str(j) + PARAMETER_END + EOS + EOL
            cond = "temp_out_" + str(i) + "_" + str(j) + " == " + NULL
            alloc_str += prefix + helping_functions.generate_if(cond, [out_json[1] + "temp_out_" + str(i) + "_" + str(j) + "\\n\"" +\
                 PARAMETER_END + EOS], ["printf(\"" + "temp_out_" + str(i) + "_" + str(j) + " memory location" + "= 0x%x \\n\", " + "temp_out_" + str(i) + "_" + str(j) + PARAMETER_END + EOS], 1)
    if "Eltwise" in layers_order or "Concat" in layers_order:
        for j in range(1, port_num + 1):
            alloc_str += prefix + "data_type_o *" + "temp_out_2_" + str(j)
            alloc_str += EQUAL + PARAMETER_BEGIN + "data_type_o *" + PARAMETER_END +\
                 "malloc" + PARAMETER_BEGIN + "out_size_2_" + str(j) + PARAMETER_END + EOS + EOL
            cond = "temp_out_2_" + str(j) + " == " + NULL
            alloc_str += prefix + helping_functions.generate_if(cond, [out_json[1] + "temp_out_2_" + str(j) + "\\n\"" +\
                 PARAMETER_END + EOS], ["printf(\"" + "temp_out_2_" + str(j) + " memory location" + "= 0x%x \\n\", " + "temp_out_2_" + str(j) + PARAMETER_END + EOS], 1)

    body_str1 += KERNEL + EOL
    body_str1 += prefix + out_json[2] + EOL
    if "fc_bias_size" in arr1_str:
        body_str1 += prefix + "memset(fc_" + str(
            n_layers) + "_out_mem_int, 0, fc_" + str(
                n_layers) + "_out_size);" + EOL
    else:
        body_str1 += prefix + "memset(out_mem_int, 0, out_size);" + EOL
    if "nn_batch_norm_size" in arr1_str:
        body_str1 += prefix + "memset(batch_norm_mem_port_param1, 0, nn_batch_norm_size);" + EOL
        body_str1 += prefix + "memset(batch_norm_mem_port_param2, 0, nn_batch_norm_size);" + EOL
    if "nn_scale_size" in arr1_str:
        body_str1 += prefix + "memset(scale_mem_port_param1, 0, nn_scale_size);" + EOL
        body_str1 += prefix + "memset(scale_mem_port_param2, 0, nn_scale_size);" + EOL

    for i in range(0, 2):
        for j in range(1, port_num + 1):
            body_str1 += prefix + "memset" + PARAMETER_BEGIN + "temp_out_" + str(
                i) + "_" + str(j)
            body_str1 += ", 0, " + "out_size_" + str(i) + "_" + str(
                j) + PARAMETER_END + EOS + EOL
    if "Eltwise" in layers_order or "Concat" in layers_order:
        for j in range(1, port_num + 1):
            body_str1 += prefix + "memset" + PARAMETER_BEGIN + "temp_out_2_" + str(
                j)
            body_str1 += ", 0, " + "out_size_2_" + str(
                j) + PARAMETER_END + EOS + EOL

    body_str1 += PREP_ENDIF + EOL * 2

    body_str1 += prefix + comm_json[0] + EOL
    body_str1 += HLS + EOL
    body_str1 += prefix + "const char* weight_src = \"net_weights.txt\";" + EOL
    body_str1 += PREP_ELSE + EOL
    body_str1 += prefix + "const char* weight_src = \"net_inputs/net_weights.txt\";" + EOL
    body_str1 += PREP_ENDIF + EOL

    #body_str1 += HLS + EOL
    #body_str1 += prefix + "ifstream ifs(\"val.txt\");" + EOL
    #body_str1 += PREP_ELSE + EOL
    #body_str1 += prefix + "ifstream ifs(\"net_inputs/val.txt\");" + EOL
    #body_str1 += PREP_ENDIF + EOL
    #body_str1 += prefix + "string val_name[10];" + EOL + prefix + "float val_class[10];" +\
    #	     EOL + prefix + "string str;" + EOL
    #body_str1 += prefix + helping_functions.generate_if("!ifs", [out_json[6], "getchar();"], "", 1)
    #body_str1 += prefix + "int num = 0;" + EOL
    #body_str1 += prefix + helping_functions.generate_while("ifs >> str&&num<20",
    #			             [helping_functions.generate_if("num % 2 == 1", ["val_class[num / 2] = int(atof(str.c_str()));"],
    #				     ["val_name[num / 2] = str;"], 2), "num++;"], 1)
    #body_str1 += prefix + "ifs.close();" + EOL*2
    indata_mem = arr1[arr1_str.index("in_data_mem_size")].split(" * ")
    if chn == 3:
        body_str1 += prefix + comm_json[1] + EOL
        body_str1 += HLS + EOL
        body_str1 += prefix + "ifstream ifs1(\"net_mean.txt\");" + EOL
        body_str1 += PREP_ELSE + EOL
        body_str1 += prefix + "ifstream ifs1(\"net_inputs/net_mean.txt\");" + EOL
        body_str1 += PREP_ENDIF + EOL
        body_str1 += EOL * 2
        body_str1 += prefix + "float channel_mean[3] = { 0 };" + EOL +\
             prefix + "string str1;" + EOL +\
             prefix + "string y1 = \"[\";" + EOL +\
             prefix + "string y2 = \"]\";" + EOL +\
             prefix + helping_functions.generate_if("!ifs1", [out_json[5], "getchar();"], "", 1)
        body_str1 += prefix + "int index = 0;" + EOL
        body_str1 += prefix + helping_functions.generate_while(
            "ifs1 >> str1", [
                "int p1 = str1.find(y1, 0);",
                helping_functions.generate_if(
                    "p1 >= 0", ["str1.erase(p1, y1.length());"], "",
                    2), "int p2 = str1.find(y2, 0);",
                helping_functions.generate_if(
                    "p2 >= 0", ["str1.erase(p2, y2.length());"], "", 2),
                "float f = atof(str1.c_str());", "channel_mean[index] = f;",
                "index++;"
            ], 1)
        body_str1 += prefix + "ifs1.close();" + EOL * 2
        body_str1 += prefix + comm_json[2] + EOL
        height = helping_functions.prompt(
            "Please enter the height of the image: ")
        width = helping_functions.prompt(
            "Please enter the width of the image: ")

        body_str1 += prefix + comm_json[3] + EOL
        body_str1 += KERNEL + EOL + HLS + EOL +\
              prefix + "string image_dir = \"" + sys.argv[2] + "\";" + EOL + PREP_ELSE + EOL +\
              prefix + "string image_dir = \"./net_inputs/test_imgs/" + sys.argv[2] + "\"" + EOS + EOL +\
              PREP_ENDIF + EOL

        body_str1 += prefix + "float in_data_3D_channel_swap[3" + ARRAY_END +\
              ARRAY_BEGIN + height + ARRAY_END + ARRAY_BEGIN + width +"] = { 0 };" +\
              EOL + prefix + "float in_data_3D[3" + ARRAY_END +\
              ARRAY_BEGIN + indata_mem[1] + ARRAY_END + ARRAY_BEGIN + indata_mem[2] +\
              "] = { 0 };" + EOL +\
              prefix + "int crop_w = " + arrr[1] + ";"+ EOL + prefix + "int crop_h = " +\
              arrr[1] + ";" + EOL + prefix + "int w;" + EOL + prefix + "int h;" + EOL +\
              prefix + "int channels;" +\
              EOL + prefix + "int size;" + EOL + prefix +\
              "const unsigned char * data = loadfile(image_dir, size);" + EOL +\
              prefix + "const unsigned char * image_orig = stbi_load_from_memory(data, size, &w, &h, &channels, 3);" +\
              EOL

        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, 3, [
                helping_functions.generate_for_loop("j", "int", "i", "w*h*3", [
                    "in_data_3D_channel_swap[2 - i][j / (w * 3)][(j % (w * 3) - i) / 3] = (float)image_orig[j]; //range:0--255"
                ], 2, 3)
            ], 1, 1)
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, 3, [
                helping_functions.generate_for_loop("j", "int", 0, "h", [
                    helping_functions.
                    generate_for_loop("k", "int", 0, "w", [
                        "in_data_3D_channel_swap[i][j][k] /= 255;// range:0--1"
                    ], 3, 1)
                ], 2, 1)
            ], 1, 1)
        body_str1 += prefix + "resize_image(in_data_3D_channel_swap, h, w, in_data_3D);" + EOL
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, 3, [
                helping_functions.generate_for_loop("j", "int", 0, "crop_h", [
                    helping_functions.
                    generate_for_loop("k", "int", 0, "crop_w", [
                        "in_data_3D[i][j][k] = in_data_3D[i][j][k] * 255 - channel_mean[i];"
                    ], 3, 1)
                ], 2, 1)
            ], 1, 1)
        body_str1 += prefix + out_json[10] + EOL

        for_str = ""
        for l in range(1, port_num + 1):
            for_str += "if(i+" + str(l - 1) + "<3){" + EOL + prefix * 5
            for_str += "temp_out_0_" + str(
                l
            ) + "[in_data_size]" + EQUAL + "(data_type)in_data_3D" + ARRAY_BEGIN + "i+" + str(
                l - 1) + ARRAY_END + "[j][k];" + EOL + prefix * 4 + BODY_END

        body_str1 += prefix + out_json[11] + EOL + prefix + out_json[11] + EOL +\
              prefix + "int in_data_size=0;" + EOL +\
              prefix + helping_functions.generate_for_loop("i", "int", 0, 3, [helping_functions.generate_for_loop("j", "int", 0, "crop_h",
              [helping_functions.generate_for_loop("k", "int", 0, "crop_w", [for_str,
              "in_data_size++;"], 3, 1)], 2, 1)], 1, port_num)
        body_str1 += prefix + out_json[12] + EOL * 2
        body_str1 += PREP_ENDIF + EOL * 2
    else:
        body_str1 += KERNEL + EOL + HLS + EOL
        body_str1 += prefix + "string image_dir = \"" + sys.argv[
            2] + "\";" + EOL + PREP_ELSE + EOL
        body_str1 += prefix + "string image_dir = \"./net_inputs/test_imgs/" + sys.argv[
            2] + "\"" + EOS + EOL + PREP_ENDIF + EOL
        body_str1 += prefix + "int w;" + EOL + prefix + "int h;" + EOL + prefix + "int channels;" + EOL + prefix + "int size;" + EOL + prefix +\
              "const unsigned char * data = loadfile(image_dir, size);" + EOL +\
              prefix + "const unsigned char * image_orig = stbi_load_from_memory(data, size, &w, &h, &channels, 1);" +\
              EOL
        body_str1 += prefix + "int in_data_size=0;" + EOL
        body_str1 += prefix + "ofstream indata;" + EOL + prefix + "indata.open(\"in_data.txt\");" + EOL
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, 1, [
                helping_functions.generate_for_loop(
                    "j", "int", 0, indata_mem[2], [
                        helping_functions.generate_for_loop(
                            "k", "int", 0, indata_mem[2], [
                                "indata << image_orig[i *" + indata_mem[2] +
                                "*" + indata_mem[2] + " + " + indata_mem[2] +
                                "*j + k] << \" \";"
                            ], 3, 1), "indata << endl;"
                    ], 2, 1), "indata << endl;"
            ], 1, 1)
        body_str1 += prefix + "indata.close();" + EOL * 2

        body_str1 += prefix + "cout << \"Writing data to input data memory space ... ... ...\" << endl;" + EOL
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, 1, [
                helping_functions.
                generate_for_loop("j", "int", 0, indata_mem[2], [
                    helping_functions.
                    generate_for_loop("k", "int", 0, indata_mem[2], [
                        "temp_out_0_1[in_data_size] = (data_type)image_orig[i*"
                        + indata_mem[2] + "*" + indata_mem[2] + " + " +
                        indata_mem[2] + "*j + k];", "in_data_size++;"
                    ], 3, 1)
                ], 2, 1)
            ], 1, 1)
        body_str1 += prefix + "cout << \"Finished writing data to input data memory space ... ...\" << endl;" + EOL + PREP_ENDIF + EOL

    body_str1 += prefix + "char tan_h = 't';" + EOL +\
          prefix + "char relu = 'r';" + EOL +\
          prefix + "char none = 'i';" + EOL +\
          prefix + "int in_number_conv = 0;" + EOL +\
          prefix + "int in_number_fc = 0;" + EOL +\
          prefix + "int in_number_pooling = 0;" + EOL*2

    body_str1 += generate_weights_biases(len(arr2), "conv", arr2, arr3)
    if "fc_bias_size" in arr1_str:
        body_str1 += generate_weights_biases(len(arr4), "fc", arr4, arr5)

    if "nn_batch_norm_size" in arr1_str:
        body_str1 += prefix + 'get_batch_norm_mean("net_inputs/batch_norm_mean.txt",batch_norm_mem_port_param1);' + EOL
        body_str1 += prefix + 'get_batch_norm_denominator("net_inputs/batch_norm_denominator.txt",batch_norm_mem_port_param2);' + EOL
    if "nn_scale_size" in arr1_str:
        body_str1 += prefix + 'get_batch_norm_gamma("net_inputs/scale_gamma.txt",scale_mem_port_param1);' + EOL
        body_str1 += prefix + 'get_batch_norm_beta("net_inputs/scale_beta.txt",scale_mem_port_param2);' + EOL + EOL

    body_str1 += KERNEL + EOL
    if "fc_bias_size" in arr1_str:
        body_str1 += prefix + "float fc_" + str(n_layers) + "_out[" + arr1[
            arr1_str.index("fc_out_size")] + "] = { 0 };" + EOL
    else:
        body_str1 += prefix + "float out[" + arr1[arr1_str.index(
            "out_size")] + "] = { 0 };" + EOL
    body_str1 += prefix + "clock_t start, finish, inf_start, inf_finish;" + EOL +\
          prefix + "double totaltime, inf_time;" + EOL +\
          prefix + "start = clock();" + EOL +\
          PREP_ENDIF + EOL*2
    body_str1 += prefix + comm_json[4] + EOL
    body_str1 += prefix + "inference_net(" + EOL + prefix + comm_json[
        7] + EOL + prefix + "conv_weight_mem_port," + EOL
    if "conv_bias_size" in arr1_str:
        body_str1 += prefix + "conv_bias_mem_port," + EOL
    if "fc_bias_size" in arr1_str:
        body_str1 += prefix + "fc_weight_mem_port," + EOL
        body_str1 += prefix + "fc_bias_mem_port," + EOL
    if "nn_batch_norm_size" in arr1_str:
        body_str1 += prefix + "batch_norm_mem_port_param1," + EOL + prefix + "batch_norm_mem_port_param2," + EOL
    if "nn_scale_size" in arr1_str:
        body_str1 += SCALE + EOL + prefix + "scale_mem_port_param1," + EOL + prefix + "scale_mem_port_param2," + EOL + PREP_ENDIF + EOL

    if "fc_bias_size" in arr1_str:
        body_str1 += KERNEL + EOL + prefix + comm_json[
            8] + EOL + prefix + "fc_" + str(n_layers) + "_out_mem_int," + EOL
    else:
        body_str1 += KERNEL + EOL + prefix + comm_json[
            8] + EOL + prefix + "out_mem_int," + EOL

    if "Eltwise" in layers_order or "Concat" in layers_order:
        for i in range(0, 2):
            for j in range(1, port_num + 1):
                body_str1 += prefix + "temp_out_" + str(i) + "_" + str(
                    j) + "," + EOL
        for j in range(1, port_num + 1):
            if j == port_num:
                body_str1 += prefix + "temp_out_2_" + str(j) + ");" + EOL * 2
            else:
                body_str1 += prefix + "temp_out_2_" + str(j) + "," + EOL
    else:
        for i in range(0, 2):
            for j in range(1, port_num + 1):
                if i == 1 and j == port_num:
                    body_str1 += prefix + "temp_out_" + str(i) + "_" + str(
                        j) + ");" + EOL * 2
                else:
                    body_str1 += prefix + "temp_out_" + str(i) + "_" + str(
                        j) + "," + EOL

    body_str1 += prefix + "finish = clock();" + EOL + prefix +\
            "totaltime = (double)(finish - start) / CLOCKS_PER_SEC;" +\
          EOL + prefix + out_json[7] + EOL

    if "fc_bias_size" in arr1_str:
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, arr1[arr1_str.index("fc_out_size")], [
                "fc_" + str(n_layers) + "_out[i]=(float)(fc_" + str(n_layers) +
                "_out_mem_int[i]);"
            ], 1, 1)
        body_str1 += prefix + "softmax(" + fc_out + ", " + arr1[arr1_str.index("fc_out_size")] + ");" +\
              EOL + prefix + "predict(" + fc_out +", " + arr1[arr1_str.index("fc_out_size")] + ");" + EOL +\
              PREP_ENDIF + EOL*2
    else:
        body_str1 += prefix + helping_functions.generate_for_loop(
            "i", "int", 0, arr1[arr1_str.index("out_size")],
            ["out[i]=(float)(" + "out_mem_int[i]);"], 1, 1)
        body_str1 += prefix + "softmax(out, " + arr1[arr1_str.index("out_size")] + ");" +\
              EOL + prefix + "predict(out, " + arr1[arr1_str.index("out_size")] + ");" + EOL +\
              PREP_ENDIF + EOL*2
    body_str1 += prefix + "return 0;" + EOL * 2 + BODY_END

    return body_str + alloc_str + body_str1
def generate(generated_file_name="conv_acc_innerpp_fc.h"):
    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    str1 = "#ifndef _CONV_ACC_FC_H_" + EOL
    str1 += "#define _CONV_ACC_FC_H_" + EOL + EOL
    str1 += "#include <iostream>" + EOL
    str1 += "#include <fstream>" + EOL
    str1 += '#include "activation_functions.h"' + EOL + EOL
    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#include <algorithm>" + EOL
    str1 += "#endif" + EOL + EOL
    str1 += "using namespace std;" + EOL + EOL
    str1 += "template <typename T, typename W, typename G, int Tm, int Tn, int Tr, int Tc, int S_max, int K_max>" + EOL
    str1 += "class conv_acc_fc {" + EOL + EOL
    str1 += "private:" + EOL
    str1 += "	int conv_layer_number;" + EOL + EOL
    str1 += "public:" + EOL
    str1 += "	conv_acc_fc() : conv_layer_number(0) {conv_layer_number = 0;};" + EOL + EOL

    str1 += "	////------------------------------C++ debugging functions---------------------------------------////" + EOL
    str1 += "	// Reset output buffer" + EOL
    str1 += "	void out_buf_reset(G buf[][Tr][Tc]){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "            for(int j = 0; j < Tr; j++){" + EOL
    str1 += "                for(int k = 0; k < Tc; k++){" + EOL
    str1 += "                    buf[i][j][k] = G(0);" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Reset weight buffer" + EOL
    str1 += "    void w_buf_reset(int K, W buf[][Tm][K_max][K_max]){" + EOL
    str1 += "        for(int i = 0; i < Tn; i++){" + EOL
    str1 += "            for(int j = 0; j < Tm; j++){" + EOL
    str1 += "                for(int k = 0; k < K; k++){" + EOL
    str1 += "                    for(int l = 0; l < K; l++){" + EOL
    str1 += "                        buf[i][j][k][l] = W(0);" + EOL
    str1 += "                    }" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Reset bias buffer" + EOL
    str1 += "    void b_buf_reset(W buf[]){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "            buf[i]= W(0);" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    ////-----------------------------Accelerator Functions---------------------------------------////" + EOL

    str1 += "    // Load bias data" + EOL
    str1 += "    void b_buf_load(W buf[], W *layer_bias, int bias_offset, int m){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "            buf[i] = *(layer_bias + bias_offset + i + m);" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Load input data" + EOL
    str1 += "    void in_buf_load(T buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max]"
    for j in range(1, port_num + 1):
        str1 += ",T " + "*in_data_" + str(j)

    str1 += ", int in_offset, int n, int r, int c, int S, int K, int P, int R_IN, int C_IN, int N) {" + EOL

    str1 += "       for (int j = r * S - P; j < (r + Tr - 1) * S + K - P; j++) {" + EOL
    str1 += "           for (int k = c * S - P; k < (c + Tc - 1) * S + K - P; k++) {" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "        		for (int i = 0; i < Tn; i+=" + str(
        port_num) + "){" + EOL
    #	str1 += "#pragma HLS UNROLL" + EOL
    #	str1 += "#pragma HLS DEPENDENCE variable=buf inter false" + EOL

    for j in range(0, port_num):
        str1 += "                   	if ((n + Tn > N && i + " + str(
            j
        ) + " >= N - n ) || j < 0 || j >= R_IN || k < 0 || k >= C_IN) {" + EOL
        str1 += "                       	buf[i + " + str(
            j) + "][j - r * S + P][k - c * S + P] = T(0);" + EOL
        str1 += "                   	} else {" + EOL
        str1 += "                       	buf[i + " + str(
            j) + "][j - r * S + P][k - c * S + P] = *(in_data_" + str(
                j + 1) + " + in_offset + (i + n)/" + str(
                    port_num) + " * R_IN * C_IN + j * C_IN + k);" + EOL
        str1 += "               		}" + EOL

    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL
    str1 += EOL
    str1 += EOL

    str1 += "    // Load weights to weight buffer" + EOL
    str1 += "   void w_buf_load(W buf[][Tm][K_max][K_max], W *layer_weights, int weight_offset, int n, int m, int K, int N, int M){" + EOL
    str1 += "       for(int k1 = 0; k1 < K; k1++){" + EOL
    str1 += "           for(int k2 = 0; k2 < K; k2++){" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "        		for(int j = 0; j < Tn; j++){" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    str1 += "            		if(N < n+Tn && j == N-n){" + EOL
    str1 += "                		break;" + EOL
    str1 += "            		}" + EOL
    str1 += "            		for(int i = 0; i < Tm && i < M-m; i++){" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                		if(M < m+Tm && i == M-m){" + EOL
    str1 += "                    		break;" + EOL
    str1 += "                		}" + EOL
    str1 += "                        buf[j][i][k1][k2] = *(layer_weights + weight_offset + (i+m)*N*K*K + (j+n)*K*K + k1*K + k2);" + EOL
    str1 += "                   }" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Convolution computation kernel" + EOL
    str1 += "    void conv_engine(T in_buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max], W w_buf[][Tm][K_max][K_max], W b_buf[], G out_buf[][Tr][Tc], int S, int n, int r, int c, int K, int R_OUT, int C_OUT){" + EOL
    str1 += "        for(int i=0; i<K; i++){" + EOL
    str1 += "            for(int j=0; j<K; j++){" + EOL
    str1 += "                for(int tr=0; tr<Tr; tr++){" + EOL
    str1 += "                    for(int tc=0; tc<Tc; tc++){" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "                        for(int tm = 0; tm < Tm; tm++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                            for(int tn=0; tn<Tn; tn++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                                if(i==0&&j==0&&tn==0&&n==0)" + EOL
    str1 += "                                    out_buf[tm][tr][tc] = b_buf[tm] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL
    str1 += "                                else" + EOL
    str1 += "                                    out_buf[tm][tr][tc] = out_buf[tm][tr][tc] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL
    str1 += "                            }" + EOL
    str1 += "                        }" + EOL
    str1 += "                    }" + EOL
    str1 += "                }" + EOL
    str1 += "            }" + EOL
    str1 += "        }" + EOL
    str1 += "    }" + EOL
    str1 += EOL
    str1 += EOL

    str1 += "    // Ouput out_buf data to output interface" + EOL
    str1 += "    void output_res(G out_buf[][Tr][Tc]"
    for j in range(1, port_num + 1):
        str1 += ",G " + "*out_data_" + str(j)
    str1 += ", int out_offset, int n, int m, int r, int c, int N, int M, int R_OUT, int C_OUT, bool act){" + EOL
    str1 += "        if (n >= N - Tn) {" + EOL
    str1 += "            for (int j = r; j < r + Tr && j < R_OUT; j++) {" + EOL
    #str1 += "                if (C_OUT < c + Tc && k == C_OUT) { break; }" + EOL
    str1 += "                for (int k = c; k < c + Tc && k < C_OUT; k++) {" + EOL
    #str1 += "                    if (R_OUT < r + Tr && j == R_OUT) { break; }" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "                    for (int i = 0; i < Tm && i < M-m; i += " + str(
        port_num) + ") {" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    #str1 += "                        if (M < m + Tm && i+m == M) { break; }" + EOL
    str1 += "                        if (act) {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                        	if (i + " + str(j -
                                                          1) + " < M-m)" + EOL
        str1 += "                            	*(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = relu(out_buf[i + " +\
         str(j-1) + "][j - r][k - c]);" + EOL

    str1 += "                        }" + EOL
    str1 += "                        else {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                        	if (i + " + str(j -
                                                          1) + " < M-m)" + EOL
        str1 += "                            	*(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = out_buf[i + " +\
         str(j-1) + "][j - r][k - c];" + EOL

    str1 += "                        }" + EOL
    str1 += "                    }" + EOL
    str1 += "                }" + EOL
    str1 += "            }" + EOL
    str1 += "        }" + EOL
    str1 += "    }" + EOL

    str1 += "///////////////////////------------------conv accelerator----------------//////////////////////////" + EOL
    str1 += "    void conv_layer_acc_fc(" + EOL
    str1 += "            int N, //input feature number" + EOL
    str1 += "            int K, //input kernel size" + EOL
    str1 += "            int M, // output feature number" + EOL
    str1 += "            int R_IN, // input Row" + EOL
    str1 += "            int C_IN, // input column" + EOL
    str1 += "            int R_OUT, // output Row" + EOL
    str1 += "            int C_OUT,// output column" + EOL
    str1 += "            int S, // stride size" + EOL
    str1 += "            int P, // padding size" + EOL
    str1 += "            bool act, // activation function bit (1-- with act, 0--without act)" + EOL
    str1 += "            W *layer_weights, //w[M][N][K][K]" + EOL
    str1 += "            W *layer_bias, // b[M]" + EOL
    str1 += "            int weight_offset," + EOL
    str1 += "            int bias_offset," + EOL
    str1 += "            int in_offset," + EOL
    str1 += "            int out_offset," + EOL
    for j in range(1, port_num + 1):
        str1 += "            T *in_data_" + str(
            j
        ) + "," + " // in_data[N][(R-1)*S + K][(C-1)*S + K] --> [N][(R-1)*S + K - 2*P][(C-1)*S + K - 2*P]" + EOL
    for j in range(1, port_num + 1):
        if j == port_num:
            str1 += "            G *out_data_" + str(
                j) + "){ // out[M][R][C]" + EOL + EOL
        else:
            str1 += "            G *out_data_" + str(
                j) + "," + " // out[M][R][C]" + EOL

    str1 += "        /***************local data buffer******************************/" + EOL
    str1 += "        T in_buf_1[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL
    str1 += "        T in_buf_0[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL
    str1 += "        W w_buf_1[Tn][Tm][K_max][K_max];" + EOL
    str1 += "        W w_buf_0[Tn][Tm][K_max][K_max];" + EOL
    str1 += "        W b_buf_1[Tm];" + EOL
    str1 += "        W b_buf_0[Tm];" + EOL
    str1 += "        G out_buf_1[Tm][Tr][Tc];" + EOL
    str1 += "        G out_buf_0[Tm][Tr][Tc];" + EOL + EOL
    str1 += "        /***************Ptr and buffer initialization******************************/" + EOL
    str1 += "        bool in_buf_0_empty = 1;" + EOL
    str1 += "        bool in_buf_1_empty = 1;" + EOL
    str1 += "        bool out_buf_0_empty = 1;" + EOL
    str1 += "        bool out_buf_1_empty = 1;" + EOL
    str1 += "        int loadbufPtr = 0;" + EOL
    str1 += "        int combufPtr = 0;" + EOL
    str1 += "        int resbufPtr = 0;" + EOL
    str1 += "        bool last_com = 0;" + EOL
    str1 += "        bool last_load = 0;" + EOL
    str1 += "        bool last_res = 0;" + EOL + EOL

    str1 += "#if _HLS_MODE_" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=2" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=2" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_0 complete dim=1" + EOL
    str1 += "#endif" + EOL + EOL

    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#if _KERNEL_DEBUG_" + EOL
    str1 += '            cout << "Starting conv_acc_innerpp_fc layer ...." << endl;' + EOL
    str1 += "            //buffer local data initiallization: must do it in C++ debug!" + EOL
    str1 += "            out_buf_reset(out_buf_1);" + EOL
    str1 += "            out_buf_reset(out_buf_0);" + EOL
    str1 += "            b_buf_reset(b_buf_1);" + EOL
    str1 += "            b_buf_reset(b_buf_0);" + EOL
    str1 += "            w_buf_reset(K, w_buf_1);" + EOL
    str1 += "            w_buf_reset(K, w_buf_0);" + EOL
    str1 += "#endif" + EOL
    str1 += "#endif" + EOL
    str1 += "		for(int r = 0; r < R_OUT; r += Tr){" + EOL
    str1 += "			for(int c = 0; c < C_OUT; c += Tc){" + EOL
    str1 += "				for(int m = 0; m < M; m += Tm){" + EOL
    str1 += "					for(int n = 0; n < N; n += 2*Tn){" + EOL
    #str1 += "#if _HLS_MODE_" + EOL
    #str1 += "#pragma HLS DATAFLOW" + EOL
    #str1 += "#endif" + EOL
    str1 += "   //--------------------------Load input B W D in ping-pong manner-------------------------//" + EOL
    str1 += "						while ((in_buf_0_empty | in_buf_1_empty)&& (!last_load)) {" + EOL
    str1 += "							if (loadbufPtr == 1) {" + EOL
    str1 += '                    			cout << "loading input buffer 1...." << endl;' + EOL
    str1 += "                    			//load input bias" + EOL
    str1 += "                        		b_buf_load(b_buf_1, layer_bias, bias_offset, m);" + EOL
    str1 += "                        		// load input data" + EOL
    str1 += "                        		in_buf_load(in_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", in_data_" + str(j)
    str1 += ", in_offset, n+Tn, r, c, S, K, P, R_IN, C_IN, N);" + EOL
    str1 += "                        		// load input weights" + EOL
    str1 += "                        		w_buf_load(w_buf_1, layer_weights, weight_offset, n+Tn, m, K, N, M);" + EOL
    str1 += "                        		in_buf_1_empty = 0;" + EOL
    str1 += '                        		cout << "buffer 1 full" << endl;' + EOL
    str1 += "                        		loadbufPtr = 0;" + EOL
    str1 += "                        		if (n+2*Tn >= N) {last_load = 1;}" + EOL
    str1 += "                        	} else {" + EOL
    str1 += '                    			cout << "loading input buffer 0...." << endl;' + EOL
    str1 += "                    			//load input bias" + EOL
    str1 += "                        		b_buf_load(b_buf_0, layer_bias, bias_offset, m);" + EOL
    str1 += "                        		// load input data" + EOL
    str1 += "                        		in_buf_load(in_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", in_data_" + str(j)
    str1 += ", in_offset, n, r, c, S, K, P, R_IN, C_IN, N);" + EOL
    str1 += "                        		// load input weights" + EOL
    str1 += "                        		w_buf_load(w_buf_0, layer_weights, weight_offset, n, m, K, N, M);" + EOL
    str1 += "                        		in_buf_0_empty = 0;" + EOL
    str1 += '                        		cout << "buffer 0 full" << endl;' + EOL
    str1 += "                        		loadbufPtr = 1;" + EOL
    str1 += "                        		if (n+Tn >= N) {last_load = 1;}" + EOL
    str1 += "							}" + EOL
    str1 += "                       }" + EOL
    str1 += "                       loadbufPtr = 0;" + EOL
    str1 += "                       last_load = 0;" + EOL
    str1 += "   //------------------------------compute buffered data -----------------------------------//" + EOL
    str1 += "                    	while ((!in_buf_0_empty | !in_buf_1_empty)&& (!last_com)) {" + EOL
    str1 += "                    		if (combufPtr == 1) {" + EOL
    str1 += '                    			cout << "computing input buffer 1...." << endl;' + EOL
    str1 += "                    			if(resbufPtr == 1){" + EOL
    str1 += "                        			conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_1, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_1_empty = 0;" + EOL
    str1 += "                    			}else{" + EOL
    str1 += "                        			conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_0, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_0_empty = 0;" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			in_buf_1_empty = 1;" + EOL
    str1 += "                    			combufPtr = 0;" + EOL
    str1 += '                    			cout << "buffer 1 computed" << endl;' + EOL
    str1 += "                    			if (n+2*Tn >= N) {last_com = 1;}" + EOL
    str1 += "                    		} else {" + EOL
    str1 += '                    			cout << "computing input buffer 0...." << endl;' + EOL
    str1 += "                    			if(resbufPtr == 1){" + EOL
    str1 += "                        			conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_1, S, n, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_1_empty = 0;" + EOL
    str1 += "                    			}else{" + EOL
    str1 += "                        			conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_0, S, n, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_0_empty = 0;" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			in_buf_0_empty = 1;" + EOL
    str1 += "                    			combufPtr = 1;" + EOL
    str1 += '                    			cout << "buffer 0 computed" << endl;' + EOL
    str1 += "								if (n+Tn >= N) {last_com = 1;}" + EOL
    str1 += "							}" + EOL
    str1 += "                       }" + EOL
    str1 += "                       combufPtr = 0;" + EOL
    str1 += "                       last_com = 0;" + EOL
    str1 += "   //---------------------------transfer output data----------------------------------------//" + EOL
    str1 += "                    	while ((!out_buf_0_empty | !out_buf_1_empty)&& (!last_res)) {" + EOL
    str1 += "                    		if (resbufPtr == 1) {" + EOL
    str1 += '                    			cout << "output buffer 1...." << endl;' + EOL
    str1 += "                    			// transfer output data" + EOL
    str1 += "                    			if (n+Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 0;" + EOL
    str1 += "                    				output_res(out_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}else if (n+2*Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 0;" + EOL
    str1 += "                    				output_res(out_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			out_buf_1_empty = 1;" + EOL
    str1 += '                    			cout << "buffer 1 res" << endl;' + EOL
    str1 += "                    		} else {" + EOL
    str1 += '                    			cout << "output buffer 0...." << endl;' + EOL
    str1 += "                    			// transfer output data" + EOL
    str1 += "                    			if (n+Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 1;" + EOL
    str1 += "                    				output_res(out_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}else if (n+2*Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 1;" + EOL
    str1 += "                    				output_res(out_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			out_buf_0_empty = 1;" + EOL
    str1 += '								cout << "buffer 0 res" << endl;' + EOL
    str1 += "							}" + EOL
    str1 += "						}" + EOL
    str1 += "						last_res = 0;" + EOL
    str1 += "					}" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL

    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#if _KERNEL_DEBUG_" + EOL
    str1 += '            cout << "Finished conv_acc_innerpp_fc layer ...." << endl;' + EOL
    str1 += "            ofstream conv_out;" + EOL
    str1 += '            conv_out.open("fc_out_data.txt",ios::app);' + EOL
    str1 += '            conv_out <<"fc output: "<< endl;' + EOL
    str1 += "            for (int i = 0; i < M/" + str(
        port_num) + "; i++) {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                for (int j = 0; j < R_OUT; j++) {" + EOL
        str1 += "                    for(int k = 0; k < C_OUT; k++){" + EOL
        str1 += "                        conv_out << *(out_data_" + str(
            j) + ' + out_offset + i*R_OUT*C_OUT + j*C_OUT + k) << " ";' + EOL
        str1 += "                    }conv_out << endl;" + EOL
        str1 += "                }conv_out << endl;" + EOL

    str1 += "            }conv_out.close();" + EOL
    str1 += "#endif" + EOL
    str1 += "#endif" + EOL
    str1 += "    }" + EOL
    str1 += "};" + EOL
    str1 += "#endif" + EOL

    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(str1)

    return str1
Esempio n. 7
0
def model_extract(include_fc):

    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)

    init_conv_N = prms[prms_str.index("nn_in_number_conv")]
    init_conv_r = prms[prms_str.index("nn_in_data_size_conv")]
    init_conv_M = prms[prms_str.index("nn_out_number_conv")]
    init_conv_P = prms[prms_str.index("nn_padding_conv")]
    init_conv_K = prms[prms_str.index("nn_channel_size_conv")]
    init_conv_S = prms[prms_str.index("nn_stride_conv")]
    init_conv_G = prms[prms_str.index("nn_group_conv")]
    init_fc_N = prms[prms_str.index("nn_in_number_fc")]
    init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")]
    init_fc_M = prms[prms_str.index("nn_out_number_fc")]
    init_fc_K = prms[prms_str.index("nn_channel_size_fc")]
    init_pool_N = prms[prms_str.index("nn_in_data_size_pooling")]
    cut_flag_conv = prms[prms_str.index("conv_cut_flag")]
    cut_flag_pool = prms[prms_str.index("pool_cut_flag")]
    cut_flag_fc = prms[prms_str.index("fc_cut_flag")]

    nn_in_number_conv_values1 = []
    if isinstance(init_fc_N, list):
        for fc_in_number in init_fc_N:
            nn_in_number_conv_values1.append(fc_in_number)
    else:
        nn_in_number_conv_values1.append(0)

    nn_out_number_conv_values1 = []
    if isinstance(init_fc_M, list):
        for fc_out_number in init_fc_M:
            nn_out_number_conv_values1.append(fc_out_number)
    else:
        nn_out_number_conv_values1.append(0)

    nn_fc_sizes_conv = []
    if isinstance(init_fc_Rin, list):
        for fc_in_size in init_fc_Rin:
            nn_fc_sizes_conv.append(fc_in_size)
    else:
        nn_fc_sizes_conv.append(0)

    nn_channel_size_conv_values = []
    if isinstance(init_fc_K, list):
        for kernel_size in init_fc_K:
            nn_channel_size_conv_values.append(kernel_size)
    else:
        nn_channel_size_conv_values.append(0)

    nn_stride_values1 = []
    if isinstance(init_fc_Rin, list):
        for stride_value in init_fc_Rin:
            nn_stride_values1.append(stride_value)
    else:
        nn_stride_values1.append(1)

    conv_only_M = [int(val) for val in init_conv_M]
    # print init_conv_M
    # print conv_only_M

    nn_conv_group_values = []
    if isinstance(init_conv_G, list):
        for group_value in init_conv_G:
            nn_conv_group_values.append(group_value)
        else:
            nn_conv_group_values.append(1)

    nn_fc_cut_flag = []
    if isinstance(cut_flag_fc, list):
        for cut_value in cut_flag_fc:
            nn_fc_cut_flag.append(cut_value)
    else:
        nn_fc_cut_flag.append(1)

    if (include_fc == 'include_fc'):
        print("[DEBUG] including FC . . . {} {} {} {} {} {}".format(
            nn_in_number_conv_values1, nn_out_number_conv_values1,
            nn_fc_sizes_conv, nn_channel_size_conv_values, nn_stride_values1,
            nn_fc_cut_flag))
        init_conv_N = init_conv_N + nn_in_number_conv_values1
        init_conv_M = init_conv_M + nn_out_number_conv_values1
        init_conv_r = init_conv_r + nn_fc_sizes_conv
        init_conv_K = init_conv_K + nn_channel_size_conv_values
        init_conv_S = init_conv_S + nn_stride_values1
        cut_flag_conv = cut_flag_conv + nn_fc_cut_flag

    conv_N = [int(string) for string in init_conv_N]
    conv_M = [int(string) for string in init_conv_M]
    conv_r = [int(string) for string in init_conv_r]
    conv_K = [int(string) for string in init_conv_K]
    conv_S = [int(string) for string in init_conv_S]
    conv_P = [int(string) for string in init_conv_P]
    conv_G = [int(string) for string in init_conv_G]
    cut_flag = [int(string) for string in cut_flag_conv]

    if not init_fc_Rin:
        conv_P = conv_P + [0]
    else:
        conv_P = conv_P + [0] * len(init_fc_Rin)
        conv_P = conv_P + [0]
    conv_G = [int(string) for string in init_conv_G]
    max_conv_N = max(conv_N)
    max_conv_M = max(conv_M)
    max_conv_S = max(conv_S)
    max_conv_K = max(conv_K)

    conv_R = []
    conv_layer_num = int(len(conv_r))
    for r in range(0, conv_layer_num):
        R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r]
        conv_R.append(R)

    # find the positions of Conv layers followed by Pooling layer
    flag = [False] * conv_layer_num
    count = 0
    print(prms[0])
    print(len(prms[0]))
    for prms_index in range(len(prms[0]) - 2):
        if "Convolution" in prms[0][prms_index]:
            # if "Pooling" in prms[0][prms_index + 1] + prms[0][prms_index + 2]:
            if "Pooling" in prms[0][prms_index + 1]:
                flag[count] = True
            count += 1

    print("conv_N: ", conv_N)
    print("conv_M: ", conv_M)
    print("conv_r: ", conv_r)
    print("conv_R: ", conv_R)
    print("conv_K: ", conv_K)
    print("conv_S: ", conv_S)
    print("flag", flag)
    print("cut_flag", cut_flag)

    return conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag, init_pool_N


# if __name__ == "__main__":
#     conv_N, conv_M, conv_r, conv_R, conv_K, conv_S = model_extract()
Esempio n. 8
0
def generate():
	"""Tm * Tn < DSP/ 5"""

	arr2 = helping_functions.read_params(sys.argv[1])   
	prms, prms_str = helping_functions.extraction(arr2)
	nn_in_number_conv_values1 = prms[prms_str.index("nn_in_number_conv")] 
	nn_in_number_fc_values = prms[prms_str.index("nn_in_number_fc")] 
	nn_out_number_fc_values = prms[prms_str.index("nn_out_number_fc")] 
	nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")] 
	nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")] 

	for n in nn_in_number_fc_values:
		nn_in_number_conv_values1.append(n)
	nn_in_number_conv_values1.append(nn_out_number_fc_values[len(nn_out_number_fc_values) - 1])

	nn_in_number_conv_values = [int(string) for string in nn_in_number_conv_values1]
	nn_channel_size_fc_values = [int(string) for string in nn_in_number_conv_values1]	
	
	mm1 = max(nn_in_number_conv_values)
	
	DSP = 900
	d = DSP/5
	arr = []
	
	r_c = [55,27,13,13,13,6, 1, 1]
	k = [11,5,3,3,3,6,1,1]
	max_S = 6
	max_K = 11
	Tm_min = 1
	Tn_min = 1
	minimums =[]
	min_cycles = 0
	minimums1 =[]
	in_buf = 0
	out_buf = 0
	w_buf = 0
	l = int(len(nn_in_number_conv_values) - 1)
	for o in range (0, l):						
		min_cycles += r_c[o] * r_c[o] * math.ceil(int(nn_in_number_conv_values[o + 1])/max_S) * math.ceil(int(nn_in_number_conv_values[o])/1) * k[o] * k[o]
	minimums.append([1, max_S])
	minimums1.append(min_cycles)

	
	for Tm in range(1, mm1+1):
		for Tn in range(max_S, Tm):
			if Tm * Tn < d:
				cycles = 0
				for j in range (0, l):
			
					cycles += int(r_c[j] * r_c[j] * math.ceil(int(nn_in_number_conv_values[j + 1])/Tn) * math.ceil(int(nn_in_number_conv_values[j])/Tm) * k[j] * k[j])
				if cycles < max(minimums1):
					
					min_cycles = cycles
					Tm_min = Tm
					Tn_min = Tn
					if (len(minimums)<5):
						minimums.append([Tm, Tn])
						minimums1.append(min_cycles)
					else:
						max_among_mins = minimums1.index(max(minimums1))
						minimums1.remove(minimums1[max_among_mins])
						minimums.remove(minimums[max_among_mins])
						minimums.append([Tm, Tn])
						minimums1.append(min_cycles)
	print("Tm and Tn")
	print(minimums)
	print("cycles")
	print(minimums1)
	
	Tr_Tc = []
	in_buff_arr = []
	w_buff_arr = []
	out_buff_arr = []
	total_arr = []
	for m in minimums:
		Tr = int(math.sqrt(m[0]*m[1]))
		Tr_Tc.append([Tr, Tr])
		in_buff = ((Tr-1)*max_S + max_K)*((Tr-1)*max_S + max_K)
		in_buff_arr.append(in_buff)
		out_buff = m[0]*m[1]*max_K*max_K
		out_buff_arr.append(out_buff)
		w_buff = Tr*Tr*m[0]
		w_buff_arr.append(w_buff)
		total = in_buff + out_buff + w_buff
		total_arr.append(total)
	print("Tr_Tc")
	print(Tr_Tc)
	print("in_buf")
	print(in_buff_arr)
	print("w_buf")
	print(w_buff_arr)
	print("out_buf")
	print(out_buff_arr)
	print("total")
	print(total_arr)
	return arr, min_cycles
Esempio n. 9
0
def generate(generated_file_name="acc_instance.h"):

    arr = helping_functions.read_params(sys.argv[1])
    layers_fun = layers(arr[1])
    layers_fun_w_bn = layers_w_bn(arr[1])
    layers_fun_fc = layers_fc(arr[1])
    prms, prms_str = helping_functions.extraction(arr)
    nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")]
    nn_stride_conv_values = prms[prms_str.index("nn_stride_conv")]
    nn_in_data_size_pooling_values = prms[prms_str.index(
        "nn_in_data_size_pooling")]
    nn_channel_size_pooling_values = prms[prms_str.index(
        "nn_channel_size_pooling")]
    nn_stride_pooling_values = prms[prms_str.index("nn_stride_pooling")]
    nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")]
    layers_order = prms[prms_str.index("layers_order")]
    str1 = "#ifndef _ACC_INSTANCE_H_" + EOL + "#define _ACC_INSTANCE_H_" + EOL * 2
    import_str = ""
    body_str = ""
    conv_layer_new_body = ""
    strides = [[], [], []]
    kernels = [[], [], []]
    acc_max_kernel = [[], [], []]
    acc_max_stride = [[], [], []]
    conv_counter = 0
    pool_counter = 0
    fc_counter = 0
    '''get stride&kernel of each layer'''
    for i, l in enumerate(layers_order):
        if l.lower().startswith("convolution"):
            strides[0].append(int(nn_stride_conv_values[conv_counter]))
            kernels[0].append(int(nn_channel_size_conv_values[conv_counter]))
            conv_counter = conv_counter + 1

        if l.lower() == "maxpooling":
            strides[1].append(int(nn_stride_pooling_values[pool_counter]))
            kernels[1].append(int(
                nn_channel_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "avepooling":
            strides[2].append(int(nn_stride_pooling_values[pool_counter]))
            kernels[2].append(int(
                nn_channel_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "globalmaxpooling":
            strides[1].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            kernels[1].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "globalavepooling":
            strides[2].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            kernels[2].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "innerproduct":
            strides[0].append(int(nn_channel_size_fc_values[fc_counter]))
            kernels[0].append(int(nn_channel_size_fc_values[fc_counter]))
            fc_counter = fc_counter + 1
    '''select the biggest stride&kernel of each kind of layer'''
    for k1 in range(len(kernels)):
        if len(kernels[k1]) != 0:
            acc_max_kernel[k1] = ", " + str(max(kernels[k1]))
        if len(strides[k1]) != 0:
            acc_max_stride[k1] = ", " + str(max(strides[k1]))
    '''params for conv_layer & pool_layer'''
    includes = [
        "#include \"conv_acc_innerpp.h\"",
        "#include \"max_pool_acc_innerpp.h\"",
        "#include \"ave_pool_acc_innerpp.h\""
    ]
    fn_names = ["conv_layer_new", "max_pool_layer_new", "ave_pool_layer_new"]
    rn_tp = "void"
    arg_t_list = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "int", "int", "int", "int"
    ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]]
    arg_n_list = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "weight_offset", "bias_offset",
        "in_offset", "out_offset"
    ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]]
    acc_params = [[], [], []]
    '''params for conv_w_bn_layer'''
    includes_w_bn = ["#include \"conv_acc_innerpp_w_bn.h\""]
    fn_names_w_bn = ["conv_layer_new_w_bn"]
    arg_t_list_w_bn = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "data_type_w", "data_type_w", "int",
        "data_type_w", "data_type_w", "int", "int", "int", "int", "int"
    ]]
    arg_n_list_w_bn = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "*bn_mean", "*bn_denominator",
        "bn_offset", "*scale_gamma", "*scale_beta", "scale_offset",
        "weight_offset", "bias_offset", "in_offset", "out_offset"
    ]]
    acc_params_w_bn = [[]]
    '''params for resnet fc_layer '''
    includes_fc = ["#include \"conv_acc_innerpp_fc.h\""]
    fn_names_fc = ["conv_layer_new_fc"]
    arg_t_list_fc = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "int", "int", "int", "int"
    ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]]
    arg_n_list_fc = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "weight_offset", "bias_offset",
        "in_offset", "out_offset"
    ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]]

    Tm_1 = helping_functions.prompt("Please enter the Tm of conv_acc: ")
    acc_params[0].append(Tm_1)
    acc_params_w_bn[0].append(Tm_1)
    Tn_1 = helping_functions.prompt("Please enter the Tn of conv_acc: ")
    acc_params[0].append(Tn_1)
    acc_params_w_bn[0].append(Tn_1)
    Tr_1 = helping_functions.prompt("Please enter the Tr of conv_acc: ")
    acc_params[0].append(Tr_1)
    acc_params_w_bn[0].append(Tr_1)
    Tc_1 = helping_functions.prompt("Please enter the Tc of conv_acc: ")
    acc_params[0].append(Tc_1)
    acc_params_w_bn[0].append(Tc_1)
    Tn_2 = helping_functions.prompt("\nPlease enter the Tn of pool_acc: ")
    acc_params[1].append(Tn_2)
    acc_params[2].append(Tn_2)
    Tr_2 = helping_functions.prompt("Please enter the Tr of pool_acc: ")
    acc_params[1].append(Tr_2)
    acc_params[2].append(Tr_2)
    Tc_2 = helping_functions.prompt("Please enter the Tc of pool_acc: ")
    acc_params[1].append(Tc_2)
    acc_params[2].append(Tc_2)
    '''object & function for conv_layer & pool_layer'''
    init_nm = ["conv_acc", "max_pool_acc", "ave_pool_acc"]
    prm = "data_type, data_type_w, data_type_o"
    init_names = ["convAcc1", "maxPoolAcc1", "avePoolAcc1"]
    acc_fn_names = [
        "conv_layer_acc", "max_pool_layer_acc", "ave_pool_layer_acc"
    ]
    '''object & function for conv_w_bn_layer'''
    init_nm_w_bn = ["conv_acc_w_bn"]
    prm_w_bn = "data_type, data_type_w, data_type_o"
    init_names_w_bn = ["convAcc2"]
    acc_fn_names_w_bn = ["conv_layer_acc_w_bn"]
    '''object & function for resnet fc_layer '''
    init_nm_fc = ["conv_acc_fc"]
    prm_fc = "data_type, data_type_w, data_type_o"
    init_names_fc = ["convAcc3"]
    acc_fn_names_fc = ["conv_layer_acc_fc"]

    for j in range(1, port_num + 1):
        arg_t_list[0].append("data_type")
        arg_t_list[1].append("data_type")
        arg_n_list[0].append("*in_data_" + str(j))
        arg_n_list[1].append("*in_data_" + str(j))
        arg_t_list_w_bn[0].append("data_type")
        arg_n_list_w_bn[0].append("*in_data_" + str(j))
        arg_t_list_fc[0].append("data_type")
        arg_t_list_fc[1].append("data_type")
        arg_n_list_fc[0].append("*in_data_" + str(j))
        arg_n_list_fc[1].append("*in_data_" + str(j))

    for j in range(1, port_num + 1):
        arg_t_list[0].append("data_type_o")
        arg_t_list[1].append("data_type_o")
        arg_n_list[0].append("*out_data_" + str(j))
        arg_n_list[1].append("*out_data_" + str(j))
        arg_t_list_w_bn[0].append("data_type_o")
        arg_n_list_w_bn[0].append("*out_data_" + str(j))
        arg_t_list_fc[0].append("data_type_o")
        arg_t_list_fc[1].append("data_type_o")
        arg_n_list_fc[0].append("*out_data_" + str(j))
        arg_n_list_fc[1].append("*out_data_" + str(j))
    '''write layer acc needed'''
    for i, l in enumerate(layers_fun):
        if l != 0:
            import_str += includes[i] + EOL
            if i > 0:
                j = 1
            else:
                j = 0
            body_str += init_nm[i] + CLASS_BEGIN + prm + COMMA_SPACE
            body_str += ', '.join(acc_params[i])
            body_str += acc_max_stride[i]
            body_str += acc_max_kernel[i]
            body_str += CLASS_END + SPACE + init_names[i] + EOS + EOL * 2
            s = ""
            if "conv_bias_size" in prms_str:
                for k, arg_nm in enumerate(arg_n_list[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list[j]) - 1:
                        s += ", "
            else:
                del arg_t_list[0][11]
                del arg_t_list[0][12]
                del arg_n_list[0][11]
                del arg_n_list[0][12]
                for k, arg_nm in enumerate(arg_n_list[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list[j]) - 1:
                        s += ", "

            body_str += generate_function(
                fn_names[i], rn_tp, arg_t_list[j], arg_n_list[j], [
                    init_names[i] + CALL_SYMBOL + acc_fn_names[i] +
                    PARAMETER_BEGIN + s + PARAMETER_END + EOS
                ])
    '''write conv_acc_w_bn if needed'''
    if "nn_batch_norm_size" in prms_str:
        for i, l in enumerate(layers_fun_w_bn):
            if l != 0:
                import_str += includes_w_bn[i] + EOL
                if i > 0:
                    j = 1
                else:
                    j = 0
                body_str += init_nm_w_bn[
                    i] + CLASS_BEGIN + prm_w_bn + COMMA_SPACE
                body_str += ', '.join(acc_params_w_bn[i])
                body_str += acc_max_stride[i]
                body_str += acc_max_kernel[i]
                body_str += CLASS_END + SPACE + init_names_w_bn[
                    i] + EOS + EOL * 2
                s = ""
                if "conv_bias_size" in prms_str:
                    for k, arg_nm in enumerate(arg_n_list_w_bn[j]):
                        if j == 0:
                            if k == 15:
                                s += EOL + "#if _SCALE_" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            elif k == 18:
                                s += EOL + "#endif" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            else:
                                s += arg_nm.replace("*", "")
                        else:
                            s += arg_nm.replace("*", "")
                        if k != len(arg_n_list_w_bn[j]) - 1:
                            s += ", "
                else:
                    del arg_t_list_w_bn[0][11]
                    del arg_t_list_w_bn[0][18]
                    del arg_n_list_w_bn[0][11]
                    del arg_n_list_w_bn[0][18]
                    for k, arg_nm in enumerate(arg_n_list_w_bn[j]):
                        if j == 0:
                            if k == 14:
                                s += EOL + "#if _SCALE_" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            elif k == 17:
                                s += EOL + "#endif" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            else:
                                s += arg_nm.replace("*", "")
                        else:
                            s += arg_nm.replace("*", "")
                        if k != len(arg_n_list_w_bn[j]) - 1:
                            s += ", "

                if j == 0:
                    body_str += generate_function_w_bn(
                        fn_names_w_bn[i], rn_tp, arg_t_list_w_bn[j],
                        arg_n_list_w_bn[j], [
                            init_names_w_bn[i] + CALL_SYMBOL +
                            acc_fn_names_w_bn[i] + PARAMETER_BEGIN + s +
                            PARAMETER_END + EOS
                        ])
    '''write fc layer acc if needed'''
    if "nn_in_number_eltwise_size" in prms_str:
        for i, l in enumerate(layers_fun_fc):
            if l != 0:
                import_str += includes_fc[i] + EOL
                if i > 0:
                    j = 1
                else:
                    j = 0
                body_str += init_nm_fc[i] + CLASS_BEGIN + prm_fc + COMMA_SPACE
                body_str += ', '.join(acc_params[i])
                body_str += acc_max_stride[i]
                body_str += acc_max_kernel[i]
                body_str += CLASS_END + SPACE + init_names_fc[i] + EOS + EOL * 2
                s = ""
                for k, arg_nm in enumerate(arg_n_list_fc[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list_fc[j]) - 1:
                        s += ", "

                body_str += generate_function(
                    fn_names_fc[i], rn_tp, arg_t_list_fc[j], arg_n_list_fc[j],
                    [
                        init_names_fc[i] + CALL_SYMBOL + acc_fn_names_fc[i] +
                        PARAMETER_BEGIN + s + PARAMETER_END + EOS
                    ])

    import_str += "#include \"config.h\"" + EOL * 2
    str1 += import_str + body_str + EOL * 2 + ENDIF
    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(str1)

    return str1
Esempio n. 10
0
def generate():
    """Tm * Tn < DSP/ 5"""

    max_ratio = 0
    max_sk = []
    tm_tn_tr_tc = [32, 8, 16, 16]
    for s in range(1, 5):
        k_max = min(80 - 15 * s, 11)
        for k in range(s, k_max):
            ctc_ratio = (32 * 16 * 16 * (
                (15 * s + k) *
                (15 * s + k) * 8 + 1)) / (4 * (8 * 32 * k * k + 32 + 8 *
                                               (15 * s + k) *
                                               (15 * s + k) + 32 * 16 * 16))
            if ctc_ratio > max_ratio:
                max_ratio = ctc_ratio
                max_sk = [s, k]
    print("max ctc and s,k")
    print(max_ratio)
    print(max_sk)

    arr2 = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr2)

    init_conv_N = prms[prms_str.index("nn_in_number_conv")]
    init_conv_r = prms[prms_str.index("nn_in_data_size_conv")]
    init_conv_M = prms[prms_str.index("nn_out_number_conv")]
    init_conv_P = prms[prms_str.index("nn_padding_conv")]
    init_conv_K = prms[prms_str.index("nn_channel_size_conv")]
    init_conv_S = prms[prms_str.index("nn_stride_conv")]
    init_conv_G = prms[prms_str.index("nn_group_conv")]
    init_fc_N = prms[prms_str.index("nn_in_number_fc")]
    init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")]
    init_fc_M = prms[prms_str.index("nn_out_number_fc")]
    init_fc_K = prms[prms_str.index("nn_channel_size_fc")]

    #for fc_in_number in nn_in_number_fc_values:
    #    nn_in_number_conv_values1.append(fc_in_number)

    #for fc_out_number in nn_out_number_fc_values:
    #    nn_out_number_conv_values1.append(fc_out_number)

    #for kernel_size in nn_channel_size_fc_values:
    #    nn_channel_size_conv_values.append(kernel_size)

    #for conv_in_size in nn_in_data_sizes_fc:
    #    nn_in_data_sizes_conv.append(1)

    #for stride_value in nn_channel_size_fc_values:
    #    nn_stride_values1.append(stride_value)

    conv_N = [int(string) for string in init_conv_N]
    conv_M = [int(string) for string in init_conv_M]
    conv_r = [int(string) for string in init_conv_r]
    conv_K = [int(string) for string in init_conv_K]
    conv_S = [int(string) for string in init_conv_S]
    conv_P = [int(string) for string in init_conv_P]
    conv_G = [int(string) for string in init_conv_G]
    max_conv_N = max(conv_N)
    max_conv_M = max(conv_M)
    max_conv_S = max(conv_S)
    max_conv_K = max(conv_K)

    conv_R = []
    conv_layer_num = int(len(conv_r))
    for r in range(0, conv_layer_num):
        R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r]
        conv_R.append(R)

    print("conv_N")
    print(conv_N)
    print("conv_M")
    print(conv_M)
    print("conv_r")
    print(conv_r)
    print("conv_R")
    print(conv_R)
    print("conv_K")
    print(conv_K)

    DSP = 6840
    #DSP = 2800
    d = int(DSP / 5)
    arr = []

    Tm_min = 1
    Tn_min = 1
    min_Tm_Tn = []
    conv_min_cycles = 0
    min_cycle_list = []
    for o in range(0, conv_layer_num):
        conv_min_cycles += conv_R[o] * conv_R[o] * math.ceil(
            int(conv_M[o]) / float(Tm_min)) * math.ceil(
                int(conv_N[o]) / float(Tn_min)) * conv_K[o] * conv_K[o]
    min_Tm_Tn.append([1, 1])
    min_cycle_list.append(conv_min_cycles)

    print("Analysis initialized point: ", min_cycle_list, min_Tm_Tn)

    target = 0
    for j in range(0, conv_layer_num):
        target += int(
            conv_R[j] * conv_R[j] * math.ceil(int(conv_N[j]) / float(32)) *
            math.ceil(int(conv_M[j]) / float(87)) * conv_K[j] * conv_K[j])
    print("targeted cycle numbers [87, 32]")
    print(target)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    # ax = Axes3D(fig)
    ax.set_title("3D Figure")
    ax.set_xlabel("Tm")
    ax.set_ylabel("Tn")
    ax.set_zlabel("Cycles")

    x_axis = [i for i in range(1, 100 + 1)]
    y_axis = [j for j in range(1, 100 + 1)]
    XX, YY = np.meshgrid(x_axis, y_axis)
    ZZ = np.zeros((100, 100))

    conv_layer_num = int(len(conv_M))
    for Tm in range(1, max_conv_M + 1):
        Tn_max = min(max_conv_N, int(int(d / Tm)), Tm)
        for Tn in range(1, Tn_max + 1):
            cycles = 0
            for j in range(1, conv_layer_num):
                cycles += int(conv_R[j] * conv_R[j] *
                              math.ceil(int(conv_N[j]) / float(Tn)) *
                              math.ceil(int(conv_M[j]) / float(Tm)) *
                              conv_K[j] * conv_K[j])

            if cycles > 0 and Tm < 100 and Tn < 100:
                ZZ[Tm, Tn] = cycles
            else:
                if Tm < 100 and Tn < 100:
                    ZZ[Tm, Tn] = 0

            if cycles < min(min_cycle_list) and cycles != 0:
                conv_min_cycles = cycles
                Tm_min = Tm
                Tn_min = Tn
                if len(min_Tm_Tn) < 5:
                    min_Tm_Tn.append([Tm, Tn])
                    min_cycle_list.append(conv_min_cycles)
                else:
                    max_among_mins = min_cycle_list.index(max(min_cycle_list))
                    min_cycle_list.remove(min_cycle_list[max_among_mins])
                    min_Tm_Tn.remove(min_Tm_Tn[max_among_mins])
                    min_cycle_list.append(conv_min_cycles)
                    min_Tm_Tn.append([Tm, Tn])

    surf = ax.plot_surface(XX,
                           YY,
                           ZZ,
                           rstride=1,
                           cstride=1,
                           cmap=cm.coolwarm,
                           linewidth=0,
                           antialiased=True)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    #plt.pause(1)
    plt.show()

    print("Tm and Tn")
    print(min_Tm_Tn)
    print("cycles")
    print(min_cycle_list)

    min_among_all = min_cycle_list.index(min(min_cycle_list))
    print("Best among all points", min_cycle_list[min_among_all],
          min_Tm_Tn[min_among_all])

    in_buf = 0
    out_buf = 0
    w_buf = 0
    Tr_Tc = []
    in_buff_arr = []
    w_buff_arr = []
    out_buff_arr = []
    total_arr = []
    for m in min_Tm_Tn:
        Tr = int(math.sqrt(m[0] * m[1]))
        Tr_Tc.append([Tr, Tr])
        in_buff = ((Tr - 1) * max_conv_S + max_conv_K) * (
            (Tr - 1) * max_conv_S + max_conv_K)
        in_buff_arr.append(in_buff)
        out_buff = m[0] * m[1] * max_conv_K * max_conv_K
        out_buff_arr.append(out_buff)
        w_buff = Tr * Tr * m[0]
        w_buff_arr.append(w_buff)
        total = in_buff + out_buff + w_buff
        total_arr.append(total)
    print("in_buf")
    print(in_buff_arr)
    print("w_buf")
    print(w_buff_arr)
    print("out_buf")
    print(out_buff_arr)
    print("total")
    print(total_arr)
    return arr, conv_min_cycles
Esempio n. 11
0
def generate_body(arr2, prefix=SEPARATER):
    body_str = EOL * 2
    c_debug = "#if _C_DEBUG_MODE_"
    ker_debug = "#if _KERNEL_DEBUG_"
    end_deb = "#endif"

    body_str += c_debug + EOL + ker_debug + EOL
    body_str += prefix + "cout << " + "\"starting forward network process...........................\" << endl;" + EOL +\
         prefix + "cout << \"...........................................................\" << endl;" + EOL
    body_str += end_deb + EOL + end_deb + EOL * 2
    layers_order = []
    conv_counter = 0
    conv_pool_counter = 0
    conv_lrn_pool_counter = 0
    lrn_counter = 0
    pool_counter = 0
    fc_counter = 0
    """extraction of parameters from 'net_config_params.txt' file"""
    prms, prms_str = helping_functions.extraction(arr2)

    n = prms[prms_str.index("n")]
    nn_in_data_size_conv_values = prms[prms_str.index("nn_in_data_size_conv")]
    nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")]
    nn_padding_conv_values = prms[prms_str.index("nn_padding_conv")]
    nn_stride_conv_values = prms[prms_str.index("nn_stride_conv")]
    nn_in_number_conv_values = prms[prms_str.index("nn_in_number_conv")]
    nn_out_number_conv_values = prms[prms_str.index("nn_out_number_conv")]
    nn_group_conv_values = prms[prms_str.index("nn_group_conv")]
    nn_local_size_lrn_values = prms[prms_str.index("nn_local_size_lrn")]
    nn_in_data_size_pooling_values = prms[prms_str.index(
        "nn_in_data_size_pooling")]
    nn_channel_size_pooling_values = prms[prms_str.index(
        "nn_channel_size_pooling")]
    nn_padding_pooling_values = prms[prms_str.index("nn_padding_pooling")]
    nn_stride_pooling_values = prms[prms_str.index("nn_stride_pooling")]
    nn_in_number_pooling_values = prms[prms_str.index("nn_in_number_pooling")]
    nn_in_number_fc_values = prms[prms_str.index("nn_in_number_fc")]
    nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")]
    nn_out_number_fc_values = prms[prms_str.index("nn_out_number_fc")]
    layers_order = prms[prms_str.index("layers_order")]
    nn_in_data_size_fc_values = prms[prms_str.index("nn_in_data_size_fc")]
    if "nn_padding_fc" in prms_str:
        nn_padding_fc_values = prms[prms_str.index("nn_padding_fc")]
    else:
        nn_padding_fc_values = ["0"] * len(nn_in_number_fc_values)
    strides = [[], [], [], [], [], [], [], []]
    kernels = [[], [], [], [], [], [], [], []]
    acc_str = EOL + "Accelerators: " + EOL
    function_calls = ""
    w_port = "conv_weight_port"
    b_port = "conv_bias_port"
    fc_w_port = "fc_weight_port"
    fc_b_port = "fc_bias_port"
    out_data1 = "output_temp_1"
    out_data2 = "output_temp_2"
    in_data = out_data2
    out_data = out_data1
    alpha = "nn_alpha_lrn"
    beta = "nn_beta_lrn"
    act_type = "activation_type"
    shift_w = "shift_weight"
    shift_b = "shift_bias"
    in_shift = "in_shift"
    out_shift = "out_shift"
    shifts = ""
    conv_weight = 0
    conv_bias = 0
    fc_weight = 0
    fc_bias = 0
    fc_weight = 0
    fc_bias = 0
    cw = ""
    cb = ""
    cc = 1
    clean_count = 1
    """"layer_exist = [conv_act, conv_no_act, pool_max_act, pool_ave_act, pool_max_no_act, pool_ave_no_act]"""
    for i, l in enumerate(layers_order):
        if l.lower().startswith("convolution"):
            last = nn_out_number_conv_values[conv_counter]
            last1 = int(math.ceil((int(nn_in_data_size_conv_values[conv_counter]) + int(nn_padding_conv_values[conv_counter]) * 2 -\
                                  int(nn_channel_size_conv_values[conv_counter]))/float(nn_stride_conv_values[conv_counter]) + 1))
            fun = "conv_layer_new"
            act = 0
            if layers_order[i + 1].lower() == "relu":
                act = 1
                strides[0].append(int(nn_stride_conv_values[conv_counter]))
                kernels[0].append(
                    int(nn_channel_size_conv_values[conv_counter]))
            else:
                strides[1].append(int(nn_stride_conv_values[conv_counter]))
                kernels[1].append(
                    int(nn_channel_size_conv_values[conv_counter]))

            for k in range(int(nn_group_conv_values[conv_counter])):
                in_shift = "0"
                out_shift = "0"
                cw = str(conv_weight)
                cb = str(conv_bias)

                shifts += prefix + "int " + shift_w + "_conv" + str(
                    cc) + "_" + str(k + 1) + EQUAL + cw + EOS + EOL
                shifts += prefix + "int " + shift_b + "_conv" + str(
                    cc) + "_" + str(k + 1) + EQUAL + cb + EOS + EOL

                if k + 1 == int(nn_group_conv_values[conv_counter]) and k > 0:
                    in_shift = "in_shift_" + str(cc)
                    out_shift = "out_shift_" + str(cc)
                in_n = int(
                    math.ceil(
                        int(nn_in_number_conv_values[conv_counter]) /
                        float(nn_group_conv_values[conv_counter])))
                out_n = int(
                    math.ceil(
                        int(nn_out_number_conv_values[conv_counter]) /
                        float(nn_group_conv_values[conv_counter])))
                function_calls += generate_function_calls1(
                    fun, [
                        str(in_n),
                        str(nn_channel_size_conv_values[conv_counter]),
                        str(out_n),
                        str(last1),
                        str(last1), nn_stride_conv_values[conv_counter],
                        nn_padding_conv_values[conv_counter],
                        str(act), in_data, w_port, b_port, out_data,
                        shift_w + "_conv" + str(cc) + "_" + str(k + 1),
                        shift_b + "_conv" + str(cc) + "_" + str(k + 1),
                        in_shift, out_shift
                    ])

                if k + 1 == int(nn_group_conv_values[conv_counter]):
                    if k > 0:
                        in_sh = str(
                            int(nn_in_number_conv_values[conv_counter]) /
                            int(nn_group_conv_values[conv_counter])
                        ) + "*" + str(last1) + "*" + str(last1)
                        out_sh = str(
                            int(nn_out_number_conv_values[conv_counter]) /
                            int(nn_group_conv_values[conv_counter])
                        ) + "*" + str(last1) + "*" + str(last1)
                        shifts += prefix + "int " + in_shift + EQUAL + in_sh + EOS + EOL
                        shifts += prefix + "int " + out_shift + EQUAL + out_sh + EOS + EOL

                    cc = cc + 1
                    shifts += EOL

                conv_weight += int(nn_in_number_conv_values[conv_counter])*int(nn_out_number_conv_values[conv_counter])/\
                int(nn_group_conv_values[conv_counter])*int(nn_channel_size_conv_values[conv_counter])*int(nn_channel_size_conv_values[conv_counter])/int(nn_group_conv_values[conv_counter])
                conv_bias += int(
                    nn_out_number_conv_values[conv_counter]) / int(
                        nn_group_conv_values[conv_counter])

            conv_counter = conv_counter + 1

        elif l.lower() == "lrn":
            body_str += generate_layer_init(
                "lrn_layer",
                [last, nn_local_size_lrn_values[lrn_counter],
                 str(last1)])
            body_str += " L" + str(lrn_counter + 1) + EOS + EOL
            alpha1 = alpha + ARRAY_BEGIN + str(lrn_counter) + ARRAY_END
            beta1 = beta + ARRAY_BEGIN + str(lrn_counter) + ARRAY_END
            function_calls += generate_function_calls(
                "L", "lrn", layers_order[i + 1], str(lrn_counter + 1),
                [alpha1, beta1, in_data, out_data])
            lrn_counter = lrn_counter + 1

        elif l.lower() == "avepooling" or l.lower() == "maxpooling":
            last = nn_in_number_pooling_values[pool_counter]

            if layers_order[i + 1].lower() == "relu":

                if l.lower() == "maxpooling":
                    fun = "max_pool_layer_new"
                    strides[2].append(
                        int(nn_stride_pooling_values[pool_counter]))
                    kernels[2].append(
                        int(nn_channel_size_pooling_values[pool_counter]))
                if l.lower() == "avepooling":
                    fun = "ave_pool_layer_new"
                    strides[3].append(
                        int(nn_stride_pooling_values[pool_counter]))
                    kernels[3].append(
                        int(nn_channel_size_pooling_values[pool_counter]))
            else:
                if l.lower() == "maxpooling":
                    fun = "max_pool_layer_new_noact"
                    strides[4].append(
                        int(nn_stride_pooling_values[pool_counter]))
                    kernels[4].append(
                        int(nn_channel_size_pooling_values[pool_counter]))
                if l.lower() == "avepooling":
                    fun = "ave_pool_layer_new_noact"
                    strides[5].append(
                        int(nn_stride_pooling_values[pool_counter]))
                    kernels[5].append(
                        int(nn_channel_size_pooling_values[pool_counter]))
            last2 = int(math.ceil((int(nn_in_data_size_pooling_values[pool_counter]) + int(nn_padding_pooling_values[pool_counter]) * 2 -\
                                  int(nn_channel_size_pooling_values[pool_counter]))/float(nn_stride_pooling_values[pool_counter]) + 1))
            function_calls += generate_function_calls1(fun, [
                str(last1),
                str(last1), nn_in_number_pooling_values[pool_counter],
                nn_channel_size_pooling_values[pool_counter],
                str(last2),
                str(last2), nn_stride_pooling_values[pool_counter],
                nn_padding_pooling_values[pool_counter], in_data, out_data
            ])
            last1 = last2
            pool_counter = pool_counter + 1

        elif l.lower() == "innerproduct" or l.lower() == "inner_product":
            last = nn_out_number_fc_values[fc_counter]
            fun = "conv_layer_new"
            act = 0
            if fc_counter > 0:
                fc_weight += int(nn_in_number_fc_values[fc_counter])*int(nn_in_number_fc_values[fc_counter-1])*\
                      int(nn_channel_size_fc_values[fc_counter-1])*int(nn_channel_size_fc_values[fc_counter-1])
                fc_bias += int(nn_out_number_fc_values[fc_counter - 1])
            b = False
            if l.lower() == "innerproduct":
                for ll in layers_order[(i + 1):]:
                    if ll.lower() == "innerproduct":
                        b = True
                        break
            if i + 1 != len(layers_order):
                if layers_order[i + 1].lower() == "relu":

                    act = 1
                    kernels[6].append(
                        int(nn_channel_size_fc_values[fc_counter]))
                else:

                    kernels[7].append(
                        int(nn_channel_size_fc_values[fc_counter]))

            shifts += prefix + "int " + shift_w + "_fc" + str(
                fc_counter + 1) + EQUAL + str(fc_weight) + EOS + EOL
            shifts += prefix + "int " + shift_b + "_fc" + str(
                fc_counter + 1) + EQUAL + str(fc_bias) + EOS + EOL * 2

            fun = "conv_layer_new"

            function_calls += generate_function_calls1(fun, [
                nn_in_number_fc_values[fc_counter],
                nn_channel_size_fc_values[fc_counter],
                nn_out_number_fc_values[fc_counter], "1", "1",
                nn_in_data_size_fc_values[fc_counter],
                nn_padding_fc_values[fc_counter],
                str(act), in_data, fc_w_port, fc_b_port, out_data,
                shift_w + "_fc" + str(fc_counter + 1),
                shift_b + "_fc" + str(fc_counter + 1), "0", "0"
            ])

            fc_counter = fc_counter + 1

        if l.lower().startswith("convolution") or l.lower(
        ) == "lrn" or l.lower() == "maxpooling" or l.lower(
        ) == "avepooling" or (l.lower() == "innerproduct" and b == True):
            if out_data == out_data1:

                in_data = out_data1
                out_data = out_data2
            else:
                in_data = out_data2
                out_data = out_data1

            function_calls += prefix + "clean_" + str(
                clean_count
            ) + ":" + prefix + helping_functions.generate_for_loop1(
                "addr", "int", "0", prms[prms_str.index("maximum")],
                out_data + "[addr] = data_type_o(0);") + EOL
            clean_count = clean_count + 1

        if l.lower() == "innerproduct" and b == False:
            function_calls += prefix + helping_functions.generate_for_loop(
                "i", "int", "0",
                nn_out_number_fc_values[len(nn_out_number_fc_values) - 1],
                ["fc_" + n + "_out_a[i] = " + out_data + "[i];"], 1, 1)

    counters = [
        conv_counter, pool_counter, lrn_counter, fc_counter, conv_pool_counter,
        conv_lrn_pool_counter
    ]

    body_str += EOL + shifts + EOL

    body_str += function_calls
    body_str += EOL * 2 + c_debug + EOL + ker_debug + EOL
    body_str += prefix + "cout << \"Finished forward network process ..........................\" << endl;" + EOL +\
         prefix + "cout << \"...........................................................\" << endl;" + EOL
    body_str += end_deb + EOL + end_deb + EOL
    body_str += BODY_END + EOL

    layers1 = [
        "conv_act", "conv_no_act", "pool_max_act", "pool_ave_act",
        "pool_max_no_act", "pool_ave_no_act", "fc_act", "fc_no_act"
    ]
    for k1 in range(len(kernels)):

        if len(kernels[k1]) != 0:
            acc_str += layers1[k1] + " - kernel: " + str(max(kernels[k1]))
            if len(strides[k1]) != 0:
                acc_str += ", stride: " + str(max(strides[k1])) + EOL
            else:
                acc_str += ", stride: 1" + EOL


    w_fc_last = fc_weight + int(nn_in_number_fc_values[fc_counter-1])*int(nn_out_number_fc_values[fc_counter-1])*\
     int(nn_channel_size_fc_values[fc_counter-1])*int(nn_channel_size_fc_values[fc_counter-1])
    b_fc_last = fc_bias + int(nn_out_number_fc_values[fc_counter - 1])

    w_b_arr = [
        conv_weight, conv_bias, w_fc_last, b_fc_last,
        nn_out_number_fc_values[len(nn_out_number_fc_values) - 1],
        prms[prms_str.index("maximum")], prms[prms_str.index("maximum")],
        prms[prms_str.index("n")]
    ]
    return body_str, counters, acc_str, w_b_arr
Esempio n. 12
0
def generate_body(body_json, out_json, comm_json, arr, prefix=SEPARATER):
        
	
	col_gray = ""
	while (col_gray != "color" and col_gray != "grayscale"):
        	col_gray = raw_input("Please enter color specification input (color, grayscale): ")
		if col_gray == "color":
			chn = 3
		elif col_gray == "grayscale":
			chn = 1
		else:
			print "Please enter \"color\" for colored image and \"grayscale\" for grayscaled one "


	sz = "sizeof"
	ms = "memset"
	body_str = EOL
	body_str1 = ""
	alloc_str = EOL + prefix + comm_json[11] + EOL
	body_str += prefix + out_json[0] + EOL
	body_str += EOL + prefix + comm_json[10] 
	body_str += EOL
	value = ""
        arr1, arr1_str = helping_functions.extraction(arr)
	arrr = arr1[arr1_str.index("in_data_mem_size")].split(" * ")
	arr2 = arr1[arr1_str.index("conv_weight_size")].split(" + ")
	arr3 = arr1[arr1_str.index("conv_bias_size")].split(" + ")
	arr4 = arr1[arr1_str.index("fc_weight_size")].split(" + ")
	arr5 = arr1[arr1_str.index("fc_bias_size")].split(" + ")
	n_layers = arr1[arr1_str.index("n")]

	fc_out = "fc_" + str(n_layers) + "_out"
	for k, var_sen in enumerate(body_json["var_init"]):
		
        	body_str += prefix + var_sen["type"] + SPACE
		if var_sen["name"] == "fc_out_size":
			body_str += fc_out + "_size"
	 	else:
			body_str += var_sen["name"] 
		body_str += EQUAL + PARAMETER_BEGIN 
		if (var_sen["name"] == "out_1_size" or var_sen["name"] == "out_2_size"):
			body_str += arr1[arr1_str.index("maximum")]
		else: 
			body_str += arr1[arr1_str.index(var_sen["name"])] 
		body_str += PARAMETER_END + MULT + sz +\
			    PARAMETER_BEGIN + var_sen["size"] + PARAMETER_END +\
			    EOS + EOL

		if (var_sen["memory"] == "in_data_mem_port" or var_sen["memory"] == "fc_8_out_mem_int"):
			alloc_str += KERNEL + EOL
			ker = 1
		
		alloc_str += prefix + var_sen["size"] + SEPARATER + "*"

		if var_sen["name"] == "fc_out_size":
				cond1 = "fc_" + str(n_layers) + "_out_mem_int"
				alloc_str += "fc_" + str(n_layers) + "_out_mem_int"
				fcout = "fc_" + str(n_layers) + "_out_size"
		else:
				cond1 = var_sen["memory"]
				alloc_str += var_sen["memory"]
				fcout = var_sen["name"]
		alloc_str += EQUAL + PARAMETER_BEGIN + var_sen["size"] + "*" + PARAMETER_END +\
			     "malloc" + PARAMETER_BEGIN + fcout + PARAMETER_END + EOS + EOL
			
		cond = cond1 + " == " + NULL
		alloc_str += prefix + helping_functions.generate_if(cond, [out_json[1] + var_sen["memory"] + "\\n\"" +\
			     PARAMETER_END + EOS], ["printf(\"" + var_sen["location"] + "= 0x%x \\n\", " + cond1 + PARAMETER_END + EOS], 1)
		if ker == 1:
			alloc_str += PREP_ENDIF + EOL
			ker = 0
	body_str1 += KERNEL + EOL
	body_str1 += prefix + out_json[2] + EOL
	body_str1 += prefix + "memset(fc_" + str(n_layers) + "_out_mem_int, 0, fc_" + str(n_layers) + "_out_size);" + EOL
	body_str1 += prefix + "memset(temp_out_1, 0, out_1_size);" + EOL
	body_str1 += prefix + "memset(temp_out_2, 0, out_2_size);" + EOL
	body_str1 += PREP_ENDIF + EOL*2

	body_str1 += prefix + comm_json[0] + EOL
	body_str1 += HLS + EOL
	body_str1 += prefix + "const char* weight_src = \"net_weights.txt\";" + EOL
	body_str1 += PREP_ELSE + EOL
	body_str1 += prefix + out_json[3] + EOL
	body_str1 += prefix + "const char* weight_src = \"net_inputs/net_weights.txt\";" + EOL
	body_str1 += prefix + out_json[4] + EOL
	body_str1 += PREP_ENDIF + EOL
	body_str1 += prefix + comm_json[1] + EOL
	body_str1 += HLS + EOL
	body_str1 += prefix + "ifstream ifs1(\"net_mean.txt\");" + EOL
	body_str1 += PREP_ELSE + EOL
	body_str1 += prefix + "ifstream ifs1(\"net_inputs/net_mean.txt\");" + EOL
	body_str1 += PREP_ENDIF + EOL	
	body_str1 += EOL*3

	body_str1 += prefix + "float channel_mean[3] = { 0 };" + EOL +\
		     prefix + "string str1;" + EOL +\
		     prefix + "string y1 = \"[\";" + EOL +\
		     prefix + "string y2 = \"]\";" + EOL +\
		     prefix + helping_functions.generate_if("!ifs1", [out_json[5], "getchar();"], "", 1) 
	body_str1 += prefix + "int index = 0;" + EOL
	body_str1 += prefix + helping_functions.generate_while("ifs1 >> str1", ["int p1 = str1.find(y1, 0);", 
		     helping_functions.generate_if("p1 >= 0", ["str1.erase(p1, y1.length());"], "", 2),
		     "int p2 = str1.find(y2, 0);", 
		     helping_functions.generate_if("p2 >= 0", ["str1.erase(p2, y2.length());"], "", 2),
		     "float f = atof(str1.c_str());",
		     "channel_mean[index] = f;",
		     "index++;"], 1)
	body_str1 += prefix + "ifs1.close();" + EOL*2
	body_str1 += prefix + comm_json[2] + EOL
	body_str1 += HLS + EOL
	body_str1 += prefix + "ifstream ifs(\"val.txt\");" + EOL
	body_str1 += PREP_ELSE + EOL
	body_str1 += prefix + "ifstream ifs(\"net_inputs/val.txt\");" + EOL
	body_str1 += PREP_ENDIF + EOL
	body_str1 += prefix + "string val_name[10];" + EOL + prefix + "float val_class[10];" +\
		     EOL + prefix + "string str;" + EOL
	body_str1 += prefix + helping_functions.generate_if("!ifs", [out_json[6], "getchar();"], "", 1)
	body_str1 += prefix + "int num = 0;" + EOL
	body_str1 += prefix + helping_functions.generate_while("ifs >> str&&num<20", 
				             [helping_functions.generate_if("num % 2 == 1", ["val_class[num / 2] = int(atof(str.c_str()));"], 
					     ["val_name[num / 2] = str;"], 2), "num++;"], 1)
	body_str1 += prefix + "ifs.close();" + EOL*2
	indata_mem = arr1[arr1_str.index("in_data_mem_size")].split(" * ")
   	if chn == 3:
        	height = helping_functions.prompt("Please enter the height of the image: ")
        	width = helping_functions.prompt("Please enter the width of the image: ")
		
		body_str1 += prefix + comm_json[3] + EOL
		body_str1 += KERNEL + EOL + HLS + EOL +\
	   		     prefix + "string image_dir = \"" + sys.argv[2] + "\";" + EOL + PREP_ELSE + EOL +\
			     prefix + "string image_dir = \"./net_inputs/test_imgs/" + sys.argv[2] + "\"" + EOS + EOL +\
			     PREP_ENDIF + EOL
		
		body_str1 += prefix + "float in_data_3D_channel_swap[3" + ARRAY_END +\
			     ARRAY_BEGIN + height + ARRAY_END + ARRAY_BEGIN + width +"] = { 0 };" +\
			     EOL + prefix + "float in_data_3D[3" + ARRAY_END +\
			     ARRAY_BEGIN + indata_mem[1] + ARRAY_END + ARRAY_BEGIN + indata_mem[2] +\
			     "] = { 0 };" + EOL +\
			     prefix + "int crop_w = " + arrr[1] + ";"+ EOL + prefix + "int crop_h = " +\
			     arrr[1] + ";" + EOL + prefix + "int w;" + EOL + prefix + "int h;" + EOL +\
			     prefix + "int channels;" +\
			     EOL + prefix + "int size;" + EOL + prefix +\
			     "const unsigned char * data = loadfile(image_dir, size);" + EOL +\
			     prefix + "const unsigned char * image_orig = stbi_load_from_memory(data, size, &w, &h, &channels, 3);" +\
			     EOL 
	
		body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, 3, [helping_functions.generate_for_loop("j", "int", "i", "w*h*3", 
			     ["in_data_3D_channel_swap[2 - i][j / (w * 3)][(j % (w * 3) - i) / 3] = (float)image_orig[j]; //range:0--255"], 2, 3)], 1, 1)
		body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, 3, [helping_functions.generate_for_loop("j", "int", 0, "h", 
			     [helping_functions.generate_for_loop("k", "int", 0, "w", ["in_data_3D_channel_swap[i][j][k] /= 255;// range:0--1"], 3, 1)], 2, 1)], 1, 1)
		body_str1 += prefix + comm_json[9] + EOL
		body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, 3, [helping_functions.generate_for_loop("j", "int", 0, "crop_h", 
			     [helping_functions.generate_for_loop("k", "int", 0, "crop_w", ["in_data_3D[i][j][k] = in_data_3D[i][j][k] * 255 - channel_mean[i];"], 
			     3, 1)], 2, 1)], 1, 1)
		body_str1 += prefix + out_json[10] + EOL
		body_str1 += prefix + out_json[11] + EOL + prefix + out_json[11] + EOL +\
			     prefix + "int in_data_size=0;" + EOL +\
			     prefix + helping_functions.generate_for_loop("i", "int", 0, 3, [helping_functions.generate_for_loop("j", "int", 0, "crop_h", 
			     [helping_functions.generate_for_loop("k", "int", 0, "crop_w", ["temp_out_2[in_data_size] = (data_type)in_data_3D[i][j][k];",
			     "in_data_size++;"], 3, 1)], 2, 1)], 1, 1) 
		body_str1 += prefix + out_json[12] + EOL*2
		body_str1 += PREP_ENDIF + EOL*2
	else:
		body_str1 += KERNEL + EOL + prefix + "data_type in_data_3D[" +\
			     indata_mem[1] + "*" + indata_mem[2] +"]" + "= { 0 };" + EOL 
		body_str1 += HLS + EOL + "ifstream ifs2(\"" + sys.argv[2] + "\");" + EOL + PREP_ELSE + EOL
		body_str1 += prefix + "ifstream ifs2(\"./net_inputs/test_imgs/" + sys.argv[2] + "\");" + EOL + PREP_ENDIF + EOL 
		body_str1 += prefix + "string str2;" + EOL + prefix + "int count = 0;" + EOL
 		body_str1 += prefix + helping_functions.generate_while("ifs2 >> str2", ["float f = atof(str2.c_str());",
		"in_data_3D[count] = (data_type)f;"
		"count++;"], 1)
		body_str1 += prefix + "int in_data_size=0;" + EOL
		body_str1 += prefix + "ofstream indata;" + EOL + prefix + "indata.open(\"in_data.txt\", ios::app);"+ EOL
		body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, 1, [helping_functions.generate_for_loop("j", "int", 0, indata_mem[2], [helping_functions.generate_for_loop("k", "int", 0, indata_mem[2], ["indata << in_data_3D[i *" + indata_mem[2] + "*" + indata_mem[2] + " + " + indata_mem[2] + "*j + k] << \" \";"], 3, 1), "indata << endl;"], 2, 1), "indata << endl;"], 1, 1)
		body_str1 += prefix + "indata.close();" + EOL*2

		body_str1 += prefix + "cout << \"Writing data to input data memory space ... ... ...\" << endl;" + EOL
		body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, 1, [helping_functions.generate_for_loop("j", "int", 0, indata_mem[2], [helping_functions.generate_for_loop("k", "int", 0, indata_mem[2], ["temp_out_2[in_data_size] = (data_type)in_data_3D[i*" + indata_mem[2] + "*" + indata_mem[2] + " + " + indata_mem[2] + "*j + k];", "in_data_size++;"], 3, 1)], 2, 1)], 1, 1)
		body_str1 += prefix + "cout << \"Finished writing data to input data memory space ... ...\" << endl;" + EOL + PREP_ENDIF + EOL

	body_str1 += prefix + "char tan_h = 't';" + EOL +\
		     prefix + "char relu = 'r';" + EOL +\
		     prefix + "char none = 'i';" + EOL +\
		     prefix + "int in_number_conv = 0;" + EOL +\
		     prefix + "int in_number_fc = 0;" + EOL +\
		     prefix + "int in_number_pooling = 0;" + EOL*2 

	body_str1 += generate_weights_biases(len(arr2), "conv", arr2, arr3)
	body_str1 += generate_weights_biases(len(arr4), "fc", arr4, arr5)

	body_str1 += KERNEL + EOL +\
		     prefix + "float fc_" + str(n_layers) + "_out[" +arr1[arr1_str.index("fc_out_size")] + "] = { 0 };" + EOL +\
		     prefix + "clock_t start, finish, inf_start, inf_finish;" + EOL +\
		     prefix + "double totaltime, inf_time;" + EOL +\
		     prefix + "start = clock();" + EOL +\
		     PREP_ENDIF + EOL*2
	body_str1 += prefix + comm_json[4] + EOL
	body_str1 += prefix + "inference_net(" + EOL + prefix + comm_json[5] + EOL +\
		     prefix + "relu," + EOL + KERNEL + EOL + prefix + comm_json[6] + EOL +\
		     prefix + "in_data_mem_port, " + EOL + PREP_ENDIF + EOL + prefix + comm_json[7] +\
		     EOL + prefix + "conv_weight_mem_port," + EOL + prefix + "conv_bias_mem_port," +\
    		     EOL + prefix + "fc_weight_mem_port," + EOL + prefix + "fc_bias_mem_port," +\
		     EOL + KERNEL + EOL + prefix + comm_json[8] + EOL + prefix + "fc_" + str(n_layers) + "_out_mem_int," +\
		     EOL + prefix + "temp_out_1," + EOL + prefix + "temp_out_2);" + EOL*2

	body_str1 += prefix + "finish = clock();" + EOL + prefix +\
  		     "totaltime = (double)(finish - start) / CLOCKS_PER_SEC;" +\
		     EOL + prefix + out_json[7] + EOL
    
	body_str1 += prefix + helping_functions.generate_for_loop("i", "int", 0, arr1[arr1_str.index("fc_out_size")], ["fc_" + str(n_layers) + "_out[i]=(float)(fc_" + str(n_layers) + "_out_mem_int[i]);"], 1, 1)
	body_str1 += prefix + "softmax(" + fc_out + ", " + arr1[arr1_str.index("fc_out_size")] + ");" +\
		     EOL + prefix + "predict(" + fc_out +", " + arr1[arr1_str.index("fc_out_size")] + ");" + EOL +\
		     PREP_ENDIF + EOL*2
	body_str1 += prefix + "return 0;" + EOL*2 + BODY_END


	return body_str + alloc_str + body_str1