Ejemplo n.º 1
0
def generate_weights_biases(length, s, arr1, arr2, prefix=SEPARATER):
    comm = "// Prepare weights and bias for "

    array = helping_functions.read_params(sys.argv[1])
    arr, arr_str = helping_functions.extraction(array)

    wb_str = ""
    for c in range(length):
        c_name = s + "_" + str(c + 1) + "_weight2D"
        b_name = s + "_" + str(c + 1) + "_bias2D"
        wb_str += prefix + comm + s + " layer " + str(c + 1) + EOL
        wb_str += generate_w_b(c_name, arr1, "weight", c, s)
        if s == "conv":
            if "conv_bias_size" in arr_str:
                wb_str += generate_w_b(b_name, arr2, "bias", c, s)
        elif s == "fc":
            if "fc_bias_size" in arr_str:
                wb_str += generate_w_b(b_name, arr2, "bias", c, s)
        wb_str += prefix + "in_number_" + s + "++;" + EOL + EOL

    wb_str += prefix + "cout<<\"Finished loading " + s + " weight into memory! Total: \" <<" + s + "_weight_num  << \"... ... ...\"<<endl;" + EOL
    if s == "conv":
        if "conv_bias_size" in arr_str:
            wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num  << \"... ... ...\"<<endl;" + EOL * 2
    elif s == "fc":
        if "fc_bias_size" in arr_str:
            wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num  << \"... ... ...\"<<endl;" + EOL * 2

    return wb_str
Ejemplo n.º 2
0
def generate(generated_file_name="acc_instance.h"):

	arr = helping_functions.read_params(sys.argv[1])
	arr1 = helping_functions.max_stride_kernel(arr)
	layers_fun = layers(arr[1])
	str1 = "#ifndef _ACC_INSTANCE_H_" + EOL + "#define _ACC_INSTANCE_H_" + EOL * 2
	import_str = ""
	body_str = ""
	conv_layer_new_body = ""
	includes = ["#include \"conv_acc_innerdf.h\"", "#include \"max_pool_acc.h\"", "#include \"ave_pool_acc.h\"", "#include \"max_pool_acc_innerdf.h\"", "#include \"ave_pool_acc_noact.h\""]	
	fn_names = ["conv_layer_new", "max_pool_layer_new", "ave_pool_layer_new", "max_pool_layer_new_noact", "ave_pool_layer_new_noact"]
	rn_tp = "void"
	arg_t_list = [["int", "int", "int", "int", "int", "int", "int", "bool", "data_type", "data_type_w", "data_type_w", "data_type_o", "int", "int", "int", "int"],
		    ["int", "int", "int", "int", "int", "int", "int", "int", "data_type", "data_type_o"]]
	arg_n_list = [["N", "K", "M", "R", "C", "S", "P", "act", "*in_data", "*layer_weights", "*layer_bias", "*out_data", "weight_offset", "bias_offset", "in_offset", "out_offset"], 
		      ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "*in_data", "*out_data"]]
	acc_params = [["16", "4", "13", "13", str(arr1[0]), str(arr1[1])], ["16", "16", "16"], ["16", "16", "16"], ["16", "16", "16", str(arr1[2]), str(arr1[3])], ["16", "16", "16"]]
	init_nm = ["conv_acc", "max_pool_acc", "ave_pool_acc", "max_pool_acc_noact", "ave_pool_acc_noact"]
	prm = "data_type, data_type_w, data_type_o"
	init_names = ["convAcc2", "maxPoolAcc1", "avePoolAcc1", "maxPoolAccNoact1", "avePoolAccNoact1"]
	acc_fn_names = ["conv_layer_acc", "max_pool_layer_acc", "ave_pool_layer_acc", "max_pool_layer_acc_noact", "ave_pool_layer_acc_noact"]

	for i, l in enumerate(layers_fun):
		if l != 0:
			import_str += includes[i] + EOL
			if i > 1:
				j = 1
			else:
				j = 0
			body_str += init_nm[i] + CLASS_BEGIN + prm + COMMA_SPACE
			body_str += ', '.join(acc_params[i])
			body_str += CLASS_END + SPACE + init_names[i] + EOS + EOL * 2
			s = ""
			for k, arg_nm in enumerate(arg_n_list[j]):
				s += arg_nm.replace("*", "") 
				if k != len(arg_n_list[j]) - 1:
					s += ", "
			body_str += generate_function(fn_names[i], rn_tp, arg_t_list[j], arg_n_list[j], [init_names[i] + CALL_SYMBOL+ acc_fn_names[i] + PARAMETER_BEGIN + s + PARAMETER_END + EOS])
			
	import_str += "#include \"config.h\"" + EOL * 2


	str1 += import_str + body_str + EOL*2 + ENDIF

	with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file:
        	generated_file.write(str1)


	return str1
Ejemplo n.º 3
0
def generate(generated_file_name="ff_test.cpp"):
    paraJS = open("parameter2.json", "r")
    json_data = json.load(paraJS)
    str1 = ""

    arr = helping_functions.read_params(sys.argv[1])
    str1 += generate_import(json_data["import"])
    str1 += generate_function(json_data["function"], "loadfile")
    str1 += generate_fn_load()
    str1 += generate_function(json_data["function"], "main")
    str_body = generate_body(json_data["body"], json_data["out"], json_data["comments"], arr)
    str1 += str_body

    with open("../example/test_demo/" + generated_file_name, "w") as generated_file:
        generated_file.write(str1)
Ejemplo n.º 4
0
def generate(generated_file_name="construct_net.h"):
    paraJS = open("parameter.json", "r")
    json_data = json.load(paraJS)

    arr = helping_functions.read_params(sys.argv[1])

    body_s, count, acc_str, wb_arr = generate_body(arr)
    import_s = generate_import(json_data["import"], count)
    header_s = generate_header(json_data["head"], arr)
    pragma_s = generate_pragma(wb_arr)
    end_s = generate_end(json_data["end"])

    function_str = import_s + header_s + pragma_s + body_s + end_s

    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(function_str)
    print(acc_str)
Ejemplo n.º 5
0
def generate_preprocessor(prep_json):
    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    comm = "// C++ compilation debug mode" + EOL
    prep_str = EOL
    prep_str += comm
    prep_str += prep_json[0] + EOL * 2
    prep_str += comm
    for prep_sen in prep_json[1:]:
        if "nn_scale_size" in prms_str:
            if prep_sen == "//#define _SCALE_ 1":
                prep_str += "#define _SCALE_ 1" + EOL
            else:
                prep_str += prep_sen + EOL
        else:
            if prep_sen == "//#define _SCALE_ 1":
                prep_str += ""
            else:
                prep_str += prep_sen + EOL
    return prep_str
Ejemplo n.º 6
0
def generate_function_w_bn(fn_nm,
                           return_type,
                           arg_types_arr,
                           arg_names_arr,
                           fn_body,
                           prefix=SEPARATER):
    fn_str = return_type + SPACE + fn_nm + PARAMETER_BEGIN + EOL

    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    for i, f in enumerate(arg_types_arr):
        if "conv_bias_size" in prms_str:
            if i == 15:
                fn_str += "#if _SCALE_" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            elif i == 18:
                fn_str += "#endif" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            else:
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            if (i != len(arg_types_arr) - 1):
                fn_str += "," + EOL
        else:
            if i == 14:
                fn_str += "#if _SCALE_" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            elif i == 17:
                fn_str += "#endif" + EOL
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            else:
                fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i]
            if (i != len(arg_types_arr) - 1):
                fn_str += "," + EOL
    fn_str += PARAMETER_END + SPACE + BODY_BEGIN + EOL * 2
    for fb in fn_body:
        fn_str += prefix + fb + EOL
    fn_str += EOL
    fn_str += BODY_END + EOL * 2

    return fn_str
Ejemplo n.º 7
0
def generate(generated_file_name="config.h"):
    paraJS = open("parameter1.json", "r")
    json_data = json.load(paraJS)
    arr = helping_functions.read_params(sys.argv[1])

    str1 = ""
    str1 += generate_import(json_data["import"])

    t1 = helping_functions.prompt("Please enter the type of input: ")
    t2 = helping_functions.prompt("Please enter the type of weights: ")
    t3 = helping_functions.prompt("Please enter the type of output: ")
    if t1.lower().startswith("ap_fixed") or t2.lower().startswith(
            "ap_fixed") or t3.lower().startswith("ap_fixed"):
        str1 += "#include \"ap_fixed.h\"" + EOL
    arr1 = [t1, t2, t3]
    str1 += generate_type_definition(json_data["type_definition"], arr1)
    str1 += generate_preprocessor(json_data["preprocessor"])
    str1 += generate_var(json_data["var"])
    str1 += generate_body(json_data["params"], arr)
    str1 += generate_end(json_data["end"])
    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(str1)
    return str1
def generate(generated_file_name="conv_acc_innerpp_fc.h"):
    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)
    str1 = "#ifndef _CONV_ACC_FC_H_" + EOL
    str1 += "#define _CONV_ACC_FC_H_" + EOL + EOL
    str1 += "#include <iostream>" + EOL
    str1 += "#include <fstream>" + EOL
    str1 += '#include "activation_functions.h"' + EOL + EOL
    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#include <algorithm>" + EOL
    str1 += "#endif" + EOL + EOL
    str1 += "using namespace std;" + EOL + EOL
    str1 += "template <typename T, typename W, typename G, int Tm, int Tn, int Tr, int Tc, int S_max, int K_max>" + EOL
    str1 += "class conv_acc_fc {" + EOL + EOL
    str1 += "private:" + EOL
    str1 += "	int conv_layer_number;" + EOL + EOL
    str1 += "public:" + EOL
    str1 += "	conv_acc_fc() : conv_layer_number(0) {conv_layer_number = 0;};" + EOL + EOL

    str1 += "	////------------------------------C++ debugging functions---------------------------------------////" + EOL
    str1 += "	// Reset output buffer" + EOL
    str1 += "	void out_buf_reset(G buf[][Tr][Tc]){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "            for(int j = 0; j < Tr; j++){" + EOL
    str1 += "                for(int k = 0; k < Tc; k++){" + EOL
    str1 += "                    buf[i][j][k] = G(0);" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Reset weight buffer" + EOL
    str1 += "    void w_buf_reset(int K, W buf[][Tm][K_max][K_max]){" + EOL
    str1 += "        for(int i = 0; i < Tn; i++){" + EOL
    str1 += "            for(int j = 0; j < Tm; j++){" + EOL
    str1 += "                for(int k = 0; k < K; k++){" + EOL
    str1 += "                    for(int l = 0; l < K; l++){" + EOL
    str1 += "                        buf[i][j][k][l] = W(0);" + EOL
    str1 += "                    }" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Reset bias buffer" + EOL
    str1 += "    void b_buf_reset(W buf[]){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "            buf[i]= W(0);" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    ////-----------------------------Accelerator Functions---------------------------------------////" + EOL

    str1 += "    // Load bias data" + EOL
    str1 += "    void b_buf_load(W buf[], W *layer_bias, int bias_offset, int m){" + EOL
    str1 += "        for(int i = 0; i < Tm; i++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "            buf[i] = *(layer_bias + bias_offset + i + m);" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Load input data" + EOL
    str1 += "    void in_buf_load(T buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max]"
    for j in range(1, port_num + 1):
        str1 += ",T " + "*in_data_" + str(j)

    str1 += ", int in_offset, int n, int r, int c, int S, int K, int P, int R_IN, int C_IN, int N) {" + EOL

    str1 += "       for (int j = r * S - P; j < (r + Tr - 1) * S + K - P; j++) {" + EOL
    str1 += "           for (int k = c * S - P; k < (c + Tc - 1) * S + K - P; k++) {" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "        		for (int i = 0; i < Tn; i+=" + str(
        port_num) + "){" + EOL
    #	str1 += "#pragma HLS UNROLL" + EOL
    #	str1 += "#pragma HLS DEPENDENCE variable=buf inter false" + EOL

    for j in range(0, port_num):
        str1 += "                   	if ((n + Tn > N && i + " + str(
            j
        ) + " >= N - n ) || j < 0 || j >= R_IN || k < 0 || k >= C_IN) {" + EOL
        str1 += "                       	buf[i + " + str(
            j) + "][j - r * S + P][k - c * S + P] = T(0);" + EOL
        str1 += "                   	} else {" + EOL
        str1 += "                       	buf[i + " + str(
            j) + "][j - r * S + P][k - c * S + P] = *(in_data_" + str(
                j + 1) + " + in_offset + (i + n)/" + str(
                    port_num) + " * R_IN * C_IN + j * C_IN + k);" + EOL
        str1 += "               		}" + EOL

    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL
    str1 += EOL
    str1 += EOL

    str1 += "    // Load weights to weight buffer" + EOL
    str1 += "   void w_buf_load(W buf[][Tm][K_max][K_max], W *layer_weights, int weight_offset, int n, int m, int K, int N, int M){" + EOL
    str1 += "       for(int k1 = 0; k1 < K; k1++){" + EOL
    str1 += "           for(int k2 = 0; k2 < K; k2++){" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "        		for(int j = 0; j < Tn; j++){" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    str1 += "            		if(N < n+Tn && j == N-n){" + EOL
    str1 += "                		break;" + EOL
    str1 += "            		}" + EOL
    str1 += "            		for(int i = 0; i < Tm && i < M-m; i++){" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                		if(M < m+Tm && i == M-m){" + EOL
    str1 += "                    		break;" + EOL
    str1 += "                		}" + EOL
    str1 += "                        buf[j][i][k1][k2] = *(layer_weights + weight_offset + (i+m)*N*K*K + (j+n)*K*K + k1*K + k2);" + EOL
    str1 += "                   }" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL
    str1 += "	}" + EOL

    str1 += "    // Convolution computation kernel" + EOL
    str1 += "    void conv_engine(T in_buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max], W w_buf[][Tm][K_max][K_max], W b_buf[], G out_buf[][Tr][Tc], int S, int n, int r, int c, int K, int R_OUT, int C_OUT){" + EOL
    str1 += "        for(int i=0; i<K; i++){" + EOL
    str1 += "            for(int j=0; j<K; j++){" + EOL
    str1 += "                for(int tr=0; tr<Tr; tr++){" + EOL
    str1 += "                    for(int tc=0; tc<Tc; tc++){" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "                        for(int tm = 0; tm < Tm; tm++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                            for(int tn=0; tn<Tn; tn++){" + EOL
    str1 += "#pragma HLS UNROLL" + EOL
    str1 += "                                if(i==0&&j==0&&tn==0&&n==0)" + EOL
    str1 += "                                    out_buf[tm][tr][tc] = b_buf[tm] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL
    str1 += "                                else" + EOL
    str1 += "                                    out_buf[tm][tr][tc] = out_buf[tm][tr][tc] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL
    str1 += "                            }" + EOL
    str1 += "                        }" + EOL
    str1 += "                    }" + EOL
    str1 += "                }" + EOL
    str1 += "            }" + EOL
    str1 += "        }" + EOL
    str1 += "    }" + EOL
    str1 += EOL
    str1 += EOL

    str1 += "    // Ouput out_buf data to output interface" + EOL
    str1 += "    void output_res(G out_buf[][Tr][Tc]"
    for j in range(1, port_num + 1):
        str1 += ",G " + "*out_data_" + str(j)
    str1 += ", int out_offset, int n, int m, int r, int c, int N, int M, int R_OUT, int C_OUT, bool act){" + EOL
    str1 += "        if (n >= N - Tn) {" + EOL
    str1 += "            for (int j = r; j < r + Tr && j < R_OUT; j++) {" + EOL
    #str1 += "                if (C_OUT < c + Tc && k == C_OUT) { break; }" + EOL
    str1 += "                for (int k = c; k < c + Tc && k < C_OUT; k++) {" + EOL
    #str1 += "                    if (R_OUT < r + Tr && j == R_OUT) { break; }" + EOL
    str1 += "#pragma HLS PIPELINE" + EOL
    str1 += "                    for (int i = 0; i < Tm && i < M-m; i += " + str(
        port_num) + ") {" + EOL
    #str1 += "#pragma HLS UNROLL" + EOL
    #str1 += "                        if (M < m + Tm && i+m == M) { break; }" + EOL
    str1 += "                        if (act) {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                        	if (i + " + str(j -
                                                          1) + " < M-m)" + EOL
        str1 += "                            	*(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = relu(out_buf[i + " +\
         str(j-1) + "][j - r][k - c]);" + EOL

    str1 += "                        }" + EOL
    str1 += "                        else {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                        	if (i + " + str(j -
                                                          1) + " < M-m)" + EOL
        str1 += "                            	*(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = out_buf[i + " +\
         str(j-1) + "][j - r][k - c];" + EOL

    str1 += "                        }" + EOL
    str1 += "                    }" + EOL
    str1 += "                }" + EOL
    str1 += "            }" + EOL
    str1 += "        }" + EOL
    str1 += "    }" + EOL

    str1 += "///////////////////////------------------conv accelerator----------------//////////////////////////" + EOL
    str1 += "    void conv_layer_acc_fc(" + EOL
    str1 += "            int N, //input feature number" + EOL
    str1 += "            int K, //input kernel size" + EOL
    str1 += "            int M, // output feature number" + EOL
    str1 += "            int R_IN, // input Row" + EOL
    str1 += "            int C_IN, // input column" + EOL
    str1 += "            int R_OUT, // output Row" + EOL
    str1 += "            int C_OUT,// output column" + EOL
    str1 += "            int S, // stride size" + EOL
    str1 += "            int P, // padding size" + EOL
    str1 += "            bool act, // activation function bit (1-- with act, 0--without act)" + EOL
    str1 += "            W *layer_weights, //w[M][N][K][K]" + EOL
    str1 += "            W *layer_bias, // b[M]" + EOL
    str1 += "            int weight_offset," + EOL
    str1 += "            int bias_offset," + EOL
    str1 += "            int in_offset," + EOL
    str1 += "            int out_offset," + EOL
    for j in range(1, port_num + 1):
        str1 += "            T *in_data_" + str(
            j
        ) + "," + " // in_data[N][(R-1)*S + K][(C-1)*S + K] --> [N][(R-1)*S + K - 2*P][(C-1)*S + K - 2*P]" + EOL
    for j in range(1, port_num + 1):
        if j == port_num:
            str1 += "            G *out_data_" + str(
                j) + "){ // out[M][R][C]" + EOL + EOL
        else:
            str1 += "            G *out_data_" + str(
                j) + "," + " // out[M][R][C]" + EOL

    str1 += "        /***************local data buffer******************************/" + EOL
    str1 += "        T in_buf_1[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL
    str1 += "        T in_buf_0[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL
    str1 += "        W w_buf_1[Tn][Tm][K_max][K_max];" + EOL
    str1 += "        W w_buf_0[Tn][Tm][K_max][K_max];" + EOL
    str1 += "        W b_buf_1[Tm];" + EOL
    str1 += "        W b_buf_0[Tm];" + EOL
    str1 += "        G out_buf_1[Tm][Tr][Tc];" + EOL
    str1 += "        G out_buf_0[Tm][Tr][Tc];" + EOL + EOL
    str1 += "        /***************Ptr and buffer initialization******************************/" + EOL
    str1 += "        bool in_buf_0_empty = 1;" + EOL
    str1 += "        bool in_buf_1_empty = 1;" + EOL
    str1 += "        bool out_buf_0_empty = 1;" + EOL
    str1 += "        bool out_buf_1_empty = 1;" + EOL
    str1 += "        int loadbufPtr = 0;" + EOL
    str1 += "        int combufPtr = 0;" + EOL
    str1 += "        int resbufPtr = 0;" + EOL
    str1 += "        bool last_com = 0;" + EOL
    str1 += "        bool last_load = 0;" + EOL
    str1 += "        bool last_res = 0;" + EOL + EOL

    str1 += "#if _HLS_MODE_" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=2" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=2" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_0 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_1 complete dim=1" + EOL
    str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_0 complete dim=1" + EOL
    str1 += "#endif" + EOL + EOL

    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#if _KERNEL_DEBUG_" + EOL
    str1 += '            cout << "Starting conv_acc_innerpp_fc layer ...." << endl;' + EOL
    str1 += "            //buffer local data initiallization: must do it in C++ debug!" + EOL
    str1 += "            out_buf_reset(out_buf_1);" + EOL
    str1 += "            out_buf_reset(out_buf_0);" + EOL
    str1 += "            b_buf_reset(b_buf_1);" + EOL
    str1 += "            b_buf_reset(b_buf_0);" + EOL
    str1 += "            w_buf_reset(K, w_buf_1);" + EOL
    str1 += "            w_buf_reset(K, w_buf_0);" + EOL
    str1 += "#endif" + EOL
    str1 += "#endif" + EOL
    str1 += "		for(int r = 0; r < R_OUT; r += Tr){" + EOL
    str1 += "			for(int c = 0; c < C_OUT; c += Tc){" + EOL
    str1 += "				for(int m = 0; m < M; m += Tm){" + EOL
    str1 += "					for(int n = 0; n < N; n += 2*Tn){" + EOL
    #str1 += "#if _HLS_MODE_" + EOL
    #str1 += "#pragma HLS DATAFLOW" + EOL
    #str1 += "#endif" + EOL
    str1 += "   //--------------------------Load input B W D in ping-pong manner-------------------------//" + EOL
    str1 += "						while ((in_buf_0_empty | in_buf_1_empty)&& (!last_load)) {" + EOL
    str1 += "							if (loadbufPtr == 1) {" + EOL
    str1 += '                    			cout << "loading input buffer 1...." << endl;' + EOL
    str1 += "                    			//load input bias" + EOL
    str1 += "                        		b_buf_load(b_buf_1, layer_bias, bias_offset, m);" + EOL
    str1 += "                        		// load input data" + EOL
    str1 += "                        		in_buf_load(in_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", in_data_" + str(j)
    str1 += ", in_offset, n+Tn, r, c, S, K, P, R_IN, C_IN, N);" + EOL
    str1 += "                        		// load input weights" + EOL
    str1 += "                        		w_buf_load(w_buf_1, layer_weights, weight_offset, n+Tn, m, K, N, M);" + EOL
    str1 += "                        		in_buf_1_empty = 0;" + EOL
    str1 += '                        		cout << "buffer 1 full" << endl;' + EOL
    str1 += "                        		loadbufPtr = 0;" + EOL
    str1 += "                        		if (n+2*Tn >= N) {last_load = 1;}" + EOL
    str1 += "                        	} else {" + EOL
    str1 += '                    			cout << "loading input buffer 0...." << endl;' + EOL
    str1 += "                    			//load input bias" + EOL
    str1 += "                        		b_buf_load(b_buf_0, layer_bias, bias_offset, m);" + EOL
    str1 += "                        		// load input data" + EOL
    str1 += "                        		in_buf_load(in_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", in_data_" + str(j)
    str1 += ", in_offset, n, r, c, S, K, P, R_IN, C_IN, N);" + EOL
    str1 += "                        		// load input weights" + EOL
    str1 += "                        		w_buf_load(w_buf_0, layer_weights, weight_offset, n, m, K, N, M);" + EOL
    str1 += "                        		in_buf_0_empty = 0;" + EOL
    str1 += '                        		cout << "buffer 0 full" << endl;' + EOL
    str1 += "                        		loadbufPtr = 1;" + EOL
    str1 += "                        		if (n+Tn >= N) {last_load = 1;}" + EOL
    str1 += "							}" + EOL
    str1 += "                       }" + EOL
    str1 += "                       loadbufPtr = 0;" + EOL
    str1 += "                       last_load = 0;" + EOL
    str1 += "   //------------------------------compute buffered data -----------------------------------//" + EOL
    str1 += "                    	while ((!in_buf_0_empty | !in_buf_1_empty)&& (!last_com)) {" + EOL
    str1 += "                    		if (combufPtr == 1) {" + EOL
    str1 += '                    			cout << "computing input buffer 1...." << endl;' + EOL
    str1 += "                    			if(resbufPtr == 1){" + EOL
    str1 += "                        			conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_1, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_1_empty = 0;" + EOL
    str1 += "                    			}else{" + EOL
    str1 += "                        			conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_0, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_0_empty = 0;" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			in_buf_1_empty = 1;" + EOL
    str1 += "                    			combufPtr = 0;" + EOL
    str1 += '                    			cout << "buffer 1 computed" << endl;' + EOL
    str1 += "                    			if (n+2*Tn >= N) {last_com = 1;}" + EOL
    str1 += "                    		} else {" + EOL
    str1 += '                    			cout << "computing input buffer 0...." << endl;' + EOL
    str1 += "                    			if(resbufPtr == 1){" + EOL
    str1 += "                        			conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_1, S, n, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_1_empty = 0;" + EOL
    str1 += "                    			}else{" + EOL
    str1 += "                        			conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_0, S, n, r, c, K, R_OUT, C_OUT);" + EOL
    str1 += "                    				out_buf_0_empty = 0;" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			in_buf_0_empty = 1;" + EOL
    str1 += "                    			combufPtr = 1;" + EOL
    str1 += '                    			cout << "buffer 0 computed" << endl;' + EOL
    str1 += "								if (n+Tn >= N) {last_com = 1;}" + EOL
    str1 += "							}" + EOL
    str1 += "                       }" + EOL
    str1 += "                       combufPtr = 0;" + EOL
    str1 += "                       last_com = 0;" + EOL
    str1 += "   //---------------------------transfer output data----------------------------------------//" + EOL
    str1 += "                    	while ((!out_buf_0_empty | !out_buf_1_empty)&& (!last_res)) {" + EOL
    str1 += "                    		if (resbufPtr == 1) {" + EOL
    str1 += '                    			cout << "output buffer 1...." << endl;' + EOL
    str1 += "                    			// transfer output data" + EOL
    str1 += "                    			if (n+Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 0;" + EOL
    str1 += "                    				output_res(out_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}else if (n+2*Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 0;" + EOL
    str1 += "                    				output_res(out_buf_1"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			out_buf_1_empty = 1;" + EOL
    str1 += '                    			cout << "buffer 1 res" << endl;' + EOL
    str1 += "                    		} else {" + EOL
    str1 += '                    			cout << "output buffer 0...." << endl;' + EOL
    str1 += "                    			// transfer output data" + EOL
    str1 += "                    			if (n+Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 1;" + EOL
    str1 += "                    				output_res(out_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}else if (n+2*Tn >= N) {" + EOL
    str1 += "                    				last_res = 1;" + EOL
    str1 += "                    				resbufPtr = 1;" + EOL
    str1 += "                    				output_res(out_buf_0"
    for j in range(1, port_num + 1):
        str1 += ", out_data_" + str(j)
    str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL
    str1 += "                    			}" + EOL
    str1 += "                    			out_buf_0_empty = 1;" + EOL
    str1 += '								cout << "buffer 0 res" << endl;' + EOL
    str1 += "							}" + EOL
    str1 += "						}" + EOL
    str1 += "						last_res = 0;" + EOL
    str1 += "					}" + EOL
    str1 += "				}" + EOL
    str1 += "			}" + EOL
    str1 += "		}" + EOL

    str1 += "#if _C_DEBUG_MODE_" + EOL
    str1 += "#if _KERNEL_DEBUG_" + EOL
    str1 += '            cout << "Finished conv_acc_innerpp_fc layer ...." << endl;' + EOL
    str1 += "            ofstream conv_out;" + EOL
    str1 += '            conv_out.open("fc_out_data.txt",ios::app);' + EOL
    str1 += '            conv_out <<"fc output: "<< endl;' + EOL
    str1 += "            for (int i = 0; i < M/" + str(
        port_num) + "; i++) {" + EOL
    for j in range(1, port_num + 1):
        str1 += "                for (int j = 0; j < R_OUT; j++) {" + EOL
        str1 += "                    for(int k = 0; k < C_OUT; k++){" + EOL
        str1 += "                        conv_out << *(out_data_" + str(
            j) + ' + out_offset + i*R_OUT*C_OUT + j*C_OUT + k) << " ";' + EOL
        str1 += "                    }conv_out << endl;" + EOL
        str1 += "                }conv_out << endl;" + EOL

    str1 += "            }conv_out.close();" + EOL
    str1 += "#endif" + EOL
    str1 += "#endif" + EOL
    str1 += "    }" + EOL
    str1 += "};" + EOL
    str1 += "#endif" + EOL

    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(str1)

    return str1
Ejemplo n.º 9
0
def model_extract(include_fc):

    arr = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr)

    init_conv_N = prms[prms_str.index("nn_in_number_conv")]
    init_conv_r = prms[prms_str.index("nn_in_data_size_conv")]
    init_conv_M = prms[prms_str.index("nn_out_number_conv")]
    init_conv_P = prms[prms_str.index("nn_padding_conv")]
    init_conv_K = prms[prms_str.index("nn_channel_size_conv")]
    init_conv_S = prms[prms_str.index("nn_stride_conv")]
    init_conv_G = prms[prms_str.index("nn_group_conv")]
    init_fc_N = prms[prms_str.index("nn_in_number_fc")]
    init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")]
    init_fc_M = prms[prms_str.index("nn_out_number_fc")]
    init_fc_K = prms[prms_str.index("nn_channel_size_fc")]
    init_pool_N = prms[prms_str.index("nn_in_data_size_pooling")]
    cut_flag_conv = prms[prms_str.index("conv_cut_flag")]
    cut_flag_pool = prms[prms_str.index("pool_cut_flag")]
    cut_flag_fc = prms[prms_str.index("fc_cut_flag")]

    nn_in_number_conv_values1 = []
    if isinstance(init_fc_N, list):
        for fc_in_number in init_fc_N:
            nn_in_number_conv_values1.append(fc_in_number)
    else:
        nn_in_number_conv_values1.append(0)

    nn_out_number_conv_values1 = []
    if isinstance(init_fc_M, list):
        for fc_out_number in init_fc_M:
            nn_out_number_conv_values1.append(fc_out_number)
    else:
        nn_out_number_conv_values1.append(0)

    nn_fc_sizes_conv = []
    if isinstance(init_fc_Rin, list):
        for fc_in_size in init_fc_Rin:
            nn_fc_sizes_conv.append(fc_in_size)
    else:
        nn_fc_sizes_conv.append(0)

    nn_channel_size_conv_values = []
    if isinstance(init_fc_K, list):
        for kernel_size in init_fc_K:
            nn_channel_size_conv_values.append(kernel_size)
    else:
        nn_channel_size_conv_values.append(0)

    nn_stride_values1 = []
    if isinstance(init_fc_Rin, list):
        for stride_value in init_fc_Rin:
            nn_stride_values1.append(stride_value)
    else:
        nn_stride_values1.append(1)

    conv_only_M = [int(val) for val in init_conv_M]
    # print init_conv_M
    # print conv_only_M

    nn_conv_group_values = []
    if isinstance(init_conv_G, list):
        for group_value in init_conv_G:
            nn_conv_group_values.append(group_value)
        else:
            nn_conv_group_values.append(1)

    nn_fc_cut_flag = []
    if isinstance(cut_flag_fc, list):
        for cut_value in cut_flag_fc:
            nn_fc_cut_flag.append(cut_value)
    else:
        nn_fc_cut_flag.append(1)

    if (include_fc == 'include_fc'):
        print("[DEBUG] including FC . . . {} {} {} {} {} {}".format(
            nn_in_number_conv_values1, nn_out_number_conv_values1,
            nn_fc_sizes_conv, nn_channel_size_conv_values, nn_stride_values1,
            nn_fc_cut_flag))
        init_conv_N = init_conv_N + nn_in_number_conv_values1
        init_conv_M = init_conv_M + nn_out_number_conv_values1
        init_conv_r = init_conv_r + nn_fc_sizes_conv
        init_conv_K = init_conv_K + nn_channel_size_conv_values
        init_conv_S = init_conv_S + nn_stride_values1
        cut_flag_conv = cut_flag_conv + nn_fc_cut_flag

    conv_N = [int(string) for string in init_conv_N]
    conv_M = [int(string) for string in init_conv_M]
    conv_r = [int(string) for string in init_conv_r]
    conv_K = [int(string) for string in init_conv_K]
    conv_S = [int(string) for string in init_conv_S]
    conv_P = [int(string) for string in init_conv_P]
    conv_G = [int(string) for string in init_conv_G]
    cut_flag = [int(string) for string in cut_flag_conv]

    if not init_fc_Rin:
        conv_P = conv_P + [0]
    else:
        conv_P = conv_P + [0] * len(init_fc_Rin)
        conv_P = conv_P + [0]
    conv_G = [int(string) for string in init_conv_G]
    max_conv_N = max(conv_N)
    max_conv_M = max(conv_M)
    max_conv_S = max(conv_S)
    max_conv_K = max(conv_K)

    conv_R = []
    conv_layer_num = int(len(conv_r))
    for r in range(0, conv_layer_num):
        R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r]
        conv_R.append(R)

    # find the positions of Conv layers followed by Pooling layer
    flag = [False] * conv_layer_num
    count = 0
    print(prms[0])
    print(len(prms[0]))
    for prms_index in range(len(prms[0]) - 2):
        if "Convolution" in prms[0][prms_index]:
            # if "Pooling" in prms[0][prms_index + 1] + prms[0][prms_index + 2]:
            if "Pooling" in prms[0][prms_index + 1]:
                flag[count] = True
            count += 1

    print("conv_N: ", conv_N)
    print("conv_M: ", conv_M)
    print("conv_r: ", conv_r)
    print("conv_R: ", conv_R)
    print("conv_K: ", conv_K)
    print("conv_S: ", conv_S)
    print("flag", flag)
    print("cut_flag", cut_flag)

    return conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag, init_pool_N


# if __name__ == "__main__":
#     conv_N, conv_M, conv_r, conv_R, conv_K, conv_S = model_extract()
Ejemplo n.º 10
0
def generate():
	"""Tm * Tn < DSP/ 5"""

	arr2 = helping_functions.read_params(sys.argv[1])   
	prms, prms_str = helping_functions.extraction(arr2)
	nn_in_number_conv_values1 = prms[prms_str.index("nn_in_number_conv")] 
	nn_in_number_fc_values = prms[prms_str.index("nn_in_number_fc")] 
	nn_out_number_fc_values = prms[prms_str.index("nn_out_number_fc")] 
	nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")] 
	nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")] 

	for n in nn_in_number_fc_values:
		nn_in_number_conv_values1.append(n)
	nn_in_number_conv_values1.append(nn_out_number_fc_values[len(nn_out_number_fc_values) - 1])

	nn_in_number_conv_values = [int(string) for string in nn_in_number_conv_values1]
	nn_channel_size_fc_values = [int(string) for string in nn_in_number_conv_values1]	
	
	mm1 = max(nn_in_number_conv_values)
	
	DSP = 900
	d = DSP/5
	arr = []
	
	r_c = [55,27,13,13,13,6, 1, 1]
	k = [11,5,3,3,3,6,1,1]
	max_S = 6
	max_K = 11
	Tm_min = 1
	Tn_min = 1
	minimums =[]
	min_cycles = 0
	minimums1 =[]
	in_buf = 0
	out_buf = 0
	w_buf = 0
	l = int(len(nn_in_number_conv_values) - 1)
	for o in range (0, l):						
		min_cycles += r_c[o] * r_c[o] * math.ceil(int(nn_in_number_conv_values[o + 1])/max_S) * math.ceil(int(nn_in_number_conv_values[o])/1) * k[o] * k[o]
	minimums.append([1, max_S])
	minimums1.append(min_cycles)

	
	for Tm in range(1, mm1+1):
		for Tn in range(max_S, Tm):
			if Tm * Tn < d:
				cycles = 0
				for j in range (0, l):
			
					cycles += int(r_c[j] * r_c[j] * math.ceil(int(nn_in_number_conv_values[j + 1])/Tn) * math.ceil(int(nn_in_number_conv_values[j])/Tm) * k[j] * k[j])
				if cycles < max(minimums1):
					
					min_cycles = cycles
					Tm_min = Tm
					Tn_min = Tn
					if (len(minimums)<5):
						minimums.append([Tm, Tn])
						minimums1.append(min_cycles)
					else:
						max_among_mins = minimums1.index(max(minimums1))
						minimums1.remove(minimums1[max_among_mins])
						minimums.remove(minimums[max_among_mins])
						minimums.append([Tm, Tn])
						minimums1.append(min_cycles)
	print("Tm and Tn")
	print(minimums)
	print("cycles")
	print(minimums1)
	
	Tr_Tc = []
	in_buff_arr = []
	w_buff_arr = []
	out_buff_arr = []
	total_arr = []
	for m in minimums:
		Tr = int(math.sqrt(m[0]*m[1]))
		Tr_Tc.append([Tr, Tr])
		in_buff = ((Tr-1)*max_S + max_K)*((Tr-1)*max_S + max_K)
		in_buff_arr.append(in_buff)
		out_buff = m[0]*m[1]*max_K*max_K
		out_buff_arr.append(out_buff)
		w_buff = Tr*Tr*m[0]
		w_buff_arr.append(w_buff)
		total = in_buff + out_buff + w_buff
		total_arr.append(total)
	print("Tr_Tc")
	print(Tr_Tc)
	print("in_buf")
	print(in_buff_arr)
	print("w_buf")
	print(w_buff_arr)
	print("out_buf")
	print(out_buff_arr)
	print("total")
	print(total_arr)
	return arr, min_cycles
Ejemplo n.º 11
0
def generate(generated_file_name="acc_instance.h"):

    arr = helping_functions.read_params(sys.argv[1])
    layers_fun = layers(arr[1])
    layers_fun_w_bn = layers_w_bn(arr[1])
    layers_fun_fc = layers_fc(arr[1])
    prms, prms_str = helping_functions.extraction(arr)
    nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")]
    nn_stride_conv_values = prms[prms_str.index("nn_stride_conv")]
    nn_in_data_size_pooling_values = prms[prms_str.index(
        "nn_in_data_size_pooling")]
    nn_channel_size_pooling_values = prms[prms_str.index(
        "nn_channel_size_pooling")]
    nn_stride_pooling_values = prms[prms_str.index("nn_stride_pooling")]
    nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")]
    layers_order = prms[prms_str.index("layers_order")]
    str1 = "#ifndef _ACC_INSTANCE_H_" + EOL + "#define _ACC_INSTANCE_H_" + EOL * 2
    import_str = ""
    body_str = ""
    conv_layer_new_body = ""
    strides = [[], [], []]
    kernels = [[], [], []]
    acc_max_kernel = [[], [], []]
    acc_max_stride = [[], [], []]
    conv_counter = 0
    pool_counter = 0
    fc_counter = 0
    '''get stride&kernel of each layer'''
    for i, l in enumerate(layers_order):
        if l.lower().startswith("convolution"):
            strides[0].append(int(nn_stride_conv_values[conv_counter]))
            kernels[0].append(int(nn_channel_size_conv_values[conv_counter]))
            conv_counter = conv_counter + 1

        if l.lower() == "maxpooling":
            strides[1].append(int(nn_stride_pooling_values[pool_counter]))
            kernels[1].append(int(
                nn_channel_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "avepooling":
            strides[2].append(int(nn_stride_pooling_values[pool_counter]))
            kernels[2].append(int(
                nn_channel_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "globalmaxpooling":
            strides[1].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            kernels[1].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "globalavepooling":
            strides[2].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            kernels[2].append(int(
                nn_in_data_size_pooling_values[pool_counter]))
            pool_counter = pool_counter + 1

        if l.lower() == "innerproduct":
            strides[0].append(int(nn_channel_size_fc_values[fc_counter]))
            kernels[0].append(int(nn_channel_size_fc_values[fc_counter]))
            fc_counter = fc_counter + 1
    '''select the biggest stride&kernel of each kind of layer'''
    for k1 in range(len(kernels)):
        if len(kernels[k1]) != 0:
            acc_max_kernel[k1] = ", " + str(max(kernels[k1]))
        if len(strides[k1]) != 0:
            acc_max_stride[k1] = ", " + str(max(strides[k1]))
    '''params for conv_layer & pool_layer'''
    includes = [
        "#include \"conv_acc_innerpp.h\"",
        "#include \"max_pool_acc_innerpp.h\"",
        "#include \"ave_pool_acc_innerpp.h\""
    ]
    fn_names = ["conv_layer_new", "max_pool_layer_new", "ave_pool_layer_new"]
    rn_tp = "void"
    arg_t_list = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "int", "int", "int", "int"
    ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]]
    arg_n_list = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "weight_offset", "bias_offset",
        "in_offset", "out_offset"
    ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]]
    acc_params = [[], [], []]
    '''params for conv_w_bn_layer'''
    includes_w_bn = ["#include \"conv_acc_innerpp_w_bn.h\""]
    fn_names_w_bn = ["conv_layer_new_w_bn"]
    arg_t_list_w_bn = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "data_type_w", "data_type_w", "int",
        "data_type_w", "data_type_w", "int", "int", "int", "int", "int"
    ]]
    arg_n_list_w_bn = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "*bn_mean", "*bn_denominator",
        "bn_offset", "*scale_gamma", "*scale_beta", "scale_offset",
        "weight_offset", "bias_offset", "in_offset", "out_offset"
    ]]
    acc_params_w_bn = [[]]
    '''params for resnet fc_layer '''
    includes_fc = ["#include \"conv_acc_innerpp_fc.h\""]
    fn_names_fc = ["conv_layer_new_fc"]
    arg_t_list_fc = [[
        "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool",
        "data_type_w", "data_type_w", "int", "int", "int", "int"
    ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]]
    arg_n_list_fc = [[
        "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act",
        "*layer_weights", "*layer_bias", "weight_offset", "bias_offset",
        "in_offset", "out_offset"
    ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]]

    Tm_1 = helping_functions.prompt("Please enter the Tm of conv_acc: ")
    acc_params[0].append(Tm_1)
    acc_params_w_bn[0].append(Tm_1)
    Tn_1 = helping_functions.prompt("Please enter the Tn of conv_acc: ")
    acc_params[0].append(Tn_1)
    acc_params_w_bn[0].append(Tn_1)
    Tr_1 = helping_functions.prompt("Please enter the Tr of conv_acc: ")
    acc_params[0].append(Tr_1)
    acc_params_w_bn[0].append(Tr_1)
    Tc_1 = helping_functions.prompt("Please enter the Tc of conv_acc: ")
    acc_params[0].append(Tc_1)
    acc_params_w_bn[0].append(Tc_1)
    Tn_2 = helping_functions.prompt("\nPlease enter the Tn of pool_acc: ")
    acc_params[1].append(Tn_2)
    acc_params[2].append(Tn_2)
    Tr_2 = helping_functions.prompt("Please enter the Tr of pool_acc: ")
    acc_params[1].append(Tr_2)
    acc_params[2].append(Tr_2)
    Tc_2 = helping_functions.prompt("Please enter the Tc of pool_acc: ")
    acc_params[1].append(Tc_2)
    acc_params[2].append(Tc_2)
    '''object & function for conv_layer & pool_layer'''
    init_nm = ["conv_acc", "max_pool_acc", "ave_pool_acc"]
    prm = "data_type, data_type_w, data_type_o"
    init_names = ["convAcc1", "maxPoolAcc1", "avePoolAcc1"]
    acc_fn_names = [
        "conv_layer_acc", "max_pool_layer_acc", "ave_pool_layer_acc"
    ]
    '''object & function for conv_w_bn_layer'''
    init_nm_w_bn = ["conv_acc_w_bn"]
    prm_w_bn = "data_type, data_type_w, data_type_o"
    init_names_w_bn = ["convAcc2"]
    acc_fn_names_w_bn = ["conv_layer_acc_w_bn"]
    '''object & function for resnet fc_layer '''
    init_nm_fc = ["conv_acc_fc"]
    prm_fc = "data_type, data_type_w, data_type_o"
    init_names_fc = ["convAcc3"]
    acc_fn_names_fc = ["conv_layer_acc_fc"]

    for j in range(1, port_num + 1):
        arg_t_list[0].append("data_type")
        arg_t_list[1].append("data_type")
        arg_n_list[0].append("*in_data_" + str(j))
        arg_n_list[1].append("*in_data_" + str(j))
        arg_t_list_w_bn[0].append("data_type")
        arg_n_list_w_bn[0].append("*in_data_" + str(j))
        arg_t_list_fc[0].append("data_type")
        arg_t_list_fc[1].append("data_type")
        arg_n_list_fc[0].append("*in_data_" + str(j))
        arg_n_list_fc[1].append("*in_data_" + str(j))

    for j in range(1, port_num + 1):
        arg_t_list[0].append("data_type_o")
        arg_t_list[1].append("data_type_o")
        arg_n_list[0].append("*out_data_" + str(j))
        arg_n_list[1].append("*out_data_" + str(j))
        arg_t_list_w_bn[0].append("data_type_o")
        arg_n_list_w_bn[0].append("*out_data_" + str(j))
        arg_t_list_fc[0].append("data_type_o")
        arg_t_list_fc[1].append("data_type_o")
        arg_n_list_fc[0].append("*out_data_" + str(j))
        arg_n_list_fc[1].append("*out_data_" + str(j))
    '''write layer acc needed'''
    for i, l in enumerate(layers_fun):
        if l != 0:
            import_str += includes[i] + EOL
            if i > 0:
                j = 1
            else:
                j = 0
            body_str += init_nm[i] + CLASS_BEGIN + prm + COMMA_SPACE
            body_str += ', '.join(acc_params[i])
            body_str += acc_max_stride[i]
            body_str += acc_max_kernel[i]
            body_str += CLASS_END + SPACE + init_names[i] + EOS + EOL * 2
            s = ""
            if "conv_bias_size" in prms_str:
                for k, arg_nm in enumerate(arg_n_list[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list[j]) - 1:
                        s += ", "
            else:
                del arg_t_list[0][11]
                del arg_t_list[0][12]
                del arg_n_list[0][11]
                del arg_n_list[0][12]
                for k, arg_nm in enumerate(arg_n_list[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list[j]) - 1:
                        s += ", "

            body_str += generate_function(
                fn_names[i], rn_tp, arg_t_list[j], arg_n_list[j], [
                    init_names[i] + CALL_SYMBOL + acc_fn_names[i] +
                    PARAMETER_BEGIN + s + PARAMETER_END + EOS
                ])
    '''write conv_acc_w_bn if needed'''
    if "nn_batch_norm_size" in prms_str:
        for i, l in enumerate(layers_fun_w_bn):
            if l != 0:
                import_str += includes_w_bn[i] + EOL
                if i > 0:
                    j = 1
                else:
                    j = 0
                body_str += init_nm_w_bn[
                    i] + CLASS_BEGIN + prm_w_bn + COMMA_SPACE
                body_str += ', '.join(acc_params_w_bn[i])
                body_str += acc_max_stride[i]
                body_str += acc_max_kernel[i]
                body_str += CLASS_END + SPACE + init_names_w_bn[
                    i] + EOS + EOL * 2
                s = ""
                if "conv_bias_size" in prms_str:
                    for k, arg_nm in enumerate(arg_n_list_w_bn[j]):
                        if j == 0:
                            if k == 15:
                                s += EOL + "#if _SCALE_" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            elif k == 18:
                                s += EOL + "#endif" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            else:
                                s += arg_nm.replace("*", "")
                        else:
                            s += arg_nm.replace("*", "")
                        if k != len(arg_n_list_w_bn[j]) - 1:
                            s += ", "
                else:
                    del arg_t_list_w_bn[0][11]
                    del arg_t_list_w_bn[0][18]
                    del arg_n_list_w_bn[0][11]
                    del arg_n_list_w_bn[0][18]
                    for k, arg_nm in enumerate(arg_n_list_w_bn[j]):
                        if j == 0:
                            if k == 14:
                                s += EOL + "#if _SCALE_" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            elif k == 17:
                                s += EOL + "#endif" + EOL + SEPARATER
                                s += arg_nm.replace("*", "")
                            else:
                                s += arg_nm.replace("*", "")
                        else:
                            s += arg_nm.replace("*", "")
                        if k != len(arg_n_list_w_bn[j]) - 1:
                            s += ", "

                if j == 0:
                    body_str += generate_function_w_bn(
                        fn_names_w_bn[i], rn_tp, arg_t_list_w_bn[j],
                        arg_n_list_w_bn[j], [
                            init_names_w_bn[i] + CALL_SYMBOL +
                            acc_fn_names_w_bn[i] + PARAMETER_BEGIN + s +
                            PARAMETER_END + EOS
                        ])
    '''write fc layer acc if needed'''
    if "nn_in_number_eltwise_size" in prms_str:
        for i, l in enumerate(layers_fun_fc):
            if l != 0:
                import_str += includes_fc[i] + EOL
                if i > 0:
                    j = 1
                else:
                    j = 0
                body_str += init_nm_fc[i] + CLASS_BEGIN + prm_fc + COMMA_SPACE
                body_str += ', '.join(acc_params[i])
                body_str += acc_max_stride[i]
                body_str += acc_max_kernel[i]
                body_str += CLASS_END + SPACE + init_names_fc[i] + EOS + EOL * 2
                s = ""
                for k, arg_nm in enumerate(arg_n_list_fc[j]):
                    s += arg_nm.replace("*", "")
                    if k != len(arg_n_list_fc[j]) - 1:
                        s += ", "

                body_str += generate_function(
                    fn_names_fc[i], rn_tp, arg_t_list_fc[j], arg_n_list_fc[j],
                    [
                        init_names_fc[i] + CALL_SYMBOL + acc_fn_names_fc[i] +
                        PARAMETER_BEGIN + s + PARAMETER_END + EOS
                    ])

    import_str += "#include \"config.h\"" + EOL * 2
    str1 += import_str + body_str + EOL * 2 + ENDIF
    with open("../example/test_demo/inference_net/" + generated_file_name,
              "w") as generated_file:
        generated_file.write(str1)

    return str1
Ejemplo n.º 12
0
def generate():
    """Tm * Tn < DSP/ 5"""

    max_ratio = 0
    max_sk = []
    tm_tn_tr_tc = [32, 8, 16, 16]
    for s in range(1, 5):
        k_max = min(80 - 15 * s, 11)
        for k in range(s, k_max):
            ctc_ratio = (32 * 16 * 16 * (
                (15 * s + k) *
                (15 * s + k) * 8 + 1)) / (4 * (8 * 32 * k * k + 32 + 8 *
                                               (15 * s + k) *
                                               (15 * s + k) + 32 * 16 * 16))
            if ctc_ratio > max_ratio:
                max_ratio = ctc_ratio
                max_sk = [s, k]
    print("max ctc and s,k")
    print(max_ratio)
    print(max_sk)

    arr2 = helping_functions.read_params(sys.argv[1])
    prms, prms_str = helping_functions.extraction(arr2)

    init_conv_N = prms[prms_str.index("nn_in_number_conv")]
    init_conv_r = prms[prms_str.index("nn_in_data_size_conv")]
    init_conv_M = prms[prms_str.index("nn_out_number_conv")]
    init_conv_P = prms[prms_str.index("nn_padding_conv")]
    init_conv_K = prms[prms_str.index("nn_channel_size_conv")]
    init_conv_S = prms[prms_str.index("nn_stride_conv")]
    init_conv_G = prms[prms_str.index("nn_group_conv")]
    init_fc_N = prms[prms_str.index("nn_in_number_fc")]
    init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")]
    init_fc_M = prms[prms_str.index("nn_out_number_fc")]
    init_fc_K = prms[prms_str.index("nn_channel_size_fc")]

    #for fc_in_number in nn_in_number_fc_values:
    #    nn_in_number_conv_values1.append(fc_in_number)

    #for fc_out_number in nn_out_number_fc_values:
    #    nn_out_number_conv_values1.append(fc_out_number)

    #for kernel_size in nn_channel_size_fc_values:
    #    nn_channel_size_conv_values.append(kernel_size)

    #for conv_in_size in nn_in_data_sizes_fc:
    #    nn_in_data_sizes_conv.append(1)

    #for stride_value in nn_channel_size_fc_values:
    #    nn_stride_values1.append(stride_value)

    conv_N = [int(string) for string in init_conv_N]
    conv_M = [int(string) for string in init_conv_M]
    conv_r = [int(string) for string in init_conv_r]
    conv_K = [int(string) for string in init_conv_K]
    conv_S = [int(string) for string in init_conv_S]
    conv_P = [int(string) for string in init_conv_P]
    conv_G = [int(string) for string in init_conv_G]
    max_conv_N = max(conv_N)
    max_conv_M = max(conv_M)
    max_conv_S = max(conv_S)
    max_conv_K = max(conv_K)

    conv_R = []
    conv_layer_num = int(len(conv_r))
    for r in range(0, conv_layer_num):
        R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r]
        conv_R.append(R)

    print("conv_N")
    print(conv_N)
    print("conv_M")
    print(conv_M)
    print("conv_r")
    print(conv_r)
    print("conv_R")
    print(conv_R)
    print("conv_K")
    print(conv_K)

    DSP = 6840
    #DSP = 2800
    d = int(DSP / 5)
    arr = []

    Tm_min = 1
    Tn_min = 1
    min_Tm_Tn = []
    conv_min_cycles = 0
    min_cycle_list = []
    for o in range(0, conv_layer_num):
        conv_min_cycles += conv_R[o] * conv_R[o] * math.ceil(
            int(conv_M[o]) / float(Tm_min)) * math.ceil(
                int(conv_N[o]) / float(Tn_min)) * conv_K[o] * conv_K[o]
    min_Tm_Tn.append([1, 1])
    min_cycle_list.append(conv_min_cycles)

    print("Analysis initialized point: ", min_cycle_list, min_Tm_Tn)

    target = 0
    for j in range(0, conv_layer_num):
        target += int(
            conv_R[j] * conv_R[j] * math.ceil(int(conv_N[j]) / float(32)) *
            math.ceil(int(conv_M[j]) / float(87)) * conv_K[j] * conv_K[j])
    print("targeted cycle numbers [87, 32]")
    print(target)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    # ax = Axes3D(fig)
    ax.set_title("3D Figure")
    ax.set_xlabel("Tm")
    ax.set_ylabel("Tn")
    ax.set_zlabel("Cycles")

    x_axis = [i for i in range(1, 100 + 1)]
    y_axis = [j for j in range(1, 100 + 1)]
    XX, YY = np.meshgrid(x_axis, y_axis)
    ZZ = np.zeros((100, 100))

    conv_layer_num = int(len(conv_M))
    for Tm in range(1, max_conv_M + 1):
        Tn_max = min(max_conv_N, int(int(d / Tm)), Tm)
        for Tn in range(1, Tn_max + 1):
            cycles = 0
            for j in range(1, conv_layer_num):
                cycles += int(conv_R[j] * conv_R[j] *
                              math.ceil(int(conv_N[j]) / float(Tn)) *
                              math.ceil(int(conv_M[j]) / float(Tm)) *
                              conv_K[j] * conv_K[j])

            if cycles > 0 and Tm < 100 and Tn < 100:
                ZZ[Tm, Tn] = cycles
            else:
                if Tm < 100 and Tn < 100:
                    ZZ[Tm, Tn] = 0

            if cycles < min(min_cycle_list) and cycles != 0:
                conv_min_cycles = cycles
                Tm_min = Tm
                Tn_min = Tn
                if len(min_Tm_Tn) < 5:
                    min_Tm_Tn.append([Tm, Tn])
                    min_cycle_list.append(conv_min_cycles)
                else:
                    max_among_mins = min_cycle_list.index(max(min_cycle_list))
                    min_cycle_list.remove(min_cycle_list[max_among_mins])
                    min_Tm_Tn.remove(min_Tm_Tn[max_among_mins])
                    min_cycle_list.append(conv_min_cycles)
                    min_Tm_Tn.append([Tm, Tn])

    surf = ax.plot_surface(XX,
                           YY,
                           ZZ,
                           rstride=1,
                           cstride=1,
                           cmap=cm.coolwarm,
                           linewidth=0,
                           antialiased=True)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    #plt.pause(1)
    plt.show()

    print("Tm and Tn")
    print(min_Tm_Tn)
    print("cycles")
    print(min_cycle_list)

    min_among_all = min_cycle_list.index(min(min_cycle_list))
    print("Best among all points", min_cycle_list[min_among_all],
          min_Tm_Tn[min_among_all])

    in_buf = 0
    out_buf = 0
    w_buf = 0
    Tr_Tc = []
    in_buff_arr = []
    w_buff_arr = []
    out_buff_arr = []
    total_arr = []
    for m in min_Tm_Tn:
        Tr = int(math.sqrt(m[0] * m[1]))
        Tr_Tc.append([Tr, Tr])
        in_buff = ((Tr - 1) * max_conv_S + max_conv_K) * (
            (Tr - 1) * max_conv_S + max_conv_K)
        in_buff_arr.append(in_buff)
        out_buff = m[0] * m[1] * max_conv_K * max_conv_K
        out_buff_arr.append(out_buff)
        w_buff = Tr * Tr * m[0]
        w_buff_arr.append(w_buff)
        total = in_buff + out_buff + w_buff
        total_arr.append(total)
    print("in_buf")
    print(in_buff_arr)
    print("w_buf")
    print(w_buff_arr)
    print("out_buf")
    print(out_buff_arr)
    print("total")
    print(total_arr)
    return arr, conv_min_cycles