def generate_weights_biases(length, s, arr1, arr2, prefix=SEPARATER): comm = "// Prepare weights and bias for " array = helping_functions.read_params(sys.argv[1]) arr, arr_str = helping_functions.extraction(array) wb_str = "" for c in range(length): c_name = s + "_" + str(c + 1) + "_weight2D" b_name = s + "_" + str(c + 1) + "_bias2D" wb_str += prefix + comm + s + " layer " + str(c + 1) + EOL wb_str += generate_w_b(c_name, arr1, "weight", c, s) if s == "conv": if "conv_bias_size" in arr_str: wb_str += generate_w_b(b_name, arr2, "bias", c, s) elif s == "fc": if "fc_bias_size" in arr_str: wb_str += generate_w_b(b_name, arr2, "bias", c, s) wb_str += prefix + "in_number_" + s + "++;" + EOL + EOL wb_str += prefix + "cout<<\"Finished loading " + s + " weight into memory! Total: \" <<" + s + "_weight_num << \"... ... ...\"<<endl;" + EOL if s == "conv": if "conv_bias_size" in arr_str: wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num << \"... ... ...\"<<endl;" + EOL * 2 elif s == "fc": if "fc_bias_size" in arr_str: wb_str += prefix + "cout<<\"Finished loading " + s + " bias into memory! Total: \" <<" + s + "_bias_num << \"... ... ...\"<<endl;" + EOL * 2 return wb_str
def generate(generated_file_name="acc_instance.h"): arr = helping_functions.read_params(sys.argv[1]) arr1 = helping_functions.max_stride_kernel(arr) layers_fun = layers(arr[1]) str1 = "#ifndef _ACC_INSTANCE_H_" + EOL + "#define _ACC_INSTANCE_H_" + EOL * 2 import_str = "" body_str = "" conv_layer_new_body = "" includes = ["#include \"conv_acc_innerdf.h\"", "#include \"max_pool_acc.h\"", "#include \"ave_pool_acc.h\"", "#include \"max_pool_acc_innerdf.h\"", "#include \"ave_pool_acc_noact.h\""] fn_names = ["conv_layer_new", "max_pool_layer_new", "ave_pool_layer_new", "max_pool_layer_new_noact", "ave_pool_layer_new_noact"] rn_tp = "void" arg_t_list = [["int", "int", "int", "int", "int", "int", "int", "bool", "data_type", "data_type_w", "data_type_w", "data_type_o", "int", "int", "int", "int"], ["int", "int", "int", "int", "int", "int", "int", "int", "data_type", "data_type_o"]] arg_n_list = [["N", "K", "M", "R", "C", "S", "P", "act", "*in_data", "*layer_weights", "*layer_bias", "*out_data", "weight_offset", "bias_offset", "in_offset", "out_offset"], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "*in_data", "*out_data"]] acc_params = [["16", "4", "13", "13", str(arr1[0]), str(arr1[1])], ["16", "16", "16"], ["16", "16", "16"], ["16", "16", "16", str(arr1[2]), str(arr1[3])], ["16", "16", "16"]] init_nm = ["conv_acc", "max_pool_acc", "ave_pool_acc", "max_pool_acc_noact", "ave_pool_acc_noact"] prm = "data_type, data_type_w, data_type_o" init_names = ["convAcc2", "maxPoolAcc1", "avePoolAcc1", "maxPoolAccNoact1", "avePoolAccNoact1"] acc_fn_names = ["conv_layer_acc", "max_pool_layer_acc", "ave_pool_layer_acc", "max_pool_layer_acc_noact", "ave_pool_layer_acc_noact"] for i, l in enumerate(layers_fun): if l != 0: import_str += includes[i] + EOL if i > 1: j = 1 else: j = 0 body_str += init_nm[i] + CLASS_BEGIN + prm + COMMA_SPACE body_str += ', '.join(acc_params[i]) body_str += CLASS_END + SPACE + init_names[i] + EOS + EOL * 2 s = "" for k, arg_nm in enumerate(arg_n_list[j]): s += arg_nm.replace("*", "") if k != len(arg_n_list[j]) - 1: s += ", " body_str += generate_function(fn_names[i], rn_tp, arg_t_list[j], arg_n_list[j], [init_names[i] + CALL_SYMBOL+ acc_fn_names[i] + PARAMETER_BEGIN + s + PARAMETER_END + EOS]) import_str += "#include \"config.h\"" + EOL * 2 str1 += import_str + body_str + EOL*2 + ENDIF with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file: generated_file.write(str1) return str1
def generate(generated_file_name="ff_test.cpp"): paraJS = open("parameter2.json", "r") json_data = json.load(paraJS) str1 = "" arr = helping_functions.read_params(sys.argv[1]) str1 += generate_import(json_data["import"]) str1 += generate_function(json_data["function"], "loadfile") str1 += generate_fn_load() str1 += generate_function(json_data["function"], "main") str_body = generate_body(json_data["body"], json_data["out"], json_data["comments"], arr) str1 += str_body with open("../example/test_demo/" + generated_file_name, "w") as generated_file: generated_file.write(str1)
def generate(generated_file_name="construct_net.h"): paraJS = open("parameter.json", "r") json_data = json.load(paraJS) arr = helping_functions.read_params(sys.argv[1]) body_s, count, acc_str, wb_arr = generate_body(arr) import_s = generate_import(json_data["import"], count) header_s = generate_header(json_data["head"], arr) pragma_s = generate_pragma(wb_arr) end_s = generate_end(json_data["end"]) function_str = import_s + header_s + pragma_s + body_s + end_s with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file: generated_file.write(function_str) print(acc_str)
def generate_preprocessor(prep_json): arr = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr) comm = "// C++ compilation debug mode" + EOL prep_str = EOL prep_str += comm prep_str += prep_json[0] + EOL * 2 prep_str += comm for prep_sen in prep_json[1:]: if "nn_scale_size" in prms_str: if prep_sen == "//#define _SCALE_ 1": prep_str += "#define _SCALE_ 1" + EOL else: prep_str += prep_sen + EOL else: if prep_sen == "//#define _SCALE_ 1": prep_str += "" else: prep_str += prep_sen + EOL return prep_str
def generate_function_w_bn(fn_nm, return_type, arg_types_arr, arg_names_arr, fn_body, prefix=SEPARATER): fn_str = return_type + SPACE + fn_nm + PARAMETER_BEGIN + EOL arr = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr) for i, f in enumerate(arg_types_arr): if "conv_bias_size" in prms_str: if i == 15: fn_str += "#if _SCALE_" + EOL fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] elif i == 18: fn_str += "#endif" + EOL fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] else: fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] if (i != len(arg_types_arr) - 1): fn_str += "," + EOL else: if i == 14: fn_str += "#if _SCALE_" + EOL fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] elif i == 17: fn_str += "#endif" + EOL fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] else: fn_str += prefix + arg_types_arr[i] + SPACE + arg_names_arr[i] if (i != len(arg_types_arr) - 1): fn_str += "," + EOL fn_str += PARAMETER_END + SPACE + BODY_BEGIN + EOL * 2 for fb in fn_body: fn_str += prefix + fb + EOL fn_str += EOL fn_str += BODY_END + EOL * 2 return fn_str
def generate(generated_file_name="config.h"): paraJS = open("parameter1.json", "r") json_data = json.load(paraJS) arr = helping_functions.read_params(sys.argv[1]) str1 = "" str1 += generate_import(json_data["import"]) t1 = helping_functions.prompt("Please enter the type of input: ") t2 = helping_functions.prompt("Please enter the type of weights: ") t3 = helping_functions.prompt("Please enter the type of output: ") if t1.lower().startswith("ap_fixed") or t2.lower().startswith( "ap_fixed") or t3.lower().startswith("ap_fixed"): str1 += "#include \"ap_fixed.h\"" + EOL arr1 = [t1, t2, t3] str1 += generate_type_definition(json_data["type_definition"], arr1) str1 += generate_preprocessor(json_data["preprocessor"]) str1 += generate_var(json_data["var"]) str1 += generate_body(json_data["params"], arr) str1 += generate_end(json_data["end"]) with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file: generated_file.write(str1) return str1
def generate(generated_file_name="conv_acc_innerpp_fc.h"): arr = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr) str1 = "#ifndef _CONV_ACC_FC_H_" + EOL str1 += "#define _CONV_ACC_FC_H_" + EOL + EOL str1 += "#include <iostream>" + EOL str1 += "#include <fstream>" + EOL str1 += '#include "activation_functions.h"' + EOL + EOL str1 += "#if _C_DEBUG_MODE_" + EOL str1 += "#include <algorithm>" + EOL str1 += "#endif" + EOL + EOL str1 += "using namespace std;" + EOL + EOL str1 += "template <typename T, typename W, typename G, int Tm, int Tn, int Tr, int Tc, int S_max, int K_max>" + EOL str1 += "class conv_acc_fc {" + EOL + EOL str1 += "private:" + EOL str1 += " int conv_layer_number;" + EOL + EOL str1 += "public:" + EOL str1 += " conv_acc_fc() : conv_layer_number(0) {conv_layer_number = 0;};" + EOL + EOL str1 += " ////------------------------------C++ debugging functions---------------------------------------////" + EOL str1 += " // Reset output buffer" + EOL str1 += " void out_buf_reset(G buf[][Tr][Tc]){" + EOL str1 += " for(int i = 0; i < Tm; i++){" + EOL str1 += " for(int j = 0; j < Tr; j++){" + EOL str1 += " for(int k = 0; k < Tc; k++){" + EOL str1 += " buf[i][j][k] = G(0);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " // Reset weight buffer" + EOL str1 += " void w_buf_reset(int K, W buf[][Tm][K_max][K_max]){" + EOL str1 += " for(int i = 0; i < Tn; i++){" + EOL str1 += " for(int j = 0; j < Tm; j++){" + EOL str1 += " for(int k = 0; k < K; k++){" + EOL str1 += " for(int l = 0; l < K; l++){" + EOL str1 += " buf[i][j][k][l] = W(0);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " // Reset bias buffer" + EOL str1 += " void b_buf_reset(W buf[]){" + EOL str1 += " for(int i = 0; i < Tm; i++){" + EOL str1 += " buf[i]= W(0);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " ////-----------------------------Accelerator Functions---------------------------------------////" + EOL str1 += " // Load bias data" + EOL str1 += " void b_buf_load(W buf[], W *layer_bias, int bias_offset, int m){" + EOL str1 += " for(int i = 0; i < Tm; i++){" + EOL str1 += "#pragma HLS UNROLL" + EOL str1 += " buf[i] = *(layer_bias + bias_offset + i + m);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " // Load input data" + EOL str1 += " void in_buf_load(T buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max]" for j in range(1, port_num + 1): str1 += ",T " + "*in_data_" + str(j) str1 += ", int in_offset, int n, int r, int c, int S, int K, int P, int R_IN, int C_IN, int N) {" + EOL str1 += " for (int j = r * S - P; j < (r + Tr - 1) * S + K - P; j++) {" + EOL str1 += " for (int k = c * S - P; k < (c + Tc - 1) * S + K - P; k++) {" + EOL str1 += "#pragma HLS PIPELINE" + EOL str1 += " for (int i = 0; i < Tn; i+=" + str( port_num) + "){" + EOL # str1 += "#pragma HLS UNROLL" + EOL # str1 += "#pragma HLS DEPENDENCE variable=buf inter false" + EOL for j in range(0, port_num): str1 += " if ((n + Tn > N && i + " + str( j ) + " >= N - n ) || j < 0 || j >= R_IN || k < 0 || k >= C_IN) {" + EOL str1 += " buf[i + " + str( j) + "][j - r * S + P][k - c * S + P] = T(0);" + EOL str1 += " } else {" + EOL str1 += " buf[i + " + str( j) + "][j - r * S + P][k - c * S + P] = *(in_data_" + str( j + 1) + " + in_offset + (i + n)/" + str( port_num) + " * R_IN * C_IN + j * C_IN + k);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += EOL str1 += EOL str1 += " // Load weights to weight buffer" + EOL str1 += " void w_buf_load(W buf[][Tm][K_max][K_max], W *layer_weights, int weight_offset, int n, int m, int K, int N, int M){" + EOL str1 += " for(int k1 = 0; k1 < K; k1++){" + EOL str1 += " for(int k2 = 0; k2 < K; k2++){" + EOL str1 += "#pragma HLS PIPELINE" + EOL str1 += " for(int j = 0; j < Tn; j++){" + EOL #str1 += "#pragma HLS UNROLL" + EOL str1 += " if(N < n+Tn && j == N-n){" + EOL str1 += " break;" + EOL str1 += " }" + EOL str1 += " for(int i = 0; i < Tm && i < M-m; i++){" + EOL #str1 += "#pragma HLS UNROLL" + EOL str1 += " if(M < m+Tm && i == M-m){" + EOL str1 += " break;" + EOL str1 += " }" + EOL str1 += " buf[j][i][k1][k2] = *(layer_weights + weight_offset + (i+m)*N*K*K + (j+n)*K*K + k1*K + k2);" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " // Convolution computation kernel" + EOL str1 += " void conv_engine(T in_buf[][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max], W w_buf[][Tm][K_max][K_max], W b_buf[], G out_buf[][Tr][Tc], int S, int n, int r, int c, int K, int R_OUT, int C_OUT){" + EOL str1 += " for(int i=0; i<K; i++){" + EOL str1 += " for(int j=0; j<K; j++){" + EOL str1 += " for(int tr=0; tr<Tr; tr++){" + EOL str1 += " for(int tc=0; tc<Tc; tc++){" + EOL str1 += "#pragma HLS PIPELINE" + EOL str1 += " for(int tm = 0; tm < Tm; tm++){" + EOL str1 += "#pragma HLS UNROLL" + EOL str1 += " for(int tn=0; tn<Tn; tn++){" + EOL str1 += "#pragma HLS UNROLL" + EOL str1 += " if(i==0&&j==0&&tn==0&&n==0)" + EOL str1 += " out_buf[tm][tr][tc] = b_buf[tm] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL str1 += " else" + EOL str1 += " out_buf[tm][tr][tc] = out_buf[tm][tr][tc] + w_buf[tn][tm][i][j]*in_buf[tn][S*(tr)+i][S*(tc)+j];" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += EOL str1 += EOL str1 += " // Ouput out_buf data to output interface" + EOL str1 += " void output_res(G out_buf[][Tr][Tc]" for j in range(1, port_num + 1): str1 += ",G " + "*out_data_" + str(j) str1 += ", int out_offset, int n, int m, int r, int c, int N, int M, int R_OUT, int C_OUT, bool act){" + EOL str1 += " if (n >= N - Tn) {" + EOL str1 += " for (int j = r; j < r + Tr && j < R_OUT; j++) {" + EOL #str1 += " if (C_OUT < c + Tc && k == C_OUT) { break; }" + EOL str1 += " for (int k = c; k < c + Tc && k < C_OUT; k++) {" + EOL #str1 += " if (R_OUT < r + Tr && j == R_OUT) { break; }" + EOL str1 += "#pragma HLS PIPELINE" + EOL str1 += " for (int i = 0; i < Tm && i < M-m; i += " + str( port_num) + ") {" + EOL #str1 += "#pragma HLS UNROLL" + EOL #str1 += " if (M < m + Tm && i+m == M) { break; }" + EOL str1 += " if (act) {" + EOL for j in range(1, port_num + 1): str1 += " if (i + " + str(j - 1) + " < M-m)" + EOL str1 += " *(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = relu(out_buf[i + " +\ str(j-1) + "][j - r][k - c]);" + EOL str1 += " }" + EOL str1 += " else {" + EOL for j in range(1, port_num + 1): str1 += " if (i + " + str(j - 1) + " < M-m)" + EOL str1 += " *(out_data_" + str(j) + " + out_offset + ((i+m)/" + str(port_num) + ") * R_OUT * C_OUT + j * C_OUT + k) = out_buf[i + " +\ str(j-1) + "][j - r][k - c];" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += "///////////////////////------------------conv accelerator----------------//////////////////////////" + EOL str1 += " void conv_layer_acc_fc(" + EOL str1 += " int N, //input feature number" + EOL str1 += " int K, //input kernel size" + EOL str1 += " int M, // output feature number" + EOL str1 += " int R_IN, // input Row" + EOL str1 += " int C_IN, // input column" + EOL str1 += " int R_OUT, // output Row" + EOL str1 += " int C_OUT,// output column" + EOL str1 += " int S, // stride size" + EOL str1 += " int P, // padding size" + EOL str1 += " bool act, // activation function bit (1-- with act, 0--without act)" + EOL str1 += " W *layer_weights, //w[M][N][K][K]" + EOL str1 += " W *layer_bias, // b[M]" + EOL str1 += " int weight_offset," + EOL str1 += " int bias_offset," + EOL str1 += " int in_offset," + EOL str1 += " int out_offset," + EOL for j in range(1, port_num + 1): str1 += " T *in_data_" + str( j ) + "," + " // in_data[N][(R-1)*S + K][(C-1)*S + K] --> [N][(R-1)*S + K - 2*P][(C-1)*S + K - 2*P]" + EOL for j in range(1, port_num + 1): if j == port_num: str1 += " G *out_data_" + str( j) + "){ // out[M][R][C]" + EOL + EOL else: str1 += " G *out_data_" + str( j) + "," + " // out[M][R][C]" + EOL str1 += " /***************local data buffer******************************/" + EOL str1 += " T in_buf_1[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL str1 += " T in_buf_0[Tn][(Tr-1)*S_max + K_max][(Tc-1)*S_max + K_max];" + EOL str1 += " W w_buf_1[Tn][Tm][K_max][K_max];" + EOL str1 += " W w_buf_0[Tn][Tm][K_max][K_max];" + EOL str1 += " W b_buf_1[Tm];" + EOL str1 += " W b_buf_0[Tm];" + EOL str1 += " G out_buf_1[Tm][Tr][Tc];" + EOL str1 += " G out_buf_0[Tm][Tr][Tc];" + EOL + EOL str1 += " /***************Ptr and buffer initialization******************************/" + EOL str1 += " bool in_buf_0_empty = 1;" + EOL str1 += " bool in_buf_1_empty = 1;" + EOL str1 += " bool out_buf_0_empty = 1;" + EOL str1 += " bool out_buf_1_empty = 1;" + EOL str1 += " int loadbufPtr = 0;" + EOL str1 += " int combufPtr = 0;" + EOL str1 += " int resbufPtr = 0;" + EOL str1 += " bool last_com = 0;" + EOL str1 += " bool last_load = 0;" + EOL str1 += " bool last_res = 0;" + EOL + EOL str1 += "#if _HLS_MODE_" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_1 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=in_buf_0 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_1 complete dim=2" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=w_buf_0 complete dim=2" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_1 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=b_buf_0 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_1 complete dim=1" + EOL str1 += "#pragma HLS ARRAY_PARTITION variable=out_buf_0 complete dim=1" + EOL str1 += "#endif" + EOL + EOL str1 += "#if _C_DEBUG_MODE_" + EOL str1 += "#if _KERNEL_DEBUG_" + EOL str1 += ' cout << "Starting conv_acc_innerpp_fc layer ...." << endl;' + EOL str1 += " //buffer local data initiallization: must do it in C++ debug!" + EOL str1 += " out_buf_reset(out_buf_1);" + EOL str1 += " out_buf_reset(out_buf_0);" + EOL str1 += " b_buf_reset(b_buf_1);" + EOL str1 += " b_buf_reset(b_buf_0);" + EOL str1 += " w_buf_reset(K, w_buf_1);" + EOL str1 += " w_buf_reset(K, w_buf_0);" + EOL str1 += "#endif" + EOL str1 += "#endif" + EOL str1 += " for(int r = 0; r < R_OUT; r += Tr){" + EOL str1 += " for(int c = 0; c < C_OUT; c += Tc){" + EOL str1 += " for(int m = 0; m < M; m += Tm){" + EOL str1 += " for(int n = 0; n < N; n += 2*Tn){" + EOL #str1 += "#if _HLS_MODE_" + EOL #str1 += "#pragma HLS DATAFLOW" + EOL #str1 += "#endif" + EOL str1 += " //--------------------------Load input B W D in ping-pong manner-------------------------//" + EOL str1 += " while ((in_buf_0_empty | in_buf_1_empty)&& (!last_load)) {" + EOL str1 += " if (loadbufPtr == 1) {" + EOL str1 += ' cout << "loading input buffer 1...." << endl;' + EOL str1 += " //load input bias" + EOL str1 += " b_buf_load(b_buf_1, layer_bias, bias_offset, m);" + EOL str1 += " // load input data" + EOL str1 += " in_buf_load(in_buf_1" for j in range(1, port_num + 1): str1 += ", in_data_" + str(j) str1 += ", in_offset, n+Tn, r, c, S, K, P, R_IN, C_IN, N);" + EOL str1 += " // load input weights" + EOL str1 += " w_buf_load(w_buf_1, layer_weights, weight_offset, n+Tn, m, K, N, M);" + EOL str1 += " in_buf_1_empty = 0;" + EOL str1 += ' cout << "buffer 1 full" << endl;' + EOL str1 += " loadbufPtr = 0;" + EOL str1 += " if (n+2*Tn >= N) {last_load = 1;}" + EOL str1 += " } else {" + EOL str1 += ' cout << "loading input buffer 0...." << endl;' + EOL str1 += " //load input bias" + EOL str1 += " b_buf_load(b_buf_0, layer_bias, bias_offset, m);" + EOL str1 += " // load input data" + EOL str1 += " in_buf_load(in_buf_0" for j in range(1, port_num + 1): str1 += ", in_data_" + str(j) str1 += ", in_offset, n, r, c, S, K, P, R_IN, C_IN, N);" + EOL str1 += " // load input weights" + EOL str1 += " w_buf_load(w_buf_0, layer_weights, weight_offset, n, m, K, N, M);" + EOL str1 += " in_buf_0_empty = 0;" + EOL str1 += ' cout << "buffer 0 full" << endl;' + EOL str1 += " loadbufPtr = 1;" + EOL str1 += " if (n+Tn >= N) {last_load = 1;}" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " loadbufPtr = 0;" + EOL str1 += " last_load = 0;" + EOL str1 += " //------------------------------compute buffered data -----------------------------------//" + EOL str1 += " while ((!in_buf_0_empty | !in_buf_1_empty)&& (!last_com)) {" + EOL str1 += " if (combufPtr == 1) {" + EOL str1 += ' cout << "computing input buffer 1...." << endl;' + EOL str1 += " if(resbufPtr == 1){" + EOL str1 += " conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_1, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL str1 += " out_buf_1_empty = 0;" + EOL str1 += " }else{" + EOL str1 += " conv_engine(in_buf_1, w_buf_1, b_buf_1, out_buf_0, S, n+Tn, r, c, K, R_OUT, C_OUT);" + EOL str1 += " out_buf_0_empty = 0;" + EOL str1 += " }" + EOL str1 += " in_buf_1_empty = 1;" + EOL str1 += " combufPtr = 0;" + EOL str1 += ' cout << "buffer 1 computed" << endl;' + EOL str1 += " if (n+2*Tn >= N) {last_com = 1;}" + EOL str1 += " } else {" + EOL str1 += ' cout << "computing input buffer 0...." << endl;' + EOL str1 += " if(resbufPtr == 1){" + EOL str1 += " conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_1, S, n, r, c, K, R_OUT, C_OUT);" + EOL str1 += " out_buf_1_empty = 0;" + EOL str1 += " }else{" + EOL str1 += " conv_engine(in_buf_0, w_buf_0, b_buf_0, out_buf_0, S, n, r, c, K, R_OUT, C_OUT);" + EOL str1 += " out_buf_0_empty = 0;" + EOL str1 += " }" + EOL str1 += " in_buf_0_empty = 1;" + EOL str1 += " combufPtr = 1;" + EOL str1 += ' cout << "buffer 0 computed" << endl;' + EOL str1 += " if (n+Tn >= N) {last_com = 1;}" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " combufPtr = 0;" + EOL str1 += " last_com = 0;" + EOL str1 += " //---------------------------transfer output data----------------------------------------//" + EOL str1 += " while ((!out_buf_0_empty | !out_buf_1_empty)&& (!last_res)) {" + EOL str1 += " if (resbufPtr == 1) {" + EOL str1 += ' cout << "output buffer 1...." << endl;' + EOL str1 += " // transfer output data" + EOL str1 += " if (n+Tn >= N) {" + EOL str1 += " last_res = 1;" + EOL str1 += " resbufPtr = 0;" + EOL str1 += " output_res(out_buf_1" for j in range(1, port_num + 1): str1 += ", out_data_" + str(j) str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL str1 += " }else if (n+2*Tn >= N) {" + EOL str1 += " last_res = 1;" + EOL str1 += " resbufPtr = 0;" + EOL str1 += " output_res(out_buf_1" for j in range(1, port_num + 1): str1 += ", out_data_" + str(j) str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL str1 += " }" + EOL str1 += " out_buf_1_empty = 1;" + EOL str1 += ' cout << "buffer 1 res" << endl;' + EOL str1 += " } else {" + EOL str1 += ' cout << "output buffer 0...." << endl;' + EOL str1 += " // transfer output data" + EOL str1 += " if (n+Tn >= N) {" + EOL str1 += " last_res = 1;" + EOL str1 += " resbufPtr = 1;" + EOL str1 += " output_res(out_buf_0" for j in range(1, port_num + 1): str1 += ", out_data_" + str(j) str1 += ", out_offset, n, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL str1 += " }else if (n+2*Tn >= N) {" + EOL str1 += " last_res = 1;" + EOL str1 += " resbufPtr = 1;" + EOL str1 += " output_res(out_buf_0" for j in range(1, port_num + 1): str1 += ", out_data_" + str(j) str1 += ", out_offset, n+Tn, m, r, c, N, M, R_OUT, C_OUT, act);" + EOL str1 += " }" + EOL str1 += " out_buf_0_empty = 1;" + EOL str1 += ' cout << "buffer 0 res" << endl;' + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " last_res = 0;" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += " }" + EOL str1 += "#if _C_DEBUG_MODE_" + EOL str1 += "#if _KERNEL_DEBUG_" + EOL str1 += ' cout << "Finished conv_acc_innerpp_fc layer ...." << endl;' + EOL str1 += " ofstream conv_out;" + EOL str1 += ' conv_out.open("fc_out_data.txt",ios::app);' + EOL str1 += ' conv_out <<"fc output: "<< endl;' + EOL str1 += " for (int i = 0; i < M/" + str( port_num) + "; i++) {" + EOL for j in range(1, port_num + 1): str1 += " for (int j = 0; j < R_OUT; j++) {" + EOL str1 += " for(int k = 0; k < C_OUT; k++){" + EOL str1 += " conv_out << *(out_data_" + str( j) + ' + out_offset + i*R_OUT*C_OUT + j*C_OUT + k) << " ";' + EOL str1 += " }conv_out << endl;" + EOL str1 += " }conv_out << endl;" + EOL str1 += " }conv_out.close();" + EOL str1 += "#endif" + EOL str1 += "#endif" + EOL str1 += " }" + EOL str1 += "};" + EOL str1 += "#endif" + EOL with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file: generated_file.write(str1) return str1
def model_extract(include_fc): arr = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr) init_conv_N = prms[prms_str.index("nn_in_number_conv")] init_conv_r = prms[prms_str.index("nn_in_data_size_conv")] init_conv_M = prms[prms_str.index("nn_out_number_conv")] init_conv_P = prms[prms_str.index("nn_padding_conv")] init_conv_K = prms[prms_str.index("nn_channel_size_conv")] init_conv_S = prms[prms_str.index("nn_stride_conv")] init_conv_G = prms[prms_str.index("nn_group_conv")] init_fc_N = prms[prms_str.index("nn_in_number_fc")] init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")] init_fc_M = prms[prms_str.index("nn_out_number_fc")] init_fc_K = prms[prms_str.index("nn_channel_size_fc")] init_pool_N = prms[prms_str.index("nn_in_data_size_pooling")] cut_flag_conv = prms[prms_str.index("conv_cut_flag")] cut_flag_pool = prms[prms_str.index("pool_cut_flag")] cut_flag_fc = prms[prms_str.index("fc_cut_flag")] nn_in_number_conv_values1 = [] if isinstance(init_fc_N, list): for fc_in_number in init_fc_N: nn_in_number_conv_values1.append(fc_in_number) else: nn_in_number_conv_values1.append(0) nn_out_number_conv_values1 = [] if isinstance(init_fc_M, list): for fc_out_number in init_fc_M: nn_out_number_conv_values1.append(fc_out_number) else: nn_out_number_conv_values1.append(0) nn_fc_sizes_conv = [] if isinstance(init_fc_Rin, list): for fc_in_size in init_fc_Rin: nn_fc_sizes_conv.append(fc_in_size) else: nn_fc_sizes_conv.append(0) nn_channel_size_conv_values = [] if isinstance(init_fc_K, list): for kernel_size in init_fc_K: nn_channel_size_conv_values.append(kernel_size) else: nn_channel_size_conv_values.append(0) nn_stride_values1 = [] if isinstance(init_fc_Rin, list): for stride_value in init_fc_Rin: nn_stride_values1.append(stride_value) else: nn_stride_values1.append(1) conv_only_M = [int(val) for val in init_conv_M] # print init_conv_M # print conv_only_M nn_conv_group_values = [] if isinstance(init_conv_G, list): for group_value in init_conv_G: nn_conv_group_values.append(group_value) else: nn_conv_group_values.append(1) nn_fc_cut_flag = [] if isinstance(cut_flag_fc, list): for cut_value in cut_flag_fc: nn_fc_cut_flag.append(cut_value) else: nn_fc_cut_flag.append(1) if (include_fc == 'include_fc'): print("[DEBUG] including FC . . . {} {} {} {} {} {}".format( nn_in_number_conv_values1, nn_out_number_conv_values1, nn_fc_sizes_conv, nn_channel_size_conv_values, nn_stride_values1, nn_fc_cut_flag)) init_conv_N = init_conv_N + nn_in_number_conv_values1 init_conv_M = init_conv_M + nn_out_number_conv_values1 init_conv_r = init_conv_r + nn_fc_sizes_conv init_conv_K = init_conv_K + nn_channel_size_conv_values init_conv_S = init_conv_S + nn_stride_values1 cut_flag_conv = cut_flag_conv + nn_fc_cut_flag conv_N = [int(string) for string in init_conv_N] conv_M = [int(string) for string in init_conv_M] conv_r = [int(string) for string in init_conv_r] conv_K = [int(string) for string in init_conv_K] conv_S = [int(string) for string in init_conv_S] conv_P = [int(string) for string in init_conv_P] conv_G = [int(string) for string in init_conv_G] cut_flag = [int(string) for string in cut_flag_conv] if not init_fc_Rin: conv_P = conv_P + [0] else: conv_P = conv_P + [0] * len(init_fc_Rin) conv_P = conv_P + [0] conv_G = [int(string) for string in init_conv_G] max_conv_N = max(conv_N) max_conv_M = max(conv_M) max_conv_S = max(conv_S) max_conv_K = max(conv_K) conv_R = [] conv_layer_num = int(len(conv_r)) for r in range(0, conv_layer_num): R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r] conv_R.append(R) # find the positions of Conv layers followed by Pooling layer flag = [False] * conv_layer_num count = 0 print(prms[0]) print(len(prms[0])) for prms_index in range(len(prms[0]) - 2): if "Convolution" in prms[0][prms_index]: # if "Pooling" in prms[0][prms_index + 1] + prms[0][prms_index + 2]: if "Pooling" in prms[0][prms_index + 1]: flag[count] = True count += 1 print("conv_N: ", conv_N) print("conv_M: ", conv_M) print("conv_r: ", conv_r) print("conv_R: ", conv_R) print("conv_K: ", conv_K) print("conv_S: ", conv_S) print("flag", flag) print("cut_flag", cut_flag) return conv_N, conv_M, conv_r, conv_R, conv_K, conv_S, conv_G, flag, cut_flag, init_pool_N # if __name__ == "__main__": # conv_N, conv_M, conv_r, conv_R, conv_K, conv_S = model_extract()
def generate(): """Tm * Tn < DSP/ 5""" arr2 = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr2) nn_in_number_conv_values1 = prms[prms_str.index("nn_in_number_conv")] nn_in_number_fc_values = prms[prms_str.index("nn_in_number_fc")] nn_out_number_fc_values = prms[prms_str.index("nn_out_number_fc")] nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")] nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")] for n in nn_in_number_fc_values: nn_in_number_conv_values1.append(n) nn_in_number_conv_values1.append(nn_out_number_fc_values[len(nn_out_number_fc_values) - 1]) nn_in_number_conv_values = [int(string) for string in nn_in_number_conv_values1] nn_channel_size_fc_values = [int(string) for string in nn_in_number_conv_values1] mm1 = max(nn_in_number_conv_values) DSP = 900 d = DSP/5 arr = [] r_c = [55,27,13,13,13,6, 1, 1] k = [11,5,3,3,3,6,1,1] max_S = 6 max_K = 11 Tm_min = 1 Tn_min = 1 minimums =[] min_cycles = 0 minimums1 =[] in_buf = 0 out_buf = 0 w_buf = 0 l = int(len(nn_in_number_conv_values) - 1) for o in range (0, l): min_cycles += r_c[o] * r_c[o] * math.ceil(int(nn_in_number_conv_values[o + 1])/max_S) * math.ceil(int(nn_in_number_conv_values[o])/1) * k[o] * k[o] minimums.append([1, max_S]) minimums1.append(min_cycles) for Tm in range(1, mm1+1): for Tn in range(max_S, Tm): if Tm * Tn < d: cycles = 0 for j in range (0, l): cycles += int(r_c[j] * r_c[j] * math.ceil(int(nn_in_number_conv_values[j + 1])/Tn) * math.ceil(int(nn_in_number_conv_values[j])/Tm) * k[j] * k[j]) if cycles < max(minimums1): min_cycles = cycles Tm_min = Tm Tn_min = Tn if (len(minimums)<5): minimums.append([Tm, Tn]) minimums1.append(min_cycles) else: max_among_mins = minimums1.index(max(minimums1)) minimums1.remove(minimums1[max_among_mins]) minimums.remove(minimums[max_among_mins]) minimums.append([Tm, Tn]) minimums1.append(min_cycles) print("Tm and Tn") print(minimums) print("cycles") print(minimums1) Tr_Tc = [] in_buff_arr = [] w_buff_arr = [] out_buff_arr = [] total_arr = [] for m in minimums: Tr = int(math.sqrt(m[0]*m[1])) Tr_Tc.append([Tr, Tr]) in_buff = ((Tr-1)*max_S + max_K)*((Tr-1)*max_S + max_K) in_buff_arr.append(in_buff) out_buff = m[0]*m[1]*max_K*max_K out_buff_arr.append(out_buff) w_buff = Tr*Tr*m[0] w_buff_arr.append(w_buff) total = in_buff + out_buff + w_buff total_arr.append(total) print("Tr_Tc") print(Tr_Tc) print("in_buf") print(in_buff_arr) print("w_buf") print(w_buff_arr) print("out_buf") print(out_buff_arr) print("total") print(total_arr) return arr, min_cycles
def generate(generated_file_name="acc_instance.h"): arr = helping_functions.read_params(sys.argv[1]) layers_fun = layers(arr[1]) layers_fun_w_bn = layers_w_bn(arr[1]) layers_fun_fc = layers_fc(arr[1]) prms, prms_str = helping_functions.extraction(arr) nn_channel_size_conv_values = prms[prms_str.index("nn_channel_size_conv")] nn_stride_conv_values = prms[prms_str.index("nn_stride_conv")] nn_in_data_size_pooling_values = prms[prms_str.index( "nn_in_data_size_pooling")] nn_channel_size_pooling_values = prms[prms_str.index( "nn_channel_size_pooling")] nn_stride_pooling_values = prms[prms_str.index("nn_stride_pooling")] nn_channel_size_fc_values = prms[prms_str.index("nn_channel_size_fc")] layers_order = prms[prms_str.index("layers_order")] str1 = "#ifndef _ACC_INSTANCE_H_" + EOL + "#define _ACC_INSTANCE_H_" + EOL * 2 import_str = "" body_str = "" conv_layer_new_body = "" strides = [[], [], []] kernels = [[], [], []] acc_max_kernel = [[], [], []] acc_max_stride = [[], [], []] conv_counter = 0 pool_counter = 0 fc_counter = 0 '''get stride&kernel of each layer''' for i, l in enumerate(layers_order): if l.lower().startswith("convolution"): strides[0].append(int(nn_stride_conv_values[conv_counter])) kernels[0].append(int(nn_channel_size_conv_values[conv_counter])) conv_counter = conv_counter + 1 if l.lower() == "maxpooling": strides[1].append(int(nn_stride_pooling_values[pool_counter])) kernels[1].append(int( nn_channel_size_pooling_values[pool_counter])) pool_counter = pool_counter + 1 if l.lower() == "avepooling": strides[2].append(int(nn_stride_pooling_values[pool_counter])) kernels[2].append(int( nn_channel_size_pooling_values[pool_counter])) pool_counter = pool_counter + 1 if l.lower() == "globalmaxpooling": strides[1].append(int( nn_in_data_size_pooling_values[pool_counter])) kernels[1].append(int( nn_in_data_size_pooling_values[pool_counter])) pool_counter = pool_counter + 1 if l.lower() == "globalavepooling": strides[2].append(int( nn_in_data_size_pooling_values[pool_counter])) kernels[2].append(int( nn_in_data_size_pooling_values[pool_counter])) pool_counter = pool_counter + 1 if l.lower() == "innerproduct": strides[0].append(int(nn_channel_size_fc_values[fc_counter])) kernels[0].append(int(nn_channel_size_fc_values[fc_counter])) fc_counter = fc_counter + 1 '''select the biggest stride&kernel of each kind of layer''' for k1 in range(len(kernels)): if len(kernels[k1]) != 0: acc_max_kernel[k1] = ", " + str(max(kernels[k1])) if len(strides[k1]) != 0: acc_max_stride[k1] = ", " + str(max(strides[k1])) '''params for conv_layer & pool_layer''' includes = [ "#include \"conv_acc_innerpp.h\"", "#include \"max_pool_acc_innerpp.h\"", "#include \"ave_pool_acc_innerpp.h\"" ] fn_names = ["conv_layer_new", "max_pool_layer_new", "ave_pool_layer_new"] rn_tp = "void" arg_t_list = [[ "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool", "data_type_w", "data_type_w", "int", "int", "int", "int" ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]] arg_n_list = [[ "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act", "*layer_weights", "*layer_bias", "weight_offset", "bias_offset", "in_offset", "out_offset" ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]] acc_params = [[], [], []] '''params for conv_w_bn_layer''' includes_w_bn = ["#include \"conv_acc_innerpp_w_bn.h\""] fn_names_w_bn = ["conv_layer_new_w_bn"] arg_t_list_w_bn = [[ "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool", "data_type_w", "data_type_w", "data_type_w", "data_type_w", "int", "data_type_w", "data_type_w", "int", "int", "int", "int", "int" ]] arg_n_list_w_bn = [[ "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act", "*layer_weights", "*layer_bias", "*bn_mean", "*bn_denominator", "bn_offset", "*scale_gamma", "*scale_beta", "scale_offset", "weight_offset", "bias_offset", "in_offset", "out_offset" ]] acc_params_w_bn = [[]] '''params for resnet fc_layer ''' includes_fc = ["#include \"conv_acc_innerpp_fc.h\""] fn_names_fc = ["conv_layer_new_fc"] arg_t_list_fc = [[ "int", "int", "int", "int", "int", "int", "int", "int", "int", "bool", "data_type_w", "data_type_w", "int", "int", "int", "int" ], ["int", "int", "int", "int", "int", "int", "int", "int", "bool"]] arg_n_list_fc = [[ "N", "K", "M", "R_IN", "C_IN", "C_OUT", "R_OUT", "S", "P", "act", "*layer_weights", "*layer_bias", "weight_offset", "bias_offset", "in_offset", "out_offset" ], ["R_in", "C_in", "N", "K", "R", "C", "S", "P", "act"]] Tm_1 = helping_functions.prompt("Please enter the Tm of conv_acc: ") acc_params[0].append(Tm_1) acc_params_w_bn[0].append(Tm_1) Tn_1 = helping_functions.prompt("Please enter the Tn of conv_acc: ") acc_params[0].append(Tn_1) acc_params_w_bn[0].append(Tn_1) Tr_1 = helping_functions.prompt("Please enter the Tr of conv_acc: ") acc_params[0].append(Tr_1) acc_params_w_bn[0].append(Tr_1) Tc_1 = helping_functions.prompt("Please enter the Tc of conv_acc: ") acc_params[0].append(Tc_1) acc_params_w_bn[0].append(Tc_1) Tn_2 = helping_functions.prompt("\nPlease enter the Tn of pool_acc: ") acc_params[1].append(Tn_2) acc_params[2].append(Tn_2) Tr_2 = helping_functions.prompt("Please enter the Tr of pool_acc: ") acc_params[1].append(Tr_2) acc_params[2].append(Tr_2) Tc_2 = helping_functions.prompt("Please enter the Tc of pool_acc: ") acc_params[1].append(Tc_2) acc_params[2].append(Tc_2) '''object & function for conv_layer & pool_layer''' init_nm = ["conv_acc", "max_pool_acc", "ave_pool_acc"] prm = "data_type, data_type_w, data_type_o" init_names = ["convAcc1", "maxPoolAcc1", "avePoolAcc1"] acc_fn_names = [ "conv_layer_acc", "max_pool_layer_acc", "ave_pool_layer_acc" ] '''object & function for conv_w_bn_layer''' init_nm_w_bn = ["conv_acc_w_bn"] prm_w_bn = "data_type, data_type_w, data_type_o" init_names_w_bn = ["convAcc2"] acc_fn_names_w_bn = ["conv_layer_acc_w_bn"] '''object & function for resnet fc_layer ''' init_nm_fc = ["conv_acc_fc"] prm_fc = "data_type, data_type_w, data_type_o" init_names_fc = ["convAcc3"] acc_fn_names_fc = ["conv_layer_acc_fc"] for j in range(1, port_num + 1): arg_t_list[0].append("data_type") arg_t_list[1].append("data_type") arg_n_list[0].append("*in_data_" + str(j)) arg_n_list[1].append("*in_data_" + str(j)) arg_t_list_w_bn[0].append("data_type") arg_n_list_w_bn[0].append("*in_data_" + str(j)) arg_t_list_fc[0].append("data_type") arg_t_list_fc[1].append("data_type") arg_n_list_fc[0].append("*in_data_" + str(j)) arg_n_list_fc[1].append("*in_data_" + str(j)) for j in range(1, port_num + 1): arg_t_list[0].append("data_type_o") arg_t_list[1].append("data_type_o") arg_n_list[0].append("*out_data_" + str(j)) arg_n_list[1].append("*out_data_" + str(j)) arg_t_list_w_bn[0].append("data_type_o") arg_n_list_w_bn[0].append("*out_data_" + str(j)) arg_t_list_fc[0].append("data_type_o") arg_t_list_fc[1].append("data_type_o") arg_n_list_fc[0].append("*out_data_" + str(j)) arg_n_list_fc[1].append("*out_data_" + str(j)) '''write layer acc needed''' for i, l in enumerate(layers_fun): if l != 0: import_str += includes[i] + EOL if i > 0: j = 1 else: j = 0 body_str += init_nm[i] + CLASS_BEGIN + prm + COMMA_SPACE body_str += ', '.join(acc_params[i]) body_str += acc_max_stride[i] body_str += acc_max_kernel[i] body_str += CLASS_END + SPACE + init_names[i] + EOS + EOL * 2 s = "" if "conv_bias_size" in prms_str: for k, arg_nm in enumerate(arg_n_list[j]): s += arg_nm.replace("*", "") if k != len(arg_n_list[j]) - 1: s += ", " else: del arg_t_list[0][11] del arg_t_list[0][12] del arg_n_list[0][11] del arg_n_list[0][12] for k, arg_nm in enumerate(arg_n_list[j]): s += arg_nm.replace("*", "") if k != len(arg_n_list[j]) - 1: s += ", " body_str += generate_function( fn_names[i], rn_tp, arg_t_list[j], arg_n_list[j], [ init_names[i] + CALL_SYMBOL + acc_fn_names[i] + PARAMETER_BEGIN + s + PARAMETER_END + EOS ]) '''write conv_acc_w_bn if needed''' if "nn_batch_norm_size" in prms_str: for i, l in enumerate(layers_fun_w_bn): if l != 0: import_str += includes_w_bn[i] + EOL if i > 0: j = 1 else: j = 0 body_str += init_nm_w_bn[ i] + CLASS_BEGIN + prm_w_bn + COMMA_SPACE body_str += ', '.join(acc_params_w_bn[i]) body_str += acc_max_stride[i] body_str += acc_max_kernel[i] body_str += CLASS_END + SPACE + init_names_w_bn[ i] + EOS + EOL * 2 s = "" if "conv_bias_size" in prms_str: for k, arg_nm in enumerate(arg_n_list_w_bn[j]): if j == 0: if k == 15: s += EOL + "#if _SCALE_" + EOL + SEPARATER s += arg_nm.replace("*", "") elif k == 18: s += EOL + "#endif" + EOL + SEPARATER s += arg_nm.replace("*", "") else: s += arg_nm.replace("*", "") else: s += arg_nm.replace("*", "") if k != len(arg_n_list_w_bn[j]) - 1: s += ", " else: del arg_t_list_w_bn[0][11] del arg_t_list_w_bn[0][18] del arg_n_list_w_bn[0][11] del arg_n_list_w_bn[0][18] for k, arg_nm in enumerate(arg_n_list_w_bn[j]): if j == 0: if k == 14: s += EOL + "#if _SCALE_" + EOL + SEPARATER s += arg_nm.replace("*", "") elif k == 17: s += EOL + "#endif" + EOL + SEPARATER s += arg_nm.replace("*", "") else: s += arg_nm.replace("*", "") else: s += arg_nm.replace("*", "") if k != len(arg_n_list_w_bn[j]) - 1: s += ", " if j == 0: body_str += generate_function_w_bn( fn_names_w_bn[i], rn_tp, arg_t_list_w_bn[j], arg_n_list_w_bn[j], [ init_names_w_bn[i] + CALL_SYMBOL + acc_fn_names_w_bn[i] + PARAMETER_BEGIN + s + PARAMETER_END + EOS ]) '''write fc layer acc if needed''' if "nn_in_number_eltwise_size" in prms_str: for i, l in enumerate(layers_fun_fc): if l != 0: import_str += includes_fc[i] + EOL if i > 0: j = 1 else: j = 0 body_str += init_nm_fc[i] + CLASS_BEGIN + prm_fc + COMMA_SPACE body_str += ', '.join(acc_params[i]) body_str += acc_max_stride[i] body_str += acc_max_kernel[i] body_str += CLASS_END + SPACE + init_names_fc[i] + EOS + EOL * 2 s = "" for k, arg_nm in enumerate(arg_n_list_fc[j]): s += arg_nm.replace("*", "") if k != len(arg_n_list_fc[j]) - 1: s += ", " body_str += generate_function( fn_names_fc[i], rn_tp, arg_t_list_fc[j], arg_n_list_fc[j], [ init_names_fc[i] + CALL_SYMBOL + acc_fn_names_fc[i] + PARAMETER_BEGIN + s + PARAMETER_END + EOS ]) import_str += "#include \"config.h\"" + EOL * 2 str1 += import_str + body_str + EOL * 2 + ENDIF with open("../example/test_demo/inference_net/" + generated_file_name, "w") as generated_file: generated_file.write(str1) return str1
def generate(): """Tm * Tn < DSP/ 5""" max_ratio = 0 max_sk = [] tm_tn_tr_tc = [32, 8, 16, 16] for s in range(1, 5): k_max = min(80 - 15 * s, 11) for k in range(s, k_max): ctc_ratio = (32 * 16 * 16 * ( (15 * s + k) * (15 * s + k) * 8 + 1)) / (4 * (8 * 32 * k * k + 32 + 8 * (15 * s + k) * (15 * s + k) + 32 * 16 * 16)) if ctc_ratio > max_ratio: max_ratio = ctc_ratio max_sk = [s, k] print("max ctc and s,k") print(max_ratio) print(max_sk) arr2 = helping_functions.read_params(sys.argv[1]) prms, prms_str = helping_functions.extraction(arr2) init_conv_N = prms[prms_str.index("nn_in_number_conv")] init_conv_r = prms[prms_str.index("nn_in_data_size_conv")] init_conv_M = prms[prms_str.index("nn_out_number_conv")] init_conv_P = prms[prms_str.index("nn_padding_conv")] init_conv_K = prms[prms_str.index("nn_channel_size_conv")] init_conv_S = prms[prms_str.index("nn_stride_conv")] init_conv_G = prms[prms_str.index("nn_group_conv")] init_fc_N = prms[prms_str.index("nn_in_number_fc")] init_fc_Rin = prms[prms_str.index("nn_in_data_size_fc")] init_fc_M = prms[prms_str.index("nn_out_number_fc")] init_fc_K = prms[prms_str.index("nn_channel_size_fc")] #for fc_in_number in nn_in_number_fc_values: # nn_in_number_conv_values1.append(fc_in_number) #for fc_out_number in nn_out_number_fc_values: # nn_out_number_conv_values1.append(fc_out_number) #for kernel_size in nn_channel_size_fc_values: # nn_channel_size_conv_values.append(kernel_size) #for conv_in_size in nn_in_data_sizes_fc: # nn_in_data_sizes_conv.append(1) #for stride_value in nn_channel_size_fc_values: # nn_stride_values1.append(stride_value) conv_N = [int(string) for string in init_conv_N] conv_M = [int(string) for string in init_conv_M] conv_r = [int(string) for string in init_conv_r] conv_K = [int(string) for string in init_conv_K] conv_S = [int(string) for string in init_conv_S] conv_P = [int(string) for string in init_conv_P] conv_G = [int(string) for string in init_conv_G] max_conv_N = max(conv_N) max_conv_M = max(conv_M) max_conv_S = max(conv_S) max_conv_K = max(conv_K) conv_R = [] conv_layer_num = int(len(conv_r)) for r in range(0, conv_layer_num): R = (conv_r[r] - conv_K[r] + conv_S[r] + 2 * conv_P[r]) / conv_S[r] conv_R.append(R) print("conv_N") print(conv_N) print("conv_M") print(conv_M) print("conv_r") print(conv_r) print("conv_R") print(conv_R) print("conv_K") print(conv_K) DSP = 6840 #DSP = 2800 d = int(DSP / 5) arr = [] Tm_min = 1 Tn_min = 1 min_Tm_Tn = [] conv_min_cycles = 0 min_cycle_list = [] for o in range(0, conv_layer_num): conv_min_cycles += conv_R[o] * conv_R[o] * math.ceil( int(conv_M[o]) / float(Tm_min)) * math.ceil( int(conv_N[o]) / float(Tn_min)) * conv_K[o] * conv_K[o] min_Tm_Tn.append([1, 1]) min_cycle_list.append(conv_min_cycles) print("Analysis initialized point: ", min_cycle_list, min_Tm_Tn) target = 0 for j in range(0, conv_layer_num): target += int( conv_R[j] * conv_R[j] * math.ceil(int(conv_N[j]) / float(32)) * math.ceil(int(conv_M[j]) / float(87)) * conv_K[j] * conv_K[j]) print("targeted cycle numbers [87, 32]") print(target) fig = plt.figure() ax = fig.gca(projection='3d') # ax = Axes3D(fig) ax.set_title("3D Figure") ax.set_xlabel("Tm") ax.set_ylabel("Tn") ax.set_zlabel("Cycles") x_axis = [i for i in range(1, 100 + 1)] y_axis = [j for j in range(1, 100 + 1)] XX, YY = np.meshgrid(x_axis, y_axis) ZZ = np.zeros((100, 100)) conv_layer_num = int(len(conv_M)) for Tm in range(1, max_conv_M + 1): Tn_max = min(max_conv_N, int(int(d / Tm)), Tm) for Tn in range(1, Tn_max + 1): cycles = 0 for j in range(1, conv_layer_num): cycles += int(conv_R[j] * conv_R[j] * math.ceil(int(conv_N[j]) / float(Tn)) * math.ceil(int(conv_M[j]) / float(Tm)) * conv_K[j] * conv_K[j]) if cycles > 0 and Tm < 100 and Tn < 100: ZZ[Tm, Tn] = cycles else: if Tm < 100 and Tn < 100: ZZ[Tm, Tn] = 0 if cycles < min(min_cycle_list) and cycles != 0: conv_min_cycles = cycles Tm_min = Tm Tn_min = Tn if len(min_Tm_Tn) < 5: min_Tm_Tn.append([Tm, Tn]) min_cycle_list.append(conv_min_cycles) else: max_among_mins = min_cycle_list.index(max(min_cycle_list)) min_cycle_list.remove(min_cycle_list[max_among_mins]) min_Tm_Tn.remove(min_Tm_Tn[max_among_mins]) min_cycle_list.append(conv_min_cycles) min_Tm_Tn.append([Tm, Tn]) surf = ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=True) fig.colorbar(surf, shrink=0.5, aspect=5) #plt.pause(1) plt.show() print("Tm and Tn") print(min_Tm_Tn) print("cycles") print(min_cycle_list) min_among_all = min_cycle_list.index(min(min_cycle_list)) print("Best among all points", min_cycle_list[min_among_all], min_Tm_Tn[min_among_all]) in_buf = 0 out_buf = 0 w_buf = 0 Tr_Tc = [] in_buff_arr = [] w_buff_arr = [] out_buff_arr = [] total_arr = [] for m in min_Tm_Tn: Tr = int(math.sqrt(m[0] * m[1])) Tr_Tc.append([Tr, Tr]) in_buff = ((Tr - 1) * max_conv_S + max_conv_K) * ( (Tr - 1) * max_conv_S + max_conv_K) in_buff_arr.append(in_buff) out_buff = m[0] * m[1] * max_conv_K * max_conv_K out_buff_arr.append(out_buff) w_buff = Tr * Tr * m[0] w_buff_arr.append(w_buff) total = in_buff + out_buff + w_buff total_arr.append(total) print("in_buf") print(in_buff_arr) print("w_buf") print(w_buff_arr) print("out_buf") print(out_buff_arr) print("total") print(total_arr) return arr, conv_min_cycles