def test(): """ Execute the tests Returns: (n_total, n_success) """ # TODO remove this line return 0, 0 logger = TestLogger(TESTNAME) # TODO change this loop to do more suitable tests for size in [128, 1024, 4096]: # generate the stimuli vecA, vecB, result = gen_stimuli(size) # prepare header file # TODO generate the header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH", size)) header.add(HeaderArray("vecA", "int8_t", vecA)) header.add(HeaderArray("vecB", "int8_t", vecB)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result # TODO add meaningful name for the subcase logger.show_subcase_result("size {:4}".format(size), result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd, parallel in [(False, False), (True, False), (True, True)]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer3.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/conv.c") if not simd: mkf.add_define("NO_SIMD") if parallel: mkf.add_define("PARALLEL") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if simd: options.append("simd") if parallel: options.append("parallel") subcase_name = "layer 3 " if options: subcase_name += " + ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for parallel in [False, True]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/flip.c") if parallel: mkf.add_define("PARALLEL") mkf.write() # generate the stimuli _, x_align, y_exp, y_exp_align = gen_stimuli() # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel(), const=False)) header.add( HeaderArray("y_exp", "int8_t", y_exp_align.ravel(), const=False)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 1 flip " if parallel: subcase_name += "parallel" else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd in [False, True]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer5.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") mkf.write() if not simd: mkf.add_define("NO_SIMD") random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 5 " if simd: subcase_name += "simd" else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def gen_input_header(net, net_params, data, output_file): # only allow nets with 255 levels assert net_params["weightInqNumLevels"] == 255 assert net_params["actSTENumLevels"] == 255 # extract and prepare the input data scale_factor = convert.ste_quant(net, "quant1") input_quant = F.quantize_to_int(data, scale_factor) input_quant_align = align_array(input_quant) # also generate the padded input vector _, C, T = input_quant.shape T_pad = T + 63 assert T_pad % 4 == 0 input_pad = np.zeros((C, T_pad), dtype=np.int) input_pad[:, 31:31 + T] = input_quant[0] # generate the header file header = HeaderFile(output_file, "__INPUT_H__", with_c=True) header.add(HeaderArray("input_data", "int8_t", input_quant_align.ravel())) header.add(HeaderArray("input_data_pad", "int8_t", input_pad.ravel())) header.write()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for size_a, size_b in [(155, 16), (1021, 63), (1024, 63), (1188, 64), (4096, 128)]: for conv_version in [2, 3]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/xcorr.c") mkf.add_define("CONV_VERSION", conv_version) mkf.write() # generate the stimuli vecA, vecB, vecExp = gen_stimuli(size_a, size_b) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH_A", size_a)) header.add(HeaderConstant("LENGTH_B", size_b)) header.add(HeaderConstant("LENGTH_RES", len(vecExp))) header.add(HeaderArray("vecA", "int8_t", vecA)) header.add(HeaderArray("vecB", "int8_t", vecB)) header.add(HeaderArray("vecExp", "int32_t", vecExp)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) casename = "V{}, {}x{}".format(conv_version, size_a, size_b) # log the result logger.show_subcase_result(casename, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/dotp.c") mkf.write() for length in [22, 23, 1024, 1025]: for a_stride, b_stride in [(1, 1), (4, 1), (8, 4)]: # generate the stimuli vec_a, vec_b, exp_result = gen_stimuli(length, a_stride, b_stride) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH", length)) header.add(HeaderConstant("EXP_RESULT", exp_result)) header.add(HeaderConstant("A_STRIDE", a_stride)) header.add(HeaderConstant("B_STRIDE", b_stride)) header.add(HeaderArray("vec_a", "int8_t", vec_a.ravel())) header.add(HeaderArray("vec_b", "int8_t", vec_b.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "length: {}, stride: {}x{}".format( length, a_stride, b_stride) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for no_intermediate_scale, duplicate_featuremap in [(False, False), (True, False), (True, True)]: # generate makefile # mkf = Makefile(opt_level=2 if duplicate_featuremap else 3) mkf = Makefile(opt_level=3) mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/fused_layer_1_2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/xcorr.c") mkf.add_cl_prog_source("func/dotp.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_define("PARALLEL") mkf.add_define("INTRINSIC_SCALE") mkf.add_define("CROSS_CORRELATE") mkf.add_define("FUSE_LAYERS") mkf.add_define("DEFAULT_DIM") if no_intermediate_scale: mkf.add_define("NO_INTERMEDIATE_SCALE") if duplicate_featuremap: mkf.add_define("DUPLICATE_FEATUREMAP") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input, no_intermediate_scale, duplicate_featuremap) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if no_intermediate_scale: options.append("no scale") if duplicate_featuremap: options.append("dup inp") subcase_name = "Fused Layer 1+2 " if options: subcase_name += "; ".join(options) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def gen_net_header(net_file, config_file, output_file): # load network net = np.load(net_file) # load configuration file with open(config_file, "r") as _f: config = json.load(_f) # we only need the network parameters net_params = config["indiv"]["net"]["params"] # only allow nets with 255 levels assert net_params["weightInqNumLevels"] == 255 assert net_params["actSTENumLevels"] == 255 assert net_params["F2"] % 4 == 0 assert net_params["N"] == 4 # prepare params if net_params["F2"] is None: net_params["F2"] = net_params["F1"] * net_params["D"] # only allow F2 = F1 * D assert net_params["F2"] == net_params["F1"] * net_params["D"] # start the header file header = HeaderFile(output_file, "__NET_NET_H__", with_c=True) # add network dimensions header.add(HeaderComment("Network Dimensions", blank_line=False)) header.add(HeaderConstant("NET_F1", net_params["F1"], blank_line=False)) header.add(HeaderConstant("NET_F2", net_params["F2"], blank_line=False)) header.add(HeaderConstant("NET_D", net_params["D"], blank_line=False)) header.add(HeaderConstant("NET_C", net_params["C"], blank_line=False)) header.add( HeaderConstant("NET_C_ALIGN", align_array_size(net_params["C"]), blank_line=False)) header.add(HeaderConstant("NET_T", net_params["T"], blank_line=False)) header.add( HeaderConstant("NET_T_ALIGN", align_array_size(net_params["T"]), blank_line=False)) header.add(HeaderConstant("NET_T8", net_params["T"] // 8, blank_line=False)) header.add( HeaderConstant("NET_T8_ALIGN", align_array_size(net_params["T"] // 8), blank_line=False)) header.add( HeaderConstant("NET_T64", (net_params["T"] // 8) // 8, blank_line=False)) header.add( HeaderConstant("NET_T64_ALIGN", align_array_size((net_params["T"] // 8) // 8), blank_line=False)) header.add(HeaderConstant("NET_N", net_params["N"], blank_line=True)) # Layer 1 input_scale = convert.ste_quant(net, "quant1") weight, weight_scale = convert.inq_conv2d(net, "conv1") weight = weight.reshape(net_params["F1"], 64) weight_reverse, _ = convert.inq_conv2d(net, "conv1", store_reversed=True) weight_reverse = weight_reverse.reshape(net_params["F1"], 64) bn_scale, bn_offset = convert.batch_norm(net, "batch_norm1") output_scale = convert.ste_quant(net, "quant2") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset) # add padding to the weight vector of 4 if WEIGHT_L1_PAD > 0: weight_reverse_pad = np.zeros((net_params["F1"], 64 + WEIGHT_L1_PAD)) weight_reverse_pad[:, :-WEIGHT_L1_PAD] = weight_reverse else: weight_reverse_pad = weight_reverse header.add( HeaderComment( "Layer 1\n" "=======\n" "Convolution + BN\n\n" "Input: [C, T]\n" "Weight: [F1, 64]\n" "Output: [F1, C, T]", mode="/*")) header.add(HeaderConstant("NET_L1_PAD_START", 31)) header.add(HeaderConstant("NET_L1_PAD_END", 32)) header.add( HeaderConstant("NET_L1_PAD_INPUT_LEN", net_params["T"] + 31 + 32)) header.add( HeaderConstant("NET_L1_PAD_INPUT_LEN_ALIGN", align_array_size(net_params["T"] + 31 + 32))) header.add(HeaderArray("net_l1_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l1_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L1_WEIGHT_LEN", weight.shape[-1])) header.add( HeaderConstant("NET_L1_WEIGHT_LEN_ALIGN", weight_reverse_pad.shape[-1])) header.add(HeaderArray("net_l1_weight", "int8_t", weight.ravel())) header.add( HeaderArray("net_l1_weight_reverse", "int8_t", weight_reverse.ravel())) header.add( HeaderArray("net_l1_weight_reverse_pad", "int8_t", weight_reverse_pad.ravel())) # layer2 input_scale = convert.ste_quant(net, "quant2") weight, weight_scale = convert.inq_conv2d(net, "conv2", store_reversed=True) bn_scale, bn_offset = convert.batch_norm(net, "batch_norm2") output_scale = convert.ste_quant(net, "quant3") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset, pool=8) weight = weight.reshape(net_params["F2"], net_params["C"]) weight = align_array(weight) header.add( HeaderComment( "Layer 2\n" "=======\n" "Convolution + BN + ReLU + Pooling\n\n" "Input: [F1, C, T]\n" "Weight: [F2, C] (aligned to [F2, 24]\n" "Output: [F2, T // 8]", mode="/*")) header.add(HeaderArray("net_l2_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l2_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L2_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l2_weight", "int8_t", weight.ravel())) header.add(HeaderArray("net_l2_weight_32", "int32_t", weight.ravel())) # layer3 input_scale = convert.ste_quant(net, "quant3") weight, weight_scale = convert.inq_conv2d(net, "sep_conv1") output_scale = convert.ste_quant(net, "quant4") factor = convert.div_factor(input_scale, weight_scale, output_scale) weight = weight.reshape(net_params["F2"], 16) header.add( HeaderComment( "Layer 3\n" "=======\n" "Convolution\n\n" "Input: [F2, T // 8]\n" "Weight: [F2, 16]\n" "Output: [F2, T // 8]", mode="/*", blank_line=False)) header.add(HeaderConstant("NET_L3_PAD_START", 7)) header.add(HeaderConstant("NET_L3_PAD_END", 8)) header.add( HeaderConstant("NET_L3_PAD_INPUT_LEN", net_params["T"] // 8 + 7 + 8)) header.add( HeaderConstant("NET_L3_PAD_INPUT_LEN_ALIGN", align_array_size(net_params["T"] // 8 + 7 + 8))) header.add(HeaderConstant("NET_L3_FACTOR", factor)) header.add(HeaderConstant("NET_L3_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l3_weight", "int8_t", weight.ravel())) # layer4 input_scale = convert.ste_quant(net, "quant4") weight, weight_scale = convert.inq_conv2d(net, "sep_conv2") output_scale = convert.ste_quant(net, "quant5") bn_scale, bn_offset = convert.batch_norm(net, "batch_norm3") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset, pool=8) weight = weight.reshape(net_params["F2"], net_params["F2"]) header.add( HeaderComment( "Layer 4\n" "=======\n" "Convolution + BN + ReLU + Pooling\n\n" "Input: [F2, T // 8]\n" "Weight: [F2, F2]\n" "Output: [F2, T // 64]", mode="/*")) header.add(HeaderArray("net_l4_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l4_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L4_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l4_weight", "int8_t", weight.ravel())) # layer5 input_scale = convert.ste_quant(net, "quant5") output_scale = convert.ste_quant(net, "quant6") weight, bias, weight_scale = convert.inq_linear(net, "fc") weight = weight.reshape(net_params["N"], net_params["F2"] * (net_params["T"] // 64)) #weight = align_array(weight) # we want to align, not for the product F2*T//64, but for T//64 itself. t64 = net_params["T"] // 64 t64_align = align_array_size(t64) weight_align = np.zeros((net_params["N"], net_params["F2"] * t64_align), dtype=int) for i in range(net_params["F2"]): weight_align[:, i * t64_align:i * t64_align + t64] = weight[:, i * t64:(i + 1) * t64] factor = convert.div_factor(input_scale, weight_scale, output_scale) header.add( HeaderComment( "Layer 5\n" "=======\n" "Linear Layer (without scaling in the end)\n\n" "Input: [F2, T // 64]\n" "Weight: [N, F2 * (T // 64)]\n" "Bias: [N]\n" "Output: [N]", mode="/*")) header.add(HeaderConstant("NET_L5_FACTOR", factor)) header.add(HeaderArray("net_l5_bias", "int8_t", bias.ravel())) header.add(HeaderConstant("NET_L5_WEIGHT_LEN", weight_align.shape[-1])) header.add(HeaderArray("net_l5_weight", "int8_t", weight_align.ravel())) # store the header file header.write()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for intrinsic, simd, flip_layers, parallel, stream, xcorr, fuse, no_div, reorder, dup_inp in [ (False, False, False, False, False, False, False, False, False, False), (True, False, False, False, False, False, False, False, False, False), (True, True, False, False, False, False, False, False, False, False), (True, True, True, False, False, False, False, False, False, False), (True, True, True, True, False, False, False, False, False, False), (True, True, True, True, True, False, False, False, False, False), (True, True, True, True, True, True, False, False, False, False), (True, True, True, True, True, True, True, False, False, False), (True, True, True, True, True, True, True, True, False, False), (True, True, True, True, True, True, True, True, True, False), (True, True, True, True, True, True, True, True, True, True) ]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/model.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/layer2.c") mkf.add_cl_prog_source("net/layer3.c") mkf.add_cl_prog_source("net/layer4.c") mkf.add_cl_prog_source("net/layer5.c") mkf.add_cl_prog_source("net/fused_layer_1_2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/flip.c") mkf.add_cl_prog_source("func/xcorr.c") if not simd: mkf.add_define("NO_SIMD") if flip_layers: mkf.add_define("FLIP_LAYERS") if parallel: mkf.add_define("PARALLEL") if intrinsic: mkf.add_define("INTRINSIC_SCALE") if stream: mkf.add_define("DMA_STREAM") if xcorr: mkf.add_define("CROSS_CORRELATE") if fuse: mkf.add_define("FUSE_LAYERS") if no_div: mkf.add_define("NO_INTERMEDIATE_SCALE") if dup_inp: mkf.add_define("DUPLICATE_FEATUREMAP") if reorder: mkf.add_define("REORDER_BN") mkf.write() # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(no_div=no_div, pad_data=dup_inp, reorder_bn=reorder) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # skip the naive result if not flip_layers: result["1"]["result"] = None # prepare the case name subcase_name = "naive" if intrinsic: subcase_name = "+ intrinsic scale" if simd: subcase_name = "+ SIMD" if flip_layers: subcase_name = "+ flip" if parallel: subcase_name = "+ parallel" if stream: subcase_name = "+ double buffering" if xcorr: subcase_name = "+ cross correlations" if fuse: subcase_name = "+ fused layer 1+2" if no_div: subcase_name = "+ no division after layer 1" if reorder: subcase_name = "+ reorder BN" if dup_inp: subcase_name = "+ duplicate featuremap" # log the result logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd in [False, True]: for flip_layers in [False, True]: for parallel in [False, True]: for dma_stream in [False, True]: for reorder in [False, True]: if not simd and (flip_layers or parallel or dma_stream or reorder): continue if not flip_layers and (parallel or dma_stream): # not implemented continue if not parallel and dma_stream: # not implemented continue # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") if not simd: mkf.add_define("NO_SIMD") if flip_layers: mkf.add_define("FLIP_LAYERS") if parallel: mkf.add_define("PARALLEL") if dma_stream: mkf.add_define("DMA_STREAM") if reorder: mkf.add_define("REORDER_BN") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input, flip_layers, reorder) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 2 " options = [] if simd: options.append("simd") if flip_layers: options.append("flip") if parallel: options.append("par") if dma_stream: options.append("stream") if reorder: options.append("reorder") if options: subcase_name += "; ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for intrinsic_conv_scale in [False, True]: for simd in [False, True]: for parallel in [False, True]: for cross_correlate in [False, True]: if not simd and (parallel or cross_correlate): continue # parallel requires intrinsic conv scale if parallel and not intrinsic_conv_scale: continue # not implemented if cross_correlate and not parallel: continue # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/xcorr.c") mkf.add_cl_prog_source("func/transform.c") if parallel: mkf.add_define("PARALLEL") if intrinsic_conv_scale: mkf.add_define("INTRINSIC_SCALE") if cross_correlate: mkf.add_define("CROSS_CORRELATE") if not simd: mkf.add_define("NO_SIMD") mkf.write() random_input = False # generate the stimuli x, y_exp = gen_stimuli(random_input) x_align = align_array(x) y_exp_align = align_array(y_exp) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add( HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if simd: options.append("simd") if parallel: options.append("par") if intrinsic_conv_scale: options.append("intr.s.") if cross_correlate: options.append("xcorr") subcase_name = "Layer 1 " if options: subcase_name += "; ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/flip.c") mkf.write() for outer_len in [16, 17, 18, 19]: for inner_len in [256, 257, 258, 259]: # generate the stimuli stim, exp = gen_stimuli(outer_len, inner_len) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("OUTER_LEN", outer_len)) header.add(HeaderConstant("OUTER_LEN_ALIGN", align_array_size(outer_len))) header.add(HeaderConstant("INNER_LEN", inner_len)) header.add(HeaderConstant("INNER_LEN_ALIGN", align_array_size(inner_len))) header.add(HeaderArray("vec_x", "int8_t", stim)) header.add(HeaderArray("vec_exp", "int8_t", exp)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "{}x{}".format(outer_len, inner_len) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/transform.c") mkf.write() for size in [1024, 1025, 1026, 1027]: # generate the stimuli x, y, y_bias = gen_stimuli(size, scale_factor=SCALE_FACTOR, bias=BIAS, max_val=2560) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH", size)) header.add(HeaderScalar("div_factor", "int32_t", SCALE_FACTOR)) header.add(HeaderScalar("bias", "int32_t", BIAS)) header.add(HeaderArray("vec_x", "int32_t", x)) header.add(HeaderArray("vec_exp", "int8_t", y)) header.add(HeaderArray("vec_exp_bias", "int8_t", y_bias)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "n={}".format(size) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()