def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd, parallel in [(False, False), (True, False), (True, True)]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer3.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/conv.c") if not simd: mkf.add_define("NO_SIMD") if parallel: mkf.add_define("PARALLEL") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if simd: options.append("simd") if parallel: options.append("parallel") subcase_name = "layer 3 " if options: subcase_name += " + ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for parallel in [False, True]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/flip.c") if parallel: mkf.add_define("PARALLEL") mkf.write() # generate the stimuli _, x_align, y_exp, y_exp_align = gen_stimuli() # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel(), const=False)) header.add( HeaderArray("y_exp", "int8_t", y_exp_align.ravel(), const=False)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 1 flip " if parallel: subcase_name += "parallel" else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for size_a, size_b in [(155, 16), (1188, 64), (4096, 128)]: for conv_version in [0, 1, 2, 3]: div_factor = 128 * size_b // 8 offset = 10 * div_factor # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_define("CONV_VERSION", conv_version) mkf.write() # generate the stimuli vecA, vecB, vecExp = gen_stimuli(size_a, size_b, div_factor, offset) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH_A", size_a)) header.add(HeaderConstant("LENGTH_B", size_b)) header.add(HeaderConstant("LENGTH_RES", len(vecExp))) header.add(HeaderConstant("FACTOR", div_factor)) header.add(HeaderConstant("OFFSET", offset)) header.add(HeaderArray("vecA", "int8_t", vecA)) header.add(HeaderArray("vecB", "int8_t", vecB)) header.add(HeaderArray("vecExp", "int8_t", vecExp)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) casename = "V{}, {}x{}".format(conv_version, size_a, size_b) # log the result logger.show_subcase_result(casename, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd in [False, True]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer5.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") mkf.write() if not simd: mkf.add_define("NO_SIMD") random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 5 " if simd: subcase_name += "simd" else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/dotp.c") mkf.write() for length in [22, 23, 1024, 1025]: for a_stride, b_stride in [(1, 1), (4, 1), (8, 4)]: # generate the stimuli vec_a, vec_b, exp_result = gen_stimuli(length, a_stride, b_stride) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH", length)) header.add(HeaderConstant("EXP_RESULT", exp_result)) header.add(HeaderConstant("A_STRIDE", a_stride)) header.add(HeaderConstant("B_STRIDE", b_stride)) header.add(HeaderArray("vec_a", "int8_t", vec_a.ravel())) header.add(HeaderArray("vec_b", "int8_t", vec_b.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "length: {}, stride: {}x{}".format( length, a_stride, b_stride) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/flip.c") mkf.write() for outer_len in [16, 17, 18, 19]: for inner_len in [256, 257, 258, 259]: # generate the stimuli stim, exp = gen_stimuli(outer_len, inner_len) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("OUTER_LEN", outer_len)) header.add(HeaderConstant("OUTER_LEN_ALIGN", align_array_size(outer_len))) header.add(HeaderConstant("INNER_LEN", inner_len)) header.add(HeaderConstant("INNER_LEN_ALIGN", align_array_size(inner_len))) header.add(HeaderArray("vec_x", "int8_t", stim)) header.add(HeaderArray("vec_exp", "int8_t", exp)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "{}x{}".format(outer_len, inner_len) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("func/transform.c") mkf.write() for size in [1024, 1025, 1026, 1027]: # generate the stimuli x, y, y_bias = gen_stimuli(size, scale_factor=SCALE_FACTOR, bias=BIAS, max_val=2560) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderConstant("LENGTH", size)) header.add(HeaderScalar("div_factor", "int32_t", SCALE_FACTOR)) header.add(HeaderScalar("bias", "int32_t", BIAS)) header.add(HeaderArray("vec_x", "int32_t", x)) header.add(HeaderArray("vec_exp", "int8_t", y)) header.add(HeaderArray("vec_exp_bias", "int8_t", y_bias)) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "n={}".format(size) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for no_intermediate_scale, duplicate_featuremap in [(False, False), (True, False), (True, True)]: # generate makefile # mkf = Makefile(opt_level=2 if duplicate_featuremap else 3) mkf = Makefile(opt_level=3) mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/fused_layer_1_2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/xcorr.c") mkf.add_cl_prog_source("func/dotp.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_define("PARALLEL") mkf.add_define("INTRINSIC_SCALE") mkf.add_define("CROSS_CORRELATE") mkf.add_define("FUSE_LAYERS") mkf.add_define("DEFAULT_DIM") if no_intermediate_scale: mkf.add_define("NO_INTERMEDIATE_SCALE") if duplicate_featuremap: mkf.add_define("DUPLICATE_FEATUREMAP") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input, no_intermediate_scale, duplicate_featuremap) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if no_intermediate_scale: options.append("no scale") if duplicate_featuremap: options.append("dup inp") subcase_name = "Fused Layer 1+2 " if options: subcase_name += "; ".join(options) logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for intrinsic, simd, flip_layers, parallel, stream, xcorr, fuse, no_div, reorder, dup_inp in [ (False, False, False, False, False, False, False, False, False, False), (True, False, False, False, False, False, False, False, False, False), (True, True, False, False, False, False, False, False, False, False), (True, True, True, False, False, False, False, False, False, False), (True, True, True, True, False, False, False, False, False, False), (True, True, True, True, True, False, False, False, False, False), (True, True, True, True, True, True, False, False, False, False), (True, True, True, True, True, True, True, False, False, False), (True, True, True, True, True, True, True, True, False, False), (True, True, True, True, True, True, True, True, True, False), (True, True, True, True, True, True, True, True, True, True) ]: # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/model.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/layer2.c") mkf.add_cl_prog_source("net/layer3.c") mkf.add_cl_prog_source("net/layer4.c") mkf.add_cl_prog_source("net/layer5.c") mkf.add_cl_prog_source("net/fused_layer_1_2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/flip.c") mkf.add_cl_prog_source("func/xcorr.c") if not simd: mkf.add_define("NO_SIMD") if flip_layers: mkf.add_define("FLIP_LAYERS") if parallel: mkf.add_define("PARALLEL") if intrinsic: mkf.add_define("INTRINSIC_SCALE") if stream: mkf.add_define("DMA_STREAM") if xcorr: mkf.add_define("CROSS_CORRELATE") if fuse: mkf.add_define("FUSE_LAYERS") if no_div: mkf.add_define("NO_INTERMEDIATE_SCALE") if dup_inp: mkf.add_define("DUPLICATE_FEATUREMAP") if reorder: mkf.add_define("REORDER_BN") mkf.write() # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(no_div=no_div, pad_data=dup_inp, reorder_bn=reorder) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # skip the naive result if not flip_layers: result["1"]["result"] = None # prepare the case name subcase_name = "naive" if intrinsic: subcase_name = "+ intrinsic scale" if simd: subcase_name = "+ SIMD" if flip_layers: subcase_name = "+ flip" if parallel: subcase_name = "+ parallel" if stream: subcase_name = "+ double buffering" if xcorr: subcase_name = "+ cross correlations" if fuse: subcase_name = "+ fused layer 1+2" if no_div: subcase_name = "+ no division after layer 1" if reorder: subcase_name = "+ reorder BN" if dup_inp: subcase_name = "+ duplicate featuremap" # log the result logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME, show_title=False) for simd in [False, True]: for flip_layers in [False, True]: for parallel in [False, True]: for dma_stream in [False, True]: for reorder in [False, True]: if not simd and (flip_layers or parallel or dma_stream or reorder): continue if not flip_layers and (parallel or dma_stream): # not implemented continue if not parallel and dma_stream: # not implemented continue # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer2.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/transform.c") mkf.add_cl_prog_source("func/dotp.c") if not simd: mkf.add_define("NO_SIMD") if flip_layers: mkf.add_define("FLIP_LAYERS") if parallel: mkf.add_define("PARALLEL") if dma_stream: mkf.add_define("DMA_STREAM") if reorder: mkf.add_define("REORDER_BN") mkf.write() random_input = False # generate the stimuli _, x_align, _, y_exp_align = gen_stimuli(random_input, flip_layers, reorder) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add(HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result subcase_name = "Layer 2 " options = [] if simd: options.append("simd") if flip_layers: options.append("flip") if parallel: options.append("par") if dma_stream: options.append("stream") if reorder: options.append("reorder") if options: subcase_name += "; ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()
def test(): """ Execute the tests Returns: (n_total, n_success) """ logger = TestLogger(TESTNAME) for intrinsic_conv_scale in [False, True]: for simd in [False, True]: for parallel in [False, True]: for cross_correlate in [False, True]: if not simd and (parallel or cross_correlate): continue # parallel requires intrinsic conv scale if parallel and not intrinsic_conv_scale: continue # not implemented if cross_correlate and not parallel: continue # generate makefile mkf = Makefile() mkf.add_fc_test_source("test.c") mkf.add_cl_test_source("cluster.c") mkf.add_cl_prog_source("net/layer1.c") mkf.add_cl_prog_source("net/net.c") mkf.add_cl_prog_source("func/conv.c") mkf.add_cl_prog_source("func/xcorr.c") mkf.add_cl_prog_source("func/transform.c") if parallel: mkf.add_define("PARALLEL") if intrinsic_conv_scale: mkf.add_define("INTRINSIC_SCALE") if cross_correlate: mkf.add_define("CROSS_CORRELATE") if not simd: mkf.add_define("NO_SIMD") mkf.write() random_input = False # generate the stimuli x, y_exp = gen_stimuli(random_input) x_align = align_array(x) y_exp_align = align_array(y_exp) # prepare header file header = HeaderFile("test_stimuli.h") header.add(HeaderArray("x_vec", "int8_t", x_align.ravel())) header.add( HeaderArray("y_exp_vec", "int8_t", y_exp_align.ravel())) header.write() # compile and run os.system("make clean all run > {}".format(RESULT_FILE)) # parse output result = parse_output(RESULT_FILE) # log the result options = [] if simd: options.append("simd") if parallel: options.append("par") if intrinsic_conv_scale: options.append("intr.s.") if cross_correlate: options.append("xcorr") subcase_name = "Layer 1 " if options: subcase_name += "; ".join(options) else: subcase_name += "naive" logger.show_subcase_result(subcase_name, result) # return summary return logger.summary()