def gen_bench_asm_function(f, simd, typ, category): bench_args_init, bench_args_decl, \ bench_args_call, bench_call = gen_bench_info_from(f, simd, typ) ## Add function that can easily be parsed to get assembly and plain code return \ ''' void {bench_name}__asm__({type}* _r, {bench_args_decl}, int sz) {{ __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); // code:{{ int n = {step}; #pragma clang loop unroll(disable) for (int i = 0; i < sz; i += n) {{ {bench_call}; }} // code:}} __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); }} '''.format( bench_name=gen_bench_name(category, f.function_name), type=typ, step=f.code_ptr_step(typ), bench_call=bench_call, bench_args_decl=common.pprint_commas(bench_args_decl) )
def gen_bench_from_basic_fun(f, simd, typ, category, unroll=None): bench_args_init, bench_args_decl, bench_args_call, bench_call = \ gen_bench_info_from(f, simd, typ) bench_name = gen_bench_name(category, f.function_name, unroll) return \ ''' {code_before} extern "C" {{ void __asm_marker__{bench_name}() {{}} }} void {bench_name}(benchmark::State& state, {type}* _r, {bench_args_decl}, int sz) {{ // Normalize size depending on the step so that we're not going out of boundaies // (Required when the size is'nt a multiple of `n`, like for unrolling benches) sz = (sz / {step}) * {step}; try {{ for (auto _ : state) {{ {asm_marker} // code: {bench_name} int n = {step}; #pragma clang loop unroll(disable) for (int i = 0; i < sz; i += n) {{ {bench_call}; }} // code: {bench_name} {asm_marker} }} }} catch (std::exception const& e) {{ std::string message("ERROR: "); message += e.what(); state.SkipWithError(message.c_str()); }} }} BENCHMARK_CAPTURE({bench_name}, {type}, make_data(sz), {bench_args_init}, sz); '''.format( bench_name=bench_name, type=typ, step=f.code_ptr_step(typ), bench_call=bench_call, bench_args_init=common.pprint_commas(bench_args_init), bench_args_decl=common.pprint_commas(bench_args_decl), bench_args_call=common.pprint_commas(bench_args_call), code_before=f.bench_code_before(typ), asm_marker=asm_marker(simd, bench_name) )
def code_call(self, typ, args): return 'nsimd::{}({}, {}(), nsimd::cpu())'. \ format(self.name, common.pprint_commas(args), typ)
def code_call(self, typ, args): return '{}({})'.format(self.name, common.pprint_commas(args))