def doit(opts): common.myprint(opts, 'Generating ulps') common.mkdir_p(opts.ulps_dir) for op_name, operator in operators.operators.items(): if not operator.tests_mpfr: continue if op_name in ['gammaln', 'lgamma', 'pow']: continue mpfr_func = operator.tests_mpfr_name() mpfr_rnd = ", MPFR_RNDN" for typ in common.ftypes: if typ == 'f16': random_generator = random_f16_generator convert_to_type = "nsimd_f32_to_f16" convert_from_type = "nsimd_f16_to_f32" mantisse = 10 size = 0xffff mpfr_suffix = "flt" elif typ == 'f32': convert_to_type = "(f32)" convert_from_type = "" random_generator = random_f32_generator mantisse = 23 #size = 0xffffffff size = 0x00ffffff mpfr_suffix = "flt" elif typ == 'f64': convert_to_type = "(f64)" convert_from_type = "" random_generator = random_f64_generator mantisse = 52 size = 0x00ffffff mpfr_suffix = "d" else: raise Exception('Unsupported type "{}"'.format(typ)) filename = os.path.join(opts.ulps_dir, '{}_{}_{}.cpp'. \ format(op_name, "ulp", typ)) if not common.can_create_filename(opts, filename): continue with common.open_utf8(opts, filename) as out: out.write(includes) out.write(gen_tests.relative_distance_cpp) out.write( code.format(typ=typ, nsimd_func=op_name, mpfr_func=mpfr_func, mpfr_rnd=mpfr_rnd, random_generator=random_generator, convert_from_type=convert_from_type, convert_to_type=convert_to_type, mantisse=mantisse, SIZE=size, mpfr_suffix=mpfr_suffix)) common.clang_format(opts, filename)
def doit(opts): common.myprint(opts, 'Generating friendly but not optimized advanced ' 'C++ API') filename = os.path.join(opts.include_dir, 'friendly_but_not_optimized.hpp') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('''#ifndef NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP #define NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP #include <nsimd/nsimd.h> #include <nsimd/cxx_adv_api.hpp> namespace nsimd {{ '''.format(year=date.today().year)) for op_name, operator in operators.operators.items(): if operator.cxx_operator == None or len(operator.params) != 3 or \ operator.name in ['shl', 'shr']: continue out.write('''{hbar} {code} '''.format(hbar=common.hbar, code=get_impl(operator))) out.write('''{hbar} }} // namespace nsimd #endif'''.format(hbar=common.hbar)) common.clang_format(opts, filename)
def gen_unary_ops_tests(lf, rt, opts): for op_name, s0, s1 in bitwise_unary_ops: decls = check + limits + gen_random_val # {op}b content_src = bitwise_unary_test_template.format( op_name=op_name, lf=lf, rt=rt, includes=includes, decls=decls, rand_statement="__gen_random_val<{lf}, {rt}>();".format(lf=lf, rt=rt), test_statement=s0, l="", term="b") filename = get_filename(opts, op_name + "b", lf, rt) with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename) # {op}l content_src = bitwise_unary_test_template.format( op_name=op_name, lf=lf, rt=rt, includes=includes, decls=decls, rand_statement="(raw_t)(rand() % 2);".format(lf=lf, rt=rt), test_statement=s1, l="l", term="l") filename = get_filename(opts, op_name + "l", lf, rt) with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def gen_archis_write_file(opts, op, platform, simd_ext, simd_dir): filename = os.path.join(simd_dir, '{}.h'.format(op.name)) if not common.can_create_filename(opts, filename): return mod = opts.platforms[platform] with common.open_utf8(opts, filename) as out: out.write('''#ifndef {guard} #define {guard} #include <nsimd/{platform}/{simd_ext}/types.h> {additional_include} {code} {hbar} #endif '''.format(additional_include=mod.get_additional_include( op.name, platform, simd_ext), year=date.today().year, guard=op.get_header_guard(platform, simd_ext), platform=platform, simd_ext=simd_ext, func=op.name, hbar=common.hbar, code=get_simd_implementation( opts, op, mod, simd_ext))) common.clang_format(opts, filename)
def doit(opts): print ('-- Generating advanced C++ API') filename = os.path.join(opts.include_dir, 'cxx_adv_api_functions.hpp') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP namespace nsimd {{ '''.format(year=date.today().year)) for op_name, operator in operators.operators.items(): if not operator.autogen_cxx_adv: continue out.write('''{hbar} {code} '''.format(hbar=common.hbar, code=get_cxx_advanced_generic(operator))) out.write('''{hbar} }} // namespace nsimd #endif'''.format(hbar=common.hbar)) common.clang_format(opts, filename)
def doit(opts): print ('-- Generating base APIs') common.mkdir_p(opts.include_dir) filename = os.path.join(opts.include_dir, 'functions.h') if not common.can_create_filename(opts, filename): return with common.open_utf8(filename) as out: out.write('''#ifndef NSIMD_FUNCTIONS_H #define NSIMD_FUNCTIONS_H '''.format(year=date.today().year)) for op_name, operator in operators.operators.items(): out.write('''{} #include NSIMD_AUTO_INCLUDE({}.h) {} {} '''.format(common.hbar, operator.name, get_c_base_generic(operator), get_cxx_base_generic(operator))) out.write('''{hbar} {put_decl} {hbar} #endif'''. \ format(hbar=common.hbar, put_decl=get_put_decl())) common.clang_format(opts, filename)
def gen_bench(f, simd, typ): ## TODO path = gen_filename(f, simd, typ) ## Check if we need to create the file if not common.can_create_filename(_opts, path): return ## Generate specific code for the bench category = common.nsimd_category(simd) code = gen_code(f, simd, typ, category=category) if code is None: return ## Now aggregate every parts bench = '' #bench += gen_bench_asm_function(f, typ, category) bench += gen_bench_against(f, simd, typ, f.bench_against_cpu()) bench += code bench += gen_bench_unrolls(f, simd, typ, category) bench += gen_bench_against(f, simd, typ, f.bench_against_libs()) ## Finalize code code = gen_bench_from_code(f, typ, bench) ## Write file with common.open_utf8(path) as f: f.write(code) ## Clang-format it! common.clang_format(_opts, path)
def gen_if_else_tests(lf, rt, opts): decls = check + limits + comparison_fp + gen_random_val content_src = if_else_test_template.format(lf=lf, rt=rt, includes=includes, decls=decls) filename = get_filename(opts, "if_else", lf, rt) with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def gen_minmax_ops_tests(lf, rt, opts): for op_name in minmax_ops: decls = check + limits + comparison_fp + gen_random_val content_src = minmax_test_template.format(op_name=op_name, lf=lf, rt=rt, includes=includes, decls=decls) filename = get_filename(opts, op_name, lf, rt) with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def gen_comparison_tests(lf, rt, opts): for op_name, op_val in comparison_ops: decls = check + limits + comparison_log.format(op_val=op_val) + gen_random_val content_src = comparison_test_template.format( op_name=op_name, op_val=op_val, lf=lf, rt=rt, includes=includes, decls=decls) filename = get_filename(opts, op_name, lf, rt) if filename == None: continue with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def gen_ternary_ops_tests(lf, rt, opts): for op_name, statement in ternary_ops: decls = check + limits + comparison_fp + gen_random_val content_src = ternary_ops_template.format( op_name=op_name, check_statement=statement.format(lf=lf, rt=rt), lf=lf, rt=rt,includes=includes, decls=decls) filename = get_filename(opts, op_name, lf, rt) if filename == None: continue with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def write_cpp(opts, simd_ext, emulate_fp16): filename = os.path.join(opts.src_dir, 'api_{}.cpp'.format(simd_ext)) if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('''#define NSIMD_INSIDE #include <nsimd/nsimd.h> #include <nsimd/cxx_adv_api.hpp> '''.format(year=date.today().year)) out.write(get_put_impl(simd_ext)) common.clang_format(opts, filename)
def gen_math_functions_tests(lf, rt, opts): for op_name in math_ops: decls = check + limits + comparison_fp + gen_random_val if op_name == "rec": decls += rec_reference content_src = math_test_template.format(op_name=op_name, lf=lf, rt=rt, includes=includes, decls=decls) filename = get_filename(opts, op_name, lf, rt) with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def gen_archis_write_put(opts, platform, simd_ext, simd_dir): filename = os.path.join(simd_dir, 'put.h') if not common.can_create_filename(opts, filename): return op = None with common.open_utf8(filename) as out: out.write( \ '''#ifndef NSIMD_{PLATFORM}_{SIMD_EXT}_PUT_H #define NSIMD_{PLATFORM}_{SIMD_EXT}_PUT_H {include_cpu_put}#include <nsimd/{platform}/{simd_ext}/types.h> #include <stdio.h> {hbar} '''.format(year=date.today().year, hbar=common.hbar, simd_ext=simd_ext, platform=platform, PLATFORM=platform.upper(), SIMD_EXT=simd_ext.upper(), include_cpu_put='#include <nsimd/cpu/cpu/put.h>\n' \ if simd_ext != 'cpu' else '')) for typ in common.types: out.write( \ '''#if NSIMD_CXX > 0 extern "C" {{ #endif NSIMD_DLLSPEC int nsimd_put_{simd_ext}_{typ}(FILE *, const char *, nsimd_{simd_ext}_v{typ}); #if NSIMD_CXX > 0 }} // extern "C" #endif #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE int put(FILE *out, const char *fmt, nsimd_{simd_ext}_v{typ} a0, {typ}, {simd_ext}) {{ return nsimd_put_{simd_ext}_{typ}(out, fmt, a0); }} }} // namespace nsimd #endif {hbar} '''.format(simd_ext=simd_ext, hbar=common.hbar, typ=typ)) out.write('#endif') common.clang_format(opts, filename)
def gen_tests(opts): for func in rand_functions: for word_size, nwords_nrounds in func.wordsize_nwords_nrounds.items(): for nwords, list_nrounds in nwords_nrounds.items(): for nrounds in list_nrounds: # Write headers dirname = os.path.join(opts.tests_dir, 'modules', 'random') common.mkdir_p(dirname) filename = os.path.join(dirname, '{}.cpp'. \ format(func.gen_function_name(nwords, word_size, nrounds))) with common.open_utf8(opts, filename) as out: out.write( func.gen_tests(opts, nrounds, word_size, nwords)) common.clang_format(opts, filename)
def gen_math_functions_tests(lf, rt, opts): for op_name in math_ops: decls = check + limits + comparison_fp + gen_random_val if op_name == "rec": decls += rec_reference ref_op_name = 'rec' else: ref_op_name = 'nsimd_scalar_abs_f64' content_src = math_test_template.format(op_name=op_name, lf=lf, rt=rt, ref_op_name=ref_op_name, includes=includes, decls=decls) filename = get_filename(opts, op_name, lf, rt) if filename == None: continue with common.open_utf8(opts, filename) as fp: fp.write(content_src) common.clang_format(opts, filename)
def write_cpp(opts, simd_ext, emulate_fp16): filename = os.path.join(opts.src_dir, 'api_{}.cpp'.format(simd_ext)) if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('''#define NSIMD_INSIDE #include <nsimd/nsimd.h> #include <nsimd/cxx_adv_api.hpp> '''.format(year=date.today().year)) for op_name, operator in operators.operators.items(): if operator.src: out.write('''{hbar} #include <nsimd/src/{name}.hpp> '''.format(name=operator.name, hbar=common.hbar)) out.write(get_impl(operator, emulate_fp16, simd_ext)) out.write(get_put_impl(simd_ext)) common.clang_format(opts, filename)
def doit(opts): common.myprint(opts, 'Generating advanced C++ API') filename = os.path.join(opts.include_dir, 'cxx_adv_api_functions.hpp') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP namespace nsimd { ''') for op_name, operator in operators.operators.items(): if not operator.autogen_cxx_adv: continue out.write('''{hbar} {code} '''.format(hbar=common.hbar, code=get_cxx_advanced_generic(operator))) if operator.cxx_operator and \ (operator.args in [['v', 'v'], ['v', 'p']]): out.write('{hbar}\n{code}'. \ format(hbar=common.hbar, code=gen_assignment_operators(operator))) out.write('''{hbar} }} // namespace nsimd #endif'''.format(hbar=common.hbar)) common.clang_format(opts, filename)
def generate(): output = licence_header_cpp(__file__) output += ["#include \"typedefs.hxx\""] output += NAMESPACE_OPEN for cpptype in make_supported_cpp_types(constants.DTYPES): output.append("template const {0:}& PamMap::at<{0:}>(" "const std::string& key, const {0:}& default_value) " "const;".format(cpptype)) output.append( "template {0:}& PamMap::at<{0:}>(const std::string& key, " "{0:}& default_value);".format(cpptype)) output += NAMESPACE_CLOSE return clang_format("\n".join(output))
def gen_bench(f, simd, typ): ## TODO path = gen_filename(f, simd, typ) ## Check if we need to create the file if not common.can_create_filename(_opts, path): return ## Generate specific code for the bench category = common.nsimd_category(simd) code = gen_code(f, simd, typ, category=category) if code is None: return ## Now aggregate every parts bench = '' #bench += gen_bench_asm_function(f, typ, category) bench += gen_bench_against(f, 'cpu', typ, f.bench_against_cpu()) bench += code bench += gen_bench_unrolls(f, simd, typ, category) bench += gen_bench_against(f, simd, typ, f.bench_against_libs()) ## bench_with_timestamp bench_with_timestamp = '' bench_with_timestamp += 'std::map<std::string, std::pair<' + typ + ', double>> sums;' + '\n' bench_with_timestamp += 'size_t const nb_runs = 10 * 1000;' + '\n' bench_with_timestamp += gen_bench_against_with_timestamp( f, 'cpu', typ, f.bench_against_cpu()) bench_with_timestamp += gen_bench_with_timestamp(f, simd, typ, category) bench_with_timestamp += gen_bench_unrolls_with_timestamp( f, simd, typ, category) bench_with_timestamp += gen_bench_against_with_timestamp( f, simd, typ, f.bench_against_libs()) bench_with_timestamp += ''' std::string json = ""; json += "{{\\n"; json += " \\"benchmarks\\": [\\n"; for (auto const & bench_name_sum_time : sums) {{ std::string const & bench_name = bench_name_sum_time.first; {typ} const & sum = bench_name_sum_time.second.first; double const & elapsed_time_ns = bench_name_sum_time.second.second; json += " {{" "\\n"; json += " \\"name\\": \\"" + bench_name + "/{typ}\\"," + "\\n"; json += " \\"real_time\\": " + std::to_string(elapsed_time_ns) + "," + "\\n"; json += " \\"sum\\": " + std::string(std::isfinite(sum) ? "" : "\\"") + std::to_string(sum) + std::string(std::isfinite(sum) ? "" : "\\"") + "," + "\\n"; json += " \\"time_unit\\": \\"ns\\"\\n"; json += " }}"; if (&bench_name_sum_time != &*sums.rbegin()) {{ json += ","; }} json += "\\n"; }} json += " ]\\n"; json += "}}\\n"; std::cout << json << std::flush; '''.format(typ=typ) ## Finalize code code = gen_bench_from_code(f, typ, bench, '') # bench_with_timestamp ## Write file with common.open_utf8(path) as f: f.write(code) ## Clang-format it! common.clang_format(_opts, path)
def gen_archis_types(opts, simd_dir, platform, simd_ext): filename = os.path.join(simd_dir, 'types.h') if not common.can_create_filename(opts, filename): return mod = opts.platforms[platform] c_code = '\n'.join([ 'typedef {} nsimd_{}_v{};'.format(mod.get_type(opts, simd_ext, t), simd_ext, t) for t in common.types ]) c_code += '\n\n' c_code += '\n'.join([ 'typedef {} nsimd_{}_vl{};'.format( mod.get_logical_type(opts, simd_ext, t), simd_ext, t) for t in common.types ]) if mod.has_compatible_SoA_types(simd_ext): for deg in range(2, 5): c_code += '\n'.join(['typedef {} nsimd_{}_v{}x{};'. \ format(mod.get_SoA_type(simd_ext, typ, deg), simd_ext, typ, deg) for typ in common.types]) else: c_code += '\n'.join([''' typedef struct nsimd_{simd_ext}_v{typ}x2 {{ nsimd_{simd_ext}_v{typ} v0; nsimd_{simd_ext}_v{typ} v1; }} nsimd_{simd_ext}_v{typ}x2; '''.format(simd_ext=simd_ext, typ=typ) \ for typ in common.types]) c_code += '\n'.join([''' typedef struct nsimd_{simd_ext}_v{typ}x3 {{ nsimd_{simd_ext}_v{typ} v0; nsimd_{simd_ext}_v{typ} v1; nsimd_{simd_ext}_v{typ} v2; }} nsimd_{simd_ext}_v{typ}x3; '''.format(simd_ext=simd_ext, typ=typ) \ for typ in common.types]) c_code += '\n'.join([''' typedef struct nsimd_{simd_ext}_v{typ}x4 {{ nsimd_{simd_ext}_v{typ} v0; nsimd_{simd_ext}_v{typ} v1; nsimd_{simd_ext}_v{typ} v2; nsimd_{simd_ext}_v{typ} v3; }} nsimd_{simd_ext}_v{typ}x4; '''.format(simd_ext=simd_ext, typ=typ) \ for typ in common.types]) c_code += '\n\n' cxx_code = '\n\n'.join([ '''template <> struct simd_traits<{typ}, {simd_ext}> {{ typedef nsimd_{simd_ext}_v{typ} simd_vector; typedef nsimd_{simd_ext}_v{typ}x2 simd_vectorx2; typedef nsimd_{simd_ext}_v{typ}x3 simd_vectorx3; typedef nsimd_{simd_ext}_v{typ}x4 simd_vectorx4; typedef nsimd_{simd_ext}_vl{typ} simd_vectorl; }};'''.format(typ=t, simd_ext=simd_ext) for t in common.types ]) with common.open_utf8(opts, filename) as out: out.write('''#ifndef NSIMD_{platform}_{SIMD_EXT}_TYPES_H #define NSIMD_{platform}_{SIMD_EXT}_TYPES_H {c_code} #define NSIMD_{simd_ext}_NB_REGISTERS {nb_registers} #if NSIMD_CXX > 0 namespace nsimd {{ struct {simd_ext} {{}}; {cxx_code} }} // namespace nsimd #endif #endif '''.\ format(year=date.today().year, platform=platform.upper(), SIMD_EXT=simd_ext.upper(), c_code=c_code, cxx_code=cxx_code, simd_ext=simd_ext, nb_registers=mod.get_nb_registers(simd_ext))) common.clang_format(opts, filename)
def gen_functions(opts): functions = '' for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue not_closed = is_not_closed(operator) not_closed_tmpl_args = 'typename ToType, ' if not_closed else '' not_closed_tmpl_params = 'ToType' if not_closed else 'none_t' if op_name in ['shl', 'shr', 'shra']: tmpl_args = 'typename Left' tmpl_params = 'Left, none_t, none_t' size = 'return left.size();' args = 'Left const &left, int s' members = 'Left left; int s;' members_assignment = 'ret.left = to_node(left); ret.s = s;' to_node_type = 'typename to_node_t<Left>::type, none_t, none_t' elif len(operator.params) == 2: tmpl_args = not_closed_tmpl_args + 'typename Left' tmpl_params = 'Left, none_t, ' + not_closed_tmpl_params size = 'return left.size();' args = 'Left const &left' members = 'Left left;' members_assignment = 'ret.left = to_node(left);' to_node_type = 'typename to_node_t<Left>::type, none_t, none_t' elif len(operator.params) == 3: tmpl_args = 'typename Left, typename Right' tmpl_params = 'Left, Right, none_t' size = 'return compute_size(left.size(), right.size());' args = 'Left const &left, Right const &right' members = 'Left left;\nRight right;' members_assignment = '''ret.left = to_node(left); ret.right = to_node(right);''' to_node_type = 'typename to_node_t<Left>::type, ' \ 'typename to_node_t<Right>::type, none_t' elif len(operator.params) == 4: tmpl_args = 'typename Left, typename Right, typename Extra' tmpl_params = 'Left, Right, Extra' size = \ 'return compute_size(left.size(), right.size(), extra.size());' args = 'Left const &left, Right const &right, Extra const &extra' members = 'Left left;\nRight right;\nExtra extra;' members_assignment = '''ret.left = to_node(left); ret.right = to_node(right); ret.extra = to_node(extra);''' to_node_type = 'typename to_node_t<Left>::type, ' \ 'typename to_node_t<Right>::type, ' \ 'typename to_node_t<Extra>::type' if operator.returns == 'v': to_pack = 'to_pack_t' return_type = 'out_type' else: to_pack = 'to_packl_t' return_type = 'bool' if not_closed: to_typ_arg = 'out_type(), ' to_typ_tmpl_arg = '<typename {to_pack}<out_type, Pack>::type>'. \ format(to_pack=to_pack) in_out_typedefs = '''typedef typename Left::out_type in_type; typedef ToType out_type;''' to_node_type = 'typename to_node_t<Left>::type, none_t, ToType' else: to_typ_arg = '' if op_name != 'to_mask' else 'out_type(), ' to_typ_tmpl_arg = '' in_out_typedefs = '''typedef typename Left::out_type in_type; typedef typename Left::out_type out_type;''' impl_args = 'left.{cpu_gpu}_get{tmpl}(i)' if (len(operator.params[1:]) >= 2): if operator.params[2] == 'p': impl_args += ', s' else: impl_args += ', right.{cpu_gpu}_get{tmpl}(i)' if (len(operator.params[1:]) >= 3): impl_args += ', extra.{cpu_gpu}_get{tmpl}(i)' impl_scalar = 'return nsimd::scalar_{}({}{});'. \ format(op_name, to_typ_arg, impl_args.format(cpu_gpu='scalar', tmpl='')) impl_gpu = 'return nsimd::gpu_{}({}{});'. \ format(op_name, to_typ_arg, impl_args.format(cpu_gpu='gpu', tmpl='')) impl_simd = 'return nsimd::{}{}({});'. \ format(op_name, to_typ_tmpl_arg, impl_args.format(cpu_gpu='template simd', tmpl='<Pack>')) functions += \ '''struct {op_name}_t {{}}; template <{tmpl_args}> struct node<{op_name}_t, {tmpl_params}> {{ {in_out_typedefs} {members} nsimd::nat size() const {{ {size} }} #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) __device__ {return_type} gpu_get(nsimd::nat i) const {{ {impl_gpu} }} #else {return_type} scalar_get(nsimd::nat i) const {{ {impl_scalar} }} template <typename Pack> typename {to_pack}<out_type, Pack>::type simd_get(nsimd::nat i) const {{ {impl_simd} }} #endif }}; template<{tmpl_args}> node<{op_name}_t, {to_node_type}> {op_name}({args}) {{ node<{op_name}_t, {to_node_type}> ret; {members_assignment} return ret; }}'''.format(op_name=op_name, tmpl_args=tmpl_args, size=size, tmpl_params=tmpl_params, return_type=return_type, args=args, to_pack=to_pack, to_node_type=to_node_type, members=members, members_assignment=members_assignment, in_out_typedefs=in_out_typedefs, impl_gpu=impl_gpu, impl_scalar=impl_scalar, impl_simd=impl_simd) if operator.cxx_operator != None and len(operator.params) == 2: functions += \ ''' template <typename Op, typename Left, typename Right, typename Extra> node<{op_name}_t, node<Op, Left, Right, Extra>, none_t, none_t> operator{cxx_operator}(node<Op, Left, Right, Extra> const &node) {{ return tet1d::{op_name}(node); }}'''.format(op_name=op_name, cxx_operator=operator.cxx_operator) if operator.cxx_operator != None and len(operator.params) == 3: functions += ''' template <typename Op, typename Left, typename Right, typename Extra, typename T> node<{op_name}_t, node<Op, Left, Right, Extra>, node<scalar_t, none_t, none_t, typename node<Op, Left, Right, Extra>::in_type>, none_t> operator{cxx_operator}(node<Op, Left, Right, Extra> const &node, T a) {{ typedef typename tet1d::node<Op, Left, Right, Extra>::in_type S; return tet1d::{op_name}(node, literal_to<S>::impl(a)); }} template <typename T, typename Op, typename Left, typename Right, typename Extra> node<{op_name}_t, node<scalar_t, none_t, none_t, typename node<Op, Left, Right, Extra>::in_type>, node<Op, Left, Right, Extra>, none_t> operator{cxx_operator}(T a, node<Op, Left, Right, Extra> const &node) {{ typedef typename tet1d::node<Op, Left, Right, Extra>::in_type S; return tet1d::{op_name}(literal_to<S>::impl(a), node); }} template <typename LeftOp, typename LeftLeft, typename LeftRight, typename LeftExtra, typename RightOp, typename RightLeft, typename RightRight, typename RightExtra> node<{op_name}_t, node<LeftOp, LeftLeft, LeftRight, LeftExtra>, node<RightOp, RightLeft, RightRight, RightExtra>, none_t> operator{cxx_operator}(node<LeftOp, LeftLeft, LeftRight, LeftExtra> const &left, node<RightOp, RightLeft, RightRight, RightExtra> const &right) {{ return tet1d::{op_name}(left, right); }}'''.format(op_name=op_name, cxx_operator=operator.cxx_operator) functions += '\n\n{}\n\n'.format(common.hbar) # Write the code to file dirname = os.path.join(opts.include_dir, 'modules', 'tet1d') common.mkdir_p(dirname) filename = os.path.join(dirname, 'functions.hpp') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: out.write('#ifndef NSIMD_MODULES_TET1D_FUNCTIONS_HPP\n') out.write('#define NSIMD_MODULES_TET1D_FUNCTIONS_HPP\n\n') out.write('namespace tet1d {\n\n') out.write('{}\n\n'.format(common.hbar)) out.write(functions) out.write('} // namespace tet1d\n\n') out.write('#endif\n') common.clang_format(opts, filename)
def doit(opts): common.myprint(opts, 'Generating scalar implementation for CPU and GPU') filename = os.path.join(opts.include_dir, 'scalar_utilities.h') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: # we declare reinterprets now as we need them scalar_tmp = [] gpu_tmp = [] for t in operators.Reinterpret.types: for tt in common.get_output_types(t, operators.Reinterpret.output_to): scalar_tmp += [operators.Reinterpret(). \ get_scalar_signature('cpu', t, tt, 'c')] gpu_tmp += [operators.Reinterpret(). \ get_scalar_signature('gpu', t, tt, 'cxx')] scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \ for sig in scalar_tmp]) gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \ for sig in gpu_tmp]) out.write( '''#ifndef NSIMD_SCALAR_UTILITIES_H #define NSIMD_SCALAR_UTILITIES_H #if NSIMD_CXX > 0 #include <cmath> #include <cstring> #else #include <math.h> #include <string.h> #endif #ifdef NSIMD_NATIVE_FP16 #if defined(NSIMD_IS_GCC) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdouble-promotion" #elif defined(NSIMD_IS_CLANG) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" #endif #endif {scalar_reinterpret_decls} #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) namespace nsimd {{ {gpu_reinterpret_decls} }} // namespace nsimd #endif '''. \ format(scalar_reinterpret_decls=scalar_reinterpret_decls, gpu_reinterpret_decls=gpu_reinterpret_decls)) for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue if operator.params == ['l'] * len(operator.params): out.write('\n\n' + common.hbar + '\n\n') out.write(\ '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE {cxx_sig} {{ return nsimd_scalar_{op_name}({c_args}); }} {gpu_impl} }} // namespace nsimd #endif'''.format( c_sig=operator.get_scalar_signature('cpu', '', '', 'c'), cxx_sig=operator.get_scalar_signature('cpu', '', '', 'cxx'), op_name=op_name, c_args=', '.join(['a{}'.format(i - 1) \ for i in range(1, len(operator.params))]), scalar_impl=scalar.get_impl(operator, tt, t), gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), rocm_impl=rocm.get_impl(operator, tt, t)))) continue for t in operator.types: tts = common.get_output_types(t, operator.output_to) for tt in tts: out.write('\n\n' + common.hbar + '\n\n') out.write(\ '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE {cxx_sig} {{ return nsimd_scalar_{op_name}_{suffix}({c_args}); }} {gpu_impl} }} // namespace nsimd #endif'''.format( c_sig=operator.get_scalar_signature('cpu', t, tt, 'c'), cxx_sig=operator.get_scalar_signature('cpu', t, tt, 'cxx'), op_name=op_name, suffix=t if operator.closed else '{}_{}'.format(tt, t), c_args=', '.join(['a{}'.format(i - 1) \ for i in range(1, len(operator.params))]), scalar_impl=scalar.get_impl(operator, tt, t), gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), rocm_impl=rocm.get_impl(operator, tt, t)))) out.write(''' {hbar} #ifdef NSIMD_NATIVE_FP16 #if defined(NSIMD_IS_GCC) #pragma GCC diagnostic pop #elif defined(NSIMD_IS_CLANG) #pragma clang diagnostic pop #endif #endif #endif'''.format(hbar=common.hbar)) common.clang_format(opts, filename)
def gen_reinterpret_convert(opts, op, from_typ, to_typ, lang): filename = get_filename(opts, op, '{}_to_{}'.format(from_typ, to_typ), lang) if filename == None: return logical = 'l' if op.name == 'reinterpretl' else '' if lang == 'c_base': if op.name == 'upcvt': comp = '''{{ vecx2({to_typ}) tmp = vupcvt(vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}); vstore{logical}a(out, vdowncvt( tmp.v0, tmp.v1, {to_typ}, {from_typ}), {from_typ}); }}'''.format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = '''vstore{logical}a(out, v{op_name}(v{op_name}( vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}), {to_typ}, {from_typ}), {from_typ});'''. \ format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) elif lang == 'cxx_base': if op.name == 'upcvt': comp = '''vecx2({to_typ}) tmp = nsimd::upcvt(nsimd::load{logical}a( in, {from_typ}()), {from_typ}(), {to_typ}()); nsimd::store{logical}a(out, nsimd::downcvt( tmp.v0, tmp.v1, {to_typ}(), {from_typ}()), {from_typ}());'''. \ format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = '''nsimd::store{logical}a(out, nsimd::{op_name}( nsimd::{op_name}(nsimd::load{logical}a( in, {from_typ}()), {from_typ}(), {to_typ}()), {to_typ}(), {from_typ}()), {from_typ}());'''. \ format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) else: if op.name == 'upcvt': comp = \ '''nsimd::packx2<{to_typ}> tmp = nsimd::upcvt< nsimd::pack{logical}x2<{to_typ}> >(nsimd::load{logical}a< nsimd::pack{logical}<{from_typ}> >(in)); nsimd::store{logical}a(out, nsimd::downcvt< nsimd::pack{logical}<{from_typ}> >(tmp.v0, tmp.v1));'''. \ format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = \ '''nsimd::store{logical}a(out, nsimd::{op_name}< nsimd::pack{logical}<{from_typ}> >(nsimd::{op_name}< nsimd::pack{logical}<{to_typ}> >(nsimd::load{logical}a< nsimd::pack{logical}<{from_typ}> >(in))));'''. \ format(op_name=op.name, from_typ=from_typ, to_typ=to_typ, logical=logical) if logical == 'l': rand = '(rand() % 2)' else: if op.name == 'reinterpret' and to_typ == 'f16' and \ from_typ in ['i16', 'u16']: rand = '(15360 /* no denormal */ | (1 << (rand() % 4)))' else: if to_typ in common.utypes or from_typ in common.utypes: rand = '(1 << (rand() % 4))' else: rand = '((2 * (rand() % 2) - 1) * (1 << (rand() % 4)))' if from_typ == 'f16': rand = 'nsimd_f32_to_f16((f32){});'.format(rand) neq_test = '(*(u16*)&in[j]) != (*(u16*)&out[j])' else: rand = '({}){}'.format(from_typ, rand) neq_test = 'in[j] != out[j]' with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i, j; {from_typ} *in, *out; int len = vlen({from_typ}); fprintf(stdout, "test of {op_name} from {from_typ} to {to_typ}...\\n"); CHECK(in = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof})); CHECK(out = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof})); for (i = 0; i < 100; i++) {{ for (j = 0; j < len; j++) {{ in[j] = {rand}; }} {comp} for (j = 0; j < len; j++) {{ if ({neq_test}) {{ exit(EXIT_FAILURE); }} }} }} fprintf(stdout, "test of {op_name} from {from_typ} to {to_typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang), op_name=op.name, to_typ=to_typ, from_typ=from_typ, comp=comp, year=date.today().year, rand=rand, neq_test=neq_test, sizeof=common.sizeof(from_typ))) common.clang_format(opts, filename)
def gen_nbtrue(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': nbtrue = 'vnbtrue(vloadla(buf, {}), {})'. \ format(typ, typ, typ) elif lang == 'cxx_base': nbtrue = 'nsimd::nbtrue(nsimd::loadla(buf, {}()), {}())'. \ format(typ, typ) else: nbtrue = 'nsimd::nbtrue(nsimd::loadla<nsimd::packl<{}> >(buf))'. \ format(typ) if typ == 'f16': scalar0 = 'nsimd_f32_to_f16(0)' scalar1 = 'nsimd_f32_to_f16(1)' else: scalar0 = '({})0'.format(typ) scalar1 = '({})1'.format(typ) with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i; {typ} *buf; int len = vlen({typ}); fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(buf = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); /* Test with all elements to true */ for (i = 0; i < len; i++) {{ buf[i] = {scalar1}; }} if ({nbtrue} != len) {{ exit(EXIT_FAILURE); }} /* Test with all elements to false */ for (i = 0; i < len; i++) {{ buf[i] = {scalar0}; }} if ({nbtrue} != 0) {{ exit(EXIT_FAILURE); }} /* Test with only one element to true */ buf[0] = {scalar1}; if ({nbtrue} != 1) {{ exit(EXIT_FAILURE); }} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang), op_name=op.name, typ=typ, nbtrue=nbtrue, year=date.today().year, notl='!' if op.name == 'any' else '', scalar0=scalar0, scalar1=scalar1, sizeof=common.sizeof(typ))) common.clang_format(opts, filename)
def gen_load_store(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if op.name.startswith('load'): deg = op.name[4] align = op.name[5] elif op.name.startswith('store'): deg = op.name[5] align = op.name[6] variables = ', '.join(['v.v{}'.format(i) for i in range(0, int(deg))]) if lang == 'c_base': load_store = \ '''vecx{deg}({typ}) v = vload{deg}{align}(&vin[i], {typ}); vstore{deg}{align}(&vout[i], {variables}, {typ});'''. \ format(deg=deg, typ=typ, align=align, variables=variables) elif lang == 'cxx_base': load_store = \ '''vecx{deg}({typ}) v = nsimd::load{deg}{align}(&vin[i], {typ}()); nsimd::store{deg}{align}(&vout[i], {variables}, {typ}());'''. \ format(deg=deg, typ=typ, align=align, variables=variables) else: load_store = \ '''nsimd::packx{deg}<{typ}> v = nsimd::load{deg}{align}< nsimd::packx{deg}<{typ}> >(&vin[i]); nsimd::store{deg}{align}(&vout[i], {variables});'''. \ format(deg=deg, typ=typ, align=align, variables=variables) if typ == 'f16': rand = '*((u16*)vin + i) = nsimd_f32_to_u16((float)(rand() % 10));' comp = '*((u16 *)vin + i) != *((u16 *)vout + i)' else: rand = 'vin[i] = ({})(rand() % 10);'.format(typ) comp = 'vin[i] != vout[i]' with common.open_utf8(filename) as out: out.write('''{includes} #define SIZE (2048 / {sizeof}) #define STATUS "test of {op_name} over {typ}" #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i, vi; {typ} *vin, *vout; int len = vlen({typ}); int n = SIZE * {deg} * len; fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(vin = ({typ}*)nsimd_aligned_alloc(n * {sizeof})); CHECK(vout = ({typ}*)nsimd_aligned_alloc(n * {sizeof})); /* Fill with random data */ for (i = 0; i < n; i++) {{ {rand} }} /* Load and put back data into vout */ for (i = 0; i < n; i += {deg} * len) {{ {load_store} }} /* Compare results */ for (vi = 0; vi < SIZE; vi += len) {{ for (i = vi; i < vi + len; i++) {{ if ({comp}) {{ fprintf(stdout, STATUS "... FAIL\\n"); fflush(stdout); return -1; }} }} }} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang), op_name=op.name, typ=typ, rand=rand, year=date.today().year, deg=deg, sizeof=common.sizeof(typ), load_store=load_store, comp=comp)) common.clang_format(opts, filename)
def gen_addv(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': op_test = 'v{}(vloada(buf, {}), {})'.format(op.name, typ, typ) extra_code = relative_distance_c elif lang == 'cxx_base': op_test = 'nsimd::{}(nsimd::loada(buf, {}()), {}())'.format( op.name, typ, typ) extra_code = relative_distance_cpp else: op_test = 'nsimd::{}(nsimd::loada<nsimd::pack<{}>>(buf))'.format( op.name, typ) extra_code = relative_distance_cpp nbits = {'f16': '10', 'f32': '21', 'f64': '48'} head = '''#define _POSIX_C_SOURCE 200112L {includes} #include <float.h> #include <math.h> #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} {extra_code}'''.format(year=date.today().year, includes=get_includes(lang), extra_code=extra_code) if typ == 'f16': # Variables initialization init = '''f16 res = nsimd_f32_to_f16(0.0f); f32 ref = 0.0f;''' rand = '''nsimd_f32_to_f16((f32)(2 * (rand() % 2) - 1) * (f32)(1 << (rand() % 4)) / (f32)(1 << (rand() % 4)))''' init_statement = 'buf[i] = {};'.format(rand) ref_statement = 'ref += nsimd_u16_to_f32(((u16 *)buf)[i]);' test = '''if (relative_distance((double) ref, (double) nsimd_f16_to_f32(res)) > get_2th_power(-{nbits})) {{ return EXIT_FAILURE; }}'''.format(nbits=nbits[typ]) else: init = '''{typ} ref = ({typ})0; {typ} res = ({typ})0;'''.format(typ=typ) rand = '''({typ})(2 * (rand() % 2) - 1) * ({typ})(1 << (rand() % 4)) / ({typ})(1 << (rand() % 4))'''.format(typ=typ) init_statement = 'buf[i] = {};'.format(rand) ref_statement = 'ref += buf[i];' test = '''if (relative_distance((double)ref, (double)res) > get_2th_power(-{nbits})) {{ return EXIT_FAILURE; }}'''.format(nbits=nbits[typ]) with common.open_utf8(filename) as out: out.write(''' \ {head} int main(void) {{ const int len = vlen({typ}); {typ} *buf; int i; {init} fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(buf = ({typ} *)nsimd_aligned_alloc(len * {sizeof})); for(i = 0; i < len; i++) {{ {init_statement} }} for(i = 0; i < len; i++) {{ {ref_statement} }} res = {op_test}; {test} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }} '''.format(head=head, init=init, op_name=op.name, typ=typ, sizeof=common.sizeof(typ), init_statement=init_statement, ref_statement=ref_statement, op_test=op_test, test=test)) common.clang_format(opts, filename)
def gen_test(opts, op, typ, lang, ulps): filename = get_filename(opts, op, typ, lang) if filename == None: return content = get_content(op, typ, lang) if op.name in ['not', 'and', 'or', 'xor', 'andnot']: comp = 'return *({uT}*)&mpfr_out != *({uT}*)&nsimd_out'. \ format(uT=common.bitfield_type[typ]) else: if typ == 'f16': left = '(double)nsimd_f16_to_f32(mpfr_out)' right = '(double)nsimd_f16_to_f32(nsimd_out)' elif typ == 'f32': left = '(double)mpfr_out' right = '(double)nsimd_out' else: left = 'mpfr_out' right = 'nsimd_out' relative_distance = relative_distance_c if lang == 'c_base' \ else relative_distance_cpp if op.tests_ulps: comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \ format(left, right, nbits='11' if typ != 'f16' else '9') extra_code = relative_distance elif op.src: if op.name in ulps: nbits = ulps[op.name][typ]["ulps"] nbits_dnz = ulps[op.name][typ]["ulps for denormalized output"] inf_error = ulps[op.name][typ]["Inf Error"] nan_error = ulps[op.name][typ]["NaN Error"] comp = '''#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wdouble-promotion" ''' if nan_error: # Ignore error with NaN output, we know we will encounter some comp += 'if ({isnan}((double){left})) return 0;\n' else: # Return false if one is NaN and not the other comp += 'if ({isnan}((double){left}) ^ isnan({rigth})) return 1;\n' if inf_error: # Ignore error with infinite output, we know we will encounter some comp += 'if ({isinf}((double){left})) return 0;\n' else: # One is infinite and not the other comp += 'if ({isinf}((double){left}) ^ {isinf}((double){rigth})) return 1;\n' # Wrong sign for infinite comp += 'if ({isinf}((double){left}) && {isinf}((double){rigth}) \ && ({right}*{left} < 0)) \ return 1;\n' comp += ''' if ({isnormal}((double){left})) {{ return relative_distance({left}, {right}) > get_2th_power(-({nbits})); }} else {{ return relative_distance({left}, {right}) > get_2th_power(-({nbits_dnz})); }} #pragma GCC diagnostic pop ''' if lang == 'c_base': comp = comp.format(left=left, right=right, nbits=nbits, nbits_dnz=nbits_dnz, isnormal='isnormal', isinf='isinf', isnan='isnan') else: comp = comp.format(left=left, right=right, nbits=nbits, nbits_dnz=nbits_dnz, isnormal='std::isnormal', isinf='std::isinf', isnan='std::isnan') else: nbits = {'f16': '10', 'f32': 21, 'f64': '48'} comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \ format(left, right, nbits=nbits[typ]) extra_code = relative_distance else: comp = 'return {} != {}'.format(left, right) extra_code = '' includes = get_includes(lang) if op.src or op.tests_ulps or op.tests_mpfr: if lang == 'c_base': includes = '''#define _POSIX_C_SOURCE 200112L #include <math.h> #include <float.h> {}'''.format(includes) else: includes = '''#define _POSIX_C_SOURCE 200112L #include <cmath> #include <cfloat> {}'''.format(includes) if op.tests_mpfr and sys.platform.startswith('linux'): includes = includes + ''' #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-conversion" #include <mpfr.h> #pragma GCC diagnostic pop ''' with common.open_utf8(filename) as out: out.write(template.format( \ includes=includes, sizeof=common.sizeof(typ), typ=typ, op_name=op.name, year=date.today().year, comp=comp, extra_code=extra_code, **content)) #vin_defi=content['vin_defi'], #vin_rand=content['vin_rand'], vout0_comp=content['vout0_comp'], #vout1_comp=content['vout1_comp'])) common.clang_format(opts, filename)
def gen_reverse(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': test_code = 'vstorea( out, vreverse( vloada( in, {typ} ), {typ} ), {typ} );'.format( typ=typ) elif lang == 'cxx_base': test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada( in, {typ}() ), {typ}() ), {typ}() );'.format( typ=typ) elif lang == 'cxx_adv': test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada<nsimd::pack<{typ}>>( in ) ) );'.format( typ=typ) if typ == 'f16': init = 'in[ i ] = nsimd_f32_to_f16((float)(i + 1));' comp = 'ok &= nsimd_f16_to_f32( out[len - 1 - i] ) == nsimd_f16_to_f32( in[i] );' else: init = 'in[ i ] = ({typ})(i + 1);'.format(typ=typ) comp = 'ok &= out[len - 1 - i] == in[i];' with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ unsigned char i; int ok; {typ} * in; {typ} * out; int len = vlen({typ}); fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(in = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); CHECK(out = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); for( i = 0 ; i < len ; ++i ) {{ {init} }} {test_code} ok = 1; for( i = 0 ; i < len ; ++i ) {{ {comp} }} /*fprintf( stdout, "%f %f %f %f\\n", in[ 0 ], out[ 0 ], in[ 1 ], out[ 1 ] );*/ if( ok ) {{ fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); }} else {{ fprintf(stderr, "test of {op_name} over {typ}... FAIL\\n"); exit(EXIT_FAILURE); }} nsimd_aligned_free( in ); nsimd_aligned_free( out ); return EXIT_SUCCESS; }} '''.format(includes=get_includes(lang), op_name=op.name, typ=typ, test_code=test_code, year=date.today().year, sizeof=common.sizeof(typ), init=init, comp=comp)) common.clang_format(opts, filename)
def doit(opts): common.myprint(opts, 'Copy native Sleef version {}'. \ format(opts.sleef_version)) # First download Sleef sleef_dir = os.path.join(opts.script_dir, '..', '_deps-sleef') common.mkdir_p(sleef_dir) url = 'https://github.com/shibatch/sleef/archive/refs/tags/{}.zip'. \ format(opts.sleef_version) r = requests.get(url, allow_redirects=True) sleef_zip = os.path.join(sleef_dir, 'sleef.zip') with open(sleef_zip, 'wb') as fout: fout.write(r.content) # Unzip sleef with zipfile.ZipFile(sleef_zip, 'r') as fin: fin.extractall(path=sleef_dir) # Copy helper function def copy(filename): dst_filename = os.path.basename(filename) shutil.copyfile( os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version), filename), os.path.join(opts.src_dir, dst_filename)) # Copy files copy('src/libm/sleefsimddp.c') copy('src/libm/sleefsimdsp.c') copy('src/libm/sleefdp.c') copy('src/libm/sleefsp.c') copy('src/common/misc.h') copy('src/libm/estrin.h') copy('src/libm/dd.h') copy('src/libm/df.h') copy('src/libm/rempitab.c') copy('src/arch/helpersse2.h') copy('src/arch/helperavx.h') copy('src/arch/helperavx2.h') copy('src/arch/helperavx512f.h') copy('src/arch/helperneon32.h') copy('src/arch/helperadvsimd.h') copy('src/arch/helperpower_128.h') copy('src/arch/helpersve.h') # Sleef uses aliases but we don't need those so we comment them def comment_DALIAS_lines(filename): src = os.path.join(opts.src_dir, filename) dst = os.path.join(opts.src_dir, 'tmp.c') with open(src, 'r') as fin, open(dst, 'w') as fout: for line in fin: if line.startswith('DALIAS_'): fout.write('/* {} */\n'.format(line.strip())) else: fout.write(line) shutil.copyfile(dst, src) os.remove(dst) comment_DALIAS_lines('sleefsimdsp.c') comment_DALIAS_lines('sleefsimddp.c') # Sleef provides runtime SIMD detection via cpuid but we don't need it def replace_x86_cpuid(filename): src = os.path.join(opts.src_dir, filename) dst = os.path.join(opts.src_dir, 'tmp.c') with open(src, 'r') as fin, open(dst, 'w') as fout: for line in fin: if line.startswith('void Sleef_x86CpuID'): fout.write('''static inline void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { /* We don't care for cpuid detection */ out[0] = 0xFFFFFFFF; out[1] = 0xFFFFFFFF; out[2] = 0xFFFFFFFF; out[3] = 0xFFFFFFFF; } ''') else: fout.write(line) shutil.copyfile(dst, src) os.remove(dst) replace_x86_cpuid('helpersse2.h') replace_x86_cpuid('helperavx.h') replace_x86_cpuid('helperavx2.h') replace_x86_cpuid('helperavx512f.h') # Sleef uses force inline through its INLINE macro defined in misc.h # We modify it to avoid warnings and because force inline has been a pain # in the past. We also rename some exported symbols. with open(os.path.join(opts.src_dir, 'misc.h'), 'a') as fout: fout.write(''' /* NSIMD specific */ #ifndef NSIMD_SLEEF_MISC_H #define NSIMD_SLEEF_MISC_H #ifdef INLINE #undef INLINE #endif #define INLINE inline #define Sleef_rempitabdp nsimd_sleef_rempitab_f64 #define Sleef_rempitabsp nsimd_sleef_rempitab_f32 #endif ''') # Sleef functions must be renamed properly for each SIMD extensions. # Moreover their name must contain their precision (in ULPs). This # precision is not the same for all functions and some functions can have # differents flavours (or precisions). The "database" is contained within # src/libm/funcproto.h. So we parse it and produce names # in headers "rename[SIMD ext].h" to avoid modifying Sleef C files. funcproto = os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version), 'src', 'libm', 'funcproto.h') defines = [] ulp_suffix = { '0': '', '1': '_u1', '2': '_u05', '3': '_u35', '4': '_u15', '5': '_u3500' } with open(funcproto, 'r') as fin: for line in fin: if (line.find('{') != -1 and line.find('}') != -1): items = [item.strip() \ for item in line.strip(' \n\r{},').split(',')] items[0] = items[0].strip('"') if items[0] == 'NULL': break sleef_name_f64 = items[0] + ulp_suffix[items[2]] sleef_name_f32 = items[0] + 'f' + ulp_suffix[items[2]] items[1] = items[1] if items[1] != '5' else '05' if items[1] == '-1': nsimd_name_f64 = 'nsimd_sleef_{}_{{nsimd_ext}}_f64'. \ format(items[0]) nsimd_name_f32 = 'nsimd_sleef_{}_{{nsimd_ext}}_f32'. \ format(items[0]) else: nsimd_name_f64 = \ 'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f64'. \ format(items[0], items[1]) nsimd_name_f32 = \ 'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f32'. \ format(items[0], items[1]) defines.append('#define x{} {}'.format(sleef_name_f64, nsimd_name_f64)) defines.append('#define x{} {}'.format(sleef_name_f32, nsimd_name_f32)) defines = '\n'.join(defines) sleef_to_nsimd = { '': ['scalar'], 'sse2': ['sse2'], 'sse4': ['sse42'], 'avx': ['avx'], 'avx2': ['avx2'], 'avx512f': ['avx512_knl', 'avx512_skylake'], 'neon32': ['neon128'], 'advsimd': ['aarch64'], 'sve': ['sve128', 'sve256', 'sve512', 'sve1024', 'sve2048'], 'vsx': ['vmx', 'vsx'] } for simd_ext in [ '', 'sse2', 'sse4', 'avx', 'avx2', 'avx512f', 'neon32', 'advsimd', 'sve', 'vsx' ]: renameheader = os.path.join(opts.src_dir, 'rename{}.h'.format(simd_ext)) se = simd_ext if simd_ext != '' else 'scalar' with open(renameheader, 'w') as fout: fout.write('''#ifndef RENAME{SIMD_EXT}_H #define RENAME{SIMD_EXT}_H '''.format(SIMD_EXT=se.upper())) for nse in sleef_to_nsimd[simd_ext]: ifdef = '' if simd_ext == '' \ else '#ifdef NSIMD_{}'.format(nse.upper()) endif = '' if simd_ext == '' else '#endif' fout.write('''{hbar} /* Naming of functions {nsimd_ext} */ {ifdef} #ifdef DETERMINISTIC {defines_det_f32} #else {defines_nondet_f32} #endif #define rempi nsimd_sleef_rempi_{nsimd_ext} #define rempif nsimd_sleef_rempif_{nsimd_ext} #define rempisub nsimd_sleef_rempisub_{nsimd_ext} #define rempisubf nsimd_sleef_rempisubf_{nsimd_ext} #define gammak nsimd_gammak_{nsimd_ext} #define gammafk nsimd_gammafk_{nsimd_ext} {endif} '''.format(NSIMD_EXT=nse.upper(), nsimd_ext=nse, hbar=common.hbar, ifdef=ifdef, endif=endif, defines_det_f32=defines.format(det='d', nsimd_ext=nse), defines_nondet_f32=defines.format(det='', nsimd_ext=nse), defines_det_f64=defines.format(det='d', nsimd_ext=nse), defines_nondet_f64=defines.format( det='', nsimd_ext=nse))) fout.write('\n\n#endif\n\n') common.clang_format(opts, renameheader)