def gen_tests(opts): for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue not_closed = (operator.output_to == common.OUTPUT_TO_SAME_SIZE_TYPES \ or ('v' not in operator.params[1:] and 'l' not in operator.params[1:])) for t in operator.types: if operator.name in ['notb', 'andb', 'xorb', 'orb', 'andnotb'] and t == 'f16': continue tts = common.get_output_types(t, operator.output_to) for tt in tts: if t == 'f16' and op_name in ['notb', 'andnotb', 'orb', 'xorb', 'andb']: continue if operator.name in ['shl', 'shr', 'shra']: gen_tests_for_shifts(opts, t, operator) elif operator.name in ['cvt', 'reinterpret', 'reinterpretl']: gen_tests_for_cvt_reinterpret(opts, tt, t, operator) else: gen_tests_for(opts, t, operator)
def doit(opts): ulps = common.load_ulps_informations(opts) print('-- Generating tests') for op_name, operator in operators.operators.items(): ## Skip non-matching tests if opts.match and not opts.match.match(op_name): continue if op_name in [ 'if_else1', 'loadu', 'loada', 'storeu', 'storea', 'len', 'loadlu', 'loadla', 'storelu', 'storela', 'set1', 'store2a', 'store2u', 'store3a', 'store3u', 'store4a', 'store4u', 'downcvt' ]: continue for typ in operator.types: if operator.name in ['notb', 'andb', 'xorb', 'orb'] and \ typ == 'f16': continue elif operator.name == 'nbtrue': gen_nbtrue(opts, operator, typ, 'c_base') gen_nbtrue(opts, operator, typ, 'cxx_base') gen_nbtrue(opts, operator, typ, 'cxx_adv') elif operator.name == 'addv': gen_addv(opts, operator, typ, 'c_base') gen_addv(opts, operator, typ, 'cxx_base') gen_addv(opts, operator, typ, 'cxx_adv') elif operator.name in ['all', 'any']: gen_all_any(opts, operator, typ, 'c_base') gen_all_any(opts, operator, typ, 'cxx_base') gen_all_any(opts, operator, typ, 'cxx_adv') elif operator.name in [ 'reinterpret', 'reinterpretl', 'cvt', 'upcvt' ]: for to_typ in common.get_output_types(typ, operator.output_to): gen_reinterpret_convert(opts, operator, typ, to_typ, 'c_base') gen_reinterpret_convert(opts, operator, typ, to_typ, 'cxx_base') gen_reinterpret_convert(opts, operator, typ, to_typ, 'cxx_adv') elif operator.name in [ 'load2a', 'load2u', 'load3a', 'load3u', 'load4a', 'load4u' ]: gen_load_store(opts, operator, typ, 'c_base') gen_load_store(opts, operator, typ, 'cxx_base') gen_load_store(opts, operator, typ, 'cxx_adv') elif operator.name == 'reverse': gen_reverse(opts, operator, typ, 'c_base') gen_reverse(opts, operator, typ, 'cxx_base') gen_reverse(opts, operator, typ, 'cxx_adv') else: gen_test(opts, operator, typ, 'c_base', ulps) gen_test(opts, operator, typ, 'cxx_base', ulps) gen_test(opts, operator, typ, 'cxx_adv', ulps)
def gen_tests(opts): for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue for t in operator.types: tts = common.get_output_types(t, operator.output_to) for tt in tts: if not nsimd_tests.should_i_do_the_test(operator, tt, t): continue if operator.name in ['shl', 'shr', 'shra']: gen_tests_for_shifts(opts, t, operator) else: gen_tests_for(opts, tt, t, operator)
def get_simd_implementation(opts, operator, mod, simd_ext): typ_pairs = [] for t in operator.types: return_typs = common.get_output_types(t, operator.output_to) for tt in return_typs: typ_pairs.append([t, tt]) if not operator.closed: tmp = [p for p in typ_pairs if p[0] in common.ftypes and \ p[1] in common.ftypes] tmp += [p for p in typ_pairs if p[0] in common.itypes and \ p[1] in common.itypes] tmp += [p for p in typ_pairs if p[0] in common.utypes and \ p[1] in common.utypes] tmp += [p for p in typ_pairs \ if (p[0] in common.utypes and p[1] in common.itypes) or \ (p[0] in common.itypes and p[1] in common.utypes)] tmp += [p for p in typ_pairs \ if (p[0] in common.iutypes and p[1] in common.ftypes) or \ (p[0] in common.ftypes and p[1] in common.iutypes)] typ_pairs = tmp ret = '' for pair in typ_pairs: from_typ = pair[0] to_typ = pair[1] fmtspec = operator.get_fmtspec(from_typ, to_typ, simd_ext) if operator.src: ret += get_simd_implementation_src(operator, simd_ext, from_typ, fmtspec) else: ret += \ '''{hbar} NSIMD_INLINE {return_typ} NSIMD_VECTORCALL nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ {content} }} #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE {return_typ} NSIMD_VECTORCALL {name}({cxx_args}) {{ {returns}nsimd_{name}_{simd_ext}_{suf}({vas}); }} }} // namespace nsimd #endif '''.format(content=mod.get_impl(opts, operator.name, simd_ext, from_typ, to_typ), **fmtspec) return ret[0:-2]
def gen_tests(opts): for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue for t in operator.types: tts = common.get_output_types(t, operator.output_to) for tt in tts: if t == 'f16' and op_name in [ 'notb', 'andnotb', 'orb', 'xorb', 'andb' ]: continue if operator.name in ['shl', 'shr', 'shra']: gen_tests_for_shifts(opts, t, operator) else: gen_tests_for(opts, tt, t, operator)
def doit(opts): common.myprint(opts, 'Generating scalar implementation for CPU and GPU') filename = os.path.join(opts.include_dir, 'scalar_utilities.h') if not common.can_create_filename(opts, filename): return with common.open_utf8(opts, filename) as out: # we declare reinterprets now as we need them scalar_tmp = [] gpu_tmp = [] for t in operators.Reinterpret.types: for tt in common.get_output_types(t, operators.Reinterpret.output_to): scalar_tmp += [operators.Reinterpret(). \ get_scalar_signature('cpu', t, tt, 'c')] gpu_tmp += [operators.Reinterpret(). \ get_scalar_signature('gpu', t, tt, 'cxx')] scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \ for sig in scalar_tmp]) gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \ for sig in gpu_tmp]) out.write( '''#ifndef NSIMD_SCALAR_UTILITIES_H #define NSIMD_SCALAR_UTILITIES_H #if NSIMD_CXX > 0 #include <cmath> #include <cstring> #else #include <math.h> #include <string.h> #endif #ifdef NSIMD_NATIVE_FP16 #if defined(NSIMD_IS_GCC) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdouble-promotion" #elif defined(NSIMD_IS_CLANG) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" #endif #endif {scalar_reinterpret_decls} #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) namespace nsimd {{ {gpu_reinterpret_decls} }} // namespace nsimd #endif '''. \ format(scalar_reinterpret_decls=scalar_reinterpret_decls, gpu_reinterpret_decls=gpu_reinterpret_decls)) for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue if operator.params == ['l'] * len(operator.params): out.write('\n\n' + common.hbar + '\n\n') out.write(\ '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE {cxx_sig} {{ return nsimd_scalar_{op_name}({c_args}); }} {gpu_impl} }} // namespace nsimd #endif'''.format( c_sig=operator.get_scalar_signature('cpu', '', '', 'c'), cxx_sig=operator.get_scalar_signature('cpu', '', '', 'cxx'), op_name=op_name, c_args=', '.join(['a{}'.format(i - 1) \ for i in range(1, len(operator.params))]), scalar_impl=scalar.get_impl(operator, tt, t), gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), rocm_impl=rocm.get_impl(operator, tt, t)))) continue for t in operator.types: tts = common.get_output_types(t, operator.output_to) for tt in tts: out.write('\n\n' + common.hbar + '\n\n') out.write(\ '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} #if NSIMD_CXX > 0 namespace nsimd {{ NSIMD_INLINE {cxx_sig} {{ return nsimd_scalar_{op_name}_{suffix}({c_args}); }} {gpu_impl} }} // namespace nsimd #endif'''.format( c_sig=operator.get_scalar_signature('cpu', t, tt, 'c'), cxx_sig=operator.get_scalar_signature('cpu', t, tt, 'cxx'), op_name=op_name, suffix=t if operator.closed else '{}_{}'.format(tt, t), c_args=', '.join(['a{}'.format(i - 1) \ for i in range(1, len(operator.params))]), scalar_impl=scalar.get_impl(operator, tt, t), gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), rocm_impl=rocm.get_impl(operator, tt, t)))) out.write(''' {hbar} #ifdef NSIMD_NATIVE_FP16 #if defined(NSIMD_IS_GCC) #pragma GCC diagnostic pop #elif defined(NSIMD_IS_CLANG) #pragma clang diagnostic pop #endif #endif #endif'''.format(hbar=common.hbar)) common.clang_format(opts, filename)