예제 #1
0
파일: hatch.py 프로젝트: zoq/nsimd
def gen_tests(opts):
    for op_name, operator in operators.operators.items():
        if not operator.has_scalar_impl:
            continue

        not_closed = (operator.output_to == common.OUTPUT_TO_SAME_SIZE_TYPES \
                      or ('v' not in operator.params[1:] and 'l' not in
                      operator.params[1:]))

        for t in operator.types:

            if operator.name in ['notb', 'andb', 'xorb', 'orb',
                                 'andnotb'] and t == 'f16':
                continue

            tts = common.get_output_types(t, operator.output_to)

            for tt in tts:
                if t == 'f16' and op_name in ['notb', 'andnotb', 'orb',
                                              'xorb', 'andb']:
                    continue
                if operator.name in ['shl', 'shr', 'shra']:
                    gen_tests_for_shifts(opts, t, operator)
                elif operator.name in ['cvt', 'reinterpret', 'reinterpretl']:
                    gen_tests_for_cvt_reinterpret(opts, tt, t, operator)
                else:
                    gen_tests_for(opts, t, operator)
예제 #2
0
파일: gen_tests.py 프로젝트: aurianer/nsimd
def doit(opts):
    ulps = common.load_ulps_informations(opts)

    print('-- Generating tests')
    for op_name, operator in operators.operators.items():
        ## Skip non-matching tests
        if opts.match and not opts.match.match(op_name):
            continue
        if op_name in [
                'if_else1', 'loadu', 'loada', 'storeu', 'storea', 'len',
                'loadlu', 'loadla', 'storelu', 'storela', 'set1', 'store2a',
                'store2u', 'store3a', 'store3u', 'store4a', 'store4u',
                'downcvt'
        ]:
            continue
        for typ in operator.types:
            if operator.name in ['notb', 'andb', 'xorb', 'orb'] and \
               typ == 'f16':
                continue
            elif operator.name == 'nbtrue':
                gen_nbtrue(opts, operator, typ, 'c_base')
                gen_nbtrue(opts, operator, typ, 'cxx_base')
                gen_nbtrue(opts, operator, typ, 'cxx_adv')
            elif operator.name == 'addv':
                gen_addv(opts, operator, typ, 'c_base')
                gen_addv(opts, operator, typ, 'cxx_base')
                gen_addv(opts, operator, typ, 'cxx_adv')
            elif operator.name in ['all', 'any']:
                gen_all_any(opts, operator, typ, 'c_base')
                gen_all_any(opts, operator, typ, 'cxx_base')
                gen_all_any(opts, operator, typ, 'cxx_adv')
            elif operator.name in [
                    'reinterpret', 'reinterpretl', 'cvt', 'upcvt'
            ]:
                for to_typ in common.get_output_types(typ, operator.output_to):
                    gen_reinterpret_convert(opts, operator, typ, to_typ,
                                            'c_base')
                    gen_reinterpret_convert(opts, operator, typ, to_typ,
                                            'cxx_base')
                    gen_reinterpret_convert(opts, operator, typ, to_typ,
                                            'cxx_adv')
            elif operator.name in [
                    'load2a', 'load2u', 'load3a', 'load3u', 'load4a', 'load4u'
            ]:
                gen_load_store(opts, operator, typ, 'c_base')
                gen_load_store(opts, operator, typ, 'cxx_base')
                gen_load_store(opts, operator, typ, 'cxx_adv')
            elif operator.name == 'reverse':
                gen_reverse(opts, operator, typ, 'c_base')
                gen_reverse(opts, operator, typ, 'cxx_base')
                gen_reverse(opts, operator, typ, 'cxx_adv')
            else:
                gen_test(opts, operator, typ, 'c_base', ulps)
                gen_test(opts, operator, typ, 'cxx_base', ulps)
                gen_test(opts, operator, typ, 'cxx_adv', ulps)
예제 #3
0
def gen_tests(opts):
    for op_name, operator in operators.operators.items():
        if not operator.has_scalar_impl:
            continue
        for t in operator.types:
            tts = common.get_output_types(t, operator.output_to)
            for tt in tts:
                if not nsimd_tests.should_i_do_the_test(operator, tt, t):
                    continue
                if operator.name in ['shl', 'shr', 'shra']:
                    gen_tests_for_shifts(opts, t, operator)
                else:
                    gen_tests_for(opts, tt, t, operator)
예제 #4
0
def get_simd_implementation(opts, operator, mod, simd_ext):
    typ_pairs = []
    for t in operator.types:
        return_typs = common.get_output_types(t, operator.output_to)
        for tt in return_typs:
            typ_pairs.append([t, tt])

    if not operator.closed:
        tmp = [p for p in typ_pairs if p[0] in common.ftypes and \
                                       p[1] in common.ftypes]
        tmp += [p for p in typ_pairs if p[0] in common.itypes and \
                                        p[1] in common.itypes]
        tmp += [p for p in typ_pairs if p[0] in common.utypes and \
                                        p[1] in common.utypes]
        tmp += [p for p in typ_pairs \
                if (p[0] in common.utypes and p[1] in common.itypes) or \
                   (p[0] in common.itypes and p[1] in common.utypes)]
        tmp += [p for p in typ_pairs \
                if (p[0] in common.iutypes and p[1] in common.ftypes) or \
                   (p[0] in common.ftypes and p[1] in common.iutypes)]
        typ_pairs = tmp

    ret = ''
    for pair in typ_pairs:
        from_typ = pair[0]
        to_typ = pair[1]
        fmtspec = operator.get_fmtspec(from_typ, to_typ, simd_ext)
        if operator.src:
            ret += get_simd_implementation_src(operator, simd_ext, from_typ,
                                               fmtspec)
        else:
            ret += \
            '''{hbar}

               NSIMD_INLINE {return_typ} NSIMD_VECTORCALL
               nsimd_{name}_{simd_ext}_{suf}({c_args}) {{
                 {content}
               }}

               #if NSIMD_CXX > 0
               namespace nsimd {{
                 NSIMD_INLINE {return_typ} NSIMD_VECTORCALL
                 {name}({cxx_args}) {{
                   {returns}nsimd_{name}_{simd_ext}_{suf}({vas});
                 }}
               }} // namespace nsimd
               #endif

               '''.format(content=mod.get_impl(opts, operator.name,
                          simd_ext, from_typ, to_typ), **fmtspec)
    return ret[0:-2]
예제 #5
0
def gen_tests(opts):
    for op_name, operator in operators.operators.items():
        if not operator.has_scalar_impl:
            continue

        for t in operator.types:

            tts = common.get_output_types(t, operator.output_to)

            for tt in tts:
                if t == 'f16' and op_name in [
                        'notb', 'andnotb', 'orb', 'xorb', 'andb'
                ]:
                    continue
                if operator.name in ['shl', 'shr', 'shra']:
                    gen_tests_for_shifts(opts, t, operator)
                else:
                    gen_tests_for(opts, tt, t, operator)
예제 #6
0
def doit(opts):
    common.myprint(opts, 'Generating scalar implementation for CPU and GPU')
    filename = os.path.join(opts.include_dir, 'scalar_utilities.h')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        # we declare reinterprets now as we need them
        scalar_tmp = []
        gpu_tmp = []
        for t in operators.Reinterpret.types:
            for tt in common.get_output_types(t,
                                              operators.Reinterpret.output_to):
                scalar_tmp += [operators.Reinterpret(). \
                               get_scalar_signature('cpu', t, tt, 'c')]
                gpu_tmp += [operators.Reinterpret(). \
                            get_scalar_signature('gpu', t, tt, 'cxx')]
        scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \
                                              for sig in scalar_tmp])
        gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \
                                           for sig in gpu_tmp])
        out.write(
        '''#ifndef NSIMD_SCALAR_UTILITIES_H
           #define NSIMD_SCALAR_UTILITIES_H

           #if NSIMD_CXX > 0
           #include <cmath>
           #include <cstring>
           #else
           #include <math.h>
           #include <string.h>
           #endif

           #ifdef NSIMD_NATIVE_FP16
             #if defined(NSIMD_IS_GCC)
               #pragma GCC diagnostic push
               #pragma GCC diagnostic ignored "-Wdouble-promotion"
             #elif defined(NSIMD_IS_CLANG)
               #pragma clang diagnostic push
               #pragma clang diagnostic ignored "-Wdouble-promotion"
             #endif
           #endif

           {scalar_reinterpret_decls}

           #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM)

           namespace nsimd {{

           {gpu_reinterpret_decls}

           }} // namespace nsimd

           #endif
           '''. \
           format(scalar_reinterpret_decls=scalar_reinterpret_decls,
                  gpu_reinterpret_decls=gpu_reinterpret_decls))
        for op_name, operator in operators.operators.items():
            if not operator.has_scalar_impl:
                continue
            if operator.params == ['l'] * len(operator.params):
                out.write('\n\n' + common.hbar + '\n\n')
                out.write(\
                '''NSIMD_INLINE {c_sig} {{
                  {scalar_impl}
                }}

                #if NSIMD_CXX > 0

                namespace nsimd {{

                NSIMD_INLINE {cxx_sig} {{
                  return nsimd_scalar_{op_name}({c_args});
                }}

                {gpu_impl}

                }} // namespace nsimd

                #endif'''.format(
                c_sig=operator.get_scalar_signature('cpu', '', '', 'c'),
                cxx_sig=operator.get_scalar_signature('cpu', '', '', 'cxx'),
                op_name=op_name,
                c_args=', '.join(['a{}'.format(i - 1) \
                               for i in range(1, len(operator.params))]),
                scalar_impl=scalar.get_impl(operator, tt, t),
                gpu_impl=get_gpu_impl(
                    operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                    cuda.get_impl(operator, tt, t),
                    rocm_impl=rocm.get_impl(operator, tt, t))))
                continue
            for t in operator.types:
                tts = common.get_output_types(t, operator.output_to)
                for tt in tts:
                    out.write('\n\n' + common.hbar + '\n\n')
                    out.write(\
                    '''NSIMD_INLINE {c_sig} {{
                      {scalar_impl}
                    }}

                    #if NSIMD_CXX > 0

                    namespace nsimd {{

                    NSIMD_INLINE {cxx_sig} {{
                      return nsimd_scalar_{op_name}_{suffix}({c_args});
                    }}

                    {gpu_impl}

                    }} // namespace nsimd

                    #endif'''.format(
                    c_sig=operator.get_scalar_signature('cpu', t, tt, 'c'),
                    cxx_sig=operator.get_scalar_signature('cpu', t, tt, 'cxx'),
                    op_name=op_name,
                    suffix=t if operator.closed else '{}_{}'.format(tt, t),
                    c_args=', '.join(['a{}'.format(i - 1) \
                                   for i in range(1, len(operator.params))]),
                    scalar_impl=scalar.get_impl(operator, tt, t),
                    gpu_impl=get_gpu_impl(
                        operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                        cuda.get_impl(operator, tt, t),
                        rocm_impl=rocm.get_impl(operator, tt, t))))

        out.write('''

                  {hbar}

                  #ifdef NSIMD_NATIVE_FP16
                    #if defined(NSIMD_IS_GCC)
                      #pragma GCC diagnostic pop
                    #elif defined(NSIMD_IS_CLANG)
                      #pragma clang diagnostic pop
                    #endif
                  #endif

                  #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)