Esempio n. 1
0
    def get_generic_signature(self, lang):
        if lang == 'c_base':
            vas = common.get_args(len(self.params) - 1)
            args = vas + (', ' if vas != '' else '')
            args += 'from_type, to_type' if not self.closed else 'type'
            return ['#define v{name}({args})'.format(name=self.name,
                    args=args),
                    '#define v{name}_e({args}, simd_ext)'. \
                    format(name=self.name, args=args)]
        elif lang == 'cxx_base':
            return_typ = common.get_one_type_generic(self.params[0], 'T')
            if return_typ.startswith('vT'):
                return_typ = \
                'typename simd_traits<T, NSIMD_SIMD>::simd_vector{}'. \
                format(return_typ[2:])
            elif return_typ == 'vlT':
                return_typ = \
                'typename simd_traits<T, NSIMD_SIMD>::simd_vectorl'
            args_list = common.enum(self.params[1:])

            temp = ', '.join(['typename A{}'.format(a[0]) for a in args_list])
            temp += ', ' if temp != '' else ''
            if not self.closed:
                tmpl_args = temp + 'typename F, typename T'
            else:
                tmpl_args = temp + 'typename T'

            temp = ', '.join(['A{i} a{i}'.format(i=a[0]) for a in args_list])
            temp += ', ' if temp != '' else ''
            if not self.closed:
                func_args = temp + 'F, T'
            else:
                func_args = temp + 'T'

            return \
            'template <{tmpl_args}> {return_typ} {name}({func_args});'. \
            format(return_typ=return_typ, tmpl_args=tmpl_args,
                   func_args=func_args, name=self.name)
        elif lang == 'cxx_adv':

            def get_pack(param):
                return 'pack{}'.format(param[1:]) if param[0] == 'v' \
                                                  else 'packl'

            args_list = common.enum(self.params[1:])
            inter = [i for i in ['v', 'l', 'vx2', 'vx3', 'vx4'] \
                     if i in self.params[1:]]
            # Do we need tag dispatching on pack<>? e.g. len, set1 and load*
            need_tmpl_pack = get_pack(self.params[0]) if inter == [] else None

            # Compute template arguments
            tmpl_args = []
            if not self.closed:
                tmpl_args += ['typename ToPackType']
            tmpl_args1 = tmpl_args + ['typename T', 'typename SimdExt']
            tmpl_argsN = tmpl_args + [
                'typename T', 'int N', 'typename SimdExt'
            ]
            other_tmpl_args = ['typename A{}'.format(i[0]) for i in args_list \
                               if i[1] not in ['v', 'l']]
            tmpl_args1 += other_tmpl_args
            tmpl_argsN += other_tmpl_args
            tmpl_args1 = ', '.join(tmpl_args1)
            tmpl_argsN = ', '.join(tmpl_argsN)

            # Compute function arguments
            def arg_type(arg, N):
                if arg[1] in ['v', 'l']:
                    pack_typ = 'pack' if arg[1] == 'v' else 'packl'
                    return '{}<T, {}, SimdExt> const&'.format(pack_typ, N)
                else:
                    return 'A{}'.format(arg[0])

            args1 = [
                '{} a{}'.format(arg_type(i, '1'), i[0]) for i in args_list
            ]
            argsN = [
                '{} a{}'.format(arg_type(i, 'N'), i[0]) for i in args_list
            ]
            # Arguments without tag dispatching on pack
            other_argsN = ', '.join(argsN)
            if not self.closed:
                args1 = ['ToPackType'] + args1
                argsN = ['ToPackType'] + argsN
            if need_tmpl_pack != None:
                args1 = ['{}<T, 1, SimdExt> const&'.format(need_tmpl_pack)] + \
                        args1
                argsN = ['{}<T, N, SimdExt> const&'.format(need_tmpl_pack)] + \
                        argsN
            args1 = ', '.join(args1)
            argsN = ', '.join(argsN)

            # Compute return type
            ret1 = 'ToPackType' if not self.closed \
                   else common.get_one_type_generic_adv_cxx(self.params[0],
                                                            'T', '1')
            retN = 'ToPackType' if not self.closed \
                   else common.get_one_type_generic_adv_cxx(self.params[0],
                                                            'T', 'N')

            ret = { \
                '1': 'template <{tmpl_args1}> {ret1} {cxx_name}({args1});'. \
                     format(tmpl_args1=tmpl_args1, ret1=ret1, args1=args1,
                            cxx_name=self.name),
                'N': 'template <{tmpl_argsN}> {retN} {cxx_name}({argsN});'. \
                     format(tmpl_argsN=tmpl_argsN, retN=retN, argsN=argsN,
                            cxx_name=self.name)
            }
            if self.cxx_operator:
                ret.update({ \
                    'op1':
                    'template <{tmpl_args1}> {ret1} {cxx_name}({args1});'. \
                    format(tmpl_args1=tmpl_args1, ret1=ret1, args1=args1,
                           cxx_name=self.cxx_operator),
                    'opN':
                    'template <{tmpl_argsN}> {retN} {cxx_name}({argsN});'. \
                    format(tmpl_argsN=tmpl_argsN, retN=retN, argsN=argsN,
                           cxx_name=self.cxx_operator)
                })
            if not self.closed:
                ret['dispatch'] = \
                'template <{tmpl_argsN}> {retN} {cxx_name}({other_argsN});'. \
                format(tmpl_argsN=tmpl_argsN, other_argsN=other_argsN,
                       retN=retN, cxx_name=self.name)
            elif need_tmpl_pack != None:
                other_tmpl_args = ', '.join(['typename SimdVector'] + \
                                            other_tmpl_args)
                ret['dispatch'] = \
                '''template <{other_tmpl_args}>
                   SimdVector {cxx_name}({other_argsN});'''. \
                   format(other_tmpl_args=other_tmpl_args,
                          other_argsN=other_argsN, cxx_name=self.name)
            return ret
        else:
            raise Exception('Lang must be one of c_base, cxx_base, cxx_adv')
Esempio n. 2
0
def gen_readme(opts):
    print('-- Generating documentation in DOC.md')
    filename = os.path.join(opts.script_dir, '..', 'DOC.md')
    if not common.can_create_filename(opts, filename):
        return
    with io.open(filename, mode='w', encoding='utf-8') as fout:
        fout.write('''## NSIMD scalar types

Their names follows the following pattern: `Sxx` where

- `S` is `i` for signed integers, `u` for unsigned integer and `f` for
  floatting point number.
- `xx` is the number of bits taken to represent the number.

Full list of scalar types:

''')
        for t in common.types:
            fout.write('- `{}`\n'.format(t))
        fout.write('''

## NSIMD SIMD vector types

Their names follows the following pattern: `vSCALAR` where `SCALAR` is a
one of scalar type listed above. For example `vi8` means a SIMD vector
containing `i8`'s.

Full list of SIMD vector types:

''')
        for t in common.types:
            fout.write('- `v{}`\n'.format(t))
        fout.write('''

## C/C++ base APIs

These come automatically when you include `nsimd/nsimd.h`. You do *not* need
to include a header file for having a function. In NSIMD, we call a platform
an architecture e.g. Intel, ARM, POWERPC. We call SIMD extension a set of
low-level functions and types provided to access a given SIDM extension.
Examples include SSE2, SSE42, AVX, ...

Here is a list of supported platforms and their corresponding SIMD extensions.

''')
        platforms = common.get_platforms(opts)
        for p in platforms:
            fout.write('- Platform `{}`\n'.format(p))
            for s in platforms[p].get_simd_exts():
                fout.write('  - `{}`\n'.format(s))
        fout.write('''
Each simd extension has its own set of SIMD types and functions. Types follow
the following pattern: `nsimd_SIMDEXT_vSCALAR` where

- `SIMDEXT` is the SIMD extensions.
- `SCALAR` is one of scalar types listed above.

There are also logical types associated to each SIMD vector type. These types
are used to represent the result of a comparison of SIMD vectors. They are
usually bit masks. Their name follow the following pattern:
`nsimd_SIMDEXT_vlSCALAR` where

- `SIMDEXT` is the SIMD extensions.
- `SCALAR` is one of scalar types listed above.

Note 1: Platform `cpu` is scalar fallback when no SIMD extension has been
specified.

Note 2: as all SIMD extensions of all platforms are different there is no
need to put the name of the platform in each identifier.

Function names follow the following pattern: `nsimd_SIMDEXT_FUNCNAME_SCALAR`
where

- `SIMDEXT` is the SIMD extensions.
- `FUNCNAME` is the name of a function e.g. `add` or `sub`.
- `SCALAR` is one of scalar types listed above.

### Generic identifier

In C, genericity is achieved using macros.

- `vec(SCALAR)` represents the SIMD vector type containing SCALAR elements.
  SCALAR must be one of scalar types listed above.
- `vecl(SCALAR)` represents the SIMD vector of logicals type containing SCALAR
  elements. SCALAR must be one of scalar types listed above.
- `vec_e(SCALAR)` represents the SIMD vector type containing SCALAR elements.
  SCALAR must be one of scalar types listed above.
- `vecl_e(SCALAR)` represents the SIMD vector of logicals type containing
  SCALAR elements. SCALAR must be one of scalar types listed above.
- `vFUNCNAME` is the macro name to access the function FUNCNAME e.g. `vadd`,
  `vsub`.
- `vFUNCNAME_e` is the macro name to access the function FUNCNAME e.g.
  `vadd_e`, `vsub_e`.

In C++98 and C++03, type traits are available.

- `nsimd::simd_traits<SCALAR, SIMDEXT>::vector` is the SIMD vector type for
  platform SIMDEXT containing SCALAR elements. SIMDEXT is one of SIMD
  extension listed above, SCALAR is one of scalar type listed above.
- `nsimd::simd_traits<SCALAR, SIMDEXT>::vectorl` is the SIMD vector of logicals
  type for platform SIMDEXT containing SCALAR elements. SIMDEXT is one of
  SIMD extensions listed above, SCALAR is one of scalar type listed above.

In C++11 and beyond, type traits are still available but typedefs are also
provided.

- `nsimd::vector<SCALAR, SIMDEXT>` is a typedef to
  `nsimd::simd_traits<SCALAR, SIMDEXT>::vector`.
- `nsimd::vectorl<SCALAR, SIMDEXT>` is a typedef to
  `nsimd::simd_traits<SCALAR, SIMDEXT>::vectorl`.

Note that all macro and functions available in plain C are still available in
C++.

### List of functions available for manipulation of SIMD vectors

For each FUNCNAME a C function (also available in C++)
named `nsimd_SIMDEXT_FUNCNAME_SCALAR` is available for each SCALAR type unless
specified otherwise.

For each FUNCNAME, a C macro (also available in C++) named `vFUNCNAME` is
available and takes as its last argument a SCALAR type.

For each FUNCNAME, a C macro (also available in C++) named `vFUNCNAME_a` is
available and takes as its two last argument a SCALAR type and a SIMDEXT.

For each FUNCNAME, a C++ function in namespace `nsimd` named `FUNCNAME` is
available. It takes as its last argument the SCALAR type and can optionnally
take the SIMDEXT as its last last argument.

For example, for the addition of two SIMD vectors `a` and `b` here are the
possibilities:

    c = nsimd_add_avx_f32(a, b); // use AVX
    c = nsimd::add(a, b, f32()); // use detected SIMDEXT
    c = nsimd::add(a, b, f32(), avx()); // force AVX even if detected SIMDEXT is not AVX
    c = vadd(a, b, f32); // use detected SIMDEXT
    c = vadd_e(a, b, f32, avx); // force AVX even if detected SIMDEXT is not AVX

Here is a list of available FUNCNAME.

''')
        for op_name, operator in operators.items():
            return_typ = common.get_one_type_generic(operator.params[0],
                                                     'SCALAR')
            func = operator.name
            args = ', '.join([common.get_one_type_generic(p, 'SCALAR') + \
                              ' a' + str(count) for count, p in \
                              enumerate(operator.params[1:])])
            fout.write('- `{} {}({});`\n'.format(return_typ, func, args))

            if operator.domain and len(operator.params[1:]) > 0:
                params = operator.params[1:]

                if len(params) == 1:
                    fout.write('  a0 ∈ {}\n'.format(operator.domain))
                else:
                    param = ', '.join(['a' + str(count) for count in \
                                       range(len(params))])
                    fout.write('  ({}) ∈ {}\n'.format(param, operator.domain))

            if len(operator.types) < len(common.types):
                typs = ', '.join(['{}'.format(t) for t in operator.types])
                fout.write('  Only available for {}\n'.format(typs))
        fout.write('''

## C++ advanced API

The C++ advanced API is called advanced not because it requires C++11 or above
but because it makes use of the particular implementation of ARM SVE by ARM
in their compiler. We do not know if GCC (and possibly MSVC in the distant
future) will use the same approach. Anyway the current implementation allows
us to put SVE SIMD vectors inside some kind of structs that behave like
standard structs. If you want to be sure to write portable code do *not* use
this API. Two new types are available.

- `nsimd::pack<SCALAR, N, SIMDEXT>` represents `N` SIMD vectors containing
  SCALAR elements of SIMD extension SIMDEXT. You can specify only the first
  template argument. The second defaults to 1 while the third defaults to the
  detected SIMDEXT.
- `nsimd::packl<SCALAR, N, SIMDEXT>` represents `N` SIMD vectors of logical
  type containing SCALAR elements of SIMD extension SIMDEXT. You can specify
  only the first template argument. The second defaults to 1 while the third
  defaults to the detected SIMDEXT.

Use N > 1 when declaring packs to have an unroll of N. This is particularily
useful on ARM.

Functions that takes packs do not take any other argument unless specified
otherwise e.g. the load family of funtions. It is impossible to determine
the kind of pack (unroll and SIMDEXT) from the type of a pointer. Therefore
in this case, the last argument must be a pack and this same type will then
return. Also some functions are available as C++ operators.

Here is the list of functions that act on packs.

''')
        for op_name, operator in operators.items():
            return_typ = common.get_one_type_pack(operator.params[0], 1, 'N')
            func = operator.name
            args = ', '.join([common.get_one_type_pack(p, 0, 'N') + ' a' + \
                              str(count) for count, p in \
                              enumerate(operator.params[1:])])
            if 'v' not in operator.params[1:] and 'l' not in operator.params[
                    1:]:
                args = args + ', pack<T, N, SimdExt> const&' if args != '' \
                              else 'pack<T, N, SimdExt> const&'
            fout.write('- `{} {}({});`\n'.format(return_typ, func, args))

            if operator.domain and len(operator.params[1:]) > 0:
                params = operator.params[1:]
                if len(params) == 1:
                    fout.write('  a0 ∈ {}\n'.format(operator.domain))
                else:
                    param = ', '.join(['a'+str(count) for count in \
                                       range(len(params))])
                    fout.write('  ({}) ∈ {}\n'.format(param, operator.domain))

            if operator.cxx_operator:
                fout.write('  Available as {}\n'.format(operator.cxx_operator))

            if len(operator.types) < len(common.types):
                typs = ', '.join(['{}'.format(t) for t in operator.types])
                fout.write('  Only available for {}\n'.format(typs))
Esempio n. 3
0
def gen_overview(opts):
    filename = common.get_markdown_file(opts, 'overview')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as fout:
        fout.write('''# Overview

## NSIMD scalar types

Their names follow the following pattern: `Sxx` where

- `S` is `i` for signed integers, `u` for unsigned integer or `f` for
  floatting point number.
- `xx` is the number of bits taken to represent the number.

Full list of scalar types:

''')
        for t in common.types:
            fout.write('- `{}`\n'.format(t))
        fout.write('''
## NSIMD generic SIMD vector types

In NSIMD, we call a platform an architecture e.g. Intel, ARM, POWERPC. We call
SIMD extension a set of low-level functions and types provided by hardware
vendors to access SIMD units. Examples include SSE2, SSE42, AVX, ...  When
compiling the generic SIMD vector types represents a SIMD register of the
target. Examples are a `__m128` for Intel SSE, `__m512` for Intel AVX-512 or
`svfloat32_t` for Arm SVE.

Their names follow the following pattern:

- C base API: `vSCALAR` where `SCALAR` is a one of scalar type listed above.
- C advanced API: `nsimd_pack_SCALAR` where `SCALAR` is a one of scalar type
  listed above.
- C++ advanced API: `nsimd::pack<SCALAR>` where `SCALAR` is a one of scalar
  type listed above.

Full list of SIMD vector types:

| Base type | C base API | C advanced API | C++ advanced API |
|-----------|------------|----------------|------------------|
''')

        fout.write('\n'.join([
        '| `{typ}` | `v{typ}` | `nsimd_pack_{typ}` | `nsimd::pack<{typ}>` |'. \
        format(typ=typ) for typ in common.types]))

        fout.write('''

## C/C++ base APIs

These come automatically when you include `nsimd/nsimd.h`. You do *not* need
to include a header file for having a function. Here is a list of supported
platforms and their corresponding SIMD extensions.

''')
        platforms = common.get_platforms(opts)
        for p in platforms:
            fout.write('- Platform `{}`\n'.format(p))
            for s in platforms[p].get_simd_exts():
                fout.write('  - `{}`\n'.format(s))
        fout.write('''
Each simd extension has its own set of SIMD types and functions. Types follow
the pattern: `nsimd_SIMDEXT_vSCALAR` where

- `SIMDEXT` is the SIMD extensions.
- `SCALAR` is one of scalar types listed above.

There are also logical types associated to each SIMD vector type. These types
are used, for example, to represent the result of a comparison of SIMD vectors.
They are usually bit masks. Their name follow the pattern:
`nsimd_SIMDEXT_vlSCALAR` where

- `SIMDEXT` is the SIMD extensions.
- `SCALAR` is one of scalar types listed above.

Note 1: Platform `cpu` is a 128 bits SIMD emulation fallback when no SIMD
extension has been specified or is supported on a given compilation target.

Note 2: as all SIMD extensions of all platforms are different there is no
need to put the name of the platform in each identifier.

Function names follow the pattern: `nsimd_SIMDEXT_FUNCNAME_SCALAR` where

- `SIMDEXT` is the SIMD extensions.
- `FUNCNAME` is the name of a function e.g. `add` or `sub`.
- `SCALAR` is one of scalar types listed above.

### Generic identifier

In the base C API, genericity is achieved using macros.

- `vec(SCALAR)` is a type to represent a SIMD vector containing SCALAR
  elements.  SCALAR must be one of scalar types listed above.
- `vecl(SCALAR)` is a type to represent a SIMD vector of logicals for SCALAR
  elements. SCALAR must be one of scalar types listed above.
- `vec_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector containing
  SCALAR elements for the simd extension SIMDEXT. SCALAR must be one of scalar
  types listed above and SIMDEXT must be a valid SIMD extension.
- `vecl_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector of logicals
  for SCALAR elements for the simd extension SIMDEXT. SCALAR must be one of
  scalar types listed above and SIMDEXT must be a valid SIMD extension.
- `vFUNCNAME` takes as input the above types to access the operator FUNCNAME
  e.g. `vadd`, `vsub`.

In C++98 and C++03, type traits are available.

- `nsimd::simd_traits<SCALAR, SIMDEXT>::vector` is the SIMD vector type for
  platform SIMDEXT containing SCALAR elements. SIMDEXT is one of SIMD
  extension listed above, SCALAR is one of scalar type listed above.
- `nsimd::simd_traits<SCALAR, SIMDEXT>::vectorl` is the SIMD vector of logicals
  type for platform SIMDEXT containing SCALAR elements. SIMDEXT is one of
  SIMD extensions listed above, SCALAR is one of scalar type listed above.

In C++11 and beyond, type traits are still available but typedefs are also
provided.

- `nsimd::vector<SCALAR, SIMDEXT>` is a typedef to
  `nsimd::simd_traits<SCALAR, SIMDEXT>::vector`.
- `nsimd::vectorl<SCALAR, SIMDEXT>` is a typedef to
  `nsimd::simd_traits<SCALAR, SIMDEXT>::vectorl`.

The C++20 API does not bring different types for SIMD registers nor other
way to access the other SIMD types. It only brings concepts instead of usual
`typename`s. For more informations cf. <concepts.md>.

Note that all macro and functions available in plain C are still available in
C++.

### List of operators provided by the base APIs

In the documentation we use interchangeably the terms "function" and
"operator".  For each operator FUNCNAME a C function (also available in C++)
named `nsimd_SIMDEXT_FUNCNAME_SCALAR` is available for each SCALAR type unless
specified otherwise.

For each FUNCNAME, a C macro (also available in C++) named `vFUNCNAME` is
available and takes as its last argument a SCALAR type.

For each FUNCNAME, a C macro (also available in C++) named `vFUNCNAME_a` is
available and takes as its two last argument a SCALAR type and a SIMDEXT.

For each FUNCNAME, a C++ function in namespace `nsimd` named `FUNCNAME` is
available. It takes as its last argument the SCALAR type and can optionnally
take the SIMDEXT as its last last argument.

For example, for the addition of two SIMD vectors `a` and `b` here are the
possibilities:

```c++
c = nsimd_add_avx_f32(a, b); // use AVX
c = nsimd::add(a, b, f32()); // use detected SIMDEXT
c = nsimd::add(a, b, f32(), avx()); // force AVX even if detected SIMDEXT is not AVX
c = vadd(a, b, f32); // use detected SIMDEXT
c = vadd_e(a, b, f32, avx); // force AVX even if detected SIMDEXT is not AVX
```

Here is a list of available FUNCNAME.

''')
        for op_name, operator in operators.items():
            return_typ = common.get_one_type_generic(operator.params[0],
                                                     'SCALAR')
            func = operator.name
            args = ', '.join([common.get_one_type_generic(p, 'SCALAR') + \
                              ' a' + str(count) for count, p in \
                              enumerate(operator.params[1:])])
            fout.write('- `{} {}({});`  \n'.format(return_typ, func, args))
            if len(operator.types) < len(common.types):
                typs = ', '.join(['{}'.format(t) for t in operator.types])
                fout.write('  Only available for {}\n'.format(typs))

        fout.write('''

## C advanced API (only available in C11)

The C advanced API takes advantage of the C11 `_Generic` keyword to provide
function overloading. Unlike the base API described above there is no need to
pass as arguments the base type of the SIMD extension. The informations are
contained in the types provided by this API.

- `nsimd_pack_SCALAR_SIMDEXT` represents a SIMD vectors containing
  SCALAR elements of SIMD extension SIMDEXT.
- `nsimd::packl_SCALAR_SIMDEXT` represents a SIMD vectors of logicals
  for SCALAR elements of SIMD extension SIMDEXT.

There are versions of the above type without SIMDEXT for which the targeted
SIMD extension is automatically chosen.

- `nsimd_pack_SCALAR` represents a SIMD vectors containing SCALAR elements.
- `nsimd::packl_SCALAR` represents a SIMD vectors of logicals for SCALAR
  elements.

Generic types are also available:

- `nsimd_pack(SCALAR)` is a type to represent a SIMD vector containing SCALAR
  elements.  SCALAR must be one of scalar types listed above.
- `nsimd_packl(SCALAR)` is a type to represent a SIMD vector of logicals for
  SCALAR elements. SCALAR must be one of scalar types listed above.
- `nsimd_pack_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector
  containing SCALAR elements for the simd extension SIMDEXT. SCALAR must be one
  of scalar types listed above and SIMDEXT must be a valid SIMD extension.
- `nsimd_packl_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector of
  logicals for SCALAR elements for the simd extension SIMDEXT. SCALAR must be
  one of scalar types listed above and SIMDEXT must be a valid SIMD extension.

Finally, operators are follow the naming: `nsimd_FUNCNAME` e.g. `nsimd_add`,
`nsimd_sub`.

## C++ advanced API

The C++ advanced API is called advanced not because it requires C++11 or above
but because it makes use of the particular implementation of ARM SVE by ARM
in their compiler. We do not know if GCC (and possibly MSVC in the distant
future) will use the same approach. Anyway the current implementation allows
us to put SVE SIMD vectors inside some kind of structs that behave like
standard structs. If you want to be sure to write portable code do *not* use
this API. Two new types are available.

- `nsimd::pack<SCALAR, N, SIMDEXT>` represents `N` SIMD vectors containing
  SCALAR elements of SIMD extension SIMDEXT. You can specify only the first
  template argument. The second defaults to 1 while the third defaults to the
  detected SIMDEXT.
- `nsimd::packl<SCALAR, N, SIMDEXT>` represents `N` SIMD vectors of logical
  type containing SCALAR elements of SIMD extension SIMDEXT. You can specify
  only the first template argument. The second defaults to 1 while the third
  defaults to the detected SIMDEXT.

Use N > 1 when declaring packs to have an unroll of N. This is particularily
useful on ARM.

Functions that takes packs do not take any other argument unless specified
otherwise e.g. the load family of funtions. It is impossible to determine
the kind of pack (unroll and SIMDEXT) from the type of a pointer. Therefore
in this case, the last argument must be a pack and this same type will then
return. Also some functions are available as C++ operators. They follow the
naming: `nsimd::FUNCNAME`.
''')