Ejemplo n.º 1
0
    def with_types(self, arg_id_to_dtype, callables_table):

        if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None:
            # the types provided aren't mature enough to specialize the
            # callable
            return (self.copy(arg_id_to_dtype=arg_id_to_dtype),
                    callables_table)

        dtype = arg_id_to_dtype[0].numpy_dtype

        if dtype.kind in ("u", "i"):
            # ints and unsigned casted to float32
            dtype = np.float32

        if dtype.type == np.float32:
            name_in_target = "log2f"
        elif dtype.type == np.float64:
            name_in_target = "log2"
            pass
        else:
            raise TypeError(f"log2: unexpected type {dtype}")

        from loopy.types import NumpyType
        return (self.copy(name_in_target=name_in_target,
                          arg_id_to_dtype={
                              0: NumpyType(dtype),
                              -1: NumpyType(dtype)
                          }), callables_table)
Ejemplo n.º 2
0
    def map_constant(self, expr):
        if is_integer(expr):
            for tp in [np.int32, np.int64]:
                iinfo = np.iinfo(tp)
                if iinfo.min <= expr <= iinfo.max:
                    return [NumpyType(np.dtype(tp))]

            else:
                raise TypeInferenceFailure("integer constant '%s' too large" %
                                           expr)

        dt = np.asarray(expr).dtype
        if hasattr(expr, "dtype"):
            return [NumpyType(expr.dtype)]
        elif isinstance(expr, np.number):
            # Numpy types are sized
            return [NumpyType(np.dtype(type(expr)))]
        elif dt.kind == "f":
            # deduce the smaller type by default
            return [NumpyType(np.dtype(np.float32))]
        elif dt.kind == "c":
            if np.complex64(expr) == np.complex128(expr):
                # (COMPLEX_GUESS_LOGIC)
                # No precision is lost by 'guessing' single precision, use that.
                # This at least covers simple cases like '1j'.
                return [NumpyType(np.dtype(np.complex64))]

            # Codegen for complex types depends on exactly correct types.
            # Refuse temptation to guess.
            raise TypeInferenceFailure("Complex constant '%s' needs to "
                                       "be sized for type inference " % expr)
        else:
            raise TypeInferenceFailure("Cannot deduce type of constant '%s'" %
                                       expr)
Ejemplo n.º 3
0
    def with_types(self, arg_id_to_dtype, callables_table):
        name = self.name

        for id in arg_id_to_dtype:
            if not -1 <= id <= 0:
                raise LoopyError(f"'{name}' can take only one argument.")

        if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None:
            # the types provided aren't mature enough to specialize the callable
            return self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table

        dtype = arg_id_to_dtype[0]
        dtype = dtype.numpy_dtype

        if dtype.kind in ("u", "i"):
            # ints and unsigned casted to float32
            dtype = np.float32
        elif dtype.kind == "c":
            raise LoopyTypeError(f"{name} does not support type {dtype}")

        typed_callable = self.copy(name_in_target=name,
                                   arg_id_to_dtype={
                                       0: NumpyType(dtype),
                                       -1: NumpyType(dtype)
                                   })

        return typed_callable, callables_table
Ejemplo n.º 4
0
def c_symbol_mangler(kernel, name):
    # float NAN as defined in C99 standard
    if name == "NAN":
        return NumpyType(np.dtype(np.float32)), name

    if name in ["INT_MAX", "INT_MIN"]:
        return NumpyType(np.dtype(np.int32)), name

    return None
Ejemplo n.º 5
0
    def with_types(self, arg_id_to_dtype, callables_table):

        if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or (
                arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None):
            # the types provided aren't mature enough to specialize the
            # callable
            return (self.copy(), callables_table)

        name = self.name
        target = self.target

        rng_variant = FUNC_NAMES_TO_RNG[name]

        from loopy.types import NumpyType
        base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits]
        ctr_dtype = target.vector_dtype(NumpyType(base_dtype),
                                        rng_variant.width)
        key_dtype = target.vector_dtype(NumpyType(base_dtype),
                                        rng_variant.key_width)

        fn = rng_variant.full_name
        if name == fn:
            new_arg_id_to_dtype = {
                -1: ctr_dtype,
                -2: ctr_dtype,
                0: ctr_dtype,
                1: key_dtype
            }
            return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype,
                              name_in_target=fn + "_gen"), callables_table)

        elif name == fn + "_f32":
            new_arg_id_to_dtype = {
                -1: target.vector_dtype(NumpyType(np.float32),
                                        rng_variant.width),
                -2: ctr_dtype,
                0: ctr_dtype,
                1: key_dtype
            }
            return self.copy(arg_id_to_dtype=new_arg_id_to_dtype,
                             name_in_target=name), callables_table

        elif name == fn + "_f64":
            new_arg_id_to_dtype = {
                -1: target.vector_dtype(NumpyType(np.float64),
                                        rng_variant.width),
                -2: ctr_dtype,
                0: ctr_dtype,
                1: key_dtype
            }
            return self.copy(arg_id_to_dtype=new_arg_id_to_dtype,
                             name_in_target=name), callables_table

        return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
Ejemplo n.º 6
0
    def parse_result_type(target, op_type):
        try:
            return NumpyType(np.dtype(op_type))
        except TypeError:
            pass

        if op_type.startswith("vec_"):
            try:
                return NumpyType(target.get_or_register_dtype(op_type[4:]))
            except AttributeError:
                pass

        raise LoopyError("unable to parse reduction type: '%s'" % op_type)
Ejemplo n.º 7
0
def opencl_function_mangler(kernel, name, arg_dtypes):
    if not isinstance(name, str):
        return None

    # OpenCL has min(), max() for integer types
    if name in ["max", "min"] and len(arg_dtypes) == 2:
        dtype = np.find_common_type(
            [], [dtype.numpy_dtype for dtype in arg_dtypes])

        if dtype.kind == "i":
            result_dtype = NumpyType(dtype)
            return CallMangleInfo(target_name=name,
                                  result_dtypes=(result_dtype, ),
                                  arg_dtypes=2 * (result_dtype, ))

    if name == "dot":
        scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields[
            "s0"]
        return CallMangleInfo(target_name=name,
                              result_dtypes=(NumpyType(scalar_dtype), ),
                              arg_dtypes=(arg_dtypes[0], ) * 2)

    if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS:
        num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name]
        if len(arg_dtypes) != num_args:
            raise LoopyError("%s takes %d arguments (%d received)" %
                             (name, num_args, len(arg_dtypes)))

        dtype = np.find_common_type(
            [], [dtype.numpy_dtype for dtype in arg_dtypes])

        if dtype.kind == "c":
            raise LoopyError("%s does not support complex numbers" % name)

        result_dtype = NumpyType(dtype)
        return CallMangleInfo(target_name=name,
                              result_dtypes=(result_dtype, ),
                              arg_dtypes=(result_dtype, ) * num_args)

    if name in VECTOR_LITERAL_FUNCS:
        base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name]

        if count != len(arg_dtypes):
            return None

        return CallMangleInfo(target_name="(%s%d) " % (base_tp_name, count),
                              result_dtypes=(kernel.target.vector_dtype(
                                  NumpyType(dtype), count), ),
                              arg_dtypes=(NumpyType(dtype), ) * count)

    return None
Ejemplo n.º 8
0
def opencl_symbol_mangler(kernel, name):
    # FIXME: should be more picky about exact names
    if name.startswith("FLT_"):
        return NumpyType(np.dtype(np.float32)), name
    elif name.startswith("DBL_"):
        return NumpyType(np.dtype(np.float64)), name
    elif name.startswith("M_"):
        if name.endswith("_F"):
            return NumpyType(np.dtype(np.float32)), name
        else:
            return NumpyType(np.dtype(np.float64)), name
    elif name == "INFINITY":
        return NumpyType(np.dtype(np.float32)), name
    else:
        return None
Ejemplo n.º 9
0
def bessel_mangler(kernel, identifier, arg_dtypes):
    """A function "mangler" to make Bessel functions
    digestible for :mod:`loopy`.

    See argument *function_manglers* to :func:`loopy.make_kernel`.
    """

    from loopy.target.pyopencl import PyOpenCLTarget
    if not isinstance(kernel.target, PyOpenCLTarget):
        raise NotImplementedError(
            "Only the PyOpenCLTarget is supported as of now")

    if identifier == "hank1_01":
        if arg_dtypes[0].is_complex():
            identifier = "hank1_01_complex"
            return lp.CallMangleInfo(
                target_name=identifier,
                result_dtypes=(NumpyType(np.dtype(hank1_01_result_dtype)), ),
                arg_dtypes=(NumpyType(np.dtype(np.complex128)), ))
        else:
            return lp.CallMangleInfo(
                target_name=identifier,
                result_dtypes=(NumpyType(np.dtype(hank1_01_result_dtype)), ),
                arg_dtypes=(NumpyType(np.dtype(np.float64)), ))

    elif identifier == "bessel_jv_two":
        if arg_dtypes[1].is_complex():
            identifier = "bessel_jv_two_complex"
            return lp.CallMangleInfo(
                target_name=identifier,
                result_dtypes=(NumpyType(
                    np.dtype(bessel_j_two_result_dtype)), ),
                arg_dtypes=(
                    NumpyType(np.dtype(np.int32)),
                    NumpyType(np.dtype(np.complex128)),
                ))
        else:
            return lp.CallMangleInfo(
                target_name=identifier,
                result_dtypes=(NumpyType(
                    np.dtype(bessel_j_two_result_dtype)), ),
                arg_dtypes=(
                    NumpyType(np.dtype(np.int32)),
                    NumpyType(np.dtype(np.float64)),
                ))

    else:
        return None
Ejemplo n.º 10
0
        def process_dtype(dtype):
            if isinstance(dtype, type) and issubclass(dtype, np.generic):
                dtype = np.dtype(dtype)
            if isinstance(dtype, np.dtype):
                dtype = NumpyType(dtype, self.kernel.target)

            return dtype
Ejemplo n.º 11
0
def pyopencl_function_mangler(target, name, arg_dtypes):
    if len(arg_dtypes) == 1 and isinstance(name, str):
        arg_dtype, = arg_dtypes

        if arg_dtype.is_complex():
            if arg_dtype.numpy_dtype == np.complex64:
                tpname = "cfloat"
            elif arg_dtype.numpy_dtype == np.complex128:
                tpname = "cdouble"
            else:
                raise RuntimeError("unexpected complex type '%s'" % arg_dtype)

            if name in ["sqrt", "exp", "log",
                    "sin", "cos", "tan",
                    "sinh", "cosh", "tanh",
                    "conj"]:
                return CallMangleInfo(
                        target_name="%s_%s" % (tpname, name),
                        result_dtypes=(arg_dtype,),
                        arg_dtypes=(arg_dtype,))

            if name in ["real", "imag", "abs"]:
                return CallMangleInfo(
                        target_name="%s_%s" % (tpname, name),
                        result_dtypes=(NumpyType(
                            np.dtype(arg_dtype.numpy_dtype.type(0).real)),
                            ),
                        arg_dtypes=(arg_dtype,))

    return None
Ejemplo n.º 12
0
    def arg_to_dtype_set(self, kwargs):
        if not self.has_runtime_typed_args:
            return None

        from loopy.types import NumpyType
        target = self.kernel.target

        impl_arg_to_arg = self.kernel.impl_arg_to_arg
        arg_to_dtype = {}
        for arg_name, val in six.iteritems(kwargs):
            arg = impl_arg_to_arg.get(arg_name, None)

            if arg is None:
                # offsets, strides and such
                continue

            if arg.dtype is None and val is not None:
                try:
                    dtype = val.dtype
                except AttributeError:
                    pass
                else:
                    arg_to_dtype[arg_name] = NumpyType(dtype, target)

        return frozenset(six.iteritems(arg_to_dtype))
Ejemplo n.º 13
0
    def with_types(self, arg_id_to_dtype, kernel, callables_table):
        dtypes = OrderedDict()
        for i in range(len(arg_id_to_dtype)):
            if arg_id_to_dtype.get(i) is None:
                # the types provided aren't mature enough to specialize the
                # callable
                return (self.copy(arg_id_to_dtype=arg_id_to_dtype),
                        callables_table)
            else:
                mat_dtype = arg_id_to_dtype[i].numpy_dtype
                dtypes[i] = NumpyType(mat_dtype)
        dtypes[-1] = NumpyType(dtypes[0].dtype)

        return (self.copy(name_in_target=self.name_in_target,
                arg_id_to_dtype=dtypes),
                callables_table)
Ejemplo n.º 14
0
    def get_or_register_dtype(self, names, dtype=None):
        if dtype is not None:
            from loopy.types import AtomicNumpyType, NumpyType
            if isinstance(dtype, AtomicNumpyType):
                return self.wrapped_registry.get_or_register_dtype(
                    names, NumpyType(dtype.dtype))

        return self.wrapped_registry.get_or_register_dtype(names, dtype)
Ejemplo n.º 15
0
        def process_dtype(dtype):
            if isinstance(dtype, type) and issubclass(dtype, np.generic):
                dtype = np.dtype(dtype)
            if isinstance(dtype, np.dtype):
                from loopy.types import NumpyType
                dtype = NumpyType(dtype, self.program.target)

            return dtype
Ejemplo n.º 16
0
    def map_lookup(self, expr):
        agg_result = self.rec(expr.aggregate)
        if not agg_result:
            return agg_result

        field = agg_result[0].numpy_dtype.fields[expr.name]
        dtype = field[0]
        return [NumpyType(dtype)]
Ejemplo n.º 17
0
    def vector_dtype(self, base, count):
        try:
            import pyopencl.cltypes as cltypes
            vec_types = cltypes.vec_types
        except ImportError:
            from pyopencl.array import vec
            vec_types = vec.types

        return NumpyType(vec_types[base.numpy_dtype, count], target=self)
Ejemplo n.º 18
0
    def with_types(self, arg_id_to_dtype, callables_table):
        new_arg_id_to_dtype = {
            i: dtype
            for i, dtype in arg_id_to_dtype.items() if dtype is not None
        }
        new_arg_id_to_dtype[-1] = NumpyType(np.int32)

        return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype),
                callables_table)
Ejemplo n.º 19
0
    def map_quotient(self, expr):
        n_dtype_set = self.rec(expr.numerator)
        d_dtype_set = self.rec(expr.denominator)

        dtypes = n_dtype_set + d_dtype_set

        if all(dtype.is_integral() for dtype in dtypes):
            # both integers
            return [NumpyType(np.dtype(np.float64))]

        else:
            return self.combine([n_dtype_set, d_dtype_set])
Ejemplo n.º 20
0
def bessel_function_mangler(kernel, name, arg_dtypes):
    from loopy.types import NumpyType
    if name == "bessel_j" and len(arg_dtypes) == 2:
        n_dtype, x_dtype, = arg_dtypes

        # *technically* takes a float, but let's not worry about that.
        if n_dtype.numpy_dtype.kind != "i":
            raise TypeError("%s expects an integer first argument")

        from loopy.kernel.data import CallMangleInfo
        return CallMangleInfo(
                "bessel_jv",
                (NumpyType(np.float64),),
                (NumpyType(np.int32), NumpyType(np.float64)),
                )

    elif name == "bessel_y" and len(arg_dtypes) == 2:
        n_dtype, x_dtype, = arg_dtypes

        # *technically* takes a float, but let's not worry about that.
        if n_dtype.numpy_dtype.kind != "i":
            raise TypeError("%s expects an integer first argument")

        from loopy.kernel.data import CallMangleInfo
        return CallMangleInfo(
                "bessel_yn",
                (NumpyType(np.float64),),
                (NumpyType(np.int32), NumpyType(np.float64)),
                )

    return None
Ejemplo n.º 21
0
def random123_function_mangler(kernel, name, arg_dtypes):
    try:
        rng_variant = FUNC_NAMES_TO_RNG[name]
    except KeyError:
        return None

    from loopy.types import NumpyType
    target = kernel.target
    base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits]
    ctr_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.width)
    key_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.key_width)

    from loopy.kernel.data import CallMangleInfo
    fn = rng_variant.full_name
    if name == fn:
        return CallMangleInfo(
                target_name=fn+"_gen",
                result_dtypes=(ctr_dtype, ctr_dtype),
                arg_dtypes=(ctr_dtype, key_dtype))

    elif name == fn + "_f32":
        return CallMangleInfo(
                target_name=name,
                result_dtypes=(
                    target.vector_dtype(NumpyType(np.float32), rng_variant.width),
                    ctr_dtype),
                arg_dtypes=(ctr_dtype, key_dtype))

    elif name == fn + "_f64":
        return CallMangleInfo(
                target_name=name,
                result_dtypes=(
                    target.vector_dtype(NumpyType(np.float64), rng_variant.width),
                    ctr_dtype),
                arg_dtypes=(ctr_dtype, key_dtype))

    else:
        return None
Ejemplo n.º 22
0
def dtype_to_type_context(target, dtype):
    from loopy.types import NumpyType

    if dtype.is_integral():
        return "i"
    if isinstance(dtype, NumpyType) and dtype.dtype in [np.float64, np.complex128]:
        return "d"
    if isinstance(dtype, NumpyType) and dtype.dtype in [np.float32, np.complex64]:
        return "f"
    if target.is_vector_dtype(dtype):
        return dtype_to_type_context(
                target, NumpyType(dtype.numpy_dtype.fields["x"][0]))

    return None
Ejemplo n.º 23
0
    def map_lookup(self, expr):
        agg_result = self.rec(expr.aggregate)
        if not agg_result:
            return agg_result

        numpy_dtype = agg_result[0].numpy_dtype
        fields = numpy_dtype.fields
        if fields is None:
            raise LoopyError("cannot look up attribute '%s' in "
                             "non-aggregate expression '%s'" %
                             (expr.aggregate, expr.name))

        try:
            field = fields[expr.name]
        except KeyError:
            raise LoopyError("cannot look up attribute '%s' in "
                             "aggregate expression '%s' of dtype '%s'" %
                             (expr.aggregate, expr.name, numpy_dtype))

        dtype = field[0]
        return [NumpyType(dtype)]
Ejemplo n.º 24
0
    def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var,
                           lhs_expr, rhs_expr, lhs_dtype, rhs_type_context):

        from pymbolic.primitives import Sum
        from cgen import Statement
        from pymbolic.mapper.stringifier import PREC_NONE

        if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [
                np.int32, np.int64, np.float32, np.float64
        ]:
            # atomicAdd
            if isinstance(rhs_expr, Sum):
                ecm = self.get_expression_to_code_mapper(codegen_state)

                new_rhs_expr = Sum(
                    tuple(c for c in rhs_expr.children if c != lhs_expr))
                lhs_expr_code = ecm(lhs_expr)
                rhs_expr_code = ecm(new_rhs_expr)

                return Statement("atomicAdd(&{}, {})".format(
                    lhs_expr_code, rhs_expr_code))
            else:
                from cgen import Block, DoWhile, Assign
                from loopy.target.c import POD
                old_val_var = codegen_state.var_name_generator("loopy_old_val")
                new_val_var = codegen_state.var_name_generator("loopy_new_val")

                from loopy.kernel.data import TemporaryVariable
                ecm = codegen_state.expression_to_code_mapper.with_assignments(
                    {
                        old_val_var: TemporaryVariable(old_val_var, lhs_dtype),
                        new_val_var: TemporaryVariable(new_val_var, lhs_dtype),
                    })

                lhs_expr_code = ecm(lhs_expr,
                                    prec=PREC_NONE,
                                    type_context=None)

                from pymbolic.mapper.substitutor import make_subst_func
                from pymbolic import var
                from loopy.symbolic import SubstitutionMapper

                subst = SubstitutionMapper(
                    make_subst_func({lhs_expr: var(old_val_var)}))
                rhs_expr_code = ecm(subst(rhs_expr),
                                    prec=PREC_NONE,
                                    type_context=rhs_type_context,
                                    needed_dtype=lhs_dtype)

                cast_str = ""
                old_val = old_val_var
                new_val = new_val_var

                if lhs_dtype.numpy_dtype.kind == "f":
                    if lhs_dtype.numpy_dtype == np.float32:
                        ctype = "int"
                    elif lhs_dtype.numpy_dtype == np.float64:
                        ctype = "long"
                    else:
                        raise AssertionError()

                    old_val = "*(%s *) &" % ctype + old_val
                    new_val = "*(%s *) &" % ctype + new_val
                    cast_str = "(%s *) " % (ctype)

                return Block([
                    POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                        old_val_var),
                    POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                        new_val_var),
                    DoWhile(
                        "atomicCAS("
                        "%(cast_str)s&(%(lhs_expr)s), "
                        "%(old_val)s, "
                        "%(new_val)s"
                        ") != %(old_val)s" % {
                            "cast_str": cast_str,
                            "lhs_expr": lhs_expr_code,
                            "old_val": old_val,
                            "new_val": new_val,
                        },
                        Block([
                            Assign(old_val_var, lhs_expr_code),
                            Assign(new_val_var, rhs_expr_code),
                        ]))
                ])
        else:
            raise NotImplementedError("atomic update for '%s'" % lhs_dtype)
Ejemplo n.º 25
0
    def cuda_with_types(self, arg_id_to_dtype, callables_table):

        name = self.name

        if name in _CUDA_SPECIFIC_FUNCTIONS:
            num_args = _CUDA_SPECIFIC_FUNCTIONS[name]

            # {{{ sanity checks

            for id, dtype in arg_id_to_dtype.items():
                if not -1 <= id < num_args:
                    raise LoopyError("%s can take only %d arguments." %
                                     (name, num_args))

                if dtype is not None and dtype.kind == "c":
                    raise LoopyTypeError(
                        f"'{name}' does not support complex arguments.")

            # }}}

            for i in range(num_args):
                if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None:
                    # the types provided aren't mature enough to specialize the
                    # callable
                    return (self.copy(arg_id_to_dtype=arg_id_to_dtype),
                            callables_table)

            dtype = np.find_common_type([], [
                dtype.numpy_dtype
                for id, dtype in arg_id_to_dtype.items() if id >= 0
            ])

            updated_arg_id_to_dtype = {
                id: NumpyType(dtype)
                for id in range(-1, num_args)
            }

            return (self.copy(name_in_target=name,
                              arg_id_to_dtype=updated_arg_id_to_dtype),
                    callables_table)

        if name == "dot":
            # CUDA dot function:
            # Performs dot product. Input types: vector and return type: scalar.
            for i in range(2):
                if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None:
                    # the types provided aren't mature enough to specialize the
                    # callable
                    return (self.copy(arg_id_to_dtype=arg_id_to_dtype),
                            callables_table)

            input_dtype = arg_id_to_dtype[0]

            scalar_dtype, offset, field_name = input_dtype.fields["x"]
            return_dtype = scalar_dtype
            return self.copy(arg_id_to_dtype={
                0: input_dtype,
                1: input_dtype,
                -1: return_dtype
            })

        return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
Ejemplo n.º 26
0
def c_math_mangler(target, name, arg_dtypes, modify_name=True):
    # Function mangler for math functions defined in C standard
    # Convert abs, min, max to fabs, fmin, fmax.
    # If modify_name is set to True, function names are modified according to
    # floating point types of the arguments (e.g. cos(double), cosf(float))
    # This should be set to True for C and Cuda, False for OpenCL
    if not isinstance(name, str):
        return None

    if name in ["abs", "min", "max"]:
        name = "f" + name

    # unitary functions
    if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh",
                 "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"]
            and len(arg_dtypes) == 1
            and arg_dtypes[0].numpy_dtype.kind == "f"):

        dtype = arg_dtypes[0].numpy_dtype

        if modify_name:
            if dtype == np.float64:
                pass  # fabs
            elif dtype == np.float32:
                name = name + "f"  # fabsf
            elif dtype == np.float128:  # pylint:disable=no-member
                name = name + "l"  # fabsl
            else:
                raise LoopyTypeError("%s does not support type %s" % (name, dtype))

        return CallMangleInfo(
                target_name=name,
                result_dtypes=arg_dtypes,
                arg_dtypes=arg_dtypes)

    # binary functions
    if (name in ["fmax", "fmin", "copysign"]
            and len(arg_dtypes) == 2):

        dtype = np.find_common_type(
            [], [dtype.numpy_dtype for dtype in arg_dtypes])

        if dtype.kind == "c":
            raise LoopyTypeError("%s does not support complex numbers")

        elif dtype.kind == "f":
            if modify_name:
                if dtype == np.float64:
                    pass  # fmin
                elif dtype == np.float32:
                    name = name + "f"  # fminf
                elif dtype == np.float128:  # pylint:disable=no-member
                    name = name + "l"  # fminl
                else:
                    raise LoopyTypeError("%s does not support type %s"
                                         % (name, dtype))

            result_dtype = NumpyType(dtype)
            return CallMangleInfo(
                    target_name=name,
                    result_dtypes=(result_dtype,),
                    arg_dtypes=2*(result_dtype,))

    return None
Ejemplo n.º 27
0
Archivo: data.py Proyecto: mmmika/loopy
    def __init__(self, name, dtype=None, shape=(), address_space=None,
            dim_tags=None, offset=0, dim_names=None, strides=None, order=None,
            base_indices=None, storage_shape=None,
            base_storage=None, initializer=None, read_only=False,
            _base_storage_access_may_be_aliasing=False, **kwargs):
        """
        :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype`
        :arg shape: :class:`loopy.auto` or a shape tuple
        :arg base_indices: :class:`loopy.auto` or a tuple of base indices
        """

        scope = kwargs.pop("scope", None)
        if scope is not None:
            warn("Passing 'scope' is deprecated. Use 'address_space' instead.",
                    DeprecationWarning, stacklevel=2)

            if address_space is not None:
                raise ValueError("only one of 'scope' and 'address_space' "
                        "may be specified")
            else:
                address_space = scope

        del scope

        if address_space is None:
            address_space = auto

        if address_space is None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "address_space must not be None"
                    % name)

        if initializer is None:
            pass
        elif isinstance(initializer, np.ndarray):
            if offset != 0:
                raise LoopyError(
                        "temporary variable '%s': "
                        "offset must be 0 if initializer specified"
                        % name)

            from loopy.types import NumpyType, to_loopy_type
            if dtype is auto or dtype is None:
                dtype = NumpyType(initializer.dtype)
            elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype):
                raise LoopyError(
                        "temporary variable '%s': "
                        "dtype of initializer does not match "
                        "dtype of array."
                        % name)

            if shape is auto:
                shape = initializer.shape

        else:
            raise LoopyError(
                    "temporary variable '%s': "
                    "initializer must be None or a numpy array"
                    % name)

        if order is None:
            order = "C"

        if base_indices is None:
            base_indices = (0,) * len(shape)

        if not read_only and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "read-write variables with initializer "
                    "are not currently supported "
                    "(did you mean to set read_only=True?)"
                    % name)

        if base_storage is not None and initializer is not None:
            raise LoopyError(
                    "temporary variable '%s': "
                    "base_storage and initializer are "
                    "mutually exclusive"
                    % name)

        if base_storage is None and _base_storage_access_may_be_aliasing:
            raise LoopyError(
                    "temporary variable '%s': "
                    "_base_storage_access_may_be_aliasing option, but no "
                    "base_storage given!"
                    % name)

        ArrayBase.__init__(self, name=intern(name),
                dtype=dtype, shape=shape, strides=strides,
                dim_tags=dim_tags, offset=offset, dim_names=dim_names,
                order=order,
                base_indices=base_indices,
                address_space=address_space,
                storage_shape=storage_shape,
                base_storage=base_storage,
                initializer=initializer,
                read_only=read_only,
                _base_storage_access_may_be_aliasing=(
                    _base_storage_access_may_be_aliasing),
                **kwargs)
Ejemplo n.º 28
0
    def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var,
                           lhs_expr, rhs_expr, lhs_dtype, rhs_type_context):
        from pymbolic.mapper.stringifier import PREC_NONE

        # FIXME: Could detect operations, generate atomic_{add,...} when
        # appropriate.

        if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [
                np.int32, np.int64, np.float32, np.float64
        ]:
            from cgen import Block, DoWhile, Assign
            from loopy.target.c import POD
            old_val_var = codegen_state.var_name_generator("loopy_old_val")
            new_val_var = codegen_state.var_name_generator("loopy_new_val")

            from loopy.kernel.data import TemporaryVariable, AddressSpace
            ecm = codegen_state.expression_to_code_mapper.with_assignments({
                old_val_var:
                TemporaryVariable(old_val_var, lhs_dtype),
                new_val_var:
                TemporaryVariable(new_val_var, lhs_dtype),
            })

            lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None)

            from pymbolic.mapper.substitutor import make_subst_func
            from pymbolic import var
            from loopy.symbolic import SubstitutionMapper

            subst = SubstitutionMapper(
                make_subst_func({lhs_expr: var(old_val_var)}))
            rhs_expr_code = ecm(subst(rhs_expr),
                                prec=PREC_NONE,
                                type_context=rhs_type_context,
                                needed_dtype=lhs_dtype)

            if lhs_dtype.numpy_dtype.itemsize == 4:
                func_name = "atomic_cmpxchg"
            elif lhs_dtype.numpy_dtype.itemsize == 8:
                func_name = "atom_cmpxchg"
            else:
                raise LoopyError("unexpected atomic size")

            cast_str = ""
            old_val = old_val_var
            new_val = new_val_var

            if lhs_dtype.numpy_dtype.kind == "f":
                if lhs_dtype.numpy_dtype == np.float32:
                    ctype = "int"
                elif lhs_dtype.numpy_dtype == np.float64:
                    ctype = "long"
                else:
                    assert False

                from loopy.kernel.data import (TemporaryVariable, ArrayArg)
                if (isinstance(lhs_var, ArrayArg)
                        and lhs_var.address_space == AddressSpace.GLOBAL):
                    var_kind = "__global"
                elif (isinstance(lhs_var, ArrayArg)
                      and lhs_var.address_space == AddressSpace.LOCAL):
                    var_kind = "__local"
                elif (isinstance(lhs_var, TemporaryVariable)
                      and lhs_var.address_space == AddressSpace.LOCAL):
                    var_kind = "__local"
                elif (isinstance(lhs_var, TemporaryVariable)
                      and lhs_var.address_space == AddressSpace.GLOBAL):
                    var_kind = "__global"
                else:
                    raise LoopyError("unexpected kind of variable '%s' in "
                                     "atomic operation: " %
                                     (lhs_var.name, type(lhs_var).__name__))

                old_val = "*(%s *) &" % ctype + old_val
                new_val = "*(%s *) &" % ctype + new_val
                cast_str = "(%s %s *) " % (var_kind, ctype)

            return Block([
                POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                    old_val_var),
                POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                    new_val_var),
                DoWhile(
                    "%(func_name)s("
                    "%(cast_str)s&(%(lhs_expr)s), "
                    "%(old_val)s, "
                    "%(new_val)s"
                    ") != %(old_val)s" % {
                        "func_name": func_name,
                        "cast_str": cast_str,
                        "lhs_expr": lhs_expr_code,
                        "old_val": old_val,
                        "new_val": new_val,
                    },
                    Block([
                        Assign(old_val_var, lhs_expr_code),
                        Assign(new_val_var, rhs_expr_code),
                    ]))
            ])
        else:
            raise NotImplementedError("atomic update for '%s'" % lhs_dtype)
Ejemplo n.º 29
0
 def vector_dtype(self, base, count):
     return NumpyType(vec.types[base.numpy_dtype, count], target=self)
Ejemplo n.º 30
0
 def vector_dtype(self, base, count):
     from pyopencl.array import vec
     return NumpyType(
             vec.types[base.numpy_dtype, count],
             target=self)