Python Snippetの例、reikna.cluda.Snippet Pythonの例

コード例 #1

0

ファイルを表示

ファイル: reduce.py プロジェクト: SyamGadde/reikna

def predicate_sum(dtype):
    """
    Returns a :py:class:`~reikna.algorithms.Predicate` object which sums its arguments.
    """
    return Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"),
        numpy.zeros(1, dtype)[0])

コード例 #2

0

ファイルを表示

ファイル: lwe_gpu.py プロジェクト: phorcys/nufhe

    def _build_plan(
            self, plan_factory, device_params,
            ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b):

        plan = plan_factory()

        extracted_n, t, base, inner_n = ks_a.shape

        mul_key = MatrixMulVector(noises_a)
        b_term = plan.temp_array_like(mul_key.parameter.output)

        build_keyswitch = PureParallel([
            Parameter('ks_a', Annotation(ks_a, 'o')),
            Parameter('ks_b', Annotation(ks_b, 'o')),
            Parameter('ks_cv', Annotation(ks_cv, 'o')),
            Parameter('in_key', Annotation(in_key, 'i')),
            Parameter('b_term', Annotation(b_term, 'i')),
            Parameter('noises_a', Annotation(noises_a, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i'))],
            Snippet(
                TEMPLATE.get_def("make_lwe_keyswitch_key"),
                render_kwds=dict(
                    log2_base=self._log2_base, output_size=self._output_size,
                    noise=self._noise)),
            guiding_array="ks_b")

        plan.computation_call(mul_key, b_term, noises_a, out_key)
        plan.computation_call(
            build_keyswitch,
            ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b)

        return plan

コード例 #3

0

ファイルを表示

def test_structure_type(thr):

    shape = (100, 100)
    dtype = dtypes.align(
        numpy.dtype([('i1', numpy.uint32),
                     ('nested', numpy.dtype([
                         ('v', numpy.uint64),
                     ])), ('i2', numpy.uint32)]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot reduce arrays with struct dtypes.
    b_ref = numpy.empty(100, dtype)
    b_ref['i1'] = a['i1'].sum(0)
    b_ref['nested']['v'] = a['nested']['v'].sum(0)
    b_ref['i2'] = a['i2'].sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
                       render_kwds=dict(ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    rd = Reduce(a_dev, predicate, axes=(0, ))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)
    b_res = b_dev.get()

    # Array.get() runs numpy.lib.stride_tricks.as_strided() on the array,
    # which adds dummy variables instead of custom offsets (and our `dtype` has them),
    # making the result dtype different, and failing the test.
    # For now we will just note the difference and convert the result
    # back to the original dtype (they are still compatible).
    # When the behavior changes, the test will start to fail and we will notice.
    # See inducer/compyte issue #26.
    wrong_dtype = b_res.dtype != b_dev.dtype

    b_res = b_res.astype(dtype)
    assert diff_is_negligible(b_res, b_ref)

    if wrong_dtype:
        pytest.xfail("as_strided() still corrupts the datatype")
    else:
        pytest.fail("as_strided() does not corrupt the datatype anymore, "
                    "we can remove the `astype()` now")

コード例 #4

0

ファイルを表示

def test_nondefault_function(thr):

    shape = (100, 100)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0)

    rd = Reduce(a_dev, predicate, axes=(0, ))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)

コード例 #5

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: fjarri/reikna

def test_nondefault_function(thr):

    shape = (100, 100)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"),
        0)

    rd = Reduce(a_dev, predicate, axes=(0,))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)

コード例 #6

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: fjarri/reikna

def test_structure_type(thr):

    shape = (100, 100)
    dtype = dtypes.align(numpy.dtype([
        ('i1', numpy.uint32),
        ('nested', numpy.dtype([
            ('v', numpy.uint64),
            ])),
        ('i2', numpy.uint32)
        ]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot reduce arrays with struct dtypes.
    b_ref = numpy.empty(100, dtype)
    b_ref['i1'] = a['i1'].sum(0)
    b_ref['nested']['v'] = a['nested']['v'].sum(0)
    b_ref['i2'] = a['i2'].sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
            render_kwds=dict(
                ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    rd = Reduce(a_dev, predicate, axes=(0,))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)
    b_res = b_dev.get()

    assert diff_is_negligible(b_res, b_ref)

コード例 #7

0

ファイルを表示

def test_scan_structure_type(thr, exclusive):

    shape = (100, 100)
    dtype = dtypes.align(
        numpy.dtype([('i1', numpy.uint32),
                     ('nested', numpy.dtype([
                         ('v', numpy.uint64),
                     ])), ('i2', numpy.uint32)]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot scan arrays with struct dtypes.
    b_ref = numpy.empty(shape, dtype)
    b_ref['i1'] = ref_scan(a['i1'], axes=0, exclusive=exclusive)
    b_ref['nested']['v'] = ref_scan(a['nested']['v'],
                                    axes=0,
                                    exclusive=exclusive)
    b_ref['i2'] = ref_scan(a['i2'], axes=0, exclusive=exclusive)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
                       render_kwds=dict(ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    scan = Scan(a_dev, predicate, axes=(0, ), exclusive=exclusive)

    b_dev = thr.empty_like(scan.parameter.output)

    scanc = scan.compile(thr)
    scanc(b_dev, a_dev)
    b_res = b_dev.get()

    assert diff_is_negligible(b_res, b_ref)

コード例 #8

0

ファイルを表示

ファイル: pureparallel.py プロジェクト: xexo7C8/reikna

    def __init__(self, parameters, code, guiding_array=None, render_kwds=None):

        Computation.__init__(self, parameters)

        self._root_parameters = list(self.signature.parameters.keys())

        if isinstance(code, Snippet):
            self._snippet = code
        else:
            self._snippet = Snippet(helpers.template_def(
                ['idxs'] + self._root_parameters, code),
                                    render_kwds=render_kwds)

        if guiding_array is None:
            guiding_array = self._root_parameters[0]

        if isinstance(guiding_array, str):
            self._guiding_shape = self.signature.parameters[
                guiding_array].annotation.type.shape
        else:
            self._guiding_shape = guiding_array

コード例 #9

0

ファイルを表示

    def __init__(self, parameters, code, render_kwds=None, connectors=None):

        for param in parameters:
            if param.annotation.input and param.annotation.output:
                raise ValueError(
                    "Transformation cannot have 'io' parameters ('" + param.name + "')")

        self.signature = Signature(parameters)

        for param in self.signature.parameters.values():
            setattr(
                self, param.name,
                TransformationParameter(self, param.name, param.annotation.type))

        if connectors is not None:
            self.connectors = connectors
        else:
            self.connectors = [param.name for param in parameters if param.annotation.array]

        tr_param_names = ['idxs'] + [param.name for param in self.signature.parameters.values()]
        self.snippet = Snippet(template_def(tr_param_names, code), render_kwds=render_kwds)

コード例 #10

0

ファイルを表示

ファイル: lwe_gpu.py プロジェクト: stjordanis/nufhe

    def _build_plan(
            self, plan_factory, device_params,
            ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b):

        plan = plan_factory()

        extracted_n, t, base, inner_n = ks_a.shape

        mean = Reduce(noises_b, predicate_sum(noises_b.dtype))
        norm = transformations.div_const(mean.parameter.output, numpy.prod(noises_b.shape))
        mean.parameter.output.connect(norm, norm.input, mean=norm.output)

        noises_b_mean = plan.temp_array_like(mean.parameter.mean)

        mul_key = MatrixMulVector(noises_a)
        b_term = plan.temp_array_like(mul_key.parameter.output)

        build_keyswitch = PureParallel([
            Parameter('ks_a', Annotation(ks_a, 'o')),
            Parameter('ks_b', Annotation(ks_b, 'o')),
            Parameter('ks_cv', Annotation(ks_cv, 'o')),
            Parameter('in_key', Annotation(in_key, 'i')),
            Parameter('b_term', Annotation(b_term, 'i')),
            Parameter('noises_a', Annotation(noises_a, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i')),
            Parameter('noises_b_mean', Annotation(noises_b_mean, 'i'))],
            Snippet(
                TEMPLATE.get_def("make_lwe_keyswitch_key"),
                render_kwds=dict(
                    log2_base=self._log2_base, output_size=self._output_size,
                    double_to_t32=double_to_t32_module, noise=self._noise)),
            guiding_array="ks_b")

        plan.computation_call(mean, noises_b_mean, noises_b)
        plan.computation_call(mul_key, b_term, noises_a, out_key)
        plan.computation_call(
            build_keyswitch,
            ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b, noises_b_mean)

        return plan

コード例 #11

0

ファイルを表示

ファイル: test_modules.py プロジェクト: xexo7C8/reikna

def combinator_call(dtype, m1num=1, m2num=1, snum=1):
    c = combinator(dtype, m1num=m1num, m2num=m2num, snum=snum)
    return Snippet(TEMPLATE.get_def('snippet'), render_kwds=dict(c=c))

コード例 #12

0

ファイルを表示

mmc_c_decl = dtypes.ctype_module(mmc_dtype)

# Create the "empty" element for our minmax monoid, that is
# x `minmax` empty == empty `minmax` x == x.
empty = numpy.empty(1, mmc_dtype)[0]
empty["cur_min"] = 1 << 30
empty["cur_max"] = -(1 << 30)

# Reduction predicate for the minmax.
# v1 and v2 get the names of two variables to be processed.
predicate = Predicate(
    Snippet.create(lambda v1, v2: """
        ${ctype} result = ${v1};
        if (${v2}.cur_min < result.cur_min)
            result.cur_min = ${v2}.cur_min;
        if (${v2}.cur_max > result.cur_max)
            result.cur_max = ${v2}.cur_max;
        return result;
        """,
                   render_kwds=dict(ctype=mmc_c_decl)), empty)

# Test array
arr = numpy.random.randint(0, 10**6, 20000)

# A transformation that creates initial minmax structures for the given array of integers
to_mmc = Transformation([
    Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')),
    Parameter('input', Annotation(arr, 'i'))
], """
    ${output.ctype} res;
    res.cur_min = ${input.load_same};

コード例 #13

0

ファイルを表示

ファイル: test_scan.py プロジェクト: fjarri/reikna

def test_scan_custom_predicate(thr):
    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"),
        0)
    check_scan(thr, (10, 20, 30, 40), axes=(1,2), predicate=predicate)

コード例 #14

0

ファイルを表示

ファイル: cl_util.py プロジェクト: liufeigit/MetaOMR

# General utilities for OpenCL arrays
from .gpu import *
from reikna.core import Type
from reikna.cluda import Snippet
from reikna.algorithms.reduce import Reduce, Predicate

prg = build_program(["maximum_filter", "taxicab_distance"])
def maximum_filter_kernel(img):
    maximum = thr.empty_like(img)
    maximum.fill(0)
    prg.maximum_filter(img, maximum,
                       global_size=map(int, img.shape[::-1]),
                       local_size=(1, 1))
    return maximum

max_snippet = Snippet.create(lambda a, b: """
    return ((${a}) > (${b})) ? (${a}) : (${b});
""")
def max_kernel(arr):
    max_func = Reduce(arr, Predicate(max_snippet, np.array([-10.0**9],np.float32)[0])).compile(thr)
    out = thr.empty_like(Type(np.float32))
    max_func(out, arr)
    return out.get().item()

コード例 #15

0

ファイルを表示

ファイル: transformation_modules.py プロジェクト: fjarri/reikna

            VALUE_NAME=VALUE_NAME,
            module_idx=module_idx,
            nq_indices=index_cnames_seq(param),
            nq_params=param_cnames_seq(subtree_parameters)))


_snippet_disassemble_combined = Snippet.create(
    lambda shape, slices, indices, combined_indices: """
    %for combined_index, slice_len in enumerate(slices):
    <%
        index_start = sum(slices[:combined_index])
        index_end = index_start + slice_len
    %>
        %for index in range(index_start, index_end):
        <%
            stride = product(shape[index+1:index_end])
        %>
        VSIZE_T ${indices[index]} = ${combined_indices[combined_index]} / ${stride};
        %if index != index_end - 1:
        ${combined_indices[combined_index]} -= ${indices[index]} * ${stride};
        %endif
        %endfor
    %endfor
    """,
    render_kwds=dict(product=helpers.product))

_module_combined = helpers.template_def(
    ['prefix', 'slices'],
    """
    <%
        value_param = [str(connector_ctype) + ' ' + VALUE_NAME] if output else []

コード例 #16

0

ファイルを表示

def test_scan_custom_predicate(thr):
    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0)
    check_scan(thr, (10, 20, 30, 40), axes=(1, 2), predicate=predicate)

コード例 #17

0

ファイルを表示

from .gpu import *
from reikna.core import Type
from reikna.cluda import Snippet
from reikna.algorithms.reduce import Reduce, Predicate

prg = build_program(["maximum_filter", "taxicab_distance"])


def maximum_filter_kernel(img):
    maximum = thr.empty_like(img)
    maximum.fill(0)
    prg.maximum_filter(img,
                       maximum,
                       global_size=map(int, img.shape[::-1]),
                       local_size=(1, 1))
    return maximum


max_snippet = Snippet.create(lambda a, b: """
    return ((${a}) > (${b})) ? (${a}) : (${b});
""")


def max_kernel(arr):
    max_func = Reduce(
        arr, Predicate(max_snippet,
                       np.array([-10.0**9], np.float32)[0])).compile(thr)
    out = thr.empty_like(Type(np.float32))
    max_func(out, arr)
    return out.get().item()

コード例 #18

0

ファイルを表示

ファイル: demo_struct_reduce.py プロジェクト: fjarri/reikna

# Create the "empty" element for our minmax monoid, that is
# x `minmax` empty == empty `minmax` x == x.
empty = numpy.empty(1, mmc_dtype)[0]
empty["cur_min"] = 1 << 30
empty["cur_max"] = -(1 << 30)


# Reduction predicate for the minmax.
# v1 and v2 get the names of two variables to be processed.
predicate = Predicate(
    Snippet.create(lambda v1, v2: """
        ${ctype} result = ${v1};
        if (${v2}.cur_min < result.cur_min)
            result.cur_min = ${v2}.cur_min;
        if (${v2}.cur_max > result.cur_max)
            result.cur_max = ${v2}.cur_max;
        return result;
        """,
        render_kwds=dict(ctype=mmc_c_decl)),
    empty)


# Test array
arr = numpy.random.randint(0, 10**6, 20000)


# A transformation that creates initial minmax structures for the given array of integers
to_mmc = Transformation(
    [Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')),
    Parameter('input', Annotation(arr, 'i'))],

コード例 #19

0

ファイルを表示

                      VALUE_NAME=VALUE_NAME,
                      module_idx=module_idx,
                      nq_indices=index_cnames_seq(param),
                      nq_params=param_cnames_seq(subtree_parameters)))


_snippet_disassemble_combined = Snippet.create(
    lambda shape, slices, indices, combined_indices: """
    %for combined_index, slice_len in enumerate(slices):
    <%
        index_start = sum(slices[:combined_index])
        index_end = index_start + slice_len
    %>
        %for index in range(index_start, index_end):
        <%
            stride = product(shape[index+1:index_end])
        %>
        VSIZE_T ${indices[index]} = ${combined_indices[combined_index]} / ${stride};
        %if index != index_end - 1:
        ${combined_indices[combined_index]} -= ${indices[index]} * ${stride};
        %endif
        %endfor
    %endfor
    """,
    render_kwds=dict(product=helpers.product))

_module_combined = helpers.template_def(['prefix', 'slices'], """
    <%
        value_param = [str(connector_ctype) + ' ' + VALUE_NAME] if output else []
        value = [VALUE_NAME] if output else []

コード例 #20

0

ファイルを表示


# Create LUT and stringify into preamble of map kernel
LUT = np.zeros(256, np.int32)
for b in xrange(8):
    LUT[(np.arange(256) & (1 << b)) != 0] += 1
strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n"
byte_to_count = Transformation([
    Parameter('output', Annotation(Type(np.int32, (1, )), 'o')),
    Parameter('input', Annotation(Type(np.uint8, (1, )), 'i'))
], strLUT + """
        ${output.store_same}(LUT[${input.load_same}]);
    """)

predicate = Predicate(
    Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0))

sum_bits_reduction = Reduce(byte_to_count.output, predicate)
sum_bits_reduction.parameter.input.connect(byte_to_count,
                                           byte_to_count.output,
                                           new_input=byte_to_count.input)
sum_bits = sum_bits_reduction.compile(thr)
#sum_byte_count = ReductionKernel(cx, np.int32, neutral="0",
#                    reduce_expr="a+b", map_expr="LUT[bytes[i]]",
#                    arguments="__global unsigned char *bytes",
#                    preamble=strLUT)
#def count_bits(img):
#    return sum_byte_count(img).get().item()
#
#pixel_inds = GenericScanKernel(cx, np.int32,
#                    arguments="__global unsigned char *bytes, "

コード例 #21

0

ファイルを表示

def kernel_declaration(kernel_name, parameters):
    return Snippet(_snippet_kernel_declaration,
                   render_kwds=dict(param_cnames_seq=param_cnames_seq,
                                    kernel_name=kernel_name,
                                    parameters=parameters))

コード例 #22

0

ファイルを表示

def predicate_sum(dtype):
    """
    Returns a :py:class:`~reikna.algorithms.Predicate` object which sums its arguments.
    """
    return Predicate(Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"),
                     numpy.zeros(1, dtype)[0])

コード例 #23

0

ファイルを表示

ファイル: bitimage.py プロジェクト: ringw/MetaOMR

LUT = np.zeros(256, np.int32)
for b in xrange(8):
    LUT[(np.arange(256) & (1 << b)) != 0] += 1
strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n"
byte_to_count = Transformation(
    [
        Parameter("output", Annotation(Type(np.int32, (1,)), "o")),
        Parameter("input", Annotation(Type(np.uint8, (1,)), "i")),
    ],
    strLUT
    + """
        ${output.store_same}(LUT[${input.load_same}]);
    """,
)

predicate = Predicate(Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0))

sum_bits_reduction = Reduce(byte_to_count.output, predicate)
sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input)
sum_bits = sum_bits_reduction.compile(thr)
# sum_byte_count = ReductionKernel(cx, np.int32, neutral="0",
#                    reduce_expr="a+b", map_expr="LUT[bytes[i]]",
#                    arguments="__global unsigned char *bytes",
#                    preamble=strLUT)
# def count_bits(img):
#    return sum_byte_count(img).get().item()
#
# pixel_inds = GenericScanKernel(cx, np.int32,
#                    arguments="__global unsigned char *bytes, "
#                              "int image_w, "
#                              "__global int *pixels",