예제 #1
0
def test_summation(thr):

    perf_size = 2**22
    dtype = dtypes.normalize_type(numpy.int64)

    a = get_test_array(perf_size, dtype)
    a_dev = thr.to_device(a)

    rd = Reduce(a, predicate_sum(dtype))

    b_dev = thr.empty_like(rd.parameter.output)
    b_ref = numpy.array([a.sum()], dtype)

    rdc = rd.compile(thr)

    attempts = 10
    times = []
    for i in range(attempts):
        t1 = time.time()
        rdc(b_dev, a_dev)
        thr.synchronize()
        times.append(time.time() - t1)

    assert diff_is_negligible(b_dev.get(), b_ref)

    return min(times), perf_size * dtype.itemsize
예제 #2
0
def test_summation(thr):

    perf_size = 2 ** 22
    dtype = dtypes.normalize_type(numpy.int64)

    a = get_test_array(perf_size, dtype)
    a_dev = thr.to_device(a)

    rd = Reduce(a, predicate_sum(dtype))

    b_dev = thr.empty_like(rd.parameter.output)
    b_ref = numpy.array([a.sum()], dtype)

    rdc = rd.compile(thr)

    attempts = 10
    times = []
    for i in range(attempts):
        t1 = time.time()
        rdc(b_dev, a_dev)
        thr.synchronize()
        times.append(time.time() - t1)

    assert diff_is_negligible(b_dev.get(), b_ref)

    return min(times), perf_size * dtype.itemsize
예제 #3
0
def test_structure_type(thr):

    shape = (100, 100)
    dtype = dtypes.align(
        numpy.dtype([('i1', numpy.uint32),
                     ('nested', numpy.dtype([
                         ('v', numpy.uint64),
                     ])), ('i2', numpy.uint32)]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot reduce arrays with struct dtypes.
    b_ref = numpy.empty(100, dtype)
    b_ref['i1'] = a['i1'].sum(0)
    b_ref['nested']['v'] = a['nested']['v'].sum(0)
    b_ref['i2'] = a['i2'].sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
                       render_kwds=dict(ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    rd = Reduce(a_dev, predicate, axes=(0, ))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)
    b_res = b_dev.get()

    # Array.get() runs numpy.lib.stride_tricks.as_strided() on the array,
    # which adds dummy variables instead of custom offsets (and our `dtype` has them),
    # making the result dtype different, and failing the test.
    # For now we will just note the difference and convert the result
    # back to the original dtype (they are still compatible).
    # When the behavior changes, the test will start to fail and we will notice.
    # See inducer/compyte issue #26.
    wrong_dtype = b_res.dtype != b_dev.dtype

    b_res = b_res.astype(dtype)
    assert diff_is_negligible(b_res, b_ref)

    if wrong_dtype:
        pytest.xfail("as_strided() still corrupts the datatype")
    else:
        pytest.fail("as_strided() does not corrupt the datatype anymore, "
                    "we can remove the `astype()` now")
예제 #4
0
def test_normal(thr, shape, axis):

    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)

    rd = Reduce(a, predicate_sum(numpy.int64), axes=(axis,) if axis is not None else None)

    b_dev = thr.empty_like(rd.parameter.output)
    b_ref = a.sum(axis)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #5
0
def test_nonsequential_axes(thr):

    shape = (50, 40, 30, 20)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0).sum(1)  # sum over axes 0 and 2 of the initial array

    rd = Reduce(a_dev, predicate_sum(numpy.int64), axes=(0, 2))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #6
0
def test_nonsequential_axes(thr):

    shape = (50, 40, 30, 20)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0).sum(1) # sum over axes 0 and 2 of the initial array

    rd = Reduce(a_dev, predicate_sum(numpy.int64), axes=(0,2))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #7
0
def test_normal(thr, shape, axis):

    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)

    rd = Reduce(a,
                predicate_sum(numpy.int64),
                axes=(axis, ) if axis is not None else None)

    b_dev = thr.empty_like(rd.parameter.output)
    b_ref = a.sum(axis)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #8
0
def test_nondefault_function(thr):

    shape = (100, 100)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0)

    rd = Reduce(a_dev, predicate, axes=(0, ))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #9
0
def test_nondefault_function(thr):

    shape = (100, 100)
    a = get_test_array(shape, numpy.int64)
    a_dev = thr.to_device(a)
    b_ref = a.sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"),
        0)

    rd = Reduce(a_dev, predicate, axes=(0,))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)

    assert diff_is_negligible(b_dev.get(), b_ref)
예제 #10
0
def test_structure_type(thr):

    shape = (100, 100)
    dtype = dtypes.align(numpy.dtype([
        ('i1', numpy.uint32),
        ('nested', numpy.dtype([
            ('v', numpy.uint64),
            ])),
        ('i2', numpy.uint32)
        ]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot reduce arrays with struct dtypes.
    b_ref = numpy.empty(100, dtype)
    b_ref['i1'] = a['i1'].sum(0)
    b_ref['nested']['v'] = a['nested']['v'].sum(0)
    b_ref['i2'] = a['i2'].sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
            render_kwds=dict(
                ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    rd = Reduce(a_dev, predicate, axes=(0,))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)
    b_res = b_dev.get()

    assert diff_is_negligible(b_res, b_ref)
예제 #11
0
def test_structure_type(thr):

    shape = (100, 100)
    dtype = dtypes.align(
        numpy.dtype([('i1', numpy.uint32),
                     ('nested', numpy.dtype([
                         ('v', numpy.uint64),
                     ])), ('i2', numpy.uint32)]))

    a = get_test_array(shape, dtype)
    a_dev = thr.to_device(a)

    # Have to construct the resulting array manually,
    # since numpy cannot reduce arrays with struct dtypes.
    b_ref = numpy.empty(100, dtype)
    b_ref['i1'] = a['i1'].sum(0)
    b_ref['nested']['v'] = a['nested']['v'].sum(0)
    b_ref['i2'] = a['i2'].sum(0)

    predicate = Predicate(
        Snippet.create(lambda v1, v2: """
            ${ctype} result = ${v1};
            result.i1 += ${v2}.i1;
            result.nested.v += ${v2}.nested.v;
            result.i2 += ${v2}.i2;
            return result;
            """,
                       render_kwds=dict(ctype=dtypes.ctype_module(dtype))),
        numpy.zeros(1, dtype)[0])

    rd = Reduce(a_dev, predicate, axes=(0, ))

    b_dev = thr.empty_like(rd.parameter.output)

    rdc = rd.compile(thr)
    rdc(b_dev, a_dev)
    b_res = b_dev.get()

    assert diff_is_negligible(b_res, b_ref)
예제 #12
0
strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n"
byte_to_count = Transformation([
    Parameter('output', Annotation(Type(np.int32, (1, )), 'o')),
    Parameter('input', Annotation(Type(np.uint8, (1, )), 'i'))
], strLUT + """
        ${output.store_same}(LUT[${input.load_same}]);
    """)

predicate = Predicate(
    Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0))

sum_bits_reduction = Reduce(byte_to_count.output, predicate)
sum_bits_reduction.parameter.input.connect(byte_to_count,
                                           byte_to_count.output,
                                           new_input=byte_to_count.input)
sum_bits = sum_bits_reduction.compile(thr)
#sum_byte_count = ReductionKernel(cx, np.int32, neutral="0",
#                    reduce_expr="a+b", map_expr="LUT[bytes[i]]",
#                    arguments="__global unsigned char *bytes",
#                    preamble=strLUT)
#def count_bits(img):
#    return sum_byte_count(img).get().item()
#
#pixel_inds = GenericScanKernel(cx, np.int32,
#                    arguments="__global unsigned char *bytes, "
#                              "int image_w, "
#                              "__global int *pixels",
#                    # Keep count of pixels we have stored so far
#                    input_expr="LUT[bytes[i]]",
#                    scan_expr="a+b", neutral="0",
#                    output_statement="""
예제 #13
0
# Test array
arr = numpy.random.randint(0, 10**6, 20000)


# A transformation that creates initial minmax structures for the given array of integers
to_mmc = Transformation(
    [Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')),
    Parameter('input', Annotation(arr, 'i'))],
    """
    ${output.ctype} res;
    res.cur_min = ${input.load_same};
    res.cur_max = ${input.load_same};
    ${output.store_same}(res);
    """)


# Create the reduction computation and attach the transformation above to its input.
reduction = Reduce(to_mmc.output, predicate)
reduction.parameter.input.connect(to_mmc, to_mmc.output, new_input=to_mmc.input)
creduction = reduction.compile(thr)


# Run the computation
arr_dev = thr.to_device(arr)
res_dev = thr.empty_like(reduction.parameter.output)
creduction(res_dev, arr_dev)
minmax = res_dev.get()

assert minmax["cur_min"] == arr.min()
assert minmax["cur_max"] == arr.max()
예제 #14
0
# Test array
arr = numpy.random.randint(0, 10**6, 20000)

# A transformation that creates initial minmax structures for the given array of integers
to_mmc = Transformation([
    Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')),
    Parameter('input', Annotation(arr, 'i'))
], """
    ${output.ctype} res;
    res.cur_min = ${input.load_same};
    res.cur_max = ${input.load_same};
    ${output.store_same}(res);
    """)

# Create the reduction computation and attach the transformation above to its input.
reduction = Reduce(to_mmc.output, predicate)
reduction.parameter.input.connect(to_mmc,
                                  to_mmc.output,
                                  new_input=to_mmc.input)
creduction = reduction.compile(thr)

# Run the computation
arr_dev = thr.to_device(arr)
res_dev = thr.empty_like(reduction.parameter.output)
creduction(res_dev, arr_dev)
minmax = res_dev.get()

assert minmax["cur_min"] == arr.min()
assert minmax["cur_max"] == arr.max()
예제 #15
0
파일: bitimage.py 프로젝트: ringw/MetaOMR
byte_to_count = Transformation(
    [
        Parameter("output", Annotation(Type(np.int32, (1,)), "o")),
        Parameter("input", Annotation(Type(np.uint8, (1,)), "i")),
    ],
    strLUT
    + """
        ${output.store_same}(LUT[${input.load_same}]);
    """,
)

predicate = Predicate(Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0))

sum_bits_reduction = Reduce(byte_to_count.output, predicate)
sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input)
sum_bits = sum_bits_reduction.compile(thr)
# sum_byte_count = ReductionKernel(cx, np.int32, neutral="0",
#                    reduce_expr="a+b", map_expr="LUT[bytes[i]]",
#                    arguments="__global unsigned char *bytes",
#                    preamble=strLUT)
# def count_bits(img):
#    return sum_byte_count(img).get().item()
#
# pixel_inds = GenericScanKernel(cx, np.int32,
#                    arguments="__global unsigned char *bytes, "
#                              "int image_w, "
#                              "__global int *pixels",
#                    # Keep count of pixels we have stored so far
#                    input_expr="LUT[bytes[i]]",
#                    scan_expr="a+b", neutral="0",
#                    output_statement="""