def test_summation(thr): perf_size = 2**22 dtype = dtypes.normalize_type(numpy.int64) a = get_test_array(perf_size, dtype) a_dev = thr.to_device(a) rd = Reduce(a, predicate_sum(dtype)) b_dev = thr.empty_like(rd.parameter.output) b_ref = numpy.array([a.sum()], dtype) rdc = rd.compile(thr) attempts = 10 times = [] for i in range(attempts): t1 = time.time() rdc(b_dev, a_dev) thr.synchronize() times.append(time.time() - t1) assert diff_is_negligible(b_dev.get(), b_ref) return min(times), perf_size * dtype.itemsize
def test_summation(thr): perf_size = 2 ** 22 dtype = dtypes.normalize_type(numpy.int64) a = get_test_array(perf_size, dtype) a_dev = thr.to_device(a) rd = Reduce(a, predicate_sum(dtype)) b_dev = thr.empty_like(rd.parameter.output) b_ref = numpy.array([a.sum()], dtype) rdc = rd.compile(thr) attempts = 10 times = [] for i in range(attempts): t1 = time.time() rdc(b_dev, a_dev) thr.synchronize() times.append(time.time() - t1) assert diff_is_negligible(b_dev.get(), b_ref) return min(times), perf_size * dtype.itemsize
def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() # Array.get() runs numpy.lib.stride_tricks.as_strided() on the array, # which adds dummy variables instead of custom offsets (and our `dtype` has them), # making the result dtype different, and failing the test. # For now we will just note the difference and convert the result # back to the original dtype (they are still compatible). # When the behavior changes, the test will start to fail and we will notice. # See inducer/compyte issue #26. wrong_dtype = b_res.dtype != b_dev.dtype b_res = b_res.astype(dtype) assert diff_is_negligible(b_res, b_ref) if wrong_dtype: pytest.xfail("as_strided() still corrupts the datatype") else: pytest.fail("as_strided() does not corrupt the datatype anymore, " "we can remove the `astype()` now")
def test_normal(thr, shape, axis): a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) rd = Reduce(a, predicate_sum(numpy.int64), axes=(axis,) if axis is not None else None) b_dev = thr.empty_like(rd.parameter.output) b_ref = a.sum(axis) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_nonsequential_axes(thr): shape = (50, 40, 30, 20) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0).sum(1) # sum over axes 0 and 2 of the initial array rd = Reduce(a_dev, predicate_sum(numpy.int64), axes=(0, 2)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_nonsequential_axes(thr): shape = (50, 40, 30, 20) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0).sum(1) # sum over axes 0 and 2 of the initial array rd = Reduce(a_dev, predicate_sum(numpy.int64), axes=(0,2)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_normal(thr, shape, axis): a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) rd = Reduce(a, predicate_sum(numpy.int64), axes=(axis, ) if axis is not None else None) b_dev = thr.empty_like(rd.parameter.output) b_ref = a.sum(axis) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_nondefault_function(thr): shape = (100, 100) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_nondefault_function(thr): shape = (100, 100) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) rd = Reduce(a_dev, predicate, axes=(0,)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align(numpy.dtype([ ('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32) ])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict( ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0,)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() assert diff_is_negligible(b_res, b_ref)
def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() assert diff_is_negligible(b_res, b_ref)
strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n" byte_to_count = Transformation([ Parameter('output', Annotation(Type(np.int32, (1, )), 'o')), Parameter('input', Annotation(Type(np.uint8, (1, )), 'i')) ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """) predicate = Predicate( Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) #sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT) #def count_bits(img): # return sum_byte_count(img).get().item() # #pixel_inds = GenericScanKernel(cx, np.int32, # arguments="__global unsigned char *bytes, " # "int image_w, " # "__global int *pixels", # # Keep count of pixels we have stored so far # input_expr="LUT[bytes[i]]", # scan_expr="a+b", neutral="0", # output_statement="""
# Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation( [Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i'))], """ ${output.ctype} res; res.cur_min = ${input.load_same}; res.cur_max = ${input.load_same}; ${output.store_same}(res); """) # Create the reduction computation and attach the transformation above to its input. reduction = Reduce(to_mmc.output, predicate) reduction.parameter.input.connect(to_mmc, to_mmc.output, new_input=to_mmc.input) creduction = reduction.compile(thr) # Run the computation arr_dev = thr.to_device(arr) res_dev = thr.empty_like(reduction.parameter.output) creduction(res_dev, arr_dev) minmax = res_dev.get() assert minmax["cur_min"] == arr.min() assert minmax["cur_max"] == arr.max()
# Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation([ Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i')) ], """ ${output.ctype} res; res.cur_min = ${input.load_same}; res.cur_max = ${input.load_same}; ${output.store_same}(res); """) # Create the reduction computation and attach the transformation above to its input. reduction = Reduce(to_mmc.output, predicate) reduction.parameter.input.connect(to_mmc, to_mmc.output, new_input=to_mmc.input) creduction = reduction.compile(thr) # Run the computation arr_dev = thr.to_device(arr) res_dev = thr.empty_like(reduction.parameter.output) creduction(res_dev, arr_dev) minmax = res_dev.get() assert minmax["cur_min"] == arr.min() assert minmax["cur_max"] == arr.max()
byte_to_count = Transformation( [ Parameter("output", Annotation(Type(np.int32, (1,)), "o")), Parameter("input", Annotation(Type(np.uint8, (1,)), "i")), ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """, ) predicate = Predicate(Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) # sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT) # def count_bits(img): # return sum_byte_count(img).get().item() # # pixel_inds = GenericScanKernel(cx, np.int32, # arguments="__global unsigned char *bytes, " # "int image_w, " # "__global int *pixels", # # Keep count of pixels we have stored so far # input_expr="LUT[bytes[i]]", # scan_expr="a+b", neutral="0", # output_statement="""