def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() # Array.get() runs numpy.lib.stride_tricks.as_strided() on the array, # which adds dummy variables instead of custom offsets (and our `dtype` has them), # making the result dtype different, and failing the test. # For now we will just note the difference and convert the result # back to the original dtype (they are still compatible). # When the behavior changes, the test will start to fail and we will notice. # See inducer/compyte issue #26. wrong_dtype = b_res.dtype != b_dev.dtype b_res = b_res.astype(dtype) assert diff_is_negligible(b_res, b_ref) if wrong_dtype: pytest.xfail("as_strided() still corrupts the datatype") else: pytest.fail("as_strided() does not corrupt the datatype anymore, " "we can remove the `astype()` now")
def test_nondefault_function(thr): shape = (100, 100) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_scan_structure_type(thr, exclusive): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot scan arrays with struct dtypes. b_ref = numpy.empty(shape, dtype) b_ref['i1'] = ref_scan(a['i1'], axes=0, exclusive=exclusive) b_ref['nested']['v'] = ref_scan(a['nested']['v'], axes=0, exclusive=exclusive) b_ref['i2'] = ref_scan(a['i2'], axes=0, exclusive=exclusive) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) scan = Scan(a_dev, predicate, axes=(0, ), exclusive=exclusive) b_dev = thr.empty_like(scan.parameter.output) scanc = scan.compile(thr) scanc(b_dev, a_dev) b_res = b_dev.get() assert diff_is_negligible(b_res, b_ref)
return repeat_kernel(img, prg.border, 1) # Create LUT and stringify into preamble of map kernel LUT = np.zeros(256, np.int32) for b in xrange(8): LUT[(np.arange(256) & (1 << b)) != 0] += 1 strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n" byte_to_count = Transformation([ Parameter('output', Annotation(Type(np.int32, (1, )), 'o')), Parameter('input', Annotation(Type(np.uint8, (1, )), 'i')) ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """) predicate = Predicate( Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) #sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT) #def count_bits(img): # return sum_byte_count(img).get().item() # #pixel_inds = GenericScanKernel(cx, np.int32, # arguments="__global unsigned char *bytes, "
])) mmc_c_decl = dtypes.ctype_module(mmc_dtype) # Create the "empty" element for our minmax monoid, that is # x `minmax` empty == empty `minmax` x == x. empty = numpy.empty(1, mmc_dtype)[0] empty["cur_min"] = 1 << 30 empty["cur_max"] = -(1 << 30) # Reduction predicate for the minmax. # v1 and v2 get the names of two variables to be processed. predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; if (${v2}.cur_min < result.cur_min) result.cur_min = ${v2}.cur_min; if (${v2}.cur_max > result.cur_max) result.cur_max = ${v2}.cur_max; return result; """, render_kwds=dict(ctype=mmc_c_decl)), empty) # Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation([ Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i')) ], """ ${output.ctype} res; res.cur_min = ${input.load_same};
def test_scan_custom_predicate(thr): predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) check_scan(thr, (10, 20, 30, 40), axes=(1, 2), predicate=predicate)