def _create_vector_types(): from pycuda.characterize import platform_bits if platform_bits() == 32: long_dtype = np.int32 ulong_dtype = np.uint32 else: long_dtype = np.int64 ulong_dtype = np.uint64 field_names = ["x", "y", "z", "w"] from pycuda.tools import register_dtype for base_name, base_type, counts in [ ("char", np.int8, [1, 2, 3, 4]), ("uchar", np.uint8, [1, 2, 3, 4]), ("short", np.int16, [1, 2, 3, 4]), ("ushort", np.uint16, [1, 2, 3, 4]), ("int", np.int32, [1, 2, 3, 4]), ("uint", np.uint32, [1, 2, 3, 4]), ("long", long_dtype, [1, 2, 3, 4]), ("ulong", ulong_dtype, [1, 2, 3, 4]), ("longlong", np.int64, [1, 2]), ("ulonglong", np.uint64, [1, 2]), ("float", np.float32, [1, 2, 3, 4]), ("double", np.float64, [1, 2]), ]: for count in counts: name = "%s%d" % (base_name, count) dtype = np.dtype([(field_names[i], base_type) for i in range(count)]) register_dtype(dtype, name, alias_ok=True) setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) setattr( vec, "make_" + name, staticmethod( eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names), dict(array=np.array, my_dtype=dtype), ) ), )
def _create_vector_types(): from pycuda.characterize import platform_bits if platform_bits() == 32: long_dtype = np.int32 ulong_dtype = np.uint32 else: long_dtype = np.int64 ulong_dtype = np.uint64 field_names = ["x", "y", "z", "w"] from pycuda.tools import register_dtype for base_name, base_type, counts in [ ('char', np.int8, [1, 2, 3, 4]), ('uchar', np.uint8, [1, 2, 3, 4]), ('short', np.int16, [1, 2, 3, 4]), ('ushort', np.uint16, [1, 2, 3, 4]), ('int', np.int32, [1, 2, 3, 4]), ('uint', np.uint32, [1, 2, 3, 4]), ('long', long_dtype, [1, 2, 3, 4]), ('ulong', ulong_dtype, [1, 2, 3, 4]), ('longlong', np.int64, [1, 2]), ('ulonglong', np.uint64, [1, 2]), ('float', np.float32, [1, 2, 3, 4]), ('double', np.float64, [1, 2]), ]: for count in counts: name = "%s%d" % (base_name, count) dtype = np.dtype([(field_names[i], base_type) for i in range(count)]) register_dtype(dtype, name, alias_ok=True) setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) setattr( vec, "make_" + name, staticmethod( eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names), dict(array=np.array, my_dtype=dtype))))
maxloc_preamble_single = """ #define MAXLOCN maxlocs #define TTYPE float #define LTYPE int """ + maxloc_preamble maxloc_preamble_double = """ #define MAXLOCN maxlocd #define TTYPE double #define LTYPE long """ + maxloc_preamble maxloc_dtype_double = np.dtype([("max", np.float64), ("loc", np.int64)]) maxloc_dtype_single = np.dtype([("max", np.float32), ("loc", np.int32)]) register_dtype(maxloc_dtype_single, "maxlocs") register_dtype(maxloc_dtype_double, "maxlocd") mls = LowerLatencyReductionKernel(maxloc_dtype_single, neutral="maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="float *x", preamble=maxloc_preamble_single) mld = LowerLatencyReductionKernel(maxloc_dtype_double, neutral="maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="double *x", preamble=maxloc_preamble_double)
def test_struct_reduce(self): preamble = """ struct minmax_collector { float cur_min; float cur_max; __device__ minmax_collector() { } __device__ minmax_collector(float cmin, float cmax) : cur_min(cmin), cur_max(cmax) { } __device__ minmax_collector(minmax_collector const &src) : cur_min(src.cur_min), cur_max(src.cur_max) { } __device__ minmax_collector(minmax_collector const volatile &src) : cur_min(src.cur_min), cur_max(src.cur_max) { } __device__ minmax_collector volatile &operator=( minmax_collector const &src) volatile { cur_min = src.cur_min; cur_max = src.cur_max; return *this; } }; __device__ minmax_collector agg_mmc(minmax_collector a, minmax_collector b) { return minmax_collector( fminf(a.cur_min, b.cur_min), fmaxf(a.cur_max, b.cur_max)); } """ mmc_dtype = np.dtype([("cur_min", np.float32), ("cur_max", np.float32)]) from pycuda.curandom import rand as curand a_gpu = curand((20000, ), dtype=np.float32) a = a_gpu.get() from pycuda.tools import register_dtype register_dtype(mmc_dtype, "minmax_collector") from pycuda.reduction import ReductionKernel red = ReductionKernel( mmc_dtype, neutral="minmax_collector(10000, -10000)", # FIXME: needs infinity literal in real use, ok here reduce_expr="agg_mmc(a, b)", map_expr="minmax_collector(x[i], x[i])", arguments="float *x", preamble=preamble, ) minmax = red(a_gpu).get() # print minmax["cur_min"], minmax["cur_max"] # print np.min(a), np.max(a) assert minmax["cur_min"] == np.min(a) assert minmax["cur_max"] == np.max(a)
def test_struct_reduce(self): preamble = """ struct minmax_collector { float cur_min; float cur_max; __device__ minmax_collector() { } __device__ minmax_collector(float cmin, float cmax) : cur_min(cmin), cur_max(cmax) { } __device__ minmax_collector(minmax_collector const &src) : cur_min(src.cur_min), cur_max(src.cur_max) { } __device__ minmax_collector(minmax_collector const volatile &src) : cur_min(src.cur_min), cur_max(src.cur_max) { } __device__ minmax_collector volatile &operator=( minmax_collector const &src) volatile { cur_min = src.cur_min; cur_max = src.cur_max; return *this; } }; __device__ minmax_collector agg_mmc(minmax_collector a, minmax_collector b) { return minmax_collector( fminf(a.cur_min, b.cur_min), fmaxf(a.cur_max, b.cur_max)); } """ mmc_dtype = np.dtype([("cur_min", np.float32), ("cur_max", np.float32)]) from pycuda.curandom import rand as curand a_gpu = curand((20000,), dtype=np.float32) a = a_gpu.get() from pycuda.tools import register_dtype register_dtype(mmc_dtype, "minmax_collector") from pycuda.reduction import ReductionKernel red = ReductionKernel(mmc_dtype, neutral="minmax_collector(10000, -10000)", # FIXME: needs infinity literal in real use, ok here reduce_expr="agg_mmc(a, b)", map_expr="minmax_collector(x[i], x[i])", arguments="float *x", preamble=preamble) minmax = red(a_gpu).get() #print minmax["cur_min"], minmax["cur_max"] #print np.min(a), np.max(a) assert minmax["cur_min"] == np.min(a) assert minmax["cur_max"] == np.max(a)
NUM_ROUTES = len(routes) LONGEST_ROUTE = 0 for r in routes: if len(r) > LONGEST_ROUTE: LONGEST_ROUTE = len(r) preamble = """ struct route_stop_s { unsigned int station_id; float distance_m; }; """ route_stop_dtype = np.dtype([("station_id", np.uint32), ("distance_m", np.float32)]) register_dtype(route_stop_dtype, "route_stop_s") # make a matrix where each row is a route and the columns are the list of stops. Each stop is two 32 bit values # the first is the global station_id number and the second is the distance in meters to the next station # we'll also have an array that lists how many entries are in each row since not all routes have the same number routes_np = np.empty((NUM_ROUTES, LONGEST_ROUTE), dtype=route_stop_dtype) routes_lengths_np = np.empty((NUM_ROUTES), dtype=np.uint32) stopid_to_gps = {} global_stop_id = 0 for r_index, r in enumerate(routes): routes_lengths_np[r_index] = len(r) for s_index, s in enumerate(r): gps = (s["lat"], s["lon"]) if gps not in stops: stops[gps] = {}