Example #1
0
def _create_vector_types():
    from pycuda.characterize import platform_bits

    if platform_bits() == 32:
        long_dtype = np.int32
        ulong_dtype = np.uint32
    else:
        long_dtype = np.int64
        ulong_dtype = np.uint64

    field_names = ["x", "y", "z", "w"]

    from pycuda.tools import register_dtype

    for base_name, base_type, counts in [
        ("char", np.int8, [1, 2, 3, 4]),
        ("uchar", np.uint8, [1, 2, 3, 4]),
        ("short", np.int16, [1, 2, 3, 4]),
        ("ushort", np.uint16, [1, 2, 3, 4]),
        ("int", np.int32, [1, 2, 3, 4]),
        ("uint", np.uint32, [1, 2, 3, 4]),
        ("long", long_dtype, [1, 2, 3, 4]),
        ("ulong", ulong_dtype, [1, 2, 3, 4]),
        ("longlong", np.int64, [1, 2]),
        ("ulonglong", np.uint64, [1, 2]),
        ("float", np.float32, [1, 2, 3, 4]),
        ("double", np.float64, [1, 2]),
    ]:
        for count in counts:
            name = "%s%d" % (base_name, count)
            dtype = np.dtype([(field_names[i], base_type) for i in range(count)])

            register_dtype(dtype, name, alias_ok=True)

            setattr(vec, name, dtype)

            my_field_names = ",".join(field_names[:count])
            setattr(
                vec,
                "make_" + name,
                staticmethod(
                    eval(
                        "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names),
                        dict(array=np.array, my_dtype=dtype),
                    )
                ),
            )
Example #2
0
def _create_vector_types():
    from pycuda.characterize import platform_bits
    if platform_bits() == 32:
        long_dtype = np.int32
        ulong_dtype = np.uint32
    else:
        long_dtype = np.int64
        ulong_dtype = np.uint64

    field_names = ["x", "y", "z", "w"]

    from pycuda.tools import register_dtype

    for base_name, base_type, counts in [
        ('char', np.int8, [1, 2, 3, 4]),
        ('uchar', np.uint8, [1, 2, 3, 4]),
        ('short', np.int16, [1, 2, 3, 4]),
        ('ushort', np.uint16, [1, 2, 3, 4]),
        ('int', np.int32, [1, 2, 3, 4]),
        ('uint', np.uint32, [1, 2, 3, 4]),
        ('long', long_dtype, [1, 2, 3, 4]),
        ('ulong', ulong_dtype, [1, 2, 3, 4]),
        ('longlong', np.int64, [1, 2]),
        ('ulonglong', np.uint64, [1, 2]),
        ('float', np.float32, [1, 2, 3, 4]),
        ('double', np.float64, [1, 2]),
    ]:
        for count in counts:
            name = "%s%d" % (base_name, count)
            dtype = np.dtype([(field_names[i], base_type)
                              for i in range(count)])

            register_dtype(dtype, name, alias_ok=True)

            setattr(vec, name, dtype)

            my_field_names = ",".join(field_names[:count])
            setattr(
                vec, "make_" + name,
                staticmethod(
                    eval(
                        "lambda %s: array((%s), dtype=my_dtype)" %
                        (my_field_names, my_field_names),
                        dict(array=np.array, my_dtype=dtype))))
Example #3
0
maxloc_preamble_single = """
    #define MAXLOCN maxlocs
    #define TTYPE float
    #define LTYPE int
""" + maxloc_preamble

maxloc_preamble_double = """
    #define MAXLOCN maxlocd
    #define TTYPE double
    #define LTYPE long
""" + maxloc_preamble

maxloc_dtype_double = np.dtype([("max", np.float64), ("loc", np.int64)])
maxloc_dtype_single = np.dtype([("max", np.float32), ("loc", np.int32)])

register_dtype(maxloc_dtype_single, "maxlocs")
register_dtype(maxloc_dtype_double, "maxlocd")

mls = LowerLatencyReductionKernel(maxloc_dtype_single,
                                  neutral="maxloc_start()",
                                  reduce_expr="maxloc_red(a, b)",
                                  map_expr="maxloc_map(x[i], i)",
                                  arguments="float *x",
                                  preamble=maxloc_preamble_single)

mld = LowerLatencyReductionKernel(maxloc_dtype_double,
                                  neutral="maxloc_start()",
                                  reduce_expr="maxloc_red(a, b)",
                                  map_expr="maxloc_map(x[i], i)",
                                  arguments="double *x",
                                  preamble=maxloc_preamble_double)
Example #4
0
    def test_struct_reduce(self):
        preamble = """
        struct minmax_collector
        {
            float cur_min;
            float cur_max;

            __device__
            minmax_collector()
            { }

            __device__
            minmax_collector(float cmin, float cmax)
            : cur_min(cmin), cur_max(cmax)
            { }

            __device__ minmax_collector(minmax_collector const &src)
            : cur_min(src.cur_min), cur_max(src.cur_max)
            { }

            __device__ minmax_collector(minmax_collector const volatile &src)
            : cur_min(src.cur_min), cur_max(src.cur_max)
            { }

            __device__ minmax_collector volatile &operator=(
                minmax_collector const &src) volatile
            {
                cur_min = src.cur_min;
                cur_max = src.cur_max;
                return *this;
            }
        };

        __device__
        minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
        {
            return minmax_collector(
                fminf(a.cur_min, b.cur_min),
                fmaxf(a.cur_max, b.cur_max));
        }
        """
        mmc_dtype = np.dtype([("cur_min", np.float32),
                              ("cur_max", np.float32)])

        from pycuda.curandom import rand as curand

        a_gpu = curand((20000, ), dtype=np.float32)
        a = a_gpu.get()

        from pycuda.tools import register_dtype

        register_dtype(mmc_dtype, "minmax_collector")

        from pycuda.reduction import ReductionKernel

        red = ReductionKernel(
            mmc_dtype,
            neutral="minmax_collector(10000, -10000)",
            # FIXME: needs infinity literal in real use, ok here
            reduce_expr="agg_mmc(a, b)",
            map_expr="minmax_collector(x[i], x[i])",
            arguments="float *x",
            preamble=preamble,
        )

        minmax = red(a_gpu).get()
        # print minmax["cur_min"], minmax["cur_max"]
        # print np.min(a), np.max(a)

        assert minmax["cur_min"] == np.min(a)
        assert minmax["cur_max"] == np.max(a)
Example #5
0
    def test_struct_reduce(self):
        preamble = """
        struct minmax_collector
        {
            float cur_min;
            float cur_max;

            __device__
            minmax_collector()
            { }

            __device__
            minmax_collector(float cmin, float cmax)
            : cur_min(cmin), cur_max(cmax)
            { }

            __device__ minmax_collector(minmax_collector const &src)
            : cur_min(src.cur_min), cur_max(src.cur_max)
            { }

            __device__ minmax_collector(minmax_collector const volatile &src)
            : cur_min(src.cur_min), cur_max(src.cur_max)
            { }

            __device__ minmax_collector volatile &operator=(
                minmax_collector const &src) volatile
            {
                cur_min = src.cur_min;
                cur_max = src.cur_max;
                return *this;
            }
        };

        __device__
        minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
        {
            return minmax_collector(
                fminf(a.cur_min, b.cur_min),
                fmaxf(a.cur_max, b.cur_max));
        }
        """
        mmc_dtype = np.dtype([("cur_min", np.float32), ("cur_max", np.float32)])

        from pycuda.curandom import rand as curand
        a_gpu = curand((20000,), dtype=np.float32)
        a = a_gpu.get()

        from pycuda.tools import register_dtype
        register_dtype(mmc_dtype, "minmax_collector")

        from pycuda.reduction import ReductionKernel
        red = ReductionKernel(mmc_dtype,
                neutral="minmax_collector(10000, -10000)",
                # FIXME: needs infinity literal in real use, ok here
                reduce_expr="agg_mmc(a, b)", map_expr="minmax_collector(x[i], x[i])",
                arguments="float *x", preamble=preamble)

        minmax = red(a_gpu).get()
        #print minmax["cur_min"], minmax["cur_max"]
        #print np.min(a), np.max(a)

        assert minmax["cur_min"] == np.min(a)
        assert minmax["cur_max"] == np.max(a)
Example #6
0
NUM_ROUTES = len(routes)
LONGEST_ROUTE = 0

for r in routes:
    if len(r) > LONGEST_ROUTE:
        LONGEST_ROUTE = len(r)

preamble = """
struct route_stop_s {
    unsigned int station_id;
    float distance_m;
};
"""
route_stop_dtype = np.dtype([("station_id", np.uint32),
                             ("distance_m", np.float32)])
register_dtype(route_stop_dtype, "route_stop_s")

# make a matrix where each row is a route and the columns are the list of stops. Each stop is two 32 bit values
# the first is the global station_id number and the second is the distance in meters to the next station
# we'll also have an array that lists how many entries are in each row since not all routes have the same number
routes_np = np.empty((NUM_ROUTES, LONGEST_ROUTE), dtype=route_stop_dtype)
routes_lengths_np = np.empty((NUM_ROUTES), dtype=np.uint32)

stopid_to_gps = {}
global_stop_id = 0
for r_index, r in enumerate(routes):
    routes_lengths_np[r_index] = len(r)
    for s_index, s in enumerate(r):
        gps = (s["lat"], s["lon"])
        if gps not in stops:
            stops[gps] = {}