Example #1
0
def find_pi_cuda(n):
    '''Takes in integer n (n points used for integration) '''

    # here's Monte Carlo for finding pi
    seed = rand.seed_getter_unique(n)
    x = rand.XORWOWRandomNumberGenerator(seed, 0)
    y = rand.XORWOWRandomNumberGenerator(seed, 0)
    ones = gpuarray.ones(
        int(n))  # need an array the size of guesses to use mask
    distance = np.hypot(x, y)
    mask = np.where(distance <= 1.0)
    pi = 4 * np.sum(ones[mask]) / n
    error = np.abs(np.pi - pi)

    return pi, error
Example #2
0
 def initializeRandomness(self):
     """
     Initialize the random number generator used on GPU
     """
     # Define a random seed_getter for curandom
     seed_getter = lambda N: gpuarray.to_gpu(
         np.random.randint(-2**30, 2**30, size=(N, )).astype(np.int32))
     self.rand_gpu = curandom.XORWOWRandomNumberGenerator(seed_getter)
Example #3
0
 def __getattribute__(self, name):
     sampler = object.__getattribute__(self, '_sampler')
     if sampler is None:
         from pycuda import curandom
         sampler = curandom.XORWOWRandomNumberGenerator(
             curandom.seed_getter_uniform)
         self._sampler = sampler
     return sampler.__getattribute__(name)
Example #4
0
    def __init__(self,
                 n_units,
                 n_incoming,
                 N,
                 init_sd=1.0,
                 precision=np.float32,
                 magic_numbers=False):

        self.n_units = n_units
        self.n_incoming = n_incoming
        self.N = N
        w = np.random.normal(0, init_sd, (self.n_incoming, self.n_units))
        b = np.random.normal(0, init_sd, (1, n_units))

        self.weights = gpuarray.to_gpu(w.copy().astype(precision))
        self.gW = gpuarray.empty_like(self.weights)

        # Prior and ID must be set after creation
        self.prior = -1
        self.ID = -1

        self.biases = gpuarray.to_gpu(b.copy().astype(precision))
        self.gB = gpuarray.empty_like(self.biases)

        #Set up momentum variables for HMC sampler
        self.pW = gpuarray.to_gpu(np.random.normal(0, 1, self.gW.shape))
        self.pB = gpuarray.to_gpu(np.random.normal(0, 1, self.gB.shape))

        self.epsW = gpuarray.zeros(self.weights.shape, precision) + 1.0
        self.epsB = gpuarray.zeros(self.biases.shape, precision) + 1.0

        self.precision = precision
        self.outputs = gpuarray.zeros((self.N, self.n_units), precision)

        self.magic_numbers = magic_numbers
        #Define tan_h function on GPU
        if magic_numbers:
            self.tanh = ElementwiseKernel("float *x",
                                          "x[i] = 1.7159 * tanh(2/3*x[i]);",
                                          "tan_h",
                                          preamble="#include <math.h>")
        else:
            self.tanh = ElementwiseKernel(
                "float *x",
                "x[i] = tanh(min(max(-10.0,x[i]),10.0));",
                "tan_h",
                preamble="#include <math.h>")
        #Compile kernels
        kernels = SourceModule(open(path + '/kernels.cu', "r").read())
        self.add_bias_kernel = kernels.get_function("add_bias")

        self.rng = curandom.XORWOWRandomNumberGenerator()

        ##Initialize posterior weights
        self.posterior_weights = list()
        self.posterior_biases = list()
Example #5
0
    def __init__(self,
                 number_of_clauses,
                 T,
                 s,
                 clause_drop_p=0.0,
                 feature_drop_p=0.0,
                 number_of_gpus=1,
                 q=1.0,
                 boost_true_positive_feedback=1,
                 weighted_clauses=0,
                 number_of_state_bits=8,
                 append_negated=True,
                 grid=(16 * 13, 1, 1),
                 block=(128, 1, 1)):
        self.number_of_gpus = np.minimum(cuda.Device.count(), number_of_gpus)

        self.number_of_clauses = number_of_clauses
        self.number_of_state_bits = number_of_state_bits
        self.T = int(T)
        self.s = s
        self.q = q
        self.boost_true_positive_feedback = boost_true_positive_feedback
        self.weighted_clauses = weighted_clauses
        self.append_negated = append_negated
        self.grid = grid
        self.block = block

        self.clause_drop_p = clause_drop_p
        self.feature_drop_p = feature_drop_p

        self.X_train = np.array([])
        self.Y_train = np.array([])
        self.X_test = np.array([])
        self.ta_state = np.array([])
        self.clause_weights = np.array([])

        self.initialized = False

        self.gpus = []
        for c in range(self.number_of_gpus):
            print("Preparing GPU #%d" % (c))
            gpu = GPU()
            gpu.device_id = c
            gpu.device = cuda.Device(c)
            gpu.context = gpu.device.make_context()
            gpu.g = curandom.XORWOWRandomNumberGenerator()

            gpu.mod_encode = SourceModule(kernels.code_encode,
                                          no_extern_c=True)
            gpu.prepare_encode = gpu.mod_encode.get_function("prepare_encode")
            gpu.encode = gpu.mod_encode.get_function("encode")

            self.gpus.append(gpu)

            gpu.context.pop()
        print()
Example #6
0
 def __getattribute__(self, name):
     if name in ('seed', 'set_seed'):
         return object.__getattribute__(self, name)
 
     sampler = object.__getattribute__(self, '_sampler')
     if sampler is None:
         from pycuda import curandom, gpuarray
         seed_func = curandom.seed_getter_uniform if self.seed is None \
           else lambda N: gpuarray.to_gpu(
                   np.array(N * [self.seed], dtype=np.int32))
         sampler = curandom.XORWOWRandomNumberGenerator(seed_func)
         self._sampler = sampler
     return sampler.__getattribute__(name)
Example #7
0
    def test_adjoint(self, iters=5):
        """Test the adjoint operator.

        Args:
            iters (int): number of iterations
        """
        src_shape = (self.data.nX1, self.data.nX2, 1)
        dest_shape = (self.data.nT, self.data.nC)
        u = gpuarray.zeros(src_shape, self.precision_complex, order='F')
        ut = gpuarray.zeros(src_shape, self.precision_real, order='F')
        Ku = gpuarray.zeros(dest_shape, self.precision_complex, order='F')
        v = gpuarray.zeros(dest_shape, self.precision_complex, order='F')
        vt = gpuarray.zeros(dest_shape, self.precision_real, order='F')
        Kadv = gpuarray.zeros(src_shape, self.precision_complex, order='F')

        generator = curandom.XORWOWRandomNumberGenerator()
        errors = []

        try:
            i = 0
            for i in range(iters):
                # randomness
                generator.fill_uniform(ut)
                generator.fill_uniform(vt)
                v = gpuarray_copy(vt.astype(self.precision_complex))
                u = gpuarray_copy(ut.astype(self.precision_complex))

                # apply operators
                self.apply(u, Ku)
                self.adjoint(v, Kadv)

                scp1 = dotc_gpu(Ku, v)
                scp2 = dotc_gpu(u, Kadv)
                n_Ku = dotc_gpu(Ku)
                n_Kadv = dotc_gpu(Kadv)
                n_u = dotc_gpu(u)
                n_v = dotc_gpu(v)

                errors.append(np.abs(scp1-scp2))

            print("Test " + str(i) + ": <Ku,v>=" + str(scp1) + ", <u,Kadv>=" +
                  str(scp2) + ", Error=" + str(np.abs(scp1-scp2)) +
                  ", Relative Error=" +
                  str((scp1-scp2)/(n_Ku*n_v + n_Kadv*n_u)))
        except KeyboardInterrupt:
            if len(errors) == 0:
                errors = -1
        finally:
            print("Mean Error: " + repr(np.mean(errors)))
            print("Standarddeviation: " + repr(np.std(errors)))
        return i
Example #8
0
    def __init__(self,
                 n_classes,
                 n_incoming,
                 N,
                 init_sd=0.1,
                 precision=np.float32):
        self.type = 'Softmax'
        self.n_incoming = n_incoming
        self.N = N
        w = np.random.normal(0, init_sd, (self.n_incoming, n_classes))
        b = np.random.normal(0, init_sd, (1, n_classes))
        self.weights = gpuarray.to_gpu(w.copy().astype(precision))
        self.gW = gpuarray.empty_like(self.weights)
        #print self.weights
        #       print init_sd
        self.biases = gpuarray.to_gpu(b.copy().astype(precision))
        self.gB = gpuarray.empty_like(self.biases)

        # Prior and ID are set later
        self.prior = -1
        self.ID = -1

        #Set up momentum variables for HMC sampler
        self.pW = gpuarray.to_gpu(np.random.normal(0, 1, self.gW.shape))
        self.pB = gpuarray.to_gpu(np.random.normal(0, 1, self.gB.shape))

        #Store stepsizes for each parameter
        self.epsW = gpuarray.zeros(self.weights.shape, precision) + 1.0
        self.epsB = gpuarray.zeros(self.biases.shape, precision) + 1.0

        self.n_classes = n_classes
        self.n_incoming = n_incoming

        self.N = N
        self.outputs = gpuarray.zeros((self.N, self.n_classes), precision)

        self.precision = precision

        kernels = SourceModule(open(path + '/kernels.cu', "r").read())
        self.softmax_kernel = kernels.get_function("softmax")
        self.add_bias_kernel = kernels.get_function("add_bias")

        self.rng = curandom.XORWOWRandomNumberGenerator()

        ##Initialize posterior weights
        self.posterior_weights = list()
        self.posterior_biases = list()

        self.eps_tol = 1e-10
Example #9
0
def seed(s=None, device=None):
    """Resets the random number generator of the specified device.

    Args:
        s (int or None): Seed value. If it is ``None``, it initializes the
            generator without fixed seed.
        device: Device specifier (i.e. argument of :func:`get_device`).

    """
    global _generators

    with DeviceUser(device) as user:
        seed_getter = _get_seed_getter(s)
        gen = curandom.XORWOWRandomNumberGenerator(seed_getter=seed_getter)
        _generators[user.device] = gen
Example #10
0
    def test_get_random_angle_in_radians(self):

        generator = curandom.XORWOWRandomNumberGenerator()
        grid = np.zeros((matrix_size, matrix_size)).astype(np.float32)
        grid = gpuarray.to_gpu(grid)
        for i in range(10):
            get_random_angle(generator.state,
                             grid,
                             np.int32(matrix_size),
                             grid=(grid_dims, grid_dims),
                             block=(block_dims, block_dims, 1))
            grid_cpu = grid.get()
            for i in range(matrix_size):
                for j in range(matrix_size):
                    self.assertGreater(grid_cpu[i][j], 0)
                    self.assertLessEqual(grid_cpu[i][j], 2 * np.pi)
Example #11
0
    def test_survival_kernel_none_survive(self):

        # make sure all cells die
        initial_population = np.ones(
            (matrix_size, matrix_size)).astype(np.float32)
        survival_probabilities = np.ones(
            (matrix_size, matrix_size)).astype(np.float32)
        generator = curandom.XORWOWRandomNumberGenerator()
        run_primitive(Empty_grid().vars(matrix_size) == Initialize_grid().vars(
            matrix_size, initial_population, survival_probabilities,
            generator))
        Config.engine.split()
        run_primitive(Survival_of_the_fittest().vars(survival_none,
                                                     matrix_size, grid_dims,
                                                     block_dims))
        grid_a = Config.engine.stack.pop().get()

        grid_b = np.zeros((matrix_size, matrix_size)).astype(np.float32)
        self.assertTrue((grid_a == grid_b).all())
Example #12
0
    def test_cycle_termination(self):

        initial_population = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        initial_population[matrix_size // 2][matrix_size // 2] = 1
        survival_probabilities = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        generator = curandom.XORWOWRandomNumberGenerator()
        run_primitive(Empty_grid().vars(matrix_size) == Initialize_grid().vars(
            matrix_size, initial_population, survival_probabilities,
            generator))
        Config.engine.n_iters = n_iters
        Config.engine.cycle_start()
        run_primitive(
            Local_diffusion().vars(local_always, matrix_size, p_local_always,
                                   grid_dims, block_dims) == Bmsb_stop())
        Config.engine.cycle_termination()

        self.assertEqual(Config.engine.iters, n_iters)
        self.assertFalse(Config.engine.is_split)
        self.assertFalse(Config.engine.continue_cycle)
Example #13
0
def find_pi_cuda(n):
    '''Takes in integer n (n points used for integration), does Monte Carlo to
    find pi using gpu methods. Returns int pi'''

    #  x and y coordinates generated
    rg = rand.XORWOWRandomNumberGenerator()
    x = rg.gen_uniform(n, np.float32)
    y = rg.gen_uniform(n, np.float32)

    # circle times
    am_circle = x**2 + y**2

    outside = pycuda.gpuarray.zeros_like(x, np.float32)
    inside = pycuda.gpuarray.ones_like(x, np.float32)

    # here's Monte Carlo for finding pi

    H = pycuda.gpuarray.if_positive(am_circle <= 1, inside, outside)
    pi = 4 * gp.sum(H) / n

    return pi
Example #14
0
    def _test_population_growth_(self):

        initial_population = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        initial_population[matrix_size // 2][matrix_size // 2] = 1
        initial_population[0][0] = 1
        survival_probabilities = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        generator = curandom.XORWOWRandomNumberGenerator()
        run_primitive(Empty_grid().vars(matrix_size) == Initialize_grid().vars(
            matrix_size, initial_population, survival_probabilities,
            generator))
        Config.engine.split()
        run_primitive(Population_growth().vars(population_growth, matrix_size,
                                               growth_rate, grid_dims,
                                               block_dims))
        grid_a = Config.engine.stack.pop().get()

        grid_b = np.zeros((matrix_size, matrix_size)).astype(np.float32)
        grid_b[matrix_size // 2][matrix_size // 2] = 227
        grid_b[0][0] = 227
        self.assertTrue((grid_a == grid_b).all())
Example #15
0
def get_generator(device=None):
    """Gets the random number generator for the given device.

    Args:
        device: Device specifier (an arugment of :func:`get_device`)

    Returns:
        pycuda.curandom.XORWOWRandomNumberGenerator: Random number generator.

    """
    global _generators

    device = get_device(device)
    gen = _generators.get(device)
    if gen is not None:
        return gen

    with using_device(device):
        s = os.environ.get('CHAINER_SEED')
        seed_getter = _get_seed_getter(s)
        gen = curandom.XORWOWRandomNumberGenerator(seed_getter=seed_getter)
        _generators[device] = gen
        return gen
Example #16
0
    def test_non_local_diffusion_never(self):

        initial_population = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        initial_population[matrix_size // 2][matrix_size // 2] = 1
        survival_probabilities = np.zeros(
            (matrix_size, matrix_size)).astype(np.float32)
        generator = curandom.XORWOWRandomNumberGenerator()
        run_primitive(Empty_grid().vars(matrix_size) == Initialize_grid().vars(
            matrix_size, initial_population, survival_probabilities,
            generator))
        Config.engine.split()
        run_primitive(Non_local_diffusion().vars(non_local_never, matrix_size,
                                                 p_non_local_never, mu, gamma,
                                                 grid_dims, block_dims))
        grid_a = Config.engine.stack.pop().get()

        grid_b = np.zeros((matrix_size, matrix_size)).astype(np.float32)
        grid_b[matrix_size // 2][matrix_size // 2] = 1

        print('Non_local_diffusion_never\nGrid_a = {}\nGrid_b = {}'.format(
            grid_a, grid_b))
        self.assertTrue((grid_a == grid_b).all())
Example #17
0
 def initialize_randoms(self):
     self.generator = curandom.XORWOWRandomNumberGenerator()
Example #18
0
    }
    
    } // end extern "C"
"""

mod = SourceModule(kernel_code, no_extern_c = True)

# Get kernel functions
local = mod.get_function('local_diffuse')
non_local = mod.get_function('non_local_diffuse')
survival_layer = mod.get_function('survival_of_the_fittest')
population_layer = mod.get_function('population_growth')
init_generators = mod.get_function('init_generators')

# Initialize random number generator
generator = curandom.XORWOWRandomNumberGenerator()
data_type_size = sizeof(generator.state_type, "#include <curand_kernel.h>")
generator._state = drv.mem_alloc((matrix_size * matrix_size) * data_type_size)
seed = 123456789
init_generators(generator.state, np.int32(seed), np.int32(matrix_size),
    grid = (grid_dims, grid_dims), block = (block_dims, block_dims, 1))

# Run n_iters of the Brown Marmorated Stink Bug (BMSB) Diffusion Simulation
run_primitive(
    empty_grid.vars(matrix_size) == 
    initialize_grid.vars(matrix_size, initial_population, survival_probabilities, generator) ==
    bmsb_stop_condition.vars(n_iters) <= 
    local_diffusion.vars(local, matrix_size, p_local, grid_dims, block_dims) == 
    non_local_diffusion.vars(non_local, matrix_size, p_non_local, mu, gamma, grid_dims, block_dims) ==
    survival_function.vars(survival_layer, matrix_size, grid_dims, block_dims) ==
    population_growth.vars(population_layer, matrix_size, growth_rate, grid_dims, block_dims) ==
Example #19
0
# Create two timers:
start = drv.Event()
end = drv.Event()

# Launch the kernel:

start.record()
rnorm(drv.Out(dest),mu,sigma,n,block=(tpb,1,1), grid=(nb,1))
end.record() # end timing
# calculate the run length
end.synchronize()
gpu_secs = start.time_till(end)*1e-3
print("SourceModule time: %f" % gpu_secs)

rng = curandom.XORWOWRandomNumberGenerator()  # be kind and exclude initialization
start.record()
gpu_res = rng.gen_normal(n,dtype=np.float32)  # lives on the device
dest2 = np.add(np.multiply(gpu_res.get(),sigma),mu)  # copy and scale
end.record() # end timing
# calculate the run length
end.synchronize()
gpu2_secs = start.time_till(end)*1e-3
print("    GPUArray time: %f" % gpu2_secs)

start.record()
# Numpy version:
start.record()
host = np.random.normal(size=n,loc=mu,scale=sigma)
end.record() # end timing
# calculate the run length