Python PhiloxGenerator Examples, pyopencl.clrandom.PhiloxGenerator Python Examples

Example #1

0

Show file

File: gpu_zmat.py Project: cindychu/LargeScaleComputing_S20

def sim_health_index(n_runs):
    # Set up OpenCL context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    rho = 0.5
    mu = 3.0
    sigma = 1.0
    z_0 = mu

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = int(4160)  #4160
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps),
                          np.float32,
                          mu=0,
                          sigma=1.0)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n)
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out, float rho, float mu",
        input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)",
        scan_expr="across_seg_boundary ? (b):(rho*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.arange(queue,
                                 len(ran),
                                 dtype=np.float32,
                                 allocator=mem_pool)

    # Enqueue and Run Scan Kernel
    prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu)

    # Get results back on CPU to plot and do final calcs, just as in Lab 1
    health_index_all = (dev_result.get().reshape(n_runs, n_steps).transpose())

    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d Health Index in: %f seconds" % (n_runs, time_elapsed))
    #print(health_index_all)
    #print(ran.reshape(n_runs, n_steps).transpose())
    #plt.plot(health_index_all)
    return

Example #2

0

Show file

    def normal(shape: Union[tuple[int, ...], int] = (1, 1),
               gpu=False) -> Tensor:
        """Draw random samples from a normal (Gaussian) distribution."""

        if gpu:
            return Tensor(
                clrandom.PhiloxGenerator(CONTEXT).normal(cq=QUEUE,
                                                         shape=shape,
                                                         dtype=np.float32),
                gpu=True,
            )

        return Tensor(np.random.normal(size=shape).astype(np.float32))

Example #3

0

Show file

def sim_lifetime(S, T):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S*T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T-1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0]*(T-1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)
    
    prefix_sum = GenericScanKernel(ctx, np.float32,
                arguments="__global float *ary, __global char *segflags, "
                    "__global float *eps, __global float *out, __global float r",
                input_expr="ary[i] + eps[i] + 3*(1-r)",
                scan_expr="across_seg_boundary ? b : (r*a+b)", neutral="0",
                is_segment_start_expr="segflags[i]",
                output_statement="out[i] = item",
                options=[])
    
    rho_neg_tracker = []
    
    for r in np.linspace(-0.95, 0.95, 200):
        
        dev_result = cl_array.empty_like(eps_mat)

        prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, r)
        simulation_all = (dev_result.get().reshape(S, T))

        neg_mean = avg_first_negative(simulation_all)
        rho_neg_tracker.append([r, neg_mean])
    
    best_rho = find_best_rho(rho_neg_tracker)

    time_elapsed = time.time() - t0
    print('Time taken to run: {}'.format(time_elapsed))

    print('Best Rho Value: {}'.format(best_rho[0]))
    print('Max period: {}'.format(best_rho[1]))
   
    return

Example #4

0

Show file

File: q2.py Project: romanticmonkey/large_scale_assignment_1

def sim_lifetime(S, T):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0] * (T - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *eps, __global float *out",
        input_expr="ary[i] + eps[i] + 3*(1-0.5)",
        scan_expr="across_seg_boundary ? b : (0.5*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.empty_like(eps_mat)

    prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result)

    simulation_all = (dev_result.get().reshape(S, T).transpose())

    average_finish = np.mean(simulation_all[-1])
    std_finish = np.std(simulation_all[-1])
    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d lifetimes in: %f seconds" % (S, time_elapsed))
    print("Average final health score: %f, Standard Deviation: %f" %
          (average_finish, std_finish))

    return

Example #5

0

Show file

File: hw1q4b.py Project: joannazhang88/LS_HW1

def main():
    t0 = time.time()
    S = 2
    T = 50
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)
    print(
        optimize.minimize(get_neg_indx,
                          x0=0.1,
                          args=ran,
                          method='L-BFGS-B',
                          bounds=((-0.95, 0.95), ),
                          options={'eps': 0.001}))
    time_elapsed = time.time() - t0
    print("Time used: %d" % (time_elapsed))

Example #6

0

Show file

File: q4_cl.py Project: romanticmonkey/large_scale_assignment_1

def sim_lifetime(rho):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    S = 1000
    T = int(4160)

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0] * (T - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *eps, __global float *out, __global float r",
        input_expr="ary[i] + eps[i] + 3*(1-r)",
        scan_expr="across_seg_boundary ? b : (r*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.empty_like(eps_mat)

    prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, rho)

    simulation_all = (dev_result.get().reshape(S, T).transpose())

    avg_first_neg = avg_first_negative(simulation_all)

    return -avg_first_neg  # turned negative for minimization

Example #7

0

Show file

    def uniform(
        shape: Union[tuple[int, ...], int] = (1, 1),
        min: float = 0.0,
        max: float = 1.0,
        gpu=False,
    ) -> Tensor:
        """Draw samples from a uniform distribution."""

        if gpu:
            return Tensor(
                clrandom.PhiloxGenerator(CONTEXT).uniform(cq=QUEUE,
                                                          shape=shape,
                                                          dtype=np.float32,
                                                          a=min,
                                                          b=max),
                gpu=True,
            )

        return Tensor(
            np.random.uniform(min, max, size=shape).astype(np.float32))

Example #8

0

Show file

def sim_rand_walks(n_runs):
    # Set up context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = 100
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps), np.float32, mu=0, sigma=1)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n)
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out",
        input_expr="ary[i]",
        scan_expr="across_seg_boundary ? b : (a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item + 100",
        options=[])

    # Allocate space for result of kernel on device
    '''
    Note: use a Memory Pool (commented out above and below) if you're invoking
    multiple times to avoid wasting time creating brand new memory areas each
    time you invoke the kernel: https://documen.tician.de/pyopencl/tools.html
    '''
    # dev_result = cl_array.arange(queue, len(ran), dtype=np.float32,
    #                                allocator=mem_pool)
    dev_result = cl_array.empty_like(ran)

    # Enqueue and Run Scan Kernel
    prefix_sum(ran, seg_boundary_flags, dev_result)

    # Get results back on CPU to plot and do final calcs, just as in Lab 1
    r_walks_all = (dev_result.get().reshape(n_runs, n_steps).transpose())

    average_finish = np.mean(r_walks_all[-1])
    std_finish = np.std(r_walks_all[-1])
    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d Random Walks in: %f seconds" % (n_runs, time_elapsed))
    print("Average final position: %f, Standard Deviation: %f" %
          (average_finish, std_finish))

    # Plot Random Walk Paths
    '''
    Note: Scan already only starts scanning at the second entry, but for the
    sake of the plot, let's set all of our random walk starting positions to 100
    and then plot the random walk paths.
    '''
    r_walks_all[0] = [100] * n_runs
    plt.plot(r_walks_all)
    plt.savefig("r_walk_nruns%d_gpu.png" % n_runs)

    return

Example #9

0

Show file

from pyopencl.scan import GenericScanKernel
import matplotlib.pyplot as plt
import time

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

t0 = time.time()
np.random.seed(25)
rho = 0.5
sigma = 1.0
z_0 = 3
S = 1000
T = int(4160)

rand_gen = clrand.PhiloxGenerator(ctx)
ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

seg_boundaries = [1] + [0] * (T - 1)
seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
seg_boundary_flags = np.tile(seg_boundaries, int(S))
seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

prefix_sum = GenericScanKernel(
    ctx,
    np.float32,
    arguments="__global float *ary, __global char *segflags, "
    "__global float *out",
    input_expr="segflags[i] == 1 ? ary[i]: ary[i]",
    scan_expr="across_seg_boundary ? b : (a*0.5+b+1.5)",
    neutral="0",

Example #10

0

Show file

def sim_health_index(n_runs):
    # Set up OpenCL context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    rho = 0.5
    mu = 3.0
    sigma = 1.0
    z_0 = mu

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = int(4160)  #4160
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps),
                          np.float32,
                          mu=0,
                          sigma=1.0)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: rho*f(n-1)+(1-rho)*mu+ran
    # also output whether the value is smaller than 0 or not
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out, float rho, float mu",
        input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)",
        scan_expr="across_seg_boundary ? (b):(rho*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] =(item>0) ? (0):(1)",
        options=[])

    dev_result = cl_array.arange(queue,
                                 len(ran),
                                 dtype=np.float32,
                                 allocator=mem_pool)

    # print time of GPU simulation
    #sim_time = time.time()
    #time_elapsed = sim_time - t0
    #print("Simulated %d Health Index in: %f seconds"% (n_runs, time_elapsed))

    # Iterate For 200 rho values
    rho_set = np.linspace(-0.95, 0.95, 200)
    rho_avgt_t = []
    for rho in rho_set:
        #Enqueue and Run Scan Kernel
        #print(rho)
        prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu)
        # Get results back on CPU to plot and do final calcs, just as in Lab 1
        health_index_all = (dev_result.get().reshape(n_runs, n_steps))

        # Find and averaged the index of first negative values across simulations
        t_all = []
        for s in health_index_all:
            if 1 in s:
                s = list(s)
                t = s.index(1)
            else:
                t = n_steps
            t_all.append(t)
        #print(len(t_all))
        avg_t = sum(t_all) / len(t_all)
        rho_avgt_t.append(avg_t)

    final_time = time.time()
    time_elapsed = final_time - t0
    print("Simulated %d Health Index for 200 rho values in: %f seconds" %
          (n_runs, time_elapsed))

    plt.plot(rho_set, rho_avgt_t)
    plt.title('Averaged periods of first negative index across Rho')
    plt.xlabel('Rho')
    plt.ylabel('Avged Period of first negative index')
    plt.savefig("GPU_rho_avgt_nruns%d.png" % (n_runs))

    max_period = max(rho_avgt_t)
    max_rho = rho_set[rho_avgt_t.index(max_period)]
    print("Max Period: %f; Max Rho: %f." % (max_period, max_rho))
    return

Example #11

0

Show file

File: test_maxwell.py Project: choward1491/pytential

    jt_sym = sym.make_sym_vector("jt", 2)
    rho_sym = sym.var("rho")

    from pytential.symbolic.pde.maxwell import (
            PECChargeCurrentMFIEOperator,
            get_sym_maxwell_point_source,
            get_sym_maxwell_plane_wave)
    mfie = PECChargeCurrentMFIEOperator()

    test_source = case.get_source(actx)

    calc_patch = CalculusPatch(np.array([-3, 0, 0]), h=0.01)
    calc_patch_tgt = PointsTarget(actx.from_numpy(calc_patch.points))

    import pyopencl.clrandom as clrandom
    rng = clrandom.PhiloxGenerator(actx.context, seed=12)

    from pytools.obj_array import make_obj_array
    src_j = make_obj_array([
            rng.normal(actx.queue, (test_source.ndofs), dtype=np.float64)
            for _ in range(3)])

    def eval_inc_field_at(places, source=None, target=None):
        if source is None:
            source = "test_source"

        if use_plane_wave:
            # plane wave
            return bind(places,
                    get_sym_maxwell_plane_wave(
                        amplitude_vec=np.array([1, 1, 1]),