예제 #1
0
def sim_health_index(n_runs):
    # Set up OpenCL context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    rho = 0.5
    mu = 3.0
    sigma = 1.0
    z_0 = mu

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = int(4160)  #4160
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps),
                          np.float32,
                          mu=0,
                          sigma=1.0)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n)
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out, float rho, float mu",
        input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)",
        scan_expr="across_seg_boundary ? (b):(rho*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.arange(queue,
                                 len(ran),
                                 dtype=np.float32,
                                 allocator=mem_pool)

    # Enqueue and Run Scan Kernel
    prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu)

    # Get results back on CPU to plot and do final calcs, just as in Lab 1
    health_index_all = (dev_result.get().reshape(n_runs, n_steps).transpose())

    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d Health Index in: %f seconds" % (n_runs, time_elapsed))
    #print(health_index_all)
    #print(ran.reshape(n_runs, n_steps).transpose())
    #plt.plot(health_index_all)
    return
예제 #2
0
    def normal(shape: Union[tuple[int, ...], int] = (1, 1),
               gpu=False) -> Tensor:
        """Draw random samples from a normal (Gaussian) distribution."""

        if gpu:
            return Tensor(
                clrandom.PhiloxGenerator(CONTEXT).normal(cq=QUEUE,
                                                         shape=shape,
                                                         dtype=np.float32),
                gpu=True,
            )

        return Tensor(np.random.normal(size=shape).astype(np.float32))
예제 #3
0
def sim_lifetime(S, T):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S*T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T-1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0]*(T-1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)
    
    prefix_sum = GenericScanKernel(ctx, np.float32,
                arguments="__global float *ary, __global char *segflags, "
                    "__global float *eps, __global float *out, __global float r",
                input_expr="ary[i] + eps[i] + 3*(1-r)",
                scan_expr="across_seg_boundary ? b : (r*a+b)", neutral="0",
                is_segment_start_expr="segflags[i]",
                output_statement="out[i] = item",
                options=[])
    
    rho_neg_tracker = []
    
    for r in np.linspace(-0.95, 0.95, 200):
        
        dev_result = cl_array.empty_like(eps_mat)

        prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, r)
        simulation_all = (dev_result.get().reshape(S, T))

        neg_mean = avg_first_negative(simulation_all)
        rho_neg_tracker.append([r, neg_mean])
    
    best_rho = find_best_rho(rho_neg_tracker)

    time_elapsed = time.time() - t0
    print('Time taken to run: {}'.format(time_elapsed))

    print('Best Rho Value: {}'.format(best_rho[0]))
    print('Max period: {}'.format(best_rho[1]))
   
    return
예제 #4
0
def sim_lifetime(S, T):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0] * (T - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *eps, __global float *out",
        input_expr="ary[i] + eps[i] + 3*(1-0.5)",
        scan_expr="across_seg_boundary ? b : (0.5*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.empty_like(eps_mat)

    prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result)

    simulation_all = (dev_result.get().reshape(S, T).transpose())

    average_finish = np.mean(simulation_all[-1])
    std_finish = np.std(simulation_all[-1])
    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d lifetimes in: %f seconds" % (S, time_elapsed))
    print("Average final health score: %f, Standard Deviation: %f" %
          (average_finish, std_finish))

    return
예제 #5
0
def main():
    t0 = time.time()
    S = 2
    T = 50
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)
    print(
        optimize.minimize(get_neg_indx,
                          x0=0.1,
                          args=ran,
                          method='L-BFGS-B',
                          bounds=((-0.95, 0.95), ),
                          options={'eps': 0.001}))
    time_elapsed = time.time() - t0
    print("Time used: %d" % (time_elapsed))
def sim_lifetime(rho):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    t0 = time.time()

    S = 1000
    T = int(4160)

    rand_gen = clrand.PhiloxGenerator(ctx, seed=25)
    eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

    z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32)
    z_mat = np.tile(z_row, int(S))
    z_mat = cl_array.to_device(queue, z_mat)

    seg_boundaries = [1] + [0] * (T - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(S))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *eps, __global float *out, __global float r",
        input_expr="ary[i] + eps[i] + 3*(1-r)",
        scan_expr="across_seg_boundary ? b : (r*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item",
        options=[])

    dev_result = cl_array.empty_like(eps_mat)

    prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, rho)

    simulation_all = (dev_result.get().reshape(S, T).transpose())

    avg_first_neg = avg_first_negative(simulation_all)

    return -avg_first_neg  # turned negative for minimization
예제 #7
0
    def uniform(
        shape: Union[tuple[int, ...], int] = (1, 1),
        min: float = 0.0,
        max: float = 1.0,
        gpu=False,
    ) -> Tensor:
        """Draw samples from a uniform distribution."""

        if gpu:
            return Tensor(
                clrandom.PhiloxGenerator(CONTEXT).uniform(cq=QUEUE,
                                                          shape=shape,
                                                          dtype=np.float32,
                                                          a=min,
                                                          b=max),
                gpu=True,
            )

        return Tensor(
            np.random.uniform(min, max, size=shape).astype(np.float32))
예제 #8
0
def sim_rand_walks(n_runs):
    # Set up context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = 100
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps), np.float32, mu=0, sigma=1)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n)
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out",
        input_expr="ary[i]",
        scan_expr="across_seg_boundary ? b : (a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] = item + 100",
        options=[])

    # Allocate space for result of kernel on device
    '''
    Note: use a Memory Pool (commented out above and below) if you're invoking
    multiple times to avoid wasting time creating brand new memory areas each
    time you invoke the kernel: https://documen.tician.de/pyopencl/tools.html
    '''
    # dev_result = cl_array.arange(queue, len(ran), dtype=np.float32,
    #                                allocator=mem_pool)
    dev_result = cl_array.empty_like(ran)

    # Enqueue and Run Scan Kernel
    prefix_sum(ran, seg_boundary_flags, dev_result)

    # Get results back on CPU to plot and do final calcs, just as in Lab 1
    r_walks_all = (dev_result.get().reshape(n_runs, n_steps).transpose())

    average_finish = np.mean(r_walks_all[-1])
    std_finish = np.std(r_walks_all[-1])
    final_time = time.time()
    time_elapsed = final_time - t0

    print("Simulated %d Random Walks in: %f seconds" % (n_runs, time_elapsed))
    print("Average final position: %f, Standard Deviation: %f" %
          (average_finish, std_finish))

    # Plot Random Walk Paths
    '''
    Note: Scan already only starts scanning at the second entry, but for the
    sake of the plot, let's set all of our random walk starting positions to 100
    and then plot the random walk paths.
    '''
    r_walks_all[0] = [100] * n_runs
    plt.plot(r_walks_all)
    plt.savefig("r_walk_nruns%d_gpu.png" % n_runs)

    return
예제 #9
0
from pyopencl.scan import GenericScanKernel
import matplotlib.pyplot as plt
import time

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

t0 = time.time()
np.random.seed(25)
rho = 0.5
sigma = 1.0
z_0 = 3
S = 1000
T = int(4160)

rand_gen = clrand.PhiloxGenerator(ctx)
ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1)

seg_boundaries = [1] + [0] * (T - 1)
seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
seg_boundary_flags = np.tile(seg_boundaries, int(S))
seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

prefix_sum = GenericScanKernel(
    ctx,
    np.float32,
    arguments="__global float *ary, __global char *segflags, "
    "__global float *out",
    input_expr="segflags[i] == 1 ? ary[i]: ary[i]",
    scan_expr="across_seg_boundary ? b : (a*0.5+b+1.5)",
    neutral="0",
예제 #10
0
def sim_health_index(n_runs):
    # Set up OpenCL context and command queue
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue))

    t0 = time.time()

    rho = 0.5
    mu = 3.0
    sigma = 1.0
    z_0 = mu

    # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps
    n_steps = int(4160)  #4160
    rand_gen = clrand.PhiloxGenerator(ctx)
    ran = rand_gen.normal(queue, (n_runs * n_steps),
                          np.float32,
                          mu=0,
                          sigma=1.0)

    # Establish boundaries for each simulated walk (i.e. start and end)
    # Necessary so that we perform scan only within rand walks and not between
    seg_boundaries = [1] + [0] * (n_steps - 1)
    seg_boundaries = np.array(seg_boundaries, dtype=np.uint8)
    seg_boundary_flags = np.tile(seg_boundaries, int(n_runs))
    seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags)

    # GPU: Define Segmented Scan Kernel, scanning simulations: rho*f(n-1)+(1-rho)*mu+ran
    # also output whether the value is smaller than 0 or not
    prefix_sum = GenericScanKernel(
        ctx,
        np.float32,
        arguments="__global float *ary, __global char *segflags, "
        "__global float *out, float rho, float mu",
        input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)",
        scan_expr="across_seg_boundary ? (b):(rho*a+b)",
        neutral="0",
        is_segment_start_expr="segflags[i]",
        output_statement="out[i] =(item>0) ? (0):(1)",
        options=[])

    dev_result = cl_array.arange(queue,
                                 len(ran),
                                 dtype=np.float32,
                                 allocator=mem_pool)

    # print time of GPU simulation
    #sim_time = time.time()
    #time_elapsed = sim_time - t0
    #print("Simulated %d Health Index in: %f seconds"% (n_runs, time_elapsed))

    # Iterate For 200 rho values
    rho_set = np.linspace(-0.95, 0.95, 200)
    rho_avgt_t = []
    for rho in rho_set:
        #Enqueue and Run Scan Kernel
        #print(rho)
        prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu)
        # Get results back on CPU to plot and do final calcs, just as in Lab 1
        health_index_all = (dev_result.get().reshape(n_runs, n_steps))

        # Find and averaged the index of first negative values across simulations
        t_all = []
        for s in health_index_all:
            if 1 in s:
                s = list(s)
                t = s.index(1)
            else:
                t = n_steps
            t_all.append(t)
        #print(len(t_all))
        avg_t = sum(t_all) / len(t_all)
        rho_avgt_t.append(avg_t)

    final_time = time.time()
    time_elapsed = final_time - t0
    print("Simulated %d Health Index for 200 rho values in: %f seconds" %
          (n_runs, time_elapsed))

    plt.plot(rho_set, rho_avgt_t)
    plt.title('Averaged periods of first negative index across Rho')
    plt.xlabel('Rho')
    plt.ylabel('Avged Period of first negative index')
    plt.savefig("GPU_rho_avgt_nruns%d.png" % (n_runs))

    max_period = max(rho_avgt_t)
    max_rho = rho_set[rho_avgt_t.index(max_period)]
    print("Max Period: %f; Max Rho: %f." % (max_period, max_rho))
    return
예제 #11
0
    jt_sym = sym.make_sym_vector("jt", 2)
    rho_sym = sym.var("rho")

    from pytential.symbolic.pde.maxwell import (
            PECChargeCurrentMFIEOperator,
            get_sym_maxwell_point_source,
            get_sym_maxwell_plane_wave)
    mfie = PECChargeCurrentMFIEOperator()

    test_source = case.get_source(actx)

    calc_patch = CalculusPatch(np.array([-3, 0, 0]), h=0.01)
    calc_patch_tgt = PointsTarget(actx.from_numpy(calc_patch.points))

    import pyopencl.clrandom as clrandom
    rng = clrandom.PhiloxGenerator(actx.context, seed=12)

    from pytools.obj_array import make_obj_array
    src_j = make_obj_array([
            rng.normal(actx.queue, (test_source.ndofs), dtype=np.float64)
            for _ in range(3)])

    def eval_inc_field_at(places, source=None, target=None):
        if source is None:
            source = "test_source"

        if use_plane_wave:
            # plane wave
            return bind(places,
                    get_sym_maxwell_plane_wave(
                        amplitude_vec=np.array([1, 1, 1]),