Esempio n. 1
0
    def test_profiling(self):
        with cuda.profiling():
            a = cuda.device_array(10)
            del a

        with cuda.profiling():
            a = cuda.device_array(100)
            del a
Esempio n. 2
0
    def test_profiling(self):
        with cuda.profiling():
            a = cuda.device_array(10)
            del a

        with cuda.profiling():
            a = cuda.device_array(100)
            del a
Esempio n. 3
0
                if flat_slice[j] < flat_slice[j + 1]:
                    flat_slice[j], flat_slice[j + 1] = flat_slice[j + 1], flat_slice[j]

        if slice.size % 2 == 0:
            upper = int(slice.size / 2)
            lower = upper - 1
            out[x, y, z] = (flat_slice[upper] + flat_slice[lower]) / 2
        else:
            out[x, y, z] = flat_slice[slice.size // 2]


im_array = np.array(Image.open("lab02img.bmp")).astype(np.uint8)

with allocated_gpu() as gpu:
    threadsperblock = (
        math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))),
        math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))),
        im_array.shape[2]
    )

    blockspergrid = (
        math.ceil(im_array.shape[0] / threadsperblock[0]),
        math.ceil(im_array.shape[1] / threadsperblock[1])
    )

    result = np.zeros(shape=(im_array.shape[0], im_array.shape[1], im_array.shape[2]))
    with cuda.profiling(), timed("on GPU"):
        calculate_gpu[blockspergrid, threadsperblock](im_array, result)

    Image.fromarray(result.astype(np.uint8), 'RGB').save('lab02result.bmp')
Esempio n. 4
0
    N_runs = int(T // t_run)
    #Adjust t_run:
    t_run = T / N_runs
else:
    N_runs = 1
    t_run = T
#print('N_runs =', N_runs)
#Mean number of encounters in time t_run
N_mean = t_run * encounterRate(n_p, v_rel, b_min, b_max_max, v_min, v_max)
print('Evolving population, start time:', datetime.datetime.now())
for i in range(N_runs):
    #Number of encounters
    N_enc = np.random.poisson(N_mean, size=N_bin)
    rng_states = create_xoroshiro128p_states(threadsperblock * blockspergrid,
                                             seed=1)
    with cuda.profiling():
        run_encounters[blockspergrid,
                       threadsperblock](a, e, m1, m2, M_p, v_rel, a_T,
                                        b_max_max, v_min, v_max, N_enc, delta,
                                        G_new, N_broken, rng_states)
    e = e[np.where(a > 0.0)]
    a = a[np.where(a > 0.0)]
#N_broken = N_bin - np.size(a)
#print('a_final/au =', a)
#print('e_final =', e)
print('Finish time:', datetime.datetime.now())
print('N_broken =', N_broken)

print('Saving')
np.savez('simulation_GPU_{}Msol_{}e{}.npz'.format(
    int(M_p * mass_unit / (2.0 * 10.0**30.0)),