def multiply(data, multiplier): if isinstance(multiplier, ThrustRTC.storage): loop = trtc.For(['arr', 'mult'], "i", "arr[i] *= mult[i];") mult = multiplier elif isinstance(multiplier, float): loop = trtc.For(['arr', 'mult'], "i", "arr[i] *= mult;") mult = trtc.DVDouble(multiplier) else: raise NotImplementedError() loop.launch_n(data.size(), [data, mult])
def sum_pair(data_out, data_in, idx, length): perm_in = trtc.DVPermutation(data_in, idx) loop = trtc.For(['arr_in', 'arr_out'], "i", "arr_out[i] = arr_in[2 * i] + arr_in[2 * i + 1];") loop.launch_n(length // 2, [perm_in, data_out])
def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell): dim = trtc.DVInt64(dim) idx_length = trtc.DVInt64(position_in_cell.shape[1]) courant_length = trtc.DVInt64(courant.shape[0]) loop = trtc.For([ 'dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell' ], "droplet", f''' // Arakawa-C grid int _l_0 = cell_origin[droplet + 0]; int _l_1 = cell_origin[droplet + idx_length]; int _l = _l_0 + _l_1 * courant_length; int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0); int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1); int _r = _r_0 + _r_1 * courant_length; int omega = position_in_cell[droplet + idx_length * dim]; int c_r = courant[_r]; int c_l = courant[_l]; displacement[droplet, dim] = {scheme(None, None, None)} ''') loop.launch_n(displacement.shape[1], [ dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell ])
class Random: __urand_init_rng_state_body = trtc.For(['rng', 'states', 'seed'], 'i', ''' rng.state_init(1234, i, 0, states[i]); ''') __urand_body = trtc.For(['states', 'vec_rnd'], 'i', ''' vec_rnd[i]=states[i].rand01(); ''') def __init__(self, size, seed=None): self.generator = trtc.device_vector('RNGState', size) self.size = size seed = seed or np.random.randint(0, 2 * 16) dseed = trtc.DVInt64(seed) Random.__urand_init_rng_state_body.launch_n( size, [rndrtc.DVRNG(), self.generator, dseed]) @nice_thrust(**NICE_THRUST_FLAGS) def __call__(self, storage): Random.__urand_body.launch_n(self.size, [self.generator, storage.data])
def floor(data_out, data_in): loop = trtc.For(['out', 'in'], "i", ''' if (in[i] >= 0) out[i] = (long) in[i]; else { out[i] = (long) in[i]; if (in != out[i]) out[i] -= 1; } ''') loop.launch_n(data_out.size(), [data_out, data_in])
def floor(data): loop = trtc.For(['arr'], "i", ''' if (arr[i] >= 0) arr[i] = (long) arr[i]; else { auto old = arr[i]; arr[i] = (long) arr[i]; if (old != arr[i]) arr[i] -= 1; } ''') loop.launch_n(data.size(), [data])
def urand(data): rng = rndrtc.DVRNG() # TODO: threads vs. blocks # TODO: proper state_init # TODO: generator choice chunks = min(32, data.size()) # TODO!!! ker = trtc.For(['rng', 'vec_rnd'], 'idx', f''' RNGState state; rng.state_init(1234, idx, 0, state); // initialize a state using the rng object for (int i=0; i<{chunks}; i++) vec_rnd[i+idx*{chunks}]=(float)state.rand01(); // generate random number using the rng object ''') ker.launch_n(data.size() // chunks, [rng, data])
def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) loop = trtc.For(['data', 'idx', 'idx_length'], "i", ''' if (data[idx[i]] == 0) idx[i] = idx_length; ''') loop.launch_n(length, [data, idx, idx_length]) trtc.Sort(idx.range(0, length)) result = trtc.Find(idx.range(0, length), idx_length) if result == -1: result = length return result
def coalescence(n, idx, length, intensive, extensive, gamma, healthy): loop = trtc.For(['n', 'idx', 'data', 'gamma', 'healthy'], "i", ''' auto j = 2 * i; auto k = j + 1; j = idx[j]; k = idx[k]; if (n[j] < n[k]) { auto old = j; j = k; k = old; } auto g = n[j] / n[k]; if (g > gamma[i]) g = gamma[i]; if (g != 0) { auto new_n = n[j] - g * n[k]; if (new_n > 0) { n[j] = new_n; data[/*:,*/ k] += g * data[/*:,*/ j]; } else { // new_n == 0 n[j] = n[k] / 2; n[k] = n[k] - n[j]; data[/*:,*/ j] = g * data[/*:,*/ j] + data[/*:,*/ k]; data[/*:,*/ k] = data[/*:,*/ j]; } if (n[j] == 0 || n[k] == 0) { healthy[0] = 0; } } ''') loop.launch_n(length // 2, [n, idx, extensive, gamma, healthy])
def column_modulo(data, divisor): loop = trtc.For(['arr', 'divisor'], "i", f''' for (int d=0; d<{divisor.size()}; d++) arr[d + i] = arr[d + i] % divisor[d]; ''') loop.launch_n(data.shape[0], [data, divisor])
result = trtc.DVDouble(obj) elif isinstance(obj, int): result = trtc.DVInt64(obj) else: raise ValueError(f"Cannot upload {obj} to device.") return result @nice_thrust(**NICE_THRUST_FLAGS) def add(output, addend): trtc.Transform_Binary(thrust(addend), thrust(output), thrust(output), trtc.Plus()) __row_modulo_body = trtc.For(['output', 'divisor', 'length'], "i", ''' int d = i / length; output[i] %= divisor[d]; ''') @nice_thrust(**NICE_THRUST_FLAGS) def row_modulo(output, divisor): __row_modulo_body.launch_n(len(output), thrust([output, divisor, output.shape[1]])) __floor_body = trtc.For(['arr'], "i", ''' if (arr[i] >= 0) arr[i] = (long) arr[i]; else { auto old = arr[i];
class MathsMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def add(output, addend): trtc.Transform_Binary(addend, output, output, trtc.Plus()) __row_modulo_body = trtc.For(['output', 'divisor', 'length'], "i", ''' int d = i / length; output[i] %= divisor[d]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def row_modulo(output, divisor): length = trtc.DVInt64(output.shape[1]) MathsMethods.__row_modulo_body.launch_n(output.size(), [output, divisor, length]) __floor_body = trtc.For(['arr'], "i", ''' if (arr[i] >= 0) arr[i] = (long) arr[i]; else { auto old = arr[i]; arr[i] = (long) arr[i]; if (old != arr[i]) arr[i] -= 1; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor(output): MathsMethods.__floor_body.launch_n(output.size(), [output]) __floor_out_of_place_body = trtc.For(['output', 'input_data'], "i", ''' output[i] = (long) floor(input_data[i]); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor_out_of_place(output, input_data): MathsMethods.__floor_out_of_place_body.launch_n( output.size(), [output, input_data]) __multiply_elementwise_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier[i]; ''') __multiply_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply(output, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVDouble(multiplier) elif isinstance(multiplier, int): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVInt64(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, device_multiplier]) __multiply_out_of_place_elementwise_body = trtc.For( ['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier[i]; ''') __multiply_out_of_place_body = trtc.For( ['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply_out_of_place(output, multiplicand, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_out_of_place_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_out_of_place_body device_multiplier = trtc.DVDouble(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, multiplicand, device_multiplier]) __power_body = trtc.For(['output', 'exponent'], "i", ''' output[i] = pow(output[i], exponent); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def power(output, exponent): if exponent == 1: return device_exponent = trtc.DVDouble(exponent) MathsMethods.__power_body.launch_n(output.size(), [output, device_exponent]) __subtract_body = trtc.For(['output', 'subtrahend'], 'i', ''' output[i] -= subtrahend[i]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def subtract(output, subtrahend): MathsMethods.__subtract_body.launch_n(output.size(), [output, subtrahend])
# example showing how to separate init and call import ThrustRTC as trtc import CURandRTC as rndrtc rng = rndrtc.DVRNG() ker_init = trtc.For(['rng','states'], 'idx', ''' rng.state_init(1234, idx, 0, states[idx]); ''') ker_call = trtc.For(['states', 'vec_rnd'], 'idx', ''' vec_rnd[idx]=(float)states[idx].rand01(); ''' ) rng_states = trtc.device_vector('RNGState', 1024) d_vec_rnd = trtc.device_vector('float', 1024) ker_init.launch_n(1024, [rng, rng_states]) ker_call.launch_n(1024, [rng_states, d_vec_rnd]) print (d_vec_rnd.to_host())
class StorageMethods: storage = trtc.DVVector.DVVector integer = np.int64 double = np.float64 @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def array(shape, dtype): if dtype in (float, StorageMethods.double): elem_cls = 'double' elem_dtype = StorageMethods.double elif dtype in (int, StorageMethods.integer): elem_cls = 'int64_t' elem_dtype = StorageMethods.integer else: raise NotImplementedError data = trtc.device_vector(elem_cls, int(np.prod(shape))) StorageMethods.__equip(data, shape, elem_dtype) return data @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def download(backend_data, numpy_target): if isinstance(backend_data, StorageMethods.storage): numpy_target[:] = np.reshape(backend_data.to_host(), backend_data.shape) else: numpy_target[:] = StorageMethods.to_ndarray(backend_data) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def from_ndarray(array): shape = array.shape if str(array.dtype).startswith('int'): dtype = StorageMethods.integer elif str(array.dtype).startswith('float'): dtype = StorageMethods.double else: raise NotImplementedError if array.ndim > 1: array = array.astype(dtype).flatten() else: array = array.astype(dtype) result = trtc.device_vector_from_numpy(array) StorageMethods.__equip(result, shape, dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def range(array, start=0, stop=None): if stop is None: stop = array.shape[0] dim = len(array.shape) if dim == 1: result = array.range(start, stop) new_shape = (stop - start, ) elif dim == 2: result = array.range(array.shape[1] * start, array.shape[1] * stop) new_shape = (stop - start, array.shape[1]) else: raise NotImplementedError( "Only 2 or less dimensions array is supported.") StorageMethods.__equip(result, shape=new_shape, dtype=array.dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def read_row(array, i): row_length = array.shape[1] start = row_length * i stop = start + row_length result = array.range(start, stop) StorageMethods.__equip(result, shape=(row_length, ), dtype=array.dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def shuffle_global(idx, length, u01): # WARNING: ineffective implementation trtc.Sort_By_Key(u01.range(0, length), idx.range(0, length)) __shuffle_local_body = trtc.For(['cell_start', 'u01', 'idx'], "c", ''' for (int i=cell_start[c+1]-1; i > cell_start[c]; i--) { int j = cell_start[c] + u01[i] * (cell_start[c+1] - cell_start[c]); int tmp = idx[i]; idx[i] = idx[j]; idx[j] = tmp; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def shuffle_local(idx, u01, cell_start): StorageMethods.__shuffle_local_body.launch_n(cell_start.size() - 1, [cell_start, u01, idx]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def to_ndarray(data): if isinstance(data, StorageMethods.storage): pass elif isinstance(data, trtc.DVVector.DVRange): data_copy = StorageMethods.array(data.shape, float) trtc.Copy(data, data_copy) data = data_copy else: raise NotImplementedError() result = data.to_host() result = np.reshape(result, data.shape) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def upload(numpy_data, backend_target): tmp = trtc.device_vector_from_numpy(numpy_data.flatten()) trtc.Swap(tmp, backend_target) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def write_row(array, i, row): row_length = array.shape[1] start = row_length * i stop = start + row_length trtc.Copy(row, array.range(start, stop)) @staticmethod def __equip(data, shape, dtype): if isinstance(shape, int): shape = (shape, ) data.shape = shape data.dtype = dtype def get(index): return trtc.Reduce(data.range(index, index + 1)) data.get = get
class AlgorithmicMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell): dim = trtc.DVInt64(dim) idx_length = trtc.DVInt64(position_in_cell.shape[1]) courant_length = trtc.DVInt64(courant.shape[0]) loop = trtc.For(['dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell'], "droplet", f''' // Arakawa-C grid int _l_0 = cell_origin[droplet + 0]; int _l_1 = cell_origin[droplet + idx_length]; int _l = _l_0 + _l_1 * courant_length; int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0); int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1); int _r = _r_0 + _r_1 * courant_length; int omega = position_in_cell[droplet + idx_length * dim]; int c_r = courant[_r]; int c_l = courant[_l]; displacement[droplet, dim] = {scheme(None, None, None)} ''') loop.launch_n(displacement.shape[1], [dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell]) __coalescence_body = trtc.For(['n', 'volume', 'idx', 'idx_length', 'intensive', 'intensive_length', 'extensive', 'extensive_length', 'gamma', 'healthy', 'adaptive', 'subs', 'adaptive_memory'], "i", ''' if (gamma[i] == 0) { adaptive_memory[i] = 1; return; } int j = idx[i]; int k = idx[i + 1]; if (n[j] < n[k]) { j = idx[i + 1]; k = idx[i]; } int g = n[j] / n[k]; if (adaptive) adaptive_memory[i] = (int)(gamma[i] * subs / g); if (g > gamma[i]) g = gamma[i]; if (g == 0) return; int new_n = n[j] - g * n[k]; if (new_n > 0) { n[j] = new_n; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + k] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + k] += g * extensive[attr + j]; } } else { // new_n == 0 n[j] = (int)(n[k] / 2); n[k] = n[k] - n[j]; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + j] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); intensive[attr + k] = intensive[attr + j]; } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + j] = g * extensive[attr + j] + extensive[attr + k]; extensive[attr + k] = extensive[attr + j]; } } if (n[k] == 0 || n[j] == 0) { healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy, adaptive, subs, adaptive_memory): idx_length = trtc.DVInt64(len(idx)) intensive_length = trtc.DVInt64(len(intensive)) extensive_length = trtc.DVInt64(len(extensive)) adaptive_device = trtc.DVBool(adaptive) subs_device = trtc.DVInt64(subs) AlgorithmicMethods.__coalescence_body.launch_n(length - 1, [n.data, volume.data, idx.data, idx_length, intensive.data, intensive_length, extensive.data, extensive_length, gamma.data, healthy.data, adaptive_device, subs_device, adaptive_memory.data]) return trtc.Reduce(adaptive_memory.data.range(0, length-1), trtc.DVInt64(0), trtc.Maximum()) __compute_gamma_body = trtc.For(['prob', 'rand'], "i", ''' prob[i] = -floor(-prob[i] + rand[int(i / 2)]); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def compute_gamma(prob, rand): AlgorithmicMethods.__compute_gamma_body.launch_n(len(prob), [prob.data, rand.data]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def condensation( solver, n_cell, cell_start_arg, v, particle_temperatures, n, vdry, idx, rhod, thd, qv, dv, prhod, pthd, pqv, kappa, rtol_x, rtol_thd, dt, substeps, cell_order ): raise NotImplementedError() __flag_precipitated_body = trtc.For(['idx', 'idx_length', 'n_dims', 'healthy', 'cell_origin', 'position_in_cell'], "i", ''' if (cell_origin[idx_length * (n_dims-1) + i] == 0 && position_in_cell[idx_length * (n_dims-1) + i] < 0) { idx[i] = idx_length; healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy): idx_length = trtc.DVInt64(idx.size()) n_dims = trtc.DVInt64(len(cell_origin.shape)) AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell]) __linear_collection_efficiency_body = trtc.For(['A', 'B', 'D1', 'D2', 'E1', 'E2', 'F1', 'F2', 'G1', 'G2', 'G3', 'Mf', 'Mg', 'output', 'radii', 'is_first_in_pair', 'unit'], "i", ''' output[i] = 0; if (is_first_in_pair[i]) { double r = radii[i] / unit; double r_s = radii[i + 1] / unit; double p = r_s / r; if (p != 0 && p != 1) { double G = pow((G1 / r), Mg) + G2 + G3 * r; double Gp = pow((1 - p), G); if (Gp != 0) { double D = D1 / pow(r, D2); double E = E1 / pow(r, E2); double F = pow((F1 / r), Mf) + F2; output[i] = A + B * p + D / pow(p, F) + E / Gp; if (output[i] < 0) { output[i] = 0; } } } } ''') @staticmethod def linear_collection_efficiency(params, output, radii, is_first_in_pair, unit): A, B, D1, D2, E1, E2, F1, F2, G1, G2, G3, Mf, Mg = params dA = trtc.DVDouble(A) dB = trtc.DVDouble(B) dD1 = trtc.DVDouble(D1) dD2 = trtc.DVDouble(D2) dE1 = trtc.DVDouble(E1) dE2 = trtc.DVDouble(E2) dF1 = trtc.DVDouble(F1) dF2 = trtc.DVDouble(F2) dG1 = trtc.DVDouble(G1) dG2 = trtc.DVDouble(G2) dG3 = trtc.DVDouble(G3) dMf = trtc.DVDouble(Mf) dMg = trtc.DVDouble(Mg) dunit = trtc.DVDouble(unit) AlgorithmicMethods.__linear_collection_efficiency_body.launch_n(len(is_first_in_pair) - 1, [dA, dB, dD1, dD2, dE1, dE2, dF1, dF2, dG1, dG2, dG3, dMf, dMg, output.data, radii.data, is_first_in_pair.data, dunit]) __interpolation_body = trtc.For(['output', 'radius', 'factor', 'a', 'b'], 'i', ''' int r_id = (int)(factor * radius[i]); auto r_rest = (factor * radius[i] - r_id) / factor; output[i] = a[r_id] + r_rest * b[r_id]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def interpolation(output, radius, factor, b, c): factor_device = trtc.DVInt64(factor) AlgorithmicMethods.__interpolation_body.launch_n(len(radius), [output.data, radius.data, factor_device, b.data, c.data]) @staticmethod def make_cell_caretaker(idx, cell_start, scheme): return AlgorithmicMethods._sort_by_cell_id_and_update_cell_start @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def moments(moment_0, moments, n, attr, cell_id, idx, length, specs_idx, specs_rank, min_x, max_x, x_id): # TODO print("Numba import!: ThrustRTC.moments(...)") from PySDM.backends.numba.numba import Numba host_moment_0 = moment_0.to_ndarray() host_moments = moments.to_ndarray() host_n = n.to_ndarray() host_attr = attr.to_ndarray() host_cell_id = cell_id.to_ndarray() host_idx = idx.to_ndarray() host_specs_idx = specs_idx.to_ndarray() host_specs_rank = specs_rank.to_ndarray() Numba.moments_body(host_moment_0, host_moments, host_n, host_attr, host_cell_id, host_idx, length, host_specs_idx, host_specs_rank, min_x, max_x, x_id) moment_0.upload(host_moment_0) moments.upload(host_moments) __normalize_body_0 = trtc.For(['cell_start', 'norm_factor', 'dt_div_dv'], "i", ''' int sd_num = cell_start[i + 1] - cell_start[i]; if (sd_num < 2) { norm_factor[i] = 0; } else { int half_sd_num = sd_num / 2; norm_factor[i] = dt_div_dv * sd_num * (sd_num - 1) / 2 / half_sd_num; } ''') __normalize_body_1 = trtc.For(['prob', 'cell_id', 'norm_factor'], "d", ''' prob[d] *= norm_factor[cell_id[d]]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv): n_cell = cell_start.shape[0] - 1 device_dt_div_dv = trtc.DVDouble(dt_div_dv) AlgorithmicMethods.__normalize_body_0.launch_n(n_cell, [cell_start.data, norm_factor.data, device_dt_div_dv]) AlgorithmicMethods.__normalize_body_1.launch_n(prob.shape[0], [prob.data, cell_id.data, norm_factor.data]) __remove_zeros_body = trtc.For(['data', 'idx', 'idx_length'], "i", ''' if (idx[i] < idx_length && data[idx[i]] == 0) idx[i] = idx_length; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) # Warning: (potential bug source): reading from outside of array AlgorithmicMethods.__remove_zeros_body.launch_n(length, [data, idx, idx_length]) trtc.Sort(idx) result = idx.size() - trtc.Count(idx, idx_length) return result ___sort_by_cell_id_and_update_cell_start_body = trtc.For(['cell_id', 'cell_start', 'idx'], "i", ''' if (i == 0) { cell_start[cell_id[idx[0]]] = 0; } else { int cell_id_curr = cell_id[idx[i]]; int cell_id_next = cell_id[idx[i + 1]]; int diff = (cell_id_next - cell_id_curr); for (int j = 1; j <= diff; j++) { cell_start[cell_id_curr + j] = idx[i + 1]; } } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length): trtc.Sort_By_Key(cell_id.data, idx.data) trtc.Fill(cell_start.data, trtc.DVInt64(length)) AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n(length - 1, [cell_id.data, cell_start.data, idx.data]) return idx
class AlgorithmicStepMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def amax(row, idx): perm_in = trtc.DVPermutation(row.data, idx.data) index = trtc.Max_Element(perm_in.range(0, len(row))) row_idx = idx[index] result = row[row_idx] return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def amin(row, idx): perm_in = trtc.DVPermutation(row.data, idx.data) index = trtc.Min_Element(perm_in.range(0, len(row))) row_idx = idx[index] result = row[row_idx] return result __cell_id_body = trtc.For( ['cell_id', 'cell_origin', 'strides', 'n_dims', 'size'], "i", ''' cell_id[i] = 0; for (int j = 0; j < n_dims; j++) { cell_id[i] += cell_origin[size * i + j] * strides[j]; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def cell_id(cell_id, cell_origin, strides): n_dims = trtc.DVInt64(strides.shape[1]) size = trtc.DVInt64(cell_origin.shape[0]) AlgorithmicStepMethods.__cell_id_body.launch_n( cell_id.size(), [cell_id, cell_origin, strides, n_dims, size]) __distance_pair_body = trtc.For( ['data_out', 'data_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = abs(data_in[i] - data_in[i + 1]); } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def distance_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__distance_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __find_pairs_body = trtc.For( ['cell_start', 'perm_cell_id', 'is_first_in_pair'], "i", ''' is_first_in_pair[i] = ( perm_cell_id[i] == perm_cell_id[i+1] && (i - cell_start[perm_cell_id[i]]) % 2 == 0 ); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def find_pairs(cell_start, is_first_in_pair, cell_id, idx, length): perm_cell_id = trtc.DVPermutation(cell_id, idx) if length > 1: AlgorithmicStepMethods.__find_pairs_body.launch_n( length - 1, [cell_start, perm_cell_id, is_first_in_pair]) __max_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = max(perm_in[i], perm_in[i + 1]); } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def max_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__max_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __sort_pair_body = trtc.For(['data_out', 'data_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { if (data_in[i] < data_in[i + 1]) { data_out[i] = data_in[i + 1]; data_out[i + 1] = data_in[i]; } else { data_out[i] = data_in[i]; data_out[i + 1] = data_in[i + 1]; } } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def sort_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) trtc.Fill(data_out, trtc.DVDouble(0)) if length > 1: AlgorithmicStepMethods.__sort_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __sum_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = perm_in[i] + perm_in[i + 1]; } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def sum_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__sum_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair])
class AlgorithmicMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell): dim = trtc.DVInt64(dim) idx_length = trtc.DVInt64(position_in_cell.shape[1]) courant_length = trtc.DVInt64(courant.shape[0]) loop = trtc.For([ 'dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell' ], "droplet", f''' // Arakawa-C grid int _l_0 = cell_origin[droplet + 0]; int _l_1 = cell_origin[droplet + idx_length]; int _l = _l_0 + _l_1 * courant_length; int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0); int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1); int _r = _r_0 + _r_1 * courant_length; int omega = position_in_cell[droplet + idx_length * dim]; int c_r = courant[_r]; int c_l = courant[_l]; displacement[droplet, dim] = {scheme(None, None, None)} ''') loop.launch_n(displacement.shape[1], [ dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell ]) __coalescence_body = trtc.For([ 'n', 'volume', 'idx', 'idx_length', 'intensive', 'intensive_length', 'extensive', 'extensive_length', 'gamma', 'healthy' ], "i", ''' if (gamma[i] == 0) return; int j = idx[i]; int k = idx[i + 1]; if (n[j] < n[k]) { j = idx[i + 1]; k = idx[i]; } int g = (int)(n[j] / n[k]); if (g > gamma[i]) g = gamma[i]; if (g == 0) return; int new_n = n[j] - g * n[k]; if (new_n > 0) { n[j] = new_n; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + k] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + k] += g * extensive[attr + j]; } } else { // new_n == 0 n[j] = (int)(n[k] / 2); n[k] = n[k] - n[j]; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + j] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); intensive[attr + k] = intensive[attr + j]; } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + j] = g * extensive[attr + j] + extensive[attr + k]; extensive[attr + k] = extensive[attr + j]; } } if (n[k] == 0 || n[j] == 0) { healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy): idx_length = trtc.DVInt64(idx.size()) intensive_length = trtc.DVInt64(intensive.size()) extensive_length = trtc.DVInt64(extensive.size()) AlgorithmicMethods.__coalescence_body.launch_n(length - 1, [ n, volume, idx, idx_length, intensive, intensive_length, extensive, extensive_length, gamma, healthy ]) __compute_gamma_body = trtc.For(['prob', 'rand'], "i", ''' prob[i] += rand[int(i / 2)]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def compute_gamma(prob, rand): MathsMethods.multiply(prob, -1.) AlgorithmicMethods.__compute_gamma_body.launch_n( prob.size(), [prob, rand]) MathsMethods.floor(prob) MathsMethods.multiply(prob, -1.) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def condensation(solver, n_cell, cell_start_arg, v, particle_temperatures, n, vdry, idx, rhod, thd, qv, dv, prhod, pthd, pqv, kappa, rtol_x, rtol_thd, dt, substeps, cell_order): raise NotImplementedError() __flag_precipitated_body = trtc.For([ 'idx', 'idx_length', 'n_dims', 'healthy', 'cell_origin', 'position_in_cell' ], "i", ''' if (cell_origin[idx_length * (n_dims-1) + i] == 0 && position_in_cell[idx_length * (n_dims-1) + i] < 0) { idx[i] = idx_length; healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy): idx_length = trtc.DVInt64(idx.size()) n_dims = trtc.DVInt64(len(cell_origin.shape)) AlgorithmicMethods.__flag_precipitated_body.launch_n( length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell]) @staticmethod def make_cell_caretaker(idx, cell_start, scheme): return AlgorithmicMethods._sort_by_cell_id_and_update_cell_start @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def moments(moment_0, moments, n, attr, cell_id, idx, length, specs_idx, specs_rank, min_x, max_x, x_id): # TODO print("Numba import!: ThrustRTC.moments(...)") from PySDM.backends.numba.numba import Numba from PySDM.backends.thrustRTC._storage_methods import StorageMethods host_moment_0 = StorageMethods.to_ndarray(moment_0) host_moments = StorageMethods.to_ndarray(moments) host_n = StorageMethods.to_ndarray(n) host_attr = StorageMethods.to_ndarray(attr) host_cell_id = StorageMethods.to_ndarray(cell_id) host_idx = StorageMethods.to_ndarray(idx) host_specs_idx = StorageMethods.to_ndarray(specs_idx) host_specs_rank = StorageMethods.to_ndarray(specs_rank) Numba.moments(host_moment_0, host_moments, host_n, host_attr, host_cell_id, host_idx, length, host_specs_idx, host_specs_rank, min_x, max_x, x_id) device_moment_0 = StorageMethods.from_ndarray(host_moment_0) device_moments = StorageMethods.from_ndarray(host_moments) trtc.Copy(device_moment_0, moment_0) trtc.Copy(device_moments, moments) __normalize_body_0 = trtc.For(['cell_start', 'norm_factor', 'dt_div_dv'], "i", ''' int sd_num = cell_start[i + 1] - cell_start[i]; if (sd_num < 2) { norm_factor[i] = 0; } else { int half_sd_num = sd_num / 2; norm_factor[i] = dt_div_dv * sd_num * (sd_num - 1) / 2 / half_sd_num; } ''') __normalize_body_1 = trtc.For(['prob', 'cell_id', 'norm_factor'], "d", ''' prob[d] *= norm_factor[cell_id[d]]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv): n_cell = cell_start.shape[0] - 1 device_dt_div_dv = trtc.DVDouble(dt_div_dv) AlgorithmicMethods.__normalize_body_0.launch_n( n_cell, [cell_start, norm_factor, device_dt_div_dv]) AlgorithmicMethods.__normalize_body_1.launch_n( prob.shape[0], [prob, cell_id, norm_factor]) __remove_zeros_body = trtc.For(['data', 'idx', 'idx_length'], "i", ''' if (idx[i] < idx_length && data[idx[i]] == 0) idx[i] = idx_length; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) # Warning: (potential bug source): reading from outside of array AlgorithmicMethods.__remove_zeros_body.launch_n( length, [data, idx, idx_length]) trtc.Sort(idx) # result = trtc.Find(idx, idx_length) # if result is None: # result = length result = idx.size() - trtc.Count(idx, idx_length) if result < idx.size(): print("undertaker") return result ___sort_by_cell_id_and_update_cell_start_body = trtc.For( ['cell_id', 'cell_start', 'idx'], "i", ''' if (i == 0) { cell_start[cell_id[idx[0]]] = 0; } else { int cell_id_curr = cell_id[idx[i]]; int cell_id_next = cell_id[idx[i + 1]]; int diff = (cell_id_next - cell_id_curr); for (int j = 1; j <= diff; j++) { cell_start[cell_id_curr + j] = idx[i + 1]; } } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length): trtc.Sort_By_Key(cell_id, idx) trtc.Fill(cell_start, trtc.DVInt64(length)) AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n( length - 1, [cell_id, cell_start, idx]) return idx
class PhysicsMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def explicit_in_space(omega, c_l, c_r): return "c_l * (1 - omega) + c_r * omega;" @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def implicit_in_space(omega, c_l, c_r): """ see eqs 14-16 in Arabas et al. 2015 (libcloudph++) """ result = "(omega * (c_r - c_l) + c_l) / (1 - (c_r - c_l));" return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def temperature_pressure_RH(rhod, thd, qv): return "temperature_pressure_RH;" __terminal_velocity_body = trtc.For(["values", "radius", "k1", "k2", "k3", "r1", "r2"], "i", ''' if (radius[i] < r1) { values[i] = k1 * radius[i] * radius[i]; } else { if (radius[i] < r2) { values[i] = k2 * radius[i]; } else { values[i] = k3 * pow(radius[i], .5); } } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def terminal_velocity(values, radius, k1, k2, k3, r1, r2): k1 = trtc.DVDouble(k1) k2 = trtc.DVDouble(k2) k3 = trtc.DVDouble(k3) r1 = trtc.DVDouble(r1) r2 = trtc.DVDouble(r2) PhysicsMethods.__terminal_velocity_body.launch_n(values.size(), [values, radius, k1, k2, k3, r1, r2]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def radius(volume): return "" @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def dr_dt_MM(r, T, p, RH, kp, rd): return "" @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def dr_dt_FF(r, T, p, qv, kp, rd, T_i): return "" @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def dthd_dt(rhod, thd, T, dqv_dt): return ""
def compute_gamma(prob, rand): MathsMethods.multiply(prob, -1.) loop = trtc.For(['prob', 'rand'], "i", "prob[i] += rand[int(i / 2)];") loop.launch_n(prob.size(), [prob, rand]) MathsMethods.floor_in_place(prob) MathsMethods.multiply(prob, -1.)
import ThrustRTC as trtc import CURandRTC as rndrtc rng = rndrtc.DVRNG() ker = trtc.For(['rng', 'vec_rnd'], 'idx', ''' RNGState state; rng.state_init(1234, idx, 0, state); vec_rnd[idx]=(float)state.rand01(); ''') d_vec_rnd = trtc.device_vector('float', 1024) ker.launch_n(1024, [rng, d_vec_rnd]) print(d_vec_rnd.to_host())
class MathsMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def add(output, addend): trtc.Transform_Binary(addend, output, output, trtc.Plus()) __row_modulo_body = trtc.For(['output', 'divisor', 'length'], "i", ''' int d = i / length; output[i] %= divisor[d]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def row_modulo(output, divisor): length = trtc.DVInt64(output.shape[1]) MathsMethods.__row_modulo_body.launch_n(output.size(), [output, divisor, length]) __floor_body = trtc.For(['arr'], "i", ''' if (arr[i] >= 0) arr[i] = (long) arr[i]; else { auto old = arr[i]; arr[i] = (long) arr[i]; if (old != arr[i]) arr[i] -= 1; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor(output): MathsMethods.__floor_body.launch_n(output.size(), [output]) __floor_out_of_place_body = trtc.For(['output', 'input_data'], "i", ''' if (input_data[i] >= 0) output[i] = (long) input_data[i]; else { output[i] = (long) input_data[i]; if (input_data[i] != output[i]) output[i] -= 1; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor_out_of_place(output, input_data): MathsMethods.__floor_out_of_place_body.launch_n(output.size(), [output, input_data]) __multiply_elementwise_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier[i]; ''') __multiply_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply(output, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVDouble(multiplier) elif isinstance(multiplier, int): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVInt64(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, device_multiplier]) __multiply_out_of_place_elementwise_body = trtc.For(['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier[i]; ''') __multiply_out_of_place_body = trtc.For(['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply_out_of_place(output, multiplicand, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_out_of_place_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_out_of_place_body device_multiplier = trtc.DVDouble(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, multiplicand, device_multiplier]) __power_body = trtc.For(['output', 'exponent'], "i", ''' output[i] = pow(output[i], exponent); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def power(output, exponent): if exponent == 1: return device_multiplier = trtc.DVDouble(exponent) MathsMethods.__power_body.launch_n(output.size(), [output, device_multiplier]) __subract_body = trtc.For(['output', 'subtrahend'], 'i', ''' output[i] -= subtrahend[i]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def subtract(output, subtrahend): MathsMethods.__subract_body.launch_n(output.size(), [output, subtrahend]) # trtc.Transform_Binary(output, subtrahend, output, trtc.Minus()) __urand_init_rng_state_body = trtc.For(['rng', 'states', 'seed'], 'i', ''' rng.state_init(1234, i, 0, states[i]); ''') __urand_body = trtc.For(['states', 'vec_rnd'], 'i', ''' vec_rnd[i]=states[i].rand01(); ''') __rng = rndrtc.DVRNG() states = trtc.device_vector('RNGState', 2**19) __urand_init_rng_state_body.launch_n(states.size(), [__rng, states, trtc.DVInt64(12)]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def urand(data, seed=None): # TODO: print("Numpy import!: ThrustRTC.urand(...)") seed = seed or np.random.randint(2**16) dseed = trtc.DVInt64(seed) # MathsMethods.__urand_init_rng_state_body.launch_n(MathsMethods.states.size(), [MathsMethods.__rng, MathsMethods.states, dseed]) MathsMethods.__urand_body.launch_n(data.size(), [MathsMethods.states, data])