コード例 #1
0
import ThrustRTC as trtc

is_odd = trtc.Functor({}, ['x'], '''
         return x % 2;
''')

darr = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8],
                                    'int32_t')
trtc.Transform(darr, darr, trtc.Negate())
print(darr.to_host())

darr_in1 = trtc.device_vector_from_list([-5, 0, 2, 3, 2, 4], 'int32_t')
darr_in2 = trtc.device_vector_from_list([3, 6, -2, 1, 2, 3], 'int32_t')
darr_out = trtc.device_vector('int32_t', 6)
trtc.Transform_Binary(darr_in1, darr_in2, darr_out, trtc.Plus())
print(darr_out.to_host())

darr = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8],
                                    'int32_t')
trtc.Transform_If(darr, darr, trtc.Negate(), is_odd)
print(darr.to_host())

darr_data = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8],
                                         'int32_t')
darr_stencil = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
                                            'int32_t')
trtc.Transform_If_Stencil(darr_data, darr_stencil, darr_data, trtc.Negate(),
                          trtc.Identity())
print(darr_data.to_host())

darr_in1 = trtc.device_vector_from_list([-5, 0, 2, 3, 2, 4], 'int32_t')
コード例 #2
0
import ThrustRTC as trtc

darr = trtc.device_vector('int32_t', 10)

trtc.Sequence(darr)
trtc.Tabulate(darr, trtc.Negate())
print(darr.to_host())
コード例 #3
0
import ThrustRTC as trtc

d_int_in = trtc.device_vector_from_list([0, 1, 2, 3, 4], 'int32_t')
d_float_in = trtc.device_vector_from_list([0.0, 10.0, 20.0, 30.0, 40.0],
                                          'float')

d_int_out = trtc.device_vector('int32_t', 5)
d_float_out = trtc.device_vector('float', 5)

zipped_in = trtc.DVZipped([d_int_in, d_float_in], ['a', 'b'])
zipped_out = trtc.DVZipped([d_int_out, d_float_out], ['a', 'b'])

trtc.Copy(zipped_in, zipped_out)
print(d_int_out.to_host())
print(d_float_out.to_host())

d_int_in = trtc.DVCounter(trtc.DVInt32(0), 5)
d_float_in = trtc.DVTransform(
    d_int_in, "float",
    trtc.Functor({}, ['i'], '        return (float)i*10.0f +10.0f;\n'))
zipped_in = trtc.DVZipped([d_int_in, d_float_in], ['a', 'b'])
trtc.Copy(zipped_in, zipped_out)
print(d_int_out.to_host())
print(d_float_out.to_host())

const_in = trtc.DVConstant(
    trtc.DVTuple({
        'a': trtc.DVInt32(123),
        'b': trtc.DVFloat(456.0)
    }), 5)
trtc.Copy(const_in, zipped_out)
コード例 #4
0
ファイル: test_extrema.py プロジェクト: sumit-byte/ThrustRTC
import ThrustRTC as trtc

darr = trtc.device_vector_from_list([1, 0, 2, 2, 1, 3], 'int32_t')
print(trtc.Min_Element(darr))
print(trtc.Max_Element(darr))
print(trtc.MinMax_Element(darr))
コード例 #5
0
ファイル: test_mismatch.py プロジェクト: sumit-byte/ThrustRTC
import ThrustRTC as trtc

d1 = trtc.device_vector_from_list([0, 5, 3, 7], 'int32_t')
d2 = trtc.device_vector_from_list([0, 5, 8, 7], 'int32_t')

print(trtc.Mismatch(d1, d2))
print(trtc.Mismatch(d1, d2, trtc.EqualTo()))
コード例 #6
0
 def find_pairs(cell_start, is_first_in_pair, cell_id, idx, length):
     perm_cell_id = trtc.DVPermutation(cell_id, idx)
     if length > 1:
         AlgorithmicStepMethods.__find_pairs_body.launch_n(
             length - 1, [cell_start, perm_cell_id, is_first_in_pair])
コード例 #7
0
 def max_pair(data_out, data_in, is_first_in_pair, idx, length):
     # note: silently assumes that data_out is not permuted (i.e. not part of state)
     perm_in = trtc.DVPermutation(data_in, idx)
     if length > 1:
         AlgorithmicStepMethods.__max_pair_body.launch_n(
             length - 1, [data_out, perm_in, is_first_in_pair])
コード例 #8
0
 def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length):
     trtc.Sort_By_Key(cell_id.data, idx.data)
     trtc.Fill(cell_start.data, trtc.DVInt64(length))
     AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n(length - 1,
                                                                               [cell_id.data, cell_start.data, idx.data])
     return idx
コード例 #9
0
class AlgorithmicMethods:

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell):
        dim = trtc.DVInt64(dim)
        idx_length = trtc.DVInt64(position_in_cell.shape[1])
        courant_length = trtc.DVInt64(courant.shape[0])
        loop = trtc.For(['dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell'], "droplet", f'''
            // Arakawa-C grid
            int _l_0 = cell_origin[droplet + 0];
            int _l_1 = cell_origin[droplet + idx_length];
            int _l = _l_0 + _l_1 * courant_length;
            int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0);
            int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1);
            int _r = _r_0 + _r_1 * courant_length;
            int omega = position_in_cell[droplet + idx_length * dim];
            int c_r = courant[_r];
            int c_l = courant[_l];
            displacement[droplet, dim] = {scheme(None, None, None)}
            ''')
        loop.launch_n(displacement.shape[1], [dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell])

    __coalescence_body = trtc.For(['n', 'volume', 'idx', 'idx_length', 'intensive', 'intensive_length', 'extensive', 'extensive_length', 'gamma', 'healthy', 'adaptive', 'subs', 'adaptive_memory'], "i", '''
        if (gamma[i] == 0) {
            adaptive_memory[i] = 1;
            return;
        }

        int j = idx[i];
        int k = idx[i + 1];

        if (n[j] < n[k]) {
            j = idx[i + 1];
            k = idx[i];
        }
        int g = n[j] / n[k];
        if (adaptive) 
            adaptive_memory[i] = (int)(gamma[i] * subs / g);
        if (g > gamma[i])
            g = gamma[i];
        if (g == 0)
            return;
            
        int new_n = n[j] - g * n[k];
        
        if (new_n > 0) {
            n[j] = new_n;
            
            for (int attr = 0; attr < intensive_length; attr+=idx_length) {
                intensive[attr + k] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]);
            }
            for (int attr = 0; attr < extensive_length; attr+=idx_length) {
                extensive[attr + k] += g * extensive[attr + j];
            }
        }
        else {  // new_n == 0
            n[j] = (int)(n[k] / 2);
            n[k] = n[k] - n[j];
            for (int attr = 0; attr < intensive_length; attr+=idx_length) {
                intensive[attr + j] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]);
                intensive[attr + k] = intensive[attr + j];
            }
            for (int attr = 0; attr < extensive_length; attr+=idx_length) {
                extensive[attr + j] = g * extensive[attr + j] + extensive[attr + k];
                extensive[attr + k] = extensive[attr + j];
            }
        }
        if (n[k] == 0 || n[j] == 0) {
            healthy[0] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy, adaptive, subs, adaptive_memory):
        idx_length = trtc.DVInt64(len(idx))
        intensive_length = trtc.DVInt64(len(intensive))
        extensive_length = trtc.DVInt64(len(extensive))
        adaptive_device = trtc.DVBool(adaptive)
        subs_device = trtc.DVInt64(subs)
        AlgorithmicMethods.__coalescence_body.launch_n(length - 1,
            [n.data, volume.data, idx.data, idx_length, intensive.data, intensive_length, extensive.data, extensive_length, gamma.data, healthy.data, adaptive_device, subs_device, adaptive_memory.data])
        return trtc.Reduce(adaptive_memory.data.range(0, length-1), trtc.DVInt64(0), trtc.Maximum())

    __compute_gamma_body = trtc.For(['prob', 'rand'], "i", '''
        prob[i] = -floor(-prob[i] + rand[int(i / 2)]);
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def compute_gamma(prob, rand):
        AlgorithmicMethods.__compute_gamma_body.launch_n(len(prob), [prob.data, rand.data])

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def condensation(
            solver,
            n_cell, cell_start_arg,
            v, particle_temperatures, n, vdry, idx, rhod, thd, qv, dv, prhod, pthd, pqv, kappa,
            rtol_x, rtol_thd, dt, substeps, cell_order
    ):
        raise NotImplementedError()

    __flag_precipitated_body = trtc.For(['idx', 'idx_length', 'n_dims', 'healthy', 'cell_origin', 'position_in_cell'], "i", '''
        if (cell_origin[idx_length * (n_dims-1) + i] == 0 && position_in_cell[idx_length * (n_dims-1) + i] < 0) {
            idx[i] = idx_length;
            healthy[0] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy):
        idx_length = trtc.DVInt64(idx.size())
        n_dims = trtc.DVInt64(len(cell_origin.shape))
        AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell])

    __linear_collection_efficiency_body = trtc.For(['A', 'B', 'D1', 'D2', 'E1', 'E2', 'F1', 'F2', 'G1', 'G2', 'G3', 'Mf', 'Mg', 'output', 'radii', 'is_first_in_pair', 'unit'], "i", '''
        output[i] = 0;
        if (is_first_in_pair[i]) {
            double r = radii[i] / unit;
            double r_s = radii[i + 1] / unit;
            double p = r_s / r;
            if (p != 0 && p != 1) {
                double G = pow((G1 / r), Mg) + G2 + G3 * r;
                double Gp = pow((1 - p), G);
                if (Gp != 0) {
                    double D = D1 / pow(r, D2);
                    double E = E1 / pow(r, E2);
                    double F = pow((F1 / r), Mf) + F2;
                    output[i] = A + B * p + D / pow(p, F) + E / Gp;
                    if (output[i] < 0) {
                        output[i] = 0;
                    }
                }
            }
        }
    ''')

    @staticmethod
    def linear_collection_efficiency(params, output, radii, is_first_in_pair, unit):
        A, B, D1, D2, E1, E2, F1, F2, G1, G2, G3, Mf, Mg = params
        dA = trtc.DVDouble(A)
        dB = trtc.DVDouble(B)
        dD1 = trtc.DVDouble(D1)
        dD2 = trtc.DVDouble(D2)
        dE1 = trtc.DVDouble(E1)
        dE2 = trtc.DVDouble(E2)
        dF1 = trtc.DVDouble(F1)
        dF2 = trtc.DVDouble(F2)
        dG1 = trtc.DVDouble(G1)
        dG2 = trtc.DVDouble(G2)
        dG3 = trtc.DVDouble(G3)
        dMf = trtc.DVDouble(Mf)
        dMg = trtc.DVDouble(Mg)
        dunit = trtc.DVDouble(unit)
        AlgorithmicMethods.__linear_collection_efficiency_body.launch_n(len(is_first_in_pair) - 1,
            [dA, dB, dD1, dD2, dE1, dE2, dF1, dF2, dG1, dG2, dG3, dMf, dMg, output.data, radii.data, is_first_in_pair.data, dunit])

    __interpolation_body = trtc.For(['output', 'radius', 'factor', 'a', 'b'], 'i', '''
        int r_id = (int)(factor * radius[i]);
        auto r_rest = (factor * radius[i] - r_id) / factor;
        output[i] = a[r_id] + r_rest * b[r_id];
    ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def interpolation(output, radius, factor, b, c):
        factor_device = trtc.DVInt64(factor)
        AlgorithmicMethods.__interpolation_body.launch_n(len(radius),
                                                         [output.data, radius.data, factor_device, b.data, c.data])

    @staticmethod
    def make_cell_caretaker(idx, cell_start, scheme):
        return AlgorithmicMethods._sort_by_cell_id_and_update_cell_start

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def moments(moment_0, moments, n, attr, cell_id, idx, length, specs_idx, specs_rank, min_x, max_x, x_id):
        # TODO print("Numba import!: ThrustRTC.moments(...)")

        from PySDM.backends.numba.numba import Numba
        host_moment_0 = moment_0.to_ndarray()
        host_moments = moments.to_ndarray()
        host_n = n.to_ndarray()
        host_attr = attr.to_ndarray()
        host_cell_id = cell_id.to_ndarray()
        host_idx = idx.to_ndarray()
        host_specs_idx = specs_idx.to_ndarray()
        host_specs_rank = specs_rank.to_ndarray()
        Numba.moments_body(host_moment_0, host_moments, host_n, host_attr, host_cell_id, host_idx, length,
                           host_specs_idx, host_specs_rank, min_x, max_x, x_id)
        moment_0.upload(host_moment_0)
        moments.upload(host_moments)

    __normalize_body_0 = trtc.For(['cell_start', 'norm_factor', 'dt_div_dv'], "i", '''
        int sd_num = cell_start[i + 1] - cell_start[i];
        if (sd_num < 2) {
            norm_factor[i] = 0;
        }
        else {
            int half_sd_num = sd_num / 2;
            norm_factor[i] = dt_div_dv * sd_num * (sd_num - 1) / 2 / half_sd_num;
        }
        ''')

    __normalize_body_1 = trtc.For(['prob', 'cell_id', 'norm_factor'], "d", '''
        prob[d] *= norm_factor[cell_id[d]];
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv):
        n_cell = cell_start.shape[0] - 1
        device_dt_div_dv = trtc.DVDouble(dt_div_dv)
        AlgorithmicMethods.__normalize_body_0.launch_n(n_cell, [cell_start.data, norm_factor.data, device_dt_div_dv])
        AlgorithmicMethods.__normalize_body_1.launch_n(prob.shape[0], [prob.data, cell_id.data, norm_factor.data])

    __remove_zeros_body = trtc.For(['data', 'idx', 'idx_length'], "i", '''
        if (idx[i] < idx_length && data[idx[i]] == 0)
            idx[i] = idx_length;
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def remove_zeros(data, idx, length) -> int:
        idx_length = trtc.DVInt64(idx.size())

        # Warning: (potential bug source): reading from outside of array
        AlgorithmicMethods.__remove_zeros_body.launch_n(length, [data, idx, idx_length])

        trtc.Sort(idx)

        result = idx.size() - trtc.Count(idx, idx_length)
        return result

    ___sort_by_cell_id_and_update_cell_start_body = trtc.For(['cell_id', 'cell_start', 'idx'], "i", '''
        if (i == 0) {
            cell_start[cell_id[idx[0]]] = 0;
        } 
        else {
            int cell_id_curr = cell_id[idx[i]];
            int cell_id_next = cell_id[idx[i + 1]];
            int diff = (cell_id_next - cell_id_curr);
            for (int j = 1; j <= diff; j++) {
                cell_start[cell_id_curr + j] = idx[i + 1];
            }
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length):
        trtc.Sort_By_Key(cell_id.data, idx.data)
        trtc.Fill(cell_start.data, trtc.DVInt64(length))
        AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n(length - 1,
                                                                                  [cell_id.data, cell_start.data, idx.data])
        return idx
コード例 #10
0
 def interpolation(output, radius, factor, b, c):
     factor_device = trtc.DVInt64(factor)
     AlgorithmicMethods.__interpolation_body.launch_n(len(radius),
                                                      [output.data, radius.data, factor_device, b.data, c.data])
コード例 #11
0
 def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv):
     n_cell = cell_start.shape[0] - 1
     device_dt_div_dv = trtc.DVDouble(dt_div_dv)
     AlgorithmicMethods.__normalize_body_0.launch_n(n_cell, [cell_start.data, norm_factor.data, device_dt_div_dv])
     AlgorithmicMethods.__normalize_body_1.launch_n(prob.shape[0], [prob.data, cell_id.data, norm_factor.data])
コード例 #12
0
 def linear_collection_efficiency(params, output, radii, is_first_in_pair, unit):
     A, B, D1, D2, E1, E2, F1, F2, G1, G2, G3, Mf, Mg = params
     dA = trtc.DVDouble(A)
     dB = trtc.DVDouble(B)
     dD1 = trtc.DVDouble(D1)
     dD2 = trtc.DVDouble(D2)
     dE1 = trtc.DVDouble(E1)
     dE2 = trtc.DVDouble(E2)
     dF1 = trtc.DVDouble(F1)
     dF2 = trtc.DVDouble(F2)
     dG1 = trtc.DVDouble(G1)
     dG2 = trtc.DVDouble(G2)
     dG3 = trtc.DVDouble(G3)
     dMf = trtc.DVDouble(Mf)
     dMg = trtc.DVDouble(Mg)
     dunit = trtc.DVDouble(unit)
     AlgorithmicMethods.__linear_collection_efficiency_body.launch_n(len(is_first_in_pair) - 1,
         [dA, dB, dD1, dD2, dE1, dE2, dF1, dF2, dG1, dG2, dG3, dMf, dMg, output.data, radii.data, is_first_in_pair.data, dunit])
コード例 #13
0
 def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy):
     idx_length = trtc.DVInt64(idx.size())
     n_dims = trtc.DVInt64(len(cell_origin.shape))
     AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell])
コード例 #14
0
ファイル: test_unique.py プロジェクト: sumit-byte/ThrustRTC
import ThrustRTC as trtc

d_value = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
count = trtc.Unique(d_value)
print(d_value.to_host(0, count))

d_value = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
count = trtc.Unique(d_value, trtc.EqualTo())
print(d_value.to_host(0, count))

d_in = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
d_out = trtc.device_vector('int32_t', 7)
count = trtc.Unique_Copy(d_in, d_out)
print(d_out.to_host(0, count))

d_in = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
d_out = trtc.device_vector('int32_t', 7)
count = trtc.Unique_Copy(d_in, d_out, trtc.EqualTo())
print(d_out.to_host(0, count))

d_keys = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
d_values = trtc.device_vector_from_list([9, 8, 7, 6, 5, 4, 3], 'int32_t')
count = trtc.Unique_By_Key(d_keys, d_values)
print(d_keys.to_host(0, count))
print(d_values.to_host(0, count))

d_keys = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t')
d_values = trtc.device_vector_from_list([9, 8, 7, 6, 5, 4, 3], 'int32_t')
count = trtc.Unique_By_Key(d_keys, d_values, trtc.EqualTo())
print(d_keys.to_host(0, count))
print(d_values.to_host(0, count))
コード例 #15
0
 def amin(row, idx):
     perm_in = trtc.DVPermutation(row.data, idx.data)
     index = trtc.Min_Element(perm_in.range(0, len(row)))
     row_idx = idx[index]
     result = row[row_idx]
     return result
コード例 #16
0
import ThrustRTC as trtc

dinput = trtc.device_vector_from_list([3, 7, 2, 5], 'int32_t')
doutput = trtc.device_vector('int32_t', 4)

dreverse = trtc.DVReverse(dinput)

trtc.Transform(dreverse, doutput, trtc.Negate())
print(doutput.to_host())
コード例 #17
0
 def cell_id(cell_id, cell_origin, strides):
     n_dims = trtc.DVInt64(strides.shape[1])
     size = trtc.DVInt64(cell_origin.shape[0])
     AlgorithmicStepMethods.__cell_id_body.launch_n(
         cell_id.size(), [cell_id, cell_origin, strides, n_dims, size])
コード例 #18
0
class StorageMethods:
    # TODO check static For
    storage = trtc.DVVector.DVVector
    integer = np.int64
    double = np.float64

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def array(shape, dtype):
        if dtype in (float, StorageMethods.double):
            elem_cls = 'double'
            elem_dtype = StorageMethods.double
        elif dtype in (int, StorageMethods.integer):
            elem_cls = 'int64_t'
            elem_dtype = StorageMethods.integer
        else:
            raise NotImplementedError

        data = trtc.device_vector(elem_cls, int(np.prod(shape)))
        # TODO: trtc.Fill(data, trtc.DVConstant(np.nan))

        StorageMethods.__equip(data, shape, elem_dtype)
        return data

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def download(backend_data, numpy_target):
        if isinstance(backend_data, StorageMethods.storage):
            numpy_target[:] = np.reshape(backend_data.to_host(),
                                         backend_data.shape)
        else:
            numpy_target[:] = StorageMethods.to_ndarray(backend_data)

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def from_ndarray(array):
        shape = array.shape

        if str(array.dtype).startswith('int'):
            dtype = StorageMethods.integer
        elif str(array.dtype).startswith('float'):
            dtype = StorageMethods.double
        else:
            raise NotImplementedError

        if array.ndim > 1:
            array = array.astype(dtype).flatten()
        else:
            array = array.astype(dtype)

        result = trtc.device_vector_from_numpy(array)

        StorageMethods.__equip(result, shape, dtype)
        return result

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def range(array, start=0, stop=None):
        if stop is None:
            stop = array.shape[0]
        dim = len(array.shape)
        if dim == 1:
            result = array.range(start, stop)
            new_shape = (stop - start, )
        elif dim == 2:
            result = array.range(array.shape[1] * start, array.shape[1] * stop)
            new_shape = (stop - start, array.shape[1])
        else:
            raise NotImplementedError(
                "Only 2 or less dimensions array is supported.")
        StorageMethods.__equip(result, shape=new_shape, dtype=array.dtype)
        return result

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def read_row(array, i):
        row_length = array.shape[1]
        start = row_length * i
        stop = start + row_length

        result = array.range(start, stop)
        StorageMethods.__equip(result, shape=(row_length, ), dtype=array.dtype)
        return result

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def shuffle_global(idx, length, u01):
        # WARNING: ineffective implementation
        trtc.Sort_By_Key(u01.range(0, length), idx.range(0, length))

    __shuffle_local_body = trtc.For(['cell_start', 'u01', 'idx'], "c", '''
        for (int i=cell_start[c+1]-1; i > cell_start[c]; i--) {
            int j = cell_start[c] + u01[i] * (cell_start[c+1] - cell_start[c]);
            int tmp = idx[i];
            idx[i] = idx[j];
            idx[j] = tmp;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def shuffle_local(idx, u01, cell_start):
        StorageMethods.__shuffle_local_body.launch_n(cell_start.size() - 1,
                                                     [cell_start, u01, idx])
        # TODO: print("Numba import!: ThrustRTC.shuffle_local(...)")
        # from PySDM.backends.numba.numba import Numba
        # host_idx = StorageMethods.to_ndarray(idx)
        # host_u01 = StorageMethods.to_ndarray(u01)
        # host_cell_start = StorageMethods.to_ndarray(cell_start)
        # Numba.shuffle_local(host_idx, host_u01, host_cell_start)
        # device_idx = StorageMethods.from_ndarray(host_idx)
        # trtc.Copy(device_idx, idx)

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def to_ndarray(data):
        # TODO: move to __equip??
        if isinstance(data, StorageMethods.storage):
            pass
        elif isinstance(data, trtc.DVVector.DVRange):
            data_copy = StorageMethods.array(data.shape, float)
            trtc.Copy(data, data_copy)
            data = data_copy
        else:
            raise NotImplementedError()

        result = data.to_host()
        result = np.reshape(result, data.shape)
        return result

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def upload(numpy_data, backend_target):
        tmp = trtc.device_vector_from_numpy(numpy_data.flatten())
        trtc.Swap(tmp, backend_target)

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def write_row(array, i, row):
        row_length = array.shape[1]
        start = row_length * i
        stop = start + row_length
        trtc.Copy(row, array.range(start, stop))

    @staticmethod
    def __equip(data, shape, dtype):
        if isinstance(shape, int):
            shape = (shape, )
        data.shape = shape
        data.dtype = dtype

        def get(index):
            return trtc.Reduce(data.range(index, index + 1))

        data.get = get
コード例 #19
0
class AlgorithmicStepMethods:
    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def amax(row, idx):
        perm_in = trtc.DVPermutation(row.data, idx.data)
        index = trtc.Max_Element(perm_in.range(0, len(row)))
        row_idx = idx[index]
        result = row[row_idx]
        return result

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def amin(row, idx):
        perm_in = trtc.DVPermutation(row.data, idx.data)
        index = trtc.Min_Element(perm_in.range(0, len(row)))
        row_idx = idx[index]
        result = row[row_idx]
        return result

    __cell_id_body = trtc.For(
        ['cell_id', 'cell_origin', 'strides', 'n_dims', 'size'], "i", '''
        cell_id[i] = 0;
        for (int j = 0; j < n_dims; j++) 
        {
            cell_id[i] += cell_origin[size * i + j] * strides[j];
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def cell_id(cell_id, cell_origin, strides):
        n_dims = trtc.DVInt64(strides.shape[1])
        size = trtc.DVInt64(cell_origin.shape[0])
        AlgorithmicStepMethods.__cell_id_body.launch_n(
            cell_id.size(), [cell_id, cell_origin, strides, n_dims, size])

    __distance_pair_body = trtc.For(
        ['data_out', 'data_in', 'is_first_in_pair'], "i", '''
        if (is_first_in_pair[i]) 
        {
            data_out[i] = abs(data_in[i] - data_in[i + 1]);
        } else {
            data_out[i] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def distance_pair(data_out, data_in, is_first_in_pair, idx, length):
        # note: silently assumes that data_out is not permuted (i.e. not part of state)
        perm_in = trtc.DVPermutation(data_in, idx)
        if length > 1:
            AlgorithmicStepMethods.__distance_pair_body.launch_n(
                length - 1, [data_out, perm_in, is_first_in_pair])

    __find_pairs_body = trtc.For(
        ['cell_start', 'perm_cell_id', 'is_first_in_pair'], "i", '''
        is_first_in_pair[i] = (
            perm_cell_id[i] == perm_cell_id[i+1] &&
            (i - cell_start[perm_cell_id[i]]) % 2 == 0
        );
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def find_pairs(cell_start, is_first_in_pair, cell_id, idx, length):
        perm_cell_id = trtc.DVPermutation(cell_id, idx)
        if length > 1:
            AlgorithmicStepMethods.__find_pairs_body.launch_n(
                length - 1, [cell_start, perm_cell_id, is_first_in_pair])

    __max_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'],
                               "i", '''
        if (is_first_in_pair[i]) 
        {
            data_out[i] = max(perm_in[i], perm_in[i + 1]);
        } else {
            data_out[i] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def max_pair(data_out, data_in, is_first_in_pair, idx, length):
        # note: silently assumes that data_out is not permuted (i.e. not part of state)
        perm_in = trtc.DVPermutation(data_in, idx)
        if length > 1:
            AlgorithmicStepMethods.__max_pair_body.launch_n(
                length - 1, [data_out, perm_in, is_first_in_pair])

    __sort_pair_body = trtc.For(['data_out', 'data_in', 'is_first_in_pair'],
                                "i", '''
        if (is_first_in_pair[i]) {
            if (data_in[i] < data_in[i + 1]) {
                data_out[i] = data_in[i + 1];
                data_out[i + 1] = data_in[i];
            } else {
                data_out[i] = data_in[i];
                data_out[i + 1] = data_in[i + 1];
            }
        } else {
            data_out[i] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def sort_pair(data_out, data_in, is_first_in_pair, idx, length):
        # note: silently assumes that data_out is not permuted (i.e. not part of state)
        perm_in = trtc.DVPermutation(data_in, idx)
        trtc.Fill(data_out, trtc.DVDouble(0))
        if length > 1:
            AlgorithmicStepMethods.__sort_pair_body.launch_n(
                length - 1, [data_out, perm_in, is_first_in_pair])

    __sum_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'],
                               "i", '''
        if (is_first_in_pair[i]) 
        {
            data_out[i] = perm_in[i] + perm_in[i + 1];
        } else {
            data_out[i] = 0;
        }
        ''')

    @staticmethod
    @nice_thrust(**NICE_THRUST_FLAGS)
    def sum_pair(data_out, data_in, is_first_in_pair, idx, length):
        # note: silently assumes that data_out is not permuted (i.e. not part of state)
        perm_in = trtc.DVPermutation(data_in, idx)
        if length > 1:
            AlgorithmicStepMethods.__sum_pair_body.launch_n(
                length - 1, [data_out, perm_in, is_first_in_pair])
コード例 #20
0
 def upload(numpy_data, backend_target):
     tmp = trtc.device_vector_from_numpy(numpy_data.flatten())
     trtc.Swap(tmp, backend_target)
コード例 #21
0
import ThrustRTC as trtc



d_input = trtc.device_vector_from_list([0, 2, 5, 7, 8], 'int32_t')

print(trtc.Lower_Bound(d_input, trtc.DVInt32(0)))
print(trtc.Lower_Bound(d_input, trtc.DVInt32(1)))
print(trtc.Lower_Bound(d_input, trtc.DVInt32(2)))
print(trtc.Lower_Bound(d_input, trtc.DVInt32(3)))
print(trtc.Lower_Bound(d_input, trtc.DVInt32(8)))
print(trtc.Lower_Bound(d_input, trtc.DVInt32(9)))

print()

print(trtc.Upper_Bound(d_input, trtc.DVInt32(0)))
print(trtc.Upper_Bound(d_input, trtc.DVInt32(1)))
print(trtc.Upper_Bound(d_input, trtc.DVInt32(2)))
print(trtc.Upper_Bound(d_input, trtc.DVInt32(3)))
print(trtc.Upper_Bound(d_input, trtc.DVInt32(8)))
print(trtc.Upper_Bound(d_input, trtc.DVInt32(9)))

print()

print(trtc.Binary_Search(d_input, trtc.DVInt32(0)))
print(trtc.Binary_Search(d_input, trtc.DVInt32(1)))
print(trtc.Binary_Search(d_input, trtc.DVInt32(2)))
print(trtc.Binary_Search(d_input, trtc.DVInt32(3)))
print(trtc.Binary_Search(d_input, trtc.DVInt32(8)))
print(trtc.Binary_Search(d_input, trtc.DVInt32(9)))
コード例 #22
0
 def write_row(array, i, row):
     row_length = array.shape[1]
     start = row_length * i
     stop = start + row_length
     trtc.Copy(row, array.range(start, stop))
コード例 #23
0
ファイル: test_gather.py プロジェクト: sumit-byte/ThrustRTC
import ThrustRTC as trtc

is_even = trtc.Functor({}, ['x'], '''
         return ((x % 2) == 0);
''')

dvalues = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
                                       'int32_t')
dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t')
doutput = trtc.device_vector('int32_t', 10)

trtc.Gather(dmap, dvalues, doutput)
print(doutput.to_host())

dvalues = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                       'int32_t')
dstencil = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
                                        'int32_t')
dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t')
doutput = trtc.device_vector_from_list([7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
                                       'int32_t')

trtc.Gather_If(dmap, dstencil, dvalues, doutput)
print(doutput.to_host())

dvalues = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                       'int32_t')
dstencil = trtc.device_vector_from_list([0, 3, 4, 1, 4, 1, 2, 7, 8, 9],
                                        'int32_t')
dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t')
doutput = trtc.device_vector_from_list([7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
コード例 #24
0
 def get(index):
     return trtc.Reduce(data.range(index, index + 1))
コード例 #25
0
ファイル: test_counter.py プロジェクト: sumit-byte/ThrustRTC
import ThrustRTC as trtc



negate = trtc.Functor( {}, ['x'],
'''
         return -x;
''')


darr = trtc.device_vector('int32_t', 10)
trtc.Transform(trtc.DVCounter(trtc.DVInt32(5), 10), darr, trtc.Negate())
print (darr.to_host())
コード例 #26
0
 def shuffle_global(idx, length, u01):
     # WARNING: ineffective implementation
     trtc.Sort_By_Key(u01.range(0, length), idx.range(0, length))
コード例 #27
0
import ThrustRTC as trtc

dIn = trtc.device_vector_from_list([10, 20, 30, 40, 50, 60, 70, 80], 'int32_t')
dOut = trtc.device_vector('int32_t', 8)

trtc.Copy(dIn, dOut)
print(dOut.to_host())

is_even = trtc.Functor({}, ['x'], '''
         return x % 2 == 0;
''')

dIn = trtc.device_vector_from_list([-2, 0, -1, 0, 1, 2], 'int32_t')
dOut = trtc.device_vector('int32_t', 6)
count = trtc.Copy_If(dIn, dOut, is_even)
print(dOut.to_host(0, count))

dIn = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5], 'int32_t')
dStencil = trtc.device_vector_from_list([-2, 0, -1, 0, 1, 2], 'int32_t')
dOut = trtc.device_vector('int32_t', 6)
count = trtc.Copy_If_Stencil(dIn, dStencil, dOut, is_even)
print(dOut.to_host(0, count))
コード例 #28
0
import ThrustRTC as trtc

dvalues = trtc.device_vector_from_list(
    [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0], 'float')
dindices = trtc.device_vector_from_list([2, 6, 1, 3], 'int32_t')
doutput = trtc.device_vector('float', 4)

perm = trtc.DVPermutation(dvalues, dindices)

trtc.Transform(perm, doutput, trtc.Negate())
print(doutput.to_host())
コード例 #29
0
ファイル: test_rand2.py プロジェクト: fynv/CURandRTC
# example showing how to separate init and call

import ThrustRTC as trtc
import CURandRTC as rndrtc

rng = rndrtc.DVRNG()

ker_init = trtc.For(['rng','states'], 'idx',
	'''
	rng.state_init(1234, idx, 0, states[idx]);
	''')

ker_call = trtc.For(['states', 'vec_rnd'], 'idx',
	'''
	vec_rnd[idx]=(float)states[idx].rand01();
	'''
	)

rng_states = trtc.device_vector('RNGState', 1024)
d_vec_rnd = trtc.device_vector('float', 1024)

ker_init.launch_n(1024, [rng, rng_states])
ker_call.launch_n(1024, [rng_states, d_vec_rnd])
print (d_vec_rnd.to_host())
コード例 #30
0
import ThrustRTC as trtc

square_root = trtc.Functor({}, ['x'], '''
         return sqrtf(x);
''')

dvalues = trtc.device_vector_from_list([1.0, 4.0, 9.0, 16.0], 'float')
doutput = trtc.device_vector('float', 4)

dtrans = trtc.DVTransform(dvalues, 'float', square_root)

trtc.Transform(dtrans, doutput, trtc.Negate())
print(doutput.to_host())