Beispiel #1
0
def _univariate_pdf_call(cu_func, data, packed_params, get):
    ndata = len(data)
    nparams = len(packed_params)

    func_regs = cu_func.num_regs

    packed_params = util.prep_ndarray(packed_params)

    data_per, params_per = util.tune_blocksize(data,
                                               packed_params,
                                               func_regs)

    shared_mem = util.compute_shmem(data, packed_params,
                                    data_per, params_per)

    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/univcaller.cu

    #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32))
    gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32)
    gpu_data = data if isinstance(data, GPUArray) else to_gpu(data)
    gpu_packed_params = to_gpu(packed_params)

    design = np.array(((data_per, params_per) + # block design
                       (len(data),) +
                       packed_params.shape), # params spec
                      dtype=np.int32)

    cu_func(gpu_dest,
            gpu_data, gpu_packed_params, design[0],
            design[1], design[2], design[3], design[4],
            block=block_design, grid=grid_design, shared=shared_mem)

    if get:
        output = gpu_dest.get()
        if nparams > 1:
            output = output.reshape((nparams, ndata), order='C').T
        return output
    else:
        return gpu_dest
Beispiel #2
0
def _multivariate_pdf_call(cu_func, data, packed_params, get, order,
                           datadim=None):
    packed_params = util.prep_ndarray(packed_params)
    func_regs = cu_func.num_regs

    # Prep the data. Skip if gpudata ...
    if isinstance(data, GPUArray):
        padded_data = data
        if datadim==None:
            ndata, dim = data.shape
        else:
            ndata, dim = data.shape[0], datadim

    else:

        ndata, dim = data.shape
        padded_data = util.pad_data(data)

    nparams = len(packed_params)
    data_per, params_per = util.tune_blocksize(padded_data,
                                               packed_params,
                                               func_regs)

    blocksize = data_per * params_per
    #print 'the blocksize is ' + str(blocksize)
    #print 'data_per ' + str(data_per) + '. params_per ' + str(params_per)
    shared_mem = util.compute_shmem(padded_data, packed_params,
                                    data_per, params_per)
    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/mvcaller.cu
    design = np.array(((data_per, params_per) + # block design
                       padded_data.shape + # data spec
                       (dim,) + # non-padded number of data columns
                       packed_params.shape), # params spec
                      dtype=np.int32)

    if nparams == 1:
        gpu_dest = gpu_empty(ndata, dtype=np.float32)
        #gpu_dest = to_gpu(np.zeros(ndata, dtype=np.float32))
    else:
        gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32, order='F')
        #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32, order='F'))

    # Upload data if not already uploaded
    if not isinstance(padded_data, GPUArray):
        gpu_padded_data = to_gpu(padded_data)
    else:
        gpu_padded_data = padded_data

    gpu_packed_params = to_gpu(packed_params)

    params = (gpu_dest, gpu_padded_data, gpu_packed_params) + tuple(design)
    kwds = dict(block=block_design, grid=grid_design, shared=shared_mem)
    cu_func(*params, **kwds)

    gpu_packed_params.gpudata.free()
    if get:
        if order=='F':
            return gpu_dest.get()
        else:
            return np.asarray(gpu_dest.get(), dtype=np.float32, order='C')
        #output = gpu_dest.get()
        #if nparams > 1:
        #    output = output.reshape((nparams, ndata), order='C').T
        #return output
    else:
        if order=='F' or nparams==1:
            return gpu_dest
        else:
            res = gpu_transpose(util.GPUarray_reshape(gpu_dest, (nparams, ndata), "C"))
            gpu_dest.gpudata.free()
            return res
Beispiel #3
0
def _multivariate_pdf_call(cu_func, data, packed_params, get, order,
                           datadim=None):
    packed_params = util.prep_ndarray(packed_params)
    func_regs = cu_func.num_regs

    # Prep the data. Skip if gpu data...
    if isinstance(data, GPUArray):
        padded_data = data
        if datadim is None:
            n_data, dim = data.shape
        else:
            n_data, dim = data.shape[0], datadim

    else:
        n_data, dim = data.shape
        padded_data = util.pad_data(data)

    n_params = len(packed_params)
    data_per, params_per = util.tune_blocksize(
        padded_data,
        packed_params,
        func_regs
    )

    shared_mem = util.compute_shared_mem(
        padded_data,
        packed_params,
        data_per,
        params_per
    )
    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(n_data, data_per),
                   util.get_boxes(n_params, params_per))

    # see cufiles/mvcaller.cu
    design = np.array(
        (
            (data_per, params_per) +  # block design
            padded_data.shape +       # data spec
            (dim,) +                  # non-padded number of data columns
            packed_params.shape       # params spec
        ),
        dtype=np.int32
    )

    if n_params == 1:
        gpu_dest = gpu_empty(n_data, dtype=np.float32)
    else:
        gpu_dest = gpu_empty((n_data, n_params), dtype=np.float32, order='F')

    # Upload data if not already uploaded
    if not isinstance(padded_data, GPUArray):
        gpu_padded_data = to_gpu(padded_data)
    else:
        gpu_padded_data = padded_data

    gpu_packed_params = to_gpu(packed_params)

    params = (gpu_dest, gpu_padded_data, gpu_packed_params) + tuple(design)
    kwargs = dict(block=block_design, grid=grid_design, shared=shared_mem)
    cu_func(*params, **kwargs)

    gpu_packed_params.gpudata.free()
    if get:
        if order == 'F':
            return gpu_dest.get()
        else:
            return np.asarray(gpu_dest.get(), dtype=np.float32, order='C')

    else:
        if order == 'F' or n_params == 1:
            return gpu_dest
        else:
            res = gpu_transpose(
                util.gpu_array_reshape(gpu_dest, (n_params, n_data), "C")
            )
            gpu_dest.gpudata.free()
            return res