Exemple #1
0
def normpdf_multi(x, means, std, logged=True, get=True):
    if logged:
        cu_func = cu_module.get_function('log_pdf_normal')
    else:
        cu_func = cu_module.get_function('pdf_normal')

    packed_params = np.c_[means, std]

    if not isinstance(x, GPUArray):
        x = util.prep_ndarray(x)

    return _univariate_pdf_call(cu_func, x, packed_params, get)
Exemple #2
0
def _univariate_pdf_call(cu_func, data, packed_params, get):
    ndata = len(data)
    nparams = len(packed_params)

    func_regs = cu_func.num_regs

    packed_params = util.prep_ndarray(packed_params)

    data_per, params_per = util.tune_blocksize(data,
                                               packed_params,
                                               func_regs)

    shared_mem = util.compute_shmem(data, packed_params,
                                    data_per, params_per)

    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/univcaller.cu

    #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32))
    gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32)
    gpu_data = data if isinstance(data, GPUArray) else to_gpu(data)
    gpu_packed_params = to_gpu(packed_params)

    design = np.array(((data_per, params_per) + # block design
                       (len(data),) +
                       packed_params.shape), # params spec
                      dtype=np.int32)

    cu_func(gpu_dest,
            gpu_data, gpu_packed_params, design[0],
            design[1], design[2], design[3], design[4],
            block=block_design, grid=grid_design, shared=shared_mem)

    if get:
        output = gpu_dest.get()
        if nparams > 1:
            output = output.reshape((nparams, ndata), order='C').T
        return output
    else:
        return gpu_dest
Exemple #3
0
def sample_discrete(in_densities, logged=False, pad=False,
                    return_gpuarray=False):
    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    if pad:
        if logged:
            densities = util.pad_data_mult16(in_densities, fill=1)
        else:
            densities = util.pad_data_mult16(in_densities, fill=0)

    else:
        densities = in_densities

    n, k = densities.shape

    if logged:
        cu_func = cu_module.get_function('sample_discrete_logged')
    else:
        cu_func = cu_module.get_function('sample_discrete')

    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            densities.reshape(k, n, 'C')
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # setup GPU data
    #gpu_random = curand(n)
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    #gpu_dest = to_gpu(np.zeros(n, dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.float32)
    stride = gpu_densities.shape[1]
    if stride % 2 == 0:
        stride += 1
    dims = np.array([n,k, gpu_densities.shape[1], stride],dtype=np.int32)


    # optimize design ...
    grid_design, block_design = _tune_sfm(n, stride, cu_func.num_regs)

    shared_mem = 4 * (block_design[0] * stride + 
                     1 * block_design[0])

    cu_func(gpu_densities, gpu_random, gpu_dest, 
            dims[0], dims[1], dims[2], dims[3],
            block=block_design, grid=grid_design, shared=shared_mem)

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res
Exemple #4
0
def sample_discrete(densities, logged=False, return_gpuarray=False):
    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    from gpustats.util import info

    n, k = densities.shape
    # prep data
    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # get gpu function
    cu_func = cu_module.get_function('sample_discrete')

    # setup GPU data
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.int32)
    dims = np.array([n, k, logged], dtype=np.int32)

    if info.max_block_threads < 1024:
        x_block_dim = 16
    else:
        x_block_dim = 32

    y_block_dim = 16
    # setup GPU call
    block_design = (x_block_dim, y_block_dim, 1)
    grid_design = (int(n / y_block_dim) + 1, 1)

    shared_mem = 4 * ((x_block_dim + 1) * y_block_dim + 2 * y_block_dim)

    cu_func(gpu_densities,
            gpu_random,
            gpu_dest,
            dims[0],
            dims[1],
            dims[2],
            block=block_design,
            grid=grid_design,
            shared=shared_mem)

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res
Exemple #5
0
def sample_discrete(densities, logged=False, return_gpuarray=False):

    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    from gpustats.util import info

    n, k = densities.shape
    # prep data
    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # get gpu function
    cu_func = cu_module.get_function("sample_discrete")

    # setup GPU data
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.int32)
    dims = np.array([n, k, logged], dtype=np.int32)

    if info.max_block_threads < 1024:
        x_block_dim = 16
    else:
        x_block_dim = 32

    y_block_dim = 16
    # setup GPU call
    block_design = (x_block_dim, y_block_dim, 1)
    grid_design = (int(n / y_block_dim) + 1, 1)

    shared_mem = 4 * ((x_block_dim + 1) * y_block_dim + 2 * y_block_dim)

    cu_func(
        gpu_densities,
        gpu_random,
        gpu_dest,
        dims[0],
        dims[1],
        dims[2],
        block=block_design,
        grid=grid_design,
        shared=shared_mem,
    )

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res
Exemple #6
0
def _multivariate_pdf_call(cu_func, data, packed_params, get, order,
                           datadim=None):
    packed_params = util.prep_ndarray(packed_params)
    func_regs = cu_func.num_regs

    # Prep the data. Skip if gpudata ...
    if isinstance(data, GPUArray):
        padded_data = data
        if datadim==None:
            ndata, dim = data.shape
        else:
            ndata, dim = data.shape[0], datadim

    else:

        ndata, dim = data.shape
        padded_data = util.pad_data(data)

    nparams = len(packed_params)
    data_per, params_per = util.tune_blocksize(padded_data,
                                               packed_params,
                                               func_regs)

    blocksize = data_per * params_per
    #print 'the blocksize is ' + str(blocksize)
    #print 'data_per ' + str(data_per) + '. params_per ' + str(params_per)
    shared_mem = util.compute_shmem(padded_data, packed_params,
                                    data_per, params_per)
    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/mvcaller.cu
    design = np.array(((data_per, params_per) + # block design
                       padded_data.shape + # data spec
                       (dim,) + # non-padded number of data columns
                       packed_params.shape), # params spec
                      dtype=np.int32)

    if nparams == 1:
        gpu_dest = gpu_empty(ndata, dtype=np.float32)
        #gpu_dest = to_gpu(np.zeros(ndata, dtype=np.float32))
    else:
        gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32, order='F')
        #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32, order='F'))

    # Upload data if not already uploaded
    if not isinstance(padded_data, GPUArray):
        gpu_padded_data = to_gpu(padded_data)
    else:
        gpu_padded_data = padded_data

    gpu_packed_params = to_gpu(packed_params)

    params = (gpu_dest, gpu_padded_data, gpu_packed_params) + tuple(design)
    kwds = dict(block=block_design, grid=grid_design, shared=shared_mem)
    cu_func(*params, **kwds)

    gpu_packed_params.gpudata.free()
    if get:
        if order=='F':
            return gpu_dest.get()
        else:
            return np.asarray(gpu_dest.get(), dtype=np.float32, order='C')
        #output = gpu_dest.get()
        #if nparams > 1:
        #    output = output.reshape((nparams, ndata), order='C').T
        #return output
    else:
        if order=='F' or nparams==1:
            return gpu_dest
        else:
            res = gpu_transpose(util.GPUarray_reshape(gpu_dest, (nparams, ndata), "C"))
            gpu_dest.gpudata.free()
            return res
Exemple #7
0
def _multivariate_pdf_call(cu_func, data, packed_params, get, order,
                           datadim=None):
    packed_params = util.prep_ndarray(packed_params)
    func_regs = cu_func.num_regs

    # Prep the data. Skip if gpu data...
    if isinstance(data, GPUArray):
        padded_data = data
        if datadim is None:
            n_data, dim = data.shape
        else:
            n_data, dim = data.shape[0], datadim

    else:
        n_data, dim = data.shape
        padded_data = util.pad_data(data)

    n_params = len(packed_params)
    data_per, params_per = util.tune_blocksize(
        padded_data,
        packed_params,
        func_regs
    )

    shared_mem = util.compute_shared_mem(
        padded_data,
        packed_params,
        data_per,
        params_per
    )
    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(n_data, data_per),
                   util.get_boxes(n_params, params_per))

    # see cufiles/mvcaller.cu
    design = np.array(
        (
            (data_per, params_per) +  # block design
            padded_data.shape +       # data spec
            (dim,) +                  # non-padded number of data columns
            packed_params.shape       # params spec
        ),
        dtype=np.int32
    )

    if n_params == 1:
        gpu_dest = gpu_empty(n_data, dtype=np.float32)
    else:
        gpu_dest = gpu_empty((n_data, n_params), dtype=np.float32, order='F')

    # Upload data if not already uploaded
    if not isinstance(padded_data, GPUArray):
        gpu_padded_data = to_gpu(padded_data)
    else:
        gpu_padded_data = padded_data

    gpu_packed_params = to_gpu(packed_params)

    params = (gpu_dest, gpu_padded_data, gpu_packed_params) + tuple(design)
    kwargs = dict(block=block_design, grid=grid_design, shared=shared_mem)
    cu_func(*params, **kwargs)

    gpu_packed_params.gpudata.free()
    if get:
        if order == 'F':
            return gpu_dest.get()
        else:
            return np.asarray(gpu_dest.get(), dtype=np.float32, order='C')

    else:
        if order == 'F' or n_params == 1:
            return gpu_dest
        else:
            res = gpu_transpose(
                util.gpu_array_reshape(gpu_dest, (n_params, n_data), "C")
            )
            gpu_dest.gpudata.free()
            return res