예제 #1
0
def correlations(X, Y, useGPU):
    if useGPU:
        import pycuda.autoinit
        import pycuda.gpuarray as gpuarray
        import skcuda.linalg as linalg
        linalg.init()

        X_gpu = gpuarray.to_gpu(X)
        XT_gpu = linalg.transpose(X_gpu)
        cxx = linalg.mdot(XT_gpu, X_gpu).get()

        XT_gpu = linalg.transpose(X_gpu)
        X_gpu.gpudata.free()
        del X_gpu
        Y_gpu = gpuarray.to_gpu(Y)
        cxy = linalg.mdot(XT_gpu, Y_gpu).get()

        cyx = cxy.T

        YT_gpu = linalg.transpose(Y_gpu)
        cyy = linalg.mdot(YT_gpu, Y_gpu).get()
    else:
        cxx = np.dot(X.T, X)
        cxy = np.dot(X.T, Y)
        cyx = cxy.T
        cyy = np.dot(Y.T, Y)

    return cxx, cxy, cyx, cyy
예제 #2
0
 def test_mdot_matrix_complex128(self):
     a = np.asarray(np.random.rand(4, 2), np.complex128)
     b = np.asarray(np.random.rand(2, 2), np.complex128)
     c = np.asarray(np.random.rand(2, 2), np.complex128)
     a_gpu = gpuarray.to_gpu(a)
     b_gpu = gpuarray.to_gpu(b)
     c_gpu = gpuarray.to_gpu(c)
     d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
     assert np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
예제 #3
0
def cuda_dot(a, b):
    print("cuda_dot", a.shape, b.shape)
    #print(misc.get_dev_attrs(misc.get_current_device()))
    #exit()
    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    #c_gpu = gpuarray.to_gpu(c)
    d_gpu = linalg.mdot(a_gpu, b_gpu)#, c_gpu)
    return matrix(d_gpu.get())
예제 #4
0
 def test_mdot_matrix_complex128(self):
     a = np.asarray(np.random.rand(4, 2), np.complex128)
     b = np.asarray(np.random.rand(2, 2), np.complex128)
     c = np.asarray(np.random.rand(2, 2), np.complex128)
     a_gpu = gpuarray.to_gpu(a)
     b_gpu = gpuarray.to_gpu(b)
     c_gpu = gpuarray.to_gpu(c)
     d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
     assert np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
예제 #5
0
def make_sample_data(set_: int):
    np.random.seed(set_ * 4347)
    if set_ == 1:  # Uniform distribution
        data = np.random.uniform(0, 1, size=(samples, num_features))
    if set_ == 2:  # 3 Gaussian distribution
        data = multi_gauss_clusters(n_clusters=3)
    if set_ == 3:  # 10 Gaussian distribution
        data = multi_gauss_clusters(n_clusters=10)
    df = pd.DataFrame()
    np.random.shuffle(data)
    df['vec'] = data.tolist()

    # find nearest neighbours
    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=51,
                            algorithm='ball_tree',
                            leaf_size=30).fit(data)
    _, nbrs_indices = nbrs.kneighbors(data)
    for n_nbr in range(10, 51, 5):
        df[f"known_neighbours_{n_nbr}"] = [
            x[1:(n_nbr + 1)] for x in nbrs_indices
        ]

    # hash using random hyperplane LSH
    import pycuda.gpuarray as gpuarray
    import skcuda.linalg as linalg
    import pycuda.autoinit
    linalg.init()
    os.environ['CUDA_HOME'] = "/opt/cuda/"
    vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32)
    LSH = LSHBias(feature_dim=num_features, bits=LSH_NUM_BITS)
    W = np.array(LSH.W, dtype=np.float32)
    b_gpu = gpuarray.to_gpu(W)
    ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32)
    X = np.concatenate((vec_np, ones), axis=1)

    # do the matrix multiplication
    a_gpu = gpuarray.to_gpu(X)
    mul = linalg.mdot(a_gpu, b_gpu)
    # get binary: 1 if value >= 0, else 0
    res = gpuarray.if_positive(
        mul >= gpuarray.zeros(mul.shape, dtype=np.float32),
        then_=gpuarray.ones_like(mul),
        else_=gpuarray.zeros_like(mul))
    res = np.array(res.get(), dtype=np.uint32)

    # convert grouped bits to integers
    res = np_array_binary_to_grouped_integers(res)
    df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res]
    df.to_parquet(f"{config.CUDA_neighbour_search_df_dir}df-{set_}.parquet",
                  index=False)

    print("created test-data")
예제 #6
0
def fitSlcGPU(slc, srcFatT2, t2, b1, ff):
    global ROWSTEP
    print("Fitting slice", slc)
    yValues = dicomStack[:, :, slc, :].squeeze()
    slcShape = yValues.shape
    nrows = slcShape[0]
    ncols = slcShape[1]
    sigLen = slcShape[2]
    success = False
    
    ffParams_gpu = None
    ffValues_gpu = None
    
    if np.any(ff[:,:,slc] > 0):
        useFF = True
        ffParams_gpu = findmax_ff.prepareAndLoadParams(parameterCombinations)
    else:
        useFF = False
        
    while not success:
        try:
            for r in range(0,nrows,ROWSTEP):
                rowMax = min(r+ROWSTEP, nrows)
                slcLin = yValues[r:rowMax,:,:].reshape(ncols*(rowMax-r), sigLen).astype(np.float32)
                
                slcGPU = None
                
                slcGPU = pycuda.gpuarray.to_gpu(slcLin)
                slcGPU = sklinalg.multiply(slcGPU, slcGPU)
                corrMatrixGPU = sklinalg.mdot(slcGPU, signalsGPU) # correlation
                
                tryFree(slcGPU)
                
                if useFF:
                    ffValues_gpu = findmax_ff.prepareAndLoadFF(ff[r:rowMax, :, slc])
                    corrMax = findmax_ff.findmax_gpu(corrMatrixGPU, ffValues_gpu, ffParams_gpu)
                else:
                    corrMaxGPU = skmisc.argmax(corrMatrixGPU, 1)
                    corrMax = corrMaxGPU.get()
                    tryFree(corrMaxGPU)
                    
                tryFree(corrMatrixGPU)
                tryFree(ffValues_gpu)
                
                for row in range(r, rowMax):
                    for c in range(ncols):
                        ind = (row-r)*ncols + c
                        t2[row,c,slc] = parameterCombinations[corrMax[ind]][0]
                        b1[row,c,slc] = parameterCombinations[corrMax[ind]][1]
                        ff[row,c,slc] = parameterCombinations[corrMax[ind]][2]
                        
                if DOPLOT >= 1:
                    plotImages()
                    
            success = True
        except pycuda._driver.MemoryError:
            ROWSTEP -= 1
            tryFree(slcGPU)
            tryFree(corrMatrixGPU)
            tryFree(ffValues_gpu)
            
            gc.collect()
            print("Not enough GPU Mem: decreasing ROWSTEP to", ROWSTEP)
예제 #7
0
    def process(self, **kwargs):
        """Calculate the likelihood, returning ln(likelihood)."""
        ret = {'value': LIKELIHOOD_FLOOR}

        self._fractions = kwargs.get('fractions', [])
        if not len(self._fractions):
            return ret

        self._model_observations = kwargs['model_observations']
        self._score_modifier = kwargs.get(self.key('score_modifier'), 0.0)
        self._upper_limits = np.array(kwargs.get('upperlimits', []),
                                      dtype=bool)

        value = ret['value']

        if min(self._fractions) < 0.0 or max(self._fractions) > 1.0:
            return ret
        for oi, obs in enumerate(self._model_observations):
            if not self._upper_limits[oi] and (isnan(obs)
                                               or not np.isfinite(obs)):
                return ret

        diag = kwargs.get('kdiagonal', None)
        residuals = kwargs.get('kresiduals', None)

        if diag is None or residuals is None:
            return ret

        if kwargs.get('kmat', None) is not None:
            kmat = kwargs['kmat']

            # Add observed errors to diagonal
            kmat[np.diag_indices_from(kmat)] += diag

            # full_size = np.count_nonzero(kmat)

            # Remove small covariance terms
            # min_cov = self.MIN_COV_TERM * np.max(kmat)
            # kmat[kmat <= min_cov] = 0.0

            # print("Sparse frac: {:.2%}".format(
            #     float(full_size - np.count_nonzero(kmat)) / full_size))

            condn = np.linalg.cond(kmat)
            if condn > 1.0e10:
                return ret

            if self._use_cpu is not True and self._model._fitter._cuda:
                try:
                    import pycuda.gpuarray as gpuarray
                    import skcuda.linalg as skla
                except ImportError:
                    self._use_cpu = True
                    if not self._cuda_reported:
                        self._printer.message('cuda_not_enabled',
                                              master_only=True,
                                              warning=True)
                else:
                    self._use_cpu = False
                    if not self._cuda_reported:
                        self._printer.message('cuda_enabled', master_only=True)
                        self._cuda_reported = True

                    kmat_gpu = gpuarray.to_gpu(kmat)
                    # kmat will now contain the cholesky decomp.
                    skla.cholesky(kmat_gpu, lib='cusolver')
                    value = -np.log(skla.det(kmat_gpu, lib='cusolver'))
                    res_gpu = gpuarray.to_gpu(
                        residuals.reshape(len(residuals), 1))
                    cho_mat_gpu = res_gpu.copy()
                    skla.cho_solve(kmat_gpu, cho_mat_gpu, lib='cusolver')
                    value -= (0.5 * (skla.mdot(skla.transpose(res_gpu),
                                               cho_mat_gpu)).get())[0][0]

            if self._use_cpu:
                try:
                    chol_kmat = scipy.linalg.cholesky(kmat, check_finite=False)

                    value = -np.linalg.slogdet(chol_kmat)[-1]
                    value -= 0.5 * (np.matmul(
                        residuals.T,
                        scipy.linalg.cho_solve(
                            (chol_kmat, False), residuals,
                            check_finite=False)))
                except Exception:
                    try:
                        value = -0.5 * (np.matmul(
                            np.matmul(residuals.T, scipy.linalg.inv(kmat)),
                            residuals) + np.log(scipy.linalg.det(kmat)))
                    except scipy.linalg.LinAlgError:
                        return ret

            ret['kdiagonal'] = diag
            ret['kresiduals'] = residuals
        elif 'kfmat' in kwargs:
            raise RuntimeError('Should not have kfmat in likelihood!')
        else:
            # Shortcut when matrix is diagonal.
            self._o_band_vs = kwargs['obandvs']
            # print('likelihood')
            # print(np.sqrt(diag))
            # print(self._o_band_vs)
            # print(residuals)
            value = -0.5 * np.sum(residuals**2 / (self._o_band_vs**2 + diag) +
                                  np.log(self._o_band_vs**2 + diag))

        score = self._score_modifier + value
        if isnan(score) or not np.isfinite(score):
            return ret
        ret['value'] = max(LIKELIHOOD_FLOOR, score)
        return ret
예제 #8
0
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy as np
import skcuda.linalg as linalg

linalg.init()
a = np.asarray(np.random.rand(4, 2), np.float32)
b = np.asarray(np.random.rand(2, 2), np.float32)
c = np.asarray(np.random.rand(2, 2), np.float32)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
print np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
예제 #9
0
    b_gpu = gpuarray.to_gpu(W)  # reuse this every time

    count = 0
    # hashing different .orc DataFrames
    for filename in tqdm(glob(basepath + "part-*.orc")):
        df = pd.read_orc(filename)
        df = df.rename(columns={"FeatureVector_all_features": "vec"})
        count += 1
        vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32)
        # add bias term
        ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32)
        X = np.concatenate((vec_np, ones), axis=1)

        # do the matrix multiplication
        a_gpu = gpuarray.to_gpu(X)
        mul = linalg.mdot(a_gpu, b_gpu)
        # get binary: 1 if value >= 0, else 0
        res = gpuarray.if_positive(
            mul >= gpuarray.zeros(mul.shape, dtype=np.float32),
            then_=gpuarray.ones_like(mul),
            else_=gpuarray.zeros_like(mul))
        res = np.array(res.get(), dtype=np.uint32)

        # convert grouped bits to integers
        res = np_array_binary_to_grouped_integers(res)

        df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res]
        df = df[["rec_MBID", f"hash_{LSH_NUM_BITS}_bits"]]
        df.to_parquet(f"{config.ABz_GPU_hashed_output_dir}{count}.parquet",
                      index=False)
예제 #10
0
import pycuda.autoinit
import numpy as np

import skcuda.linalg as linalg
import skcuda.misc as cumisc
linalg.init()

# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
    demo_types.extend([np.float64, np.complex128])

for t in demo_types:
    print('Testing multiple matrix multiplication for type ' + str(np.dtype(t)))
    if np.iscomplexobj(t()):
        a = np.asarray(np.random.rand(8, 4) + 1j * np.random.rand(8, 4), t)
        b = np.asarray(np.random.rand(4, 4) + 1j * np.random.rand(4, 4), t)
        c = np.asarray(np.random.rand(4, 4) + 1j * np.random.rand(4, 4), t)
    else:
        a = np.asarray(np.random.rand(8, 4), t)
        b = np.asarray(np.random.rand(4, 4), t)
        c = np.asarray(np.random.rand(4, 4), t)

    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    c_gpu = gpuarray.to_gpu(c)
    d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
    print('Success status: ', np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get()))
예제 #11
0
def cuda_dot2(a, b, c):
    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    c_gpu = gpuarray.to_gpu(c)
    d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
    return d_gpu.get()
예제 #12
0
def cuda_dot(a, b):
    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    #c_gpu = gpuarray.to_gpu(c)
    d_gpu = linalg.mdot(a_gpu, b_gpu)#, c_gpu)
    return matrix(d_gpu.get())