コード例 #1
0
def logis(y,x):
    end = 0
    start = 0
    x = x.astype(np.float32)
    y = y.astype(np.float32)
    start=time.time()
    # Translado de variable a GPU
    x_gpu = gpuarray.to_gpu(x)
    y_gpu = gpuarray.to_gpu(y)

    linalg.init()
    # Transpuesta de X
    x_gpu_T = linalg.transpose(x_gpu)
    beta_gpu = linalg.dot(linalg.dot(linalg.inv(linalg.dot(x_gpu_T,x_gpu)),x_gpu_T),y_gpu)
    j = 1
    while(True):
        mu = sapply(x,beta_gpu.get())
        mu = mu.astype(np.float32)
        mu_gpu = gpuarray.to_gpu(mu)
        V_gpu= linalg.diag(mu_gpu)
        f2_gpu = linalg.multiply(mu_gpu,1-mu_gpu)
        f3_gpu = linalg.diag(1/f2_gpu)
        f4_gpu = (y_gpu-mu_gpu)
        f5_gpu = linalg.dot(f3_gpu,f4_gpu)
        if(np.isnan(f5_gpu.get()).any()):
            f5_cpu = f5_gpu.get()
            f5_cpu = nanValue(f5_cpu)
            f5_gpu = gpuarray.to_gpu(f5_cpu.astype(np.float32))
        y_1_gpu = linalg.dot(x_gpu,beta_gpu) + f5_gpu
        beta_1_gpu = linalg.dot(linalg.dot(linalg.dot(linalg.inv(linalg.dot(linalg.dot(x_gpu_T,V_gpu),x_gpu)),x_gpu_T),V_gpu),y_1_gpu)
        check_value = np.absolute(linalg.norm(beta_1_gpu-beta_gpu))
        #if(check_value<0.00001):
            #break
        if(j == 10 or check_value<0.00001):
            break
        beta_gpu = beta_1_gpu
        j = j + 1
    end = time.time()
    tiempo = (end-start)
    return {"iteraciones":j,"Betas":beta_gpu.get(),"time":tiempo}
コード例 #2
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_1d_complex128(self):
     v = np.array([1j, 2j, 3j, 4j, 5j, 6j], np.complex128)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #3
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_2d_wide_complex64(self):
     v = np.array(np.random.rand(32, 64)*1j, np.complex64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #4
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_2d_tall_float64(self):
     v = np.array(np.random.rand(64, 32), np.float64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #5
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_1d_float64(self):
     v = np.array([1, 2, 3, 4, 5, 6], np.float64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #6
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_2d_wide_float32(self):
     v = np.array(np.random.rand(32, 64), np.float32)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #7
0
ファイル: diag_demo.py プロジェクト: Brainiarc7/scikit-cuda
#!/usr/bin/env python

"""
Demonstrate diagonal matrix creation on the GPU.
"""
from __future__ import print_function

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np

import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()

# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
    demo_types.extend([np.float64, np.complex128])

for t in demo_types:
    print('Testing real diagonal matrix creation for type ' + str(np.dtype(t)))
    v = np.array([1, 2, 3, 4, 5, 6], t)
    v_gpu = gpuarray.to_gpu(v)
    d_gpu = culinalg.diag(v_gpu)
    print('Success status: ', np.all(d_gpu.get() == np.diag(v)))
コード例 #8
0
def FastICASymmApro(X, whitening, dewhitening, maxIterations, threshold):
    Threads = 32
    ThreadBlock = (Threads, Threads, 1)

    Dim, NumOfSampl = X.shape
    Dim = np.int32(Dim)
    B = linalg.orth(np.random.random(
        (Dim,
         Dim))).astype(np.float32)  #linalg.orth makes the array non contiguous
    #B.flags['C_CONTIGUOUS']

    print(B)
    B_gpu = gpuarray.to_gpu(np.ascontiguousarray(B, np.float32))
    #Bold
    Bold_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    Bold = np.zeros((Dim, Dim))
    #W
    A = np.zeros((Dim, Dim))  #maybe dtype
    #CTC
    CTC_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    #hypTan
    hypTan_gpu = gpuarray.zeros((NumOfSampl, Dim), np.float32)
    #rowSum
    row = int(np.ceil(NumOfSampl / Threads))
    Sum_gpu = gpuarray.zeros((row, Dim), np.float32)
    rowSum_gpu = gpuarray.zeros(Dim, np.float32)
    #minAbsCos
    minAbsCos_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    #left, right
    left_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    right_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    #Identity
    I_gpu = gpuarray.to_gpu(np.eye(Dim).astype(np.float32))
    Check_gpu = gpuarray.zeros((Dim, Dim), np.float32)
    #diag
    diag_gpu = gpuarray.zeros(Dim, np.float32)

    # start = cuda.Event()
    # end = cuda.Event()

    # start.record()

    #X
    X_gpu = gpuarray.to_gpu(X.astype(np.float32))

    for i in range(0, maxIterations + 1):
        #         print(i, maxIterations)
        if i == maxIterations:
            print('Component {} did not converge after {} iterations'.format(
                i, maxIterations))
            B = B_gpu.get()
            if B.size != 0:  #not empty

                B = B @ np.real(inv(sqrt(B.T @ B)))
                W = B.T @ whitening
                A = dewhitening @ B

                print('A:\n', A)
                print('W:\n', W)

                return A, W
            return None, None  #TODO

        f = True
        j = 0

        gpuMatMul(B_gpu, B_gpu, CTC_gpu, transb='T')
        gpuSumCol(CTC_gpu, Sum_gpu, ThreadBlock, rowSum_gpu)

        #         print(np.allclose(rowSum_gpu.get(), gpuSum(CTC_gpu, axis=0).get()))
        norm = findMax(rowSum_gpu, (31, 1, 1))
        #         norm = gpuMax(rowSum_gpu)

        Div(B_gpu, norm, Dim, block=ThreadBlock,
            grid=(1, 1, 1))  #Division by scalar
        #maybe check every 5 iterations
        while f:
            Mul(left_gpu,
                B_gpu,
                np.float32(3 / 2),
                Dim,
                block=ThreadBlock,
                grid=(1, 1, 1))  #Division by scalar

            gpuMatMul(B_gpu, B_gpu, right_gpu, transb='T')
            gpuMatMul(right_gpu, B_gpu, right_gpu)
            Mul(right_gpu,
                right_gpu,
                np.float32(1 / 2),
                Dim,
                block=ThreadBlock,
                grid=(1, 1, 1))  #Division by scalar

            Sub(B_gpu,
                left_gpu,
                right_gpu,
                Dim,
                block=ThreadBlock,
                grid=(1, 1, 1))  #C = left - right
            gpuMatMul(B_gpu, B_gpu, Check_gpu, transb='T')

            if j >= 20:
                f = compareGpuC(Check_gpu, I_gpu, ThreadBlock).get()
                f = not f <= threshold
                j = 0
            j += 1
#             j+=1

        gpuMatMul(B_gpu, Bold_gpu, minAbsCos_gpu, transa='T')

        #         minAbsCos2 = findMin(abs(findDiag(minAbsCos_gpu, diag_gpu)), (128, 1, 1)).get()
        minAbsCos2 = gpuMin(abs(diag(minAbsCos_gpu))).get()
        #         print( abs( diag(minAbsCos_gpu) ) )

        minAbsCos = minAbsCos2[0]

        if 1 - minAbsCos < threshold:
            print('Converged!')  #TODO

            # end.record()
            # end.synchronize()

#             secs = start.time_till(end)*1e-3
#             return secs
#             print('Seconds: ', secs)

# C = B_gpu.get()

# A = dewhitening @ C
# W = C.T @ whitening

# print('A:\n', A)
# print('W:\n', W)
# return A, W

        Copy(Bold_gpu, B_gpu, Dim, block=ThreadBlock,
             grid=(1, 1, 1))  #Bold = B

        gpuMatMul(X_gpu, B_gpu, hypTan_gpu, transa='T')

        n = int(np.ceil(hypTan_gpu.shape[0] / Threads))
        if n > 65536:
            n = 65535

#         n=1
        gpuTanh(hypTan_gpu,
                np.int32(hypTan_gpu.shape[1]),
                np.int32(hypTan_gpu.shape[0]),
                block=ThreadBlock,
                grid=(1, n, 1))
        gpuMatMul(X_gpu, hypTan_gpu, CTC_gpu)

        n = Dim * NumOfSampl

        m = int(np.ceil(hypTan_gpu.shape[0] / (Threads * Threads)))
        if m > 65536:
            m = 65535
#         m = 1

        elementWise(hypTan_gpu,
                    np.int32(n),
                    block=(Threads * Threads, 1, 1),
                    grid=(m, 1, 1))  #1 - hypTan*hypTan

        gpuSumCol(hypTan_gpu, Sum_gpu, ThreadBlock, rowSum_gpu)
        MatVecMul(B_gpu, rowSum_gpu, Dim, block=ThreadBlock, grid=(1, 1, 1))

        Sub(B_gpu, CTC_gpu, B_gpu, Dim, block=ThreadBlock,
            grid=(1, 1, 1))  #C = left - right
        Div(B_gpu,
            np.int32(NumOfSampl),
            Dim,
            block=ThreadBlock,
            grid=(1, 1, 1))  #Division by scalar
コード例 #9
0
 def test_diag_1d_complex128(self):
     v = np.array([1j, 2j, 3j, 4j, 5j, 6j], np.complex128)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #10
0
 def test_diag_2d_wide_complex64(self):
     v = np.array(np.random.rand(32, 64)*1j, np.complex64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #11
0
 def test_diag_2d_tall_float64(self):
     v = np.array(np.random.rand(64, 32), np.float64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #12
0
 def test_diag_1d_float64(self):
     v = np.array([1, 2, 3, 4, 5, 6], np.float64)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #13
0
 def test_diag_2d_wide_float32(self):
     v = np.array(np.random.rand(32, 64), np.float32)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #14
0
ファイル: cudastuff1.py プロジェクト: sneshyba/ice3
def diag(A):
    A_gpu = gpuarray.to_gpu(A)
    out_gpu = linalg.diag(A_gpu)
    return out_gpu.get()
コード例 #15
0
 def compute_P_cuda(self, C, D):
     dD = culinalg.diag(D)
     CD = culinalg.dot_diag(dD, C, 'T')
     P = culinalg.dot_diag(dD, CD)
     return P.copy()
コード例 #16
0
ファイル: test_linalg.py プロジェクト: akassab/gpu_project
 def test_diag_2d_tall_complex128(self):
     v = np.array(np.random.rand(64, 32)*1j, np.complex128)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())
コード例 #17
0
ファイル: diag_demo.py プロジェクト: vicb1/python-reference
#!/usr/bin/env python
"""
Demonstrate diagonal matrix creation on the GPU.
"""
from __future__ import print_function

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np

import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()

# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
    demo_types.extend([np.float64, np.complex128])

for t in demo_types:
    print('Testing real diagonal matrix creation for type ' + str(np.dtype(t)))
    v = np.array([1, 2, 3, 4, 5, 6], t)
    v_gpu = gpuarray.to_gpu(v)
    d_gpu = culinalg.diag(v_gpu)
    print('Success status: %r' % np.all(d_gpu.get() == np.diag(v)))
コード例 #18
0
 def test_diag_2d_tall_complex128(self):
     v = np.array(np.random.rand(64, 32)*1j, np.complex128)
     v_gpu = gpuarray.to_gpu(v)
     d_gpu = linalg.diag(v_gpu)
     assert np.all(np.diag(v) == d_gpu.get())