예제 #1
0
 def impl_test_inv(self, dtype):
     from scipy.linalg import inv as cpu_inv
     x = np.asarray(np.random.rand(4, 4), dtype)
     x = np.dot(x.T, x)
     x_gpu = gpuarray.to_gpu(x)
     xinv = cpu_inv(x)
     xinv_gpu = linalg.inv(x_gpu)
     assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5)
     assert xinv_gpu is not x_gpu
     xinv_gpu = linalg.inv(x_gpu, overwrite=True)
     assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5)
     assert xinv_gpu is x_gpu
예제 #2
0
 def impl_test_inv(self, dtype):
     from scipy.linalg import inv as cpu_inv
     x = np.asarray(np.random.rand(4, 4), dtype)
     x = np.dot(x.T, x)
     x_gpu = gpuarray.to_gpu(x)
     xinv = cpu_inv(x)
     xinv_gpu = linalg.inv(x_gpu)
     assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5)
     assert xinv_gpu is not x_gpu
     xinv_gpu = linalg.inv(x_gpu, overwrite=True)
     assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5)
     assert xinv_gpu is x_gpu
def calculate_H_gpu(X, W, P):
    WPW = la.add_diag(P, la.dot(W, W, "t", "n"))
    tmp = la.dot(W, la.inv(WPW, overwrite=True))
    H = la.dot(X, tmp, "n", "t")
    H = gpu.maximum(H, 0)
    H = to_unit_variance(H)
    return H, tmp
def calculate_H_gpu(X, W, P):
    WPW = la.add_diag(P, la.dot(W, W, "t", "n"))
    tmp = la.dot(W, la.inv(WPW, overwrite=True))
    H = la.dot(X, tmp, "n", "t")
    H = gpu.maximum(H, 0)
    H = to_unit_variance(H)
    return H, tmp
def train_rfn_gpu(X,
                  n_hidden,
                  n_iter,
                  learnrateW,
                  learnratePsi,
                  dropout_rate,
                  input_droput_rate,
                  minPsi=0.1,
                  seed=32):
    k = n_hidden
    n, m = X.shape
    W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32)
    P = np.array([0.1] * m, dtype=np.float32)
    XXdiag = np.diag(np.dot(X.T, X) /
                     n).copy()  # explicit copy to avoid numpy 1.8 warning
    W = gpu.to_gpu(W, allocator=_mempool.allocate)
    P = gpu.to_gpu(P, allocator=_mempool.allocate)
    X = gpu.to_gpu(X, allocator=_mempool.allocate)
    XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate)
    I = la.eye(k, dtype=np.float32)

    init_rng(seed)
    t0 = time.time()
    for cur_iter in range(n_iter):
        H, tmp = calculate_H_gpu(X, W, P)
        if dropout_rate > 0:
            dropout(H, dropout_rate)
        Xtmp = X
        if input_dropout_rate > 0:
            Xtmp = X.copy()
            saltpepper_noise(Xtmp, input_dropout_rate)
        U = la.dot(Xtmp, H, "t", "n") / n
        S = la.dot(H, H, "t", "n") / n
        S += I
        S -= la.dot(tmp, W, "n", "t")
        Cii = la.dot(la.dot(W, S, "t") - 2 * U, W)

        Sinv = la.inv(S, overwrite=True)
        dW = la.dot(Sinv, U, "n", "t") - W
        dP = XXdiag + la.diag(Cii) - P

        W += learnrateW * dW
        P += learnratePsi * dP

        P = gpu.maximum(P, minPsi)
        if cur_iter % 25 == 0:
            print "iter %3d (elapsed time: %5.2fs)" % (cur_iter,
                                                       time.time() - t0)
    return W.get(), P.get()
def train_rfn_gpu(X, n_hidden, n_iter, learnrateW, learnratePsi, dropout_rate, input_droput_rate, minPsi=0.1, seed=32):
    k = n_hidden
    n, m = X.shape
    W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32)
    P = np.array([0.1] * m, dtype=np.float32)
    XXdiag = np.diag(np.dot(X.T, X) / n).copy() # explicit copy to avoid numpy 1.8 warning
    W = gpu.to_gpu(W, allocator=_mempool.allocate)
    P = gpu.to_gpu(P, allocator=_mempool.allocate)
    X = gpu.to_gpu(X, allocator=_mempool.allocate)
    XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate)
    I = la.eye(k, dtype=np.float32)

    init_rng(seed)
    t0 = time.time()
    for cur_iter in range(n_iter):
        H, tmp = calculate_H_gpu(X, W, P)
        if dropout_rate > 0:
            dropout(H, dropout_rate)
        Xtmp = X
        if input_dropout_rate > 0:
            Xtmp = X.copy()
            saltpepper_noise(Xtmp, input_dropout_rate)
        U = la.dot(Xtmp, H, "t", "n") / n
        S = la.dot(H, H, "t", "n") / n
        S += I
        S -= la.dot(tmp, W, "n", "t")
        Cii = la.dot(la.dot(W, S, "t") - 2*U, W)

        Sinv = la.inv(S, overwrite=True)
        dW = la.dot(Sinv, U, "n", "t") - W
        dP = XXdiag + la.diag(Cii) - P

        W += learnrateW * dW
        P += learnratePsi * dP

        P = gpu.maximum(P, minPsi)
        if cur_iter % 25 == 0:
            print "iter %3d (elapsed time: %5.2fs)" % (cur_iter, time.time() - t0)
    return W.get(), P.get()