def impl_test_inv(self, dtype): from scipy.linalg import inv as cpu_inv x = np.asarray(np.random.rand(4, 4), dtype) x = np.dot(x.T, x) x_gpu = gpuarray.to_gpu(x) xinv = cpu_inv(x) xinv_gpu = linalg.inv(x_gpu) assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5) assert xinv_gpu is not x_gpu xinv_gpu = linalg.inv(x_gpu, overwrite=True) assert np.allclose(xinv, xinv_gpu.get(), atol=1e-5) assert xinv_gpu is x_gpu
def calculate_H_gpu(X, W, P): WPW = la.add_diag(P, la.dot(W, W, "t", "n")) tmp = la.dot(W, la.inv(WPW, overwrite=True)) H = la.dot(X, tmp, "n", "t") H = gpu.maximum(H, 0) H = to_unit_variance(H) return H, tmp
def train_rfn_gpu(X, n_hidden, n_iter, learnrateW, learnratePsi, dropout_rate, input_droput_rate, minPsi=0.1, seed=32): k = n_hidden n, m = X.shape W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32) P = np.array([0.1] * m, dtype=np.float32) XXdiag = np.diag(np.dot(X.T, X) / n).copy() # explicit copy to avoid numpy 1.8 warning W = gpu.to_gpu(W, allocator=_mempool.allocate) P = gpu.to_gpu(P, allocator=_mempool.allocate) X = gpu.to_gpu(X, allocator=_mempool.allocate) XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate) I = la.eye(k, dtype=np.float32) init_rng(seed) t0 = time.time() for cur_iter in range(n_iter): H, tmp = calculate_H_gpu(X, W, P) if dropout_rate > 0: dropout(H, dropout_rate) Xtmp = X if input_dropout_rate > 0: Xtmp = X.copy() saltpepper_noise(Xtmp, input_dropout_rate) U = la.dot(Xtmp, H, "t", "n") / n S = la.dot(H, H, "t", "n") / n S += I S -= la.dot(tmp, W, "n", "t") Cii = la.dot(la.dot(W, S, "t") - 2 * U, W) Sinv = la.inv(S, overwrite=True) dW = la.dot(Sinv, U, "n", "t") - W dP = XXdiag + la.diag(Cii) - P W += learnrateW * dW P += learnratePsi * dP P = gpu.maximum(P, minPsi) if cur_iter % 25 == 0: print "iter %3d (elapsed time: %5.2fs)" % (cur_iter, time.time() - t0) return W.get(), P.get()
def train_rfn_gpu(X, n_hidden, n_iter, learnrateW, learnratePsi, dropout_rate, input_droput_rate, minPsi=0.1, seed=32): k = n_hidden n, m = X.shape W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32) P = np.array([0.1] * m, dtype=np.float32) XXdiag = np.diag(np.dot(X.T, X) / n).copy() # explicit copy to avoid numpy 1.8 warning W = gpu.to_gpu(W, allocator=_mempool.allocate) P = gpu.to_gpu(P, allocator=_mempool.allocate) X = gpu.to_gpu(X, allocator=_mempool.allocate) XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate) I = la.eye(k, dtype=np.float32) init_rng(seed) t0 = time.time() for cur_iter in range(n_iter): H, tmp = calculate_H_gpu(X, W, P) if dropout_rate > 0: dropout(H, dropout_rate) Xtmp = X if input_dropout_rate > 0: Xtmp = X.copy() saltpepper_noise(Xtmp, input_dropout_rate) U = la.dot(Xtmp, H, "t", "n") / n S = la.dot(H, H, "t", "n") / n S += I S -= la.dot(tmp, W, "n", "t") Cii = la.dot(la.dot(W, S, "t") - 2*U, W) Sinv = la.inv(S, overwrite=True) dW = la.dot(Sinv, U, "n", "t") - W dP = XXdiag + la.diag(Cii) - P W += learnrateW * dW P += learnratePsi * dP P = gpu.maximum(P, minPsi) if cur_iter % 25 == 0: print "iter %3d (elapsed time: %5.2fs)" % (cur_iter, time.time() - t0) return W.get(), P.get()