def partial_fit(self, X): """ Fit the model to the data X which should contain a partial segment of the data. Adjust the parameters to maximize the likelihood of v using Stochastic Maximum Likelihood (SML). Parameters ---------- X : array-like, shape (n_samples, n_features) The data to use for training. """ v_pos = X h_pos = self._mean_hiddens(v_pos) if self.use_pcd: v_neg = self._sample_visibles(self.h_samples_) else: v_neg = self._sample_visibles(h_pos) h_neg = self._mean_hiddens(v_neg) lr = float(self.learning_rate) / v_pos.shape[0] op.add_dot(h_pos, v_pos, self.dW, True, False, alpha=1.0, beta=self.momentum) op.add_dot(h_neg, v_neg, self.dW, True, False, alpha=-1.0, beta=1.0) self.W += lr * self.dW self.dbh *= self.momentum self.dbv *= self.momentum self.dbh += (op.sum(h_pos, axis=0) - op.sum(h_neg, axis=0)).reshape(1, self.dbh.shape[1]) self.dbv += (op.sum(v_pos, axis=0) - op.sum(v_neg, axis=0)).reshape(1, self.dbv.shape[1]) self.bh += lr * self.dbh self.bv += lr * self.dbv if self.use_pcd: self.h_samples_ = op.sample_binomial(h_neg)
def bprop(self, delta, momentum=0.0): op.streams[2].synchronize() # make sure layer above is done self.dfunc(delta, self.A, self.Z, stream=op.streams[0]) op.streams[0].synchronize() op.add_dot(delta, self.X, self.dW, True, False, alpha=1.0 / delta.shape[0], beta=momentum, stream=op.streams[0]) m = op.mean(delta, axis=0, stream=op.streams[1]) op.add_vec(self.db, 1.0, m, beta=momentum, stream=op.streams[1]) if self.l2_penalty > 0: op.add_vec(self.dW, self.l2_penalty, self.W, stream=op.streams[0]) if not self.is_input_layer: if self.dropout > 0.0 and self.activation not in ("relu", "sigmoid"): return op.dot(delta, self.W) * self.M else: return op.dot(delta, self.W) else: return 0.0
def test_csrmm_bug(): ''' the 2nd call might crash''' from scipy.sparse import csr_matrix W = np.random.normal(size=(5, 3)).astype(np.float32, order="c") X = np.random.laplace(size=(6, 3)).astype(np.float32) X[X<0.1] = 0 X = csr_matrix(X, dtype=np.float32) Xd = GPUCSRArray(X) Wd = op.to_gpu(W) Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0]) op.add_dot(Cd, Xd, Wd, True, False, alpha=-0.3, beta=1.0, stream=op.streams[0]) op.mean(Cd, axis=0, stream=op.streams[1])
def bprop(self, delta, momentum=0.0): op.streams[2].synchronize() # make sure layer above is done self.dfunc(delta, self.A, self.Z, stream=op.streams[0]) op.streams[0].synchronize() op.add_dot(delta, self.X, self.dW, True, False, alpha=1.0/delta.shape[0], beta=momentum, stream=op.streams[0]) m = op.mean(delta, axis=0, stream=op.streams[1]) op.add_vec(self.db, 1.0, m, beta=momentum, stream=op.streams[1]) if self.l2_penalty > 0: op.add_vec(self.dW, self.l2_penalty, self.W, stream=op.streams[0]) if not self.is_input_layer: return op.dot(delta, self.W, stream=op.streams[2]) else: return 0.0
def partial_fit(self, X): """ Fit the model to the data X which should contain a partial segment of the data. Adjust the parameters to maximize the likelihood of v using Stochastic Maximum Likelihood (SML). Parameters ---------- X : array-like, shape (n_samples, n_features) The data to use for training. """ v_pos = X h_pos = self._mean_hiddens(v_pos) if self.use_pcd: v_neg = self._sample_visibles(self.h_samples_) else: v_neg = self._sample_visibles(h_pos) h_neg = self._mean_hiddens(v_neg) lr = float(self.learning_rate) / v_pos.shape[0] op.add_dot(h_pos, v_pos, self.dW, True, False, alpha=1.0, beta=self.momentum) op.add_dot(h_neg, v_neg, self.dW, True, False, alpha=-1.0, beta=1.0) self.W += lr * self.dW self.dbh *= self.momentum self.dbv *= self.momentum self.dbh += (op.sum(h_pos, axis=0) - op.sum(h_neg, axis=0)).reshape( 1, self.dbh.shape[1]) self.dbv += (op.sum(v_pos, axis=0) - op.sum(v_neg, axis=0)).reshape( 1, self.dbv.shape[1]) self.bh += lr * self.dbh self.bv += lr * self.dbv if self.use_pcd: self.h_samples_ = op.sample_binomial(h_neg)
def test_sparseA2_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(4, 6)).astype(np.float32) A[(A < 0.1)] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.randn(5, 6).astype(np.float32) X = np.ones((4, 5), dtype=np.float32) X_exp = (A * B.T) + X op.add_dot(A, B, X, transB=True, beta=1.0) assert_allclose(X, X_exp, rtol=1e-4, err_msg="sparse_sgemmA transB") A = np.random.laplace(size=(6, 4)).astype(np.float32) A[(A < 0.1)] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.randn(5, 6).astype(np.float32) X = np.ones((4, 5), dtype=np.float32) X_exp = (A.T * B.T) + X op.add_dot(A, B, X, transA=True, transB=True, beta=1.0) assert_allclose(X, X_exp, rtol=1e-4, err_msg="sparse_sgemmA transA transB")
def test_sparseA2_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(4, 6)).astype(np.float32) A[(A < 0.1)] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.randn(5, 6).astype(np.float32) X = np.ones((4, 5), dtype=np.float32) X_exp =(A * B.T) + X op.add_dot(A, B, X, transB=True, beta=1.0) assert_allclose(X, X_exp, rtol=1e-4, err_msg="sparse_sgemmA transB") A = np.random.laplace(size=(6, 4)).astype(np.float32) A[(A < 0.1)] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.randn(5, 6).astype(np.float32) X = np.ones((4, 5), dtype=np.float32) X_exp =(A.T * B.T) + X op.add_dot(A, B, X, transA=True, transB=True, beta=1.0) assert_allclose(X, X_exp, rtol=1e-4, err_msg="sparse_sgemmA transA transB")
def test_csrmm_bug(): ''' the 2nd call might crash''' from scipy.sparse import csr_matrix W = np.random.normal(size=(5, 3)).astype(np.float32, order="c") X = np.random.laplace(size=(6, 3)).astype(np.float32) X[X < 0.1] = 0 X = csr_matrix(X, dtype=np.float32) Xd = GPUCSRArray(X) Wd = op.to_gpu(W) Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0]) op.add_dot(Cd, Xd, Wd, True, False, alpha=-0.3, beta=1.0, stream=op.streams[0]) op.mean(Cd, axis=0, stream=op.streams[1])
def test_gpusparseB_sgemm_tb(): from scipy.sparse import csr_matrix B = np.random.laplace(size=(3, 5)).astype(np.float32) B[B < 0.1] = 0 B = csr_matrix(B, dtype=np.float32) A = np.random.normal(size=(4, 5)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c') X_exp = (A * B.T) + 0.5 * C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
def test_gpusparseA_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(5, 3)).astype(np.float32) A[A < 0.1] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.normal(size=(3, 6)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c') X_exp = (A * B) + 0.5 * C Ad = GPUCSRArray(A) Bd = op.to_gpu(B) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
def tes_deactivate_t_gpusparseB_sgemm_ta_bug(): from scipy.sparse import csr_matrix A = np.random.normal(size=(6, 12)).astype(np.float32, order="c") B = np.random.laplace(size=(6, 33)).astype(np.float32) B[B<0.1] = 0 B = csr_matrix(B, dtype=np.float32) C = np.ones((12, 33), dtype=np.float32, order='c') X_exp = (A.T*B) + 0.5*C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-3, err_msg="gpusparse_sgemmB ta bug")
def test_gpusparseB_sgemm_tb(): from scipy.sparse import csr_matrix B = np.random.laplace(size=(3, 5)).astype(np.float32) B[B<0.1] = 0 B = csr_matrix(B, dtype=np.float32) A = np.random.normal(size=(4, 5)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c') X_exp = (A*B.T) + 0.5*C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
def test_gpusparseA_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(5, 3)).astype(np.float32) A[A<0.1] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.normal(size=(3, 6)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c') X_exp = (A*B) + 0.5*C Ad = GPUCSRArray(A) Bd = op.to_gpu(B) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
def tes_deactivate_t_gpusparseB_sgemm_ta_bug(): from scipy.sparse import csr_matrix A = np.random.normal(size=(6, 12)).astype(np.float32, order="c") B = np.random.laplace(size=(6, 33)).astype(np.float32) B[B < 0.1] = 0 B = csr_matrix(B, dtype=np.float32) C = np.ones((12, 33), dtype=np.float32, order='c') X_exp = (A.T * B) + 0.5 * C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-3, err_msg="gpusparse_sgemmB ta bug")
def test_dense_gemm(): A = np.random.randn(30, 40).astype(np.float32) B = np.random.randn(40, 50).astype(np.float32) X = np.ones((30, 50), np.float32) X_exp = np.dot(A, B) + X op.add_dot(A, B, X, beta=1.0) assert_allclose(X, X_exp) Ad = op.to_gpu(A) Bd = op.to_gpu(B) Xd = op.to_gpu(X) op.add_dot(A, B, X, beta=1.0) assert_allclose(op.to_cpu(Xd), X_exp) A = np.random.randn(40, 30).astype(np.float32) B = np.random.randn(40, 50).astype(np.float32) X = np.ones((30, 50), np.float32) X_exp = np.dot(A.T, B) + X op.add_dot(A, B, X, transA=True, beta=1.0) assert_allclose(X, X_exp) Ad = op.to_gpu(A) Bd = op.to_gpu(B) Xd = op.to_gpu(X) op.add_dot(A, B, X, transA=True, beta=1.0) assert_allclose(op.to_cpu(Xd), X_exp) A = np.random.randn(30, 40).astype(np.float32) B = np.random.randn(50, 40).astype(np.float32) X = np.ones((30, 50), np.float32) X_exp = np.dot(A, B.T) + X op.add_dot(A, B, X, transB=True, beta=1.0) assert_allclose(X, X_exp) Ad = op.to_gpu(A) Bd = op.to_gpu(B) Xd = op.to_gpu(X) op.add_dot(A, B, X, transB=True, beta=1.0) assert_allclose(op.to_cpu(Xd), X_exp) A = np.random.randn(40, 30).astype(np.float32) B = np.random.randn(50, 40).astype(np.float32) X = np.ones((30, 50), np.float32) X_exp = np.dot(A.T, B.T) + X op.add_dot(A, B, X, transA=True, transB=True, beta=1.0) assert_allclose(X, X_exp) Ad = op.to_gpu(A) Bd = op.to_gpu(B) Xd = op.to_gpu(X) op.add_dot(A, B, X, transA=True, transB=True, beta=1.0) assert_allclose(op.to_cpu(Xd), X_exp)