def cal(x,client): st = time.time() #Distributed scheduler #with dask.set_options(get=dask.threaded.get): with dask.set_options(get=client.get): A = da.transpose(x) B = da.dot(x,A) C = da.dot(B,B) print C.compute() #Default scheduler # with dask.set_options(get=dask.threaded.get): # A = da.transpose(x) # B = da.dot(x,A) # C = da.dot(B,B) # # print C.compute() #mannually set global thread. # from multiprocessing.pool import ThreadPool # with dask.set_options(pool=ThreadPool(4)): # A = da.transpose(x) # B = da.dot(x,A) # C = da.dot(B,B) # # print C.compute(num_works = 4) print 'time: ',time.time()-st return 0
def test_svd_compressed(): m, n = 2000, 250 r = 10 np.random.seed(4321) mat1 = np.random.randn(m, r) mat2 = np.random.randn(r, n) mat = mat1.dot(mat2) data = da.from_array(mat, chunks=(500, 50)) u, s, vt = svd_compressed(data, r, seed=4321, n_power_iter=2) usvt = da.dot(u, da.dot(da.diag(s), vt)) tol = 0.2 assert_eq(da.linalg.norm(usvt), np.linalg.norm(mat), rtol=tol, atol=tol) # average accuracy check u = u[:, :r] s = s[:r] vt = vt[:r, :] s_exact = np.linalg.svd(mat)[1] s_exact = s_exact[:r] assert_eq(np.eye(r, r), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(r, r), da.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values
def test_svd_compressed(iterator): m, n = 100, 50 r = 5 a = da.random.random((m, n), chunks=(m, n)) # calculate approximation and true singular values u, s, vt = svd_compressed(a, 2 * r, iterator=iterator[0], n_power_iter=iterator[1], seed=4321) # worst case s_true = scipy.linalg.svd(a.compute(), compute_uv=False) # compute the difference with original matrix norm = scipy.linalg.norm((a - (u[:, :r] * s[:r]) @ vt[:r, :]).compute(), 2) # ||a-a_hat||_2 <= (1+tol)s_{k+1}: based on eq. 1.10/1.11: # Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp. # "Finding structure with randomness: Probabilistic algorithms for constructing # approximate matrix decompositions." SIAM review 53.2 (2011): 217-288. frac = norm / s_true[r + 1] - 1 # Tolerance determined via simulation to be slightly above max norm of difference matrix in 10k samples. # See https://github.com/dask/dask/pull/6799#issuecomment-726631175 for more details. tol = 0.4 assert frac < tol assert_eq(np.eye(r, r), da.dot(u[:, :r].T, u[:, :r])) # u must be orthonormal assert_eq(np.eye(r, r), da.dot(vt[:r, :], vt[:r, :].T)) # v must be orthonormal
def _compute(self, X, y, sync_every, HH=None, HY=None): """Computing matrices HH and HY, the actually long part. .. todo: actually distributed computations that scatter batches of data file names, and reduce-sum the HH,HY matrices. """ # processing files for i, X_file, y_file in zip(range(len(X)), X, y): X_dask = dd.read_parquet(X_file).to_dask_array(lengths=True) Y_dask = dd.read_parquet(y_file).to_dask_array(lengths=True) H_dask = self._project(X_dask) if HH is None: # first iteration HH = da.dot(H_dask.transpose(), H_dask) HY = da.dot(H_dask.transpose(), Y_dask) else: HH += da.dot(H_dask.transpose(), H_dask) HY += da.dot(H_dask.transpose(), Y_dask) if sync_every is not None and i % sync_every == 0: wait([HH, HY]) # synchronization if sync_every is not None and i % sync_every == 0: HH, HY = self.client_.persist([HH, HY]) # finishing solution if sync_every is not None: wait([HH, HY]) return HH, HY
def inverse_transform(self, X): """Transform data back to its original space. Returns an array X_original whose transform would be X. Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples in the number of samples and n_components is the number of components. Returns ------- X_original array-like, shape (n_samples, n_features) Notes ----- If whitening is enabled, inverse_transform does not compute the exact inverse operation of transform. """ check_is_fitted(self, "mean_") if self.whiten: return (da.dot( X, np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_, ) + self.mean_) else: return da.dot(X, self.components_) + self.mean_
def check_dmd_dask(D, mu, Phi, show_warning=True): """ Checks how close the approximation using DMD is to the original data. Returns: None if the difference is within the tolerance Displays a warning otherwise. """ X = D[:, 0:-1] Y = D[:, 1:] #Y_est = da.dot(da.dot(da.dot(Phi, da.diag(mu)), pinv_SVD(Phi)), X) Phi_inv = pinv_SVD(Phi) PhiMu = da.dot(Phi, da.diag(mu)) #Y_est = da.dot(da.dot(PhiMu, Phi_inv), X) Y_est = da.dot(PhiMu, da.dot(Phi_inv, X)) diff = da.real(Y - Y_est) res = da.fabs(diff) rtol = 1.e-8 atol = 1.e-5 if da.all(res < atol + rtol * da.fabs(da.real(Y_est))).compute(): return (None) else: #if not b and show_warning: warn('dmd result does not satisfy Y=AX')
def test_tsqr_zero_height_chunks(): m_q = 10 n_q = 5 m_r = 5 n_r = 5 # certainty mat = np.random.rand(10, 5) x = da.from_array(mat, chunks=((4, 0, 1, 0, 5), (5, ))) q, r = da.linalg.qr(x) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular # uncertainty mat2 = np.vstack([mat, -(np.ones((10, 5)))]) v2 = mat2[:, 0] x2 = da.from_array(mat2, chunks=5) c = da.from_array(v2, chunks=5) x = x2[c >= 0, :] # remove the ones added above to yield mat q, r = da.linalg.qr(x) q = q.compute() # because uncertainty r = r.compute() assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, np.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), np.dot(q.T, q)) # q must be orthonormal assert_eq(r, np.triu(r)) # r must be upper triangular
def test_tsqr_zero_height_chunks(): m_q = 10 n_q = 5 m_r = 5 n_r = 5 # certainty mat = np.random.rand(10, 5) x = da.from_array(mat, chunks=((4, 0, 1, 0, 5), (5,))) q, r = da.linalg.qr(x) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular # uncertainty mat2 = np.vstack([mat, -np.ones((10, 5))]) v2 = mat2[:, 0] x2 = da.from_array(mat2, chunks=5) c = da.from_array(v2, chunks=5) x = x2[c >= 0, :] # remove the ones added above to yield mat q, r = da.linalg.qr(x) q = q.compute() # because uncertainty r = r.compute() assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, np.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), np.dot(q.T, q)) # q must be orthonormal assert_eq(r, np.triu(r)) # r must be upper triangular
def pinv_SVD(X): """ a function to find a pseudo-inverse in dask using svd """ u, s, v = da.linalg.svd(X) S_inv = da.diag(1 / s) X_inv = da.dot(v.T.conj(), da.dot(S_inv, u.T.conj())) return (X_inv)
def JtJvec(self, v): vec = da.dot(self.A, v) row = dask.delayed(sp.csr_matrix.dot)(sp.eye(N), vec) jvec = da.from_delayed(row, dtype=float, shape=([N])) jtjvec = da.dot(self.A, jvec) return jtjvec #da.from_delayed(jtjvec, dtype=float, shape=(1, N))
def test_tsqr_svd_regular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(10, n), name='A') u, s, vt = tsqr(data, compute_svd=True) usvt = da.dot(u, da.dot(da.diag(s), vt)) s_exact = np.linalg.svd(mat)[1] assert_eq(mat, usvt) # accuracy check assert_eq(np.eye(n, n), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(n, n), da.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values
def eig_dask(A, nofIt=None): """ A dask eigenvalue solver: assumes A is symmetric and used the QR method to find eigenvalues and eigenvectors. nofIt: number of iterations (default is the size of A) """ A_new = A if nofIt is None: nofIt = A.shape[0] V = da.eye(A.shape[0], 100) for i in range(nofIt): Q, R = da.linalg.qr(A_new) A_new = da.dot(R, Q) V = da.dot(V, Q) return (da.diag(A_new), V)
def test_make_regression(n_samples, n_features, n_informative, n_targets, bias, effective_rank, tail_strength, noise, shuffle, coef, random_state, n_parts, cluster): c = Client(cluster) try: from cuml.dask.datasets import make_regression result = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative, n_targets=n_targets, bias=bias, effective_rank=effective_rank, noise=noise, shuffle=shuffle, coef=coef, random_state=random_state, n_parts=n_parts) if coef: out, values, coefs = result else: out, values = result assert out.shape == (n_samples, n_features), "out shape mismatch" if n_targets > 1: assert values.shape == (n_samples, n_targets), \ "values shape mismatch" else: assert values.shape == (n_samples, ), "values shape mismatch" assert len(out.chunks[0]) == n_parts assert len(out.chunks[1]) == 1 if coef: if n_targets > 1: assert coefs.shape == (n_features, n_targets), \ "coefs shape mismatch" assert len(coefs.chunks[1]) == 1 else: assert coefs.shape == (n_features, ), "coefs shape mismatch" assert len(coefs.chunks[0]) == 1 test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative) std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0) test1, std_test2 = da.compute(test1, std_test2) diff = cp.abs(1.0 - std_test2) test2 = cp.all(diff < 1.5 * 10**(-1.)) assert test1, \ "Unexpected number of informative features" assert test2, "Unexpectedly incongruent outputs" finally: c.close()
def iter_dot(data, masks, repeats): frames = len(data) maskcount = len(masks) for repeat in range(repeats): result = da.dot(data, masks.T) result.compute(num_workers=WORKERS) return result
def workMethod(): matrix1 = dar.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18]]) matrix2 = dar.array([[5, 10, 15], [20, 25, 30], [35, 40, 45], [50, 55, 60], [65, 70, 75], [80, 85, 90]]) # Expected Results: # [1155, 1260, 1365] # [2685, 2970, 3255] # [4215, 4680, 5145] print('Matrix 1:') print(matrix1.compute()) print('\n') print('Matrix 2:') print(matrix2.compute()) print('\n') result = dar.dot(matrix1, matrix2) # result.visualize(filename='./Results/DaskSyncMatrixMultFiles/DaskSyncMatrixMultGraph') print('Final Result') print(result.compute()) print('\n')
def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False, X_norm_squared=None): if X_norm_squared is not None: XX = X_norm_squared if XX.shape == (1, X.shape[0]): XX = XX.T elif XX.shape != (X.shape[0], 1): raise ValueError( "Incompatible dimensions for X and X_norm_squared") else: XX = row_norms(X, squared=True)[:, np.newaxis] if X is Y: YY = XX.T elif Y_norm_squared is not None: if Y_norm_squared.ndim < 2: YY = Y_norm_squared[:, np.newaxis] else: YY = Y_norm_squared if YY.shape != (1, Y.shape[0]): raise ValueError( "Incompatible dimensions for Y and Y_norm_squared") else: YY = row_norms(Y, squared=True)[np.newaxis, :] # TODO: this often emits a warning. Silence it here? distances = -2 * da.dot(X, Y.T) + XX + YY distances = da.maximum(distances, 0) # TODO: scikit-learn sets the diagonal to 0 when X is Y. return distances if squared else da.sqrt(distances)
def dmd_evolve_dask(X0, mu, Phi, t): """ dmd_evolve_dask evolves the dmd components to time t starting from X0 Args: X0 - dask.array of length d the initial observation D[:,0] mu - the dmd eigenvalues Phi - the dmd modes t - an array of times #TODO for now the evolution is by increment of 1, should allow for a smaller timestep Returns: Psi - dask.array of dimensions r x t """ # calculate starting point b = da.dot(pinv_SVD(Phi), X0) # rank r = Phi.shape[1] # initialize Psi Psi = np.zeros([r, len(t)], dtype='complex') #Psi = da.zeros([r,len(t)],chunks = (r,len(t)),dtype='complex') # evolve Psi for i, _t in enumerate(t): Psi[:, i] = multiply(power(mu, _t), b) return (Psi)
def Jtvec(self, v): row = dask.delayed(sp.csr_matrix.dot)(sp.eye(N), v) row = da.from_delayed(row, dtype=float, shape=[N]) vec = da.dot(self.A, row) return vec #da.from_delayed(row, dtype=float, shape=[N])
def euclidean(XA, XB): """Returns the distance between points using Euclidean distance (2-norm) as the distance metric between the points. Find the Euclidean distances between four 2-D coordinates: >>> coords = [(35.0456, -85.2672), ... (35.1174, -89.9711), ... (35.9728, -83.9422), ... (36.1667, -86.7833)] >>> euclidean(coords, coords) array([[ 0. , 4.7044, 1.6172, 1.8856], [ 4.7044, 0. , 6.0893, 3.3561], [ 1.6172, 6.0893, 0. , 2.8477], [ 1.8856, 3.3561, 2.8477, 0. ]]) """ mA = (XA.shape)[0] mB = (XB.shape)[0] distances = [] for i in range(0, mA): dm = np.zeros(shape=(1, mB), dtype=np.double) for j in range(0, mB): XA_XB = XA[i, :] - XB[j, :] dm[0, j] = da.sqrt(da.dot(XA_XB, XA_XB)) distances.append( da.from_array(dm, chunks=(mA + mB) / multiprocessing.cpu_count())) return da.concatenate(distances, axis=0)
def da_linregress(x, y): """ Refactor of the scipy linregress with numba, less checks for speed sake and done with dask arrays :param x: array for independent :param y: :return: """ TINY = 1.0e-20 # x = np.asarray(x) # y = np.asarray(y) arr = da.stack([x, y], axis=1) n = len(x) # average sum of squares: ssxm, ssxym, ssyxm, ssym = (da.dot(arr.T, arr) / n).ravel() r_num = ssxym r_den = np.sqrt(ssxm * ssym) if r_den == 0.0: r = 0.0 else: r = r_num / r_den # test for numerical error propagation if r > 1.0: r = 1.0 elif r < -1.0: r = -1.0 df = n - 2 slope = r_num / ssxm r_t = r + TINY t = r * da.sqrt(df / ((1.0 - r_t) * (1.0 + r_t))) prob = 2 * stats.distributions.t.sf(np.abs(t), df) return slope, r**2, prob
def test_double_dependencies(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(4, 4)) X = d + 1 X = da.dot(X, X.T) assert_eq(X.compute(optimize_graph=False), X)
def _project(self, X_dask): """Compute hidden layer output with Dask functionality. """ H_list = [] for hl, W in zip(self.hidden_layers_, self.W_): if hl.hidden_layer_ == HiddenLayerType.PAIRWISE: H0 = X_dask.map_blocks(pairwise_distances, W, dtype=X_dask.dtype, chunks=(X_dask.chunks[0], (W.shape[0], )), metric=hl.pairwise_metric) else: XW_dask = da.dot(X_dask, W.transpose()) if hl.ufunc_ is dummy: H0 = XW_dask elif hl.ufunc_ is np.tanh: H0 = da.tanh(XW_dask) else: H0 = XW_dask.map_blocks(hl.ufunc_) H_list.append(H0) if self.include_original_features: H_list.append(X_dask) H_list.append(da.ones((X_dask.shape[0], 1))) H_dask = da.concatenate(H_list, axis=1).rechunk(self.bsize_) return H_dask
def score_samples(self, X): """Return the log-likelihood of each sample. See. "Pattern Recognition and Machine Learning" by C. Bishop, 12.2.1 p. 574 or http://www.miketipping.com/papers/met-mppca.pdf Parameters ---------- X : array, shape(n_samples, n_features) The data. Returns ------- ll : array, shape (n_samples,) Log-likelihood of each sample under the current model """ check_is_fitted(self, "mean_") # X = check_array(X) Xr = X - self.mean_ n_features = X.shape[1] precision = self.get_precision() # [n_features, n_features] log_like = -.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1) log_like -= .5 * (n_features * da.log(2. * np.pi) - fast_logdet(precision)) return log_like
def transform(self, X): """Apply dimensionality reduction on X. X is projected on the first principal components previous extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ check_is_fitted(self, ["mean_", "components_"], all_or_any=all) # X = check_array(X) if self.mean_ is not None: X = X - self.mean_ X_transformed = da.dot(X, self.components_.T) if self.whiten: X_transformed /= np.sqrt(self.explained_variance_) return X_transformed
def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1): X, Y = check_pairwise_arrays(X, Y) if gamma is None: gamma = 1.0 / X.shape[1] K = (gamma * da.dot(X, Y.T) + coef0)**degree return K
def mvn_random_DASK(mean, cov, N, dim): da.random.seed(10) epsilon = 0.0001 A = da.linalg.cholesky(cov + epsilon * da.eye(dim), lower=True) z = da.random.standard_normal(size=(N, dim)) x = da.outer(da.ones((N, )), mean).transpose() + da.dot(A, z.transpose()) return x
def test_sparse_dot(sp_format): pytest.importorskip("cupyx") if sp_format == "csr": sp_matrix = cupyx.scipy.sparse.csr_matrix elif sp_format == "csc": sp_matrix = cupyx.scipy.sparse.csc_matrix dtype = "f" density = 0.3 x_shape, x_chunks = (4, 8), (2, 4) y_shape, y_chunks = (8, 6), (4, 3) x = cupy.random.random(x_shape, dtype=dtype) y = cupy.random.random(y_shape, dtype=dtype) x[x < 1 - density] = 0 y[y < 1 - density] = 0 z = x.dot(y) da_x = da.from_array(x, chunks=x_chunks, asarray=False, fancy=False) da_y = da.from_array(y, chunks=y_chunks, asarray=False, fancy=False) da_x = da_x.map_blocks(sp_matrix, dtype=dtype) da_y = da_y.map_blocks(sp_matrix, dtype=dtype) da_z = da.dot(da_x, da_y).compute() assert cupyx.scipy.sparse.isspmatrix(da_z) assert_eq(z, da_z.todense())
def _distance(Z, Y, epsilon): """ Distance function """ Y = Y + epsilon # The first term below is equal to: da.dot(da.ones(m, n), Y) # with Z.shape = (m, n) and Y.shape = (n, k) d = (Y.sum(axis=0, keepdims=True).repeat(Z.shape[0], axis=0) - da.dot(Z, da.log(Y))) return d
def slerp(self, val, low, high): """Code from https://github.com/soumith/dcgan.torch/issues/14""" omega = da.arccos(da.clip(da.dot(low / da.linalg.norm(low), high.transpose() / da.linalg.norm(high)), -1, 1)) so = da.sin(omega) if so == 0: return (1.0-val) * low + val * high # L'Hopital's rule/LERP return da.sin((1.0 - val) * omega) / so * low + da.sin(val * omega) / so * high
def sigmoid_kernel(X, Y=None, gamma=None, coef0=1): X, Y = check_pairwise_arrays(X, Y) if gamma is None: gamma = 1.0 / X.shape[1] K = da.dot(X, Y.T) K *= gamma K += coef0 K = da.tanh(K) return K
def test_sfqr(m, n, chunks, error_type): mat = np.random.rand(m, n) data = da.from_array(mat, chunks=chunks, name="A") m_q = m n_q = min(m, n) m_r = n_q n_r = n m_qtq = n_q if error_type is None: q, r = da.linalg.sfqr(data) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(m_qtq, m_qtq), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular else: with pytest.raises(error_type): q, r = da.linalg.sfqr(data)
def test_qr(m, n, chunks, error_type): mat = np.random.rand(m, n) data = da.from_array(mat, chunks=chunks, name='A') m_q = m n_q = min(m, n) m_r = n_q n_r = n m_qtq = n_q if error_type is None: q, r = qr(data) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(m_qtq, m_qtq), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular else: with pytest.raises(error_type): q, r = qr(data)
def dask_detrend_data(data, output_arr): """ Detrend data using a linear fit. Parameters ---------- data: dask.array Input dataset to detrend. Assumes leading axis is sampling dimension. output_arr: ndarray-like Output array with same shape as data to store detrended data. Notes ----- This is a very expensive operation if using a large dataset. May slow down if forced to spill onto the disk cache It does not currently take into account X data. Instead, it creates a dummy array (using arange) for sampling points. """ dummy_time = np.arange(data.shape[0])[:, None] dummy_time = da.from_array(dummy_time, chunks=dummy_time.shape) # intercept handling x_offset = dummy_time.mean(axis=0) x_centered = dummy_time - x_offset y_offset = data.mean(axis=0) y_centered = data - y_offset coefs, resid, rank, s = da.linalg.lstsq(x_centered, y_centered) intercepts = y_offset - x_offset*coefs predict = da.dot(dummy_time, coefs) + intercepts detrended = data - predict da.store(detrended, output_arr) return output_arr
def test_tsqr(m, n, chunks, error_type): mat = np.random.rand(m, n) data = da.from_array(mat, chunks=chunks, name='A') # qr m_q = m n_q = min(m, n) m_r = n_q n_r = n # svd m_u = m n_u = min(m, n) n_s = n_q m_vh = n_q n_vh = n d_vh = max(m_vh, n_vh) # full matrix returned if error_type is None: # test QR q, r = tsqr(data) assert_eq((m_q, n_q), q.shape) # shape check assert_eq((m_r, n_r), r.shape) # shape check assert_eq(mat, da.dot(q, r)) # accuracy check assert_eq(np.eye(n_q, n_q), da.dot(q.T, q)) # q must be orthonormal assert_eq(r, da.triu(r.rechunk(r.shape[0]))) # r must be upper triangular # test SVD u, s, vh = tsqr(data, compute_svd=True) s_exact = np.linalg.svd(mat)[1] assert_eq(s, s_exact) # s must contain the singular values assert_eq((m_u, n_u), u.shape) # shape check assert_eq((n_s,), s.shape) # shape check assert_eq((d_vh, d_vh), vh.shape) # shape check assert_eq(np.eye(n_u, n_u), da.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(d_vh, d_vh), da.dot(vh, vh.T)) # vh must be orthonormal assert_eq(mat, da.dot(da.dot(u, da.diag(s)), vh[:n_q])) # accuracy check else: with pytest.raises(error_type): q, r = tsqr(data) with pytest.raises(error_type): u, s, vh = tsqr(data, compute_svd=True)
def h_svd(elem, n, J=None, m=3, theta=0, drop=1500, step=1, force=0, quant='omega', path='output.h5'): """ Run Singular Value Decomposition on the quaternions. Currently eats all my RAM. All of it. These aren't the old days where you'd deal with three swap file purges before breakfast. If Mac OS X runs out of application memory it does not fail gracefully, but instead stumbles around like a stunned cow until you put it out of its misery. i: (0,1,2,3) The element of the quaternion we're looking at n: Sample time (row length of trajectory matrix) (int, timesteps) J: Lag time (how many steps ahead of the preceding row each row of the trajectory matrix is) (int, timesteps) Default is J = n, as suggested by Richard and Tom's book. J=1 is the one suggested by Broomhead & King, but this seems to result in too much correlation between successive rows. drop: How many values at the beginning of the reconstructed timeseries to disregard (int) """ if J is None: J = n with pd.HDFStore(path) as store: with h5py.File(path, 'r+') as h5pystore: data = store.sim.H_omega[str(elem)] quat = data.as_matrix() assert len(quat) % n == 0 rows = (len(quat) - (n-J)) // J params = store.sim.tail(1).to_string() + \ ''.join(map(str,[elem, n, J, m])) digest = hashlib.md5(params.encode()).hexdigest() if force or ('/svd/traj' not in h5pystore or \ h5pystore['/svd/traj'].attrs["digest"] != np.string_(digest)): print("Constructing new trajectory matrix...", flush=1) if '/svd/traj' in h5pystore: del h5pystore['/svd'] traj = h5pystore.create_dataset('/svd/traj', (rows,n)) if J != n: progress = tqdm(total=rows, leave=1) for i, j in zip(range(rows), range(0, len(quat), J)): traj[i] = quat[j:j+n] progress.update() progress.close() else: traj[:] = quat.reshape((rows, n)) traj.attrs["digest"] = np.string_(digest) print() else: print("Reusing previous trajectory matrix...", flush=1) traj = h5pystore.require_dataset( '/svd/traj', (rows, n), np.float32 ) print("Constructing OoC trajectory and covariance matrices...", flush=1) datraj = da.from_array(traj, chunks=(1000, n)) cov = da.dot(datraj.transpose(), datraj) #normn = len(quat) - (n-1) #traj = normn**-1/2 * hankel(quat, np.zeros(n))[::J] print("Running SVD...", flush=1) U, s, V = da.linalg.svd(cov) S = np.diag(s[0:m]) #return(S[0,0], S[1,1], S[2,2]) #print(traj.shape, cov.shape, U.shape, s.shape, V.shape) print(S) #recon = dot(U[:,0:m], dot(S, V[0:m,:])) #print("Writing U", flush=1) #h5pystore.create_dataset('/svd/U', data=U) ##print("Writing s", flush=1) ##h5pystore.create_dataset('/svd/s', data = S) #print("Writing V", flush=1) #h5pystore.create_dataset('/svd/V', data=V) #print("Writing cov", flush=1) #h5pystore.create_dataset('/svd/cov', data=cov) print("Taking Poincaré sections and projecting dataset...", flush=1) #plane_normal = np.cross(U[:,1], U[:,2]) #plane_normal /= np.norm(plane_normal) x = dot(traj, U[:,0]) y = dot(traj, U[:,1]) z = dot(traj, U[:,2]) idx = np.sign(np.roll(z, 1)) != np.sign(z) #idx = np.roll(z, 1) > z #idx = da.isclose(dist, 0) print("Plotting!", flush=1) fig = plt.figure(figsize=(8,8)) fig.set_tight_layout(True) ax = fig.add_subplot(111) ax.scatter(traj[:,0][idx][::step], np.roll(traj[:,0][idx][::step], 1), marker='+') #ax.set_title("Poincare section of omega{} through plane of first two singular vectors".format(str(elem))) #embed = [] #for i in range(m): # embed.append(np.zeros(traj.shape[0]-drop)) # for j in range(traj.shape[0]-drop): # embed[i][j] = np.inner(V[:,i], traj[j]) #figatt = plt.figure(figsize=(8,8)) #figatt.set_tight_layout(True) #axesatt = figatt.add_subplot(111) #axesatt.plot(embed[0], embed[1]) plt.show()