def cos_similarity(vec1,vec2): dot_product=np.dot(vec1,vec2) norm_vec1=linalg.norm(vec1) norm_vec2=linalg.norm(vec2) if norm_vec1*norm_vec2!=0: return(dot_product/(norm_vec1*norm_vec2)) else: return 0
def corrSparsityCliques(x, obj, constraints): n = len(x) # Independent variable vector size C = identity( n, format='lil' ) # Initialising appropriate identity matrix (lil_matrix format) # Retrieve cross-terms of objective function and update C matrix objectiveCrossDependencies = getObjectiveCrossDependencies(obj, x) updateCWithCrossDependencies(objectiveCrossDependencies, C) # Retrieve co-dependent terms for every constraint and update C matrix for constraint in constraints: constraintCrossDependencies = getConstraintCodependencies( constraint, x) updateCWithCrossDependencies(constraintCrossDependencies, C) C = csc_matrix( C) # Convert into CSC structure, most efficient for next steps C = C + norm(C, ord=1) * identity( n, format='csc') # Ensure strict diagonal dominance for C cliqueStructure = cliquesFromSpMatD( C) # Large function that will generate clique structure return cliqueStructure
def l2_normalize(self): ''' L2-normalize all vectors in the matrix. ''' l2norm = linalg.norm(self.matrix, axis=1, ord=2) l2norm[l2norm==0.0] = 1.0 # Convert 0 values to 1 self.matrix = csr_matrix(self.matrix/l2norm.reshape(len(l2norm),1))
def get_pagerank(adj_mat, theta=.85, epsilon=1e-03, max_iter=20): """ Returns the vector of pagerank scores :param adj_mat: (scipy.sparse.csc.csc_matrix) n x n :param theta: (numeric) damping factor :param epsilon: (numeric) convergence parameter :return: vector of pagerank scores n x 1 """ n = adj_mat.shape[0] g_mat = get_gmat(adj_mat, theta) pr_vec = sparse.csc_matrix(np.ones(n)) / n norm_iter = adj_mat.shape[0] n = adj_mat.shape[0] i = 0 norm = [] while (norm_iter > epsilon * n) and (i < max_iter): pr_iter = pr_vec.dot(g_mat) norm_iter = linalg.norm(pr_vec - pr_iter) pr_vec = pr_iter i += 1 norm += [norm_iter] print("iter {0}: {1}".format(i, norm_iter)) return pr_vec.T
def normalize_sparse(M, norm="frag", order=1, iterations=3): """Applies a normalization type to a sparse matrix. """ try: from scipy.sparse import csr_matrix except ImportError as e: print(str(e)) print("I am peforming dense normalization by default.") return normalize_dense(M.todense()) r = csr_matrix(M) if norm == "SCN": for _ in range(1, iterations): row_sums = np.array(r.sum(axis=1)).flatten() col_sums = np.array(r.sum(axis=0)).flatten() row_indices, col_indices = r.nonzero() r.data /= row_sums[row_indices] * col_sums[col_indices] elif norm == "global": try: from scipy.sparse import linalg r = linalg.norm(M, ord=order) except (ImportError, AttributeError) as e: print(str(e)) print("I can't import linalg tools for sparse matrices.") print("Please upgrade your scipy version to 0.16.0.") elif callable(norm): r = norm(M) else: print("Unknown norm. Returning input as fallback") return r
def normalize_sparse(M, norm="frag", order=1, iterations=3): """Applies a normalization type to a sparse matrix.""" try: from scipy.sparse import csr_matrix except ImportError as e: print(str(e)) print("I am peforming dense normalization by default.") return normalize_dense(M.todense()) r = csr_matrix(M) if norm == "SCN": for iteration in range(1, iterations): row_sums = np.array(r.sum(axis=1)).flatten() col_sums = np.array(r.sum(axis=0)).flatten() row_indices, col_indices = r.nonzero() r.data /= row_sums[row_indices] * col_sums[col_indices] elif norm == "global": try: from scipy.sparse import linalg r = linalg.norm(M, ord=order) except (ImportError, AttributeError) as e: print(str(e)) print("I can't import linalg tools for sparse matrices.") print("Please upgrade your scipy version to 0.16.0.") elif callable(norm): r = norm(M) else: print("I don't recognize this norm, I am returning input matrix by default.") return r
def run_iterative(T, I, neg_percent_tr, links_te, signs_te, c, convergence_threshold): R = T.copy().asformat('csc') #intialize with T R_ = T.copy().asformat('csc') #itialize with T norm2 = 999999999 it = 0 while norm2 > convergence_threshold: if it % 2 == 0: R_ = c * (T.dot(R)) + (1 - c) * I else: R = c * (T.dot(R_)) + (1 - c) * I norm2 = norm(R - R_) print('Iteration {} and difference {}'.format(it, norm2)) it += 1 #uncomment to see progress while converging #if it % 2 == 0: # evaluate(R.copy().asformat('dok'), neg_percent_tr, links_te, signs_te) #else: # evaluate(R_.copy().asformat('dok'), neg_percent_tr, links_te, signs_te) #get final evaluation #if we quit when it = 2 that means it = 1 was the last to execute #and so R was the last result if it % 2 == 0: evaluate(R.copy().asformat('dok'), neg_percent_tr, links_te, signs_te) else: evaluate(R_.copy().asformat('dok'), neg_percent_tr, links_te, signs_te)
def normalize_test(self, K_test, feats_test): #K_test unormalized m = K_test.shape[0] #norms_test = np.sum(feats_test*feats_test,axis=1) norms_test = norm(feats_test,axis=1) matrix_norms = np.outer(norms_test, self.norms_train) #+ 1e-40 # matrix sqrt(K(xtest,xtest)*K(xtrain,xtrain)) K_test = np.divide(K_test, matrix_norms) return K_test
def to_svd(beg=200, end=2613, jump=300, with_norm=True): """ Wykonuje SVD na rzadkiej macierzy wczytanej z with_idf Zapisuej dla róznych wartosci k pod nazwa k, gdzie k = 1,2,3...A.shape-1 A-macierz :param beg: od ktorego k :param end: do jakiego k :param jump: o jakie k :param with_norm: noramlizować przed zapisem :return: zapisuje macierze rzadkie do plikow """ spare = load_sparse_csr('usage_files/with_idf.npz') x = [] for k in range(beg, end, jump): print("Starting: " + str(k)) U, sigma, V = svds(spare, k=k) print("Done svd") reconsign = csc_matrix(U) * diags(sigma, format='csc') * csc_matrix(V) print("Done multiplication") if with_norm: x = [(1 / norm(reconsign.getcol(i))) for i in range(reconsign.shape[1])] reconsign = reconsign.multiply(csc_matrix(x)) x = [] save_sparse('svd/' + str(k), csc_matrix(reconsign)) print("Saved " + str(k))
def lbfgs_step(gfk, k, vecs, props): m = props.get("lbfgsMemory", 10) q = gfk a = {} if k == 0: return -gfk / linalg.norm(gfk, numpy.inf) k = numpy.max(vecs.keys())+1 bl = max(0, k-m) for i in range(k-1, bl-1, -1): (sk, yk, rhok) = vecs[i] a[i] = rhok * numpy.dot(sk, q) q = q - a[i]*yk (sk, yk, rhok) = vecs[k-1] gammak = numpy.dot(sk,yk)/(numpy.dot(yk,yk)) r = gammak * q for i in range(bl, k): (sk, yk, rhok) = vecs[i] beta = rhok * numpy.dot(yk, r) r = r + sk*(a[i]-beta) return -r
def orthogonality(A, g): """Measure orthogonality between a vector and the null space of a matrix. Compute a measure of orthogonality between the null space of the (possibly sparse) matrix ``A`` and a given vector ``g``. The formula is a simplified (and cheaper) version of formula (3.13) from [1]_. ``orth = norm(A g, ord=2)/(norm(A, ord='fro')*norm(g, ord=2))``. References ---------- .. [1] Gould, Nicholas IM, Mary E. Hribar, and Jorge Nocedal. "On the solution of equality constrained quadratic programming problems arising in optimization." SIAM Journal on Scientific Computing 23.4 (2001): 1376-1395. """ # Compute vector norms norm_g = np.linalg.norm(g) # Compute Frobenius norm of the matrix A if issparse(A): norm_A = linalg.norm(A, ord='fro') else: norm_A = np.linalg.norm(A, ord='fro') # Check if norms are zero if norm_g == 0 or norm_A == 0: return 0 norm_A_g = np.linalg.norm(A.dot(g)) # Orthogonality measure orth = norm_A_g / (norm_A * norm_g) return orth
def baseline(): filename = './data/GSM3067191_08hpf.csv' # 8hpf Zebrafish embryos df = pd.read_csv(filename) # Each row now corresponds to a cell (example) and each column to a gene (feature) data = df.values[:, 1:].astype(np.float).T # Make data sparse sata = sparse.dok_matrix(data) k = 50 lsvec, svals, rsvect = la.svd(data) dnorm = sla.norm(sata) approx = lsvec.dot(np.diag(svals)).dot(rsvect) print("SVD reconstruction error:", la.norm(sata - approx) / dnorm) avgs = np.sum(data[:], axis=0) / data.shape[0] plt.plot(avgs) plt.show() expression_counts = np.sum(sata, axis=0) best = np.array(np.argsort(-expression_counts))[0] common = best[:k] uncommon = best[k:] common_norm = la.norm(data[:, common]) print("Baseline reconstruction error:", np.sqrt(dnorm**2 - common_norm** 2) / dnorm)
def cosine_similarities(raw_texts, dictionary_path='dictionary/AmericanDictionary.csv'): dictionary = read_csv(dictionary_path, sep=';', error_bad_lines=False) documents_per_code = dictionary.groupby('Icd1')['DiagnosisText'].agg( lambda x: ' '.join(x)) tfidf_mapper = TfidfVectorizer() dictionary_vectors = tfidf_mapper.fit_transform(documents_per_code) raw_texts_vectors = tfidf_mapper.transform(raw_texts) products = raw_texts_vectors.dot(dictionary_vectors.T) raw_norms = norm(raw_texts_vectors, axis=1) dictionary_norms = norm(dictionary_vectors, axis=1) raw_norms[raw_norms == 0] = 1.0 dictionary_norms[dictionary_norms == 0] = 1.0 d = products / np.expand_dims(raw_norms, axis=1) d /= np.expand_dims(dictionary_norms, axis=0) return d
def gs_solve(D,b,lam): "Solve the smoothing problem with Gauss-Seidel iteration. Variables same as assignment." (m,n) = np.shape(D) ident = spsp.identity(n) A = lam * D.T @ D + ident # the matrix a is our target, we want to solve lx = b. gauss-seidel splits # this into two components: lower and strictly-upper triangular L = spsp.tril(A, format='csc') U = A - L assert(spspla.norm( A - (L + U), inf) < 1e-3 ) Linv = spspla.inv(L) guess = spsp.csc_matrix(np.zeros(pred_shape(A))) err = 10 while err > tol: temp = (b - U @ guess) guess = Linv @ temp result = A @ guess err = npla.norm(result - b, 2) # print(err) return guess
def propagation(M, adj, alpha=0.7, tol=10e-6): # TODO equation, M, alpha """Network propagation iterative process Iterative algorithm for apply propagation using random walk on a network: Initialize:: X1 = M Repeat:: X2 = alpha * X1.A + (1-alpha) * M X1 = X2 Until:: norm(X2-X1) < tol Where:: A : degree-normalized adjacency matrix Parameters ---------- M : sparse matrix Data matrix to be diffused. adj : sparse matrix Adjacency matrice. alpha : float, default: 0.7 Diffusion/propagation factor with 0 <= alpha <= 1. For alpha = 0 : no diffusion. For alpha = 1 : tol : float, default: 10e-6 Convergence threshold. Returns ------- X2 : sparse matrix Smoothed matrix. """ n = adj.shape[0] # diagonal = 1 -> degree # TODO to set diagonal = 0 before applying eye adj = adj + sp.eye(n, dtype=np.float32) d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]), shape=(n, n), dtype=np.float32) A = adj.dot(d) X1 = M.astype(np.float32) X2 = alpha * X1.dot(A) + (1 - alpha) * M i = 0 while norm(X2 - X1) > tol: X1 = X2 X2 = alpha * X1.dot(A) + (1 - alpha) * M i += 1 print(' Propagation iteration = {} ----- {}'.format( i, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) return X2
def katz_hungarian_connection(adj, nodes, n_perturbations, threshold=0.000001, nsteps=10000, dataset_name=None): graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.Graph) rows = nodes[:n_perturbations] cols = nodes[n_perturbations:] precomputed_path = f'data/tmp/{dataset_name}_katz.pkl' if dataset_name is not None and os.path.exists(precomputed_path): print("Loading precomputed_katz...") with open(precomputed_path, 'r') as ff: sigma = pickle.load(open(precomputed_path, 'rb')) else: D = nx.linalg.laplacianmatrix.laplacian_matrix(graph) + adj D_inv = spalg.inv(D) D_invA = D_inv * adj l, v = spalg.eigs(D_invA, k=1, which="LR") lmax = l[0].real alpha = (1 / lmax) * 0.9 sigma = sp.csr_matrix(D_invA.shape, dtype=np.float) print('Calculate sigma matrix') for i in range(nsteps): sigma_new = alpha * D_invA * sigma + sp.identity( adj.shape[0], dtype=np.float, format='csr') diff = abs(spalg.norm(sigma, 1) - spalg.norm(sigma_new, 1)) sigma = sigma_new print(diff) if diff < threshold: break print('Number of steps taken: ' + str(i)) sigma = sigma.toarray().astype('float') if dataset_name is not None: pickle.dump(sigma, open(precomputed_path, "wb")) similarity = {u: {v: sigma[u][v] for v in cols} for u in rows} mtx = np.array( [np.array(list(similarity[u].values())) for u in similarity]) i_u = {i: u for i, u in enumerate(similarity)} i_v = {i: v for i, v in enumerate(similarity[list(similarity.keys())[0]])} u, v = linear_sum_assignment(+mtx) return [[i_u[i], i_v[j]] for i, j in zip(u, v)]
def propagation(M, adj, alpha=0.7, tol=10e-6): # TODO equation, M, alpha """Network propagation iterative process Iterative algorithm for apply propagation using random walk on a network: Initialize:: X1 = M Repeat:: X2 = alpha * X1.A + (1-alpha) * M X1 = X2 Until:: norm(X2-X1) < tol Where:: A : degree-normalized adjacency matrix Parameters ---------- M : sparse matrix Data matrix to be diffused. adj : sparse matrix Adjacency matrice. alpha : float, default: 0.7 Diffusion/propagation factor with 0 <= alpha <= 1. For alpha = 0 : no diffusion. For alpha = 1 : tol : float, default: 10e-6 Convergence threshold. Returns ------- X2 : sparse matrix Smoothed matrix. """ n = adj.shape[0] # diagonal = 1 -> degree # TODO to set diagonal = 0 before applying eye adj = adj+sp.eye(n, dtype=np.float32) d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]), shape=(n, n), dtype=np.float32) A = adj.dot(d) X1 = M.astype(np.float32) X2 = alpha * X1.dot(A) + (1-alpha) * M i = 0 while norm(X2-X1) > tol: X1 = X2 X2 = alpha * X1.dot(A) + (1-alpha) * M i += 1 print(' Propagation iteration = {} ----- {}'.format( i, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) return X2
def _compute_cost(self): pattern = self._R != 0 proj_ABt = pattern.multiply((self._A).dot(self._B.T)) cost = splinalg.norm((proj_ABt - self._R).multiply( self._ptwise_sqrt_W_sparse), 'fro') ** 2 norm_A = self._Lambda * (np.linalg.norm(self._A, 'fro')) ** 2 norm_B = self._Lambda * (np.linalg.norm(self._B, 'fro')) ** 2 return cost + norm_A + norm_B
def sparse_cosine(x_mat: _SPARSE_SCIPY_TYPES, y_mat: _SPARSE_SCIPY_TYPES) -> 'np.ndarray': """Cosine distance between each row in x_mat and each row in y_mat. :param x_mat: scipy.sparse like array with ndim=2 :param y_mat: scipy.sparse like array with ndim=2 :return: np.ndarray with ndim=2 """ from scipy.sparse.linalg import norm # we need the np.asarray otherwise we get a np.matrix object that iterates differently return 1 - np.clip( np.asarray( x_mat.dot(y_mat.T) / (np.outer(norm(x_mat, axis=1), norm(y_mat, axis=1)))), -1, 1, )
def _test_symmetry_W(problem_filename): problem = read_fclib_format(problem_filename)[1] A = csr_matrix(N.SBM_to_sparse(problem.M)[1]) #print("A=", A) #print("A=", np.transpose(A)) #print("A-A^T",A-np.transpose(A)) print("norm A-A^T", linalg.norm(A - np.transpose(A))) is_symmetric = False symmetry_test = linalg.norm(A - np.transpose(A), np.inf) print("symmetry_test === ", symmetry_test) is_symmetric = (symmetry_test <= 1e-12) is_dd, dd_test = dd(A) print("is_dd", is_dd) print("is_symmetric", is_symmetric) return is_symmetric, symmetry_test, is_dd, dd_test
def kernel_function(self, x, y, p=2, sigma=5.0): if self.kernel == 'linear_kernel': return np.dot(x, y) if self.kernel == 'polynomial_kernel': return (1 + np.dot(x, y))**p if self.kernel == 'gaussian_kernel': return np.exp(-linalg.norm(x - y)**2 / (2 * (sigma**2)))
def searchDirection(gk, Hk, epsilon=1e-8): pk = -lsqr(Hk.tocsr(), gk.toarray().ravel())[0] # compute the search direction if -pk @ gk <= epsilon * norm(gk) * np.linalg.norm(pk): gk = gk.toarray() # needed here to not get fail later return -gk.ravel( ) # ensure that the directional derivative is negative in this direction return pk
def __relative_error(A, W, H, trace_AT_A): '''Calculate relative error over sparse A matrix''' norm = trace_AT_A - 2 * __trace_of_product( H.T, W.T @ A) + __trace_of_product(H.T, W.T @ W @ H) relative_norm = norm / slg.norm(A) return relative_norm
def testit(problem='cylinderwake', N=None, nu=None, Re=None, nnwtnstps=9, npcrdstps=5, palpha=1e-5): vel_nwtn_tol = 1e-14 # prefix for data files data_prfx = problem # dir to store data ddir = 'data/' # paraview output ParaviewOutput = True proutdir = 'results/' femp, stokesmatsc, rhsd = dnsps.get_sysmats(problem=problem, N=N, Re=Re, nu=nu, scheme='TH', mergerhs=True, bccontrol=True) proutdir = 'results/' ddir = 'data/' import scipy.sparse.linalg as spsla print 'get expmats: ||A|| = {0}'.format(spsla.norm(stokesmatsc['A'])) print 'get expmats: ||Arob|| = {0}'.format(spsla.norm(stokesmatsc['Arob'])) stokesmatsc['A'] = stokesmatsc['A'] + 1./palpha*stokesmatsc['Arob'] b_mat = 0.*1./palpha*stokesmatsc['Brob'] brhs = 1.5*b_mat[:, :1] - 1.5*b_mat[:, 1:] soldict = stokesmatsc # containing A, J, JT soldict.update(femp) # adding V, Q, invinds, diribcs soldict.update(fv=rhsd['fv']+brhs, fp=rhsd['fp'], N=N, nu=nu, vel_nwtn_stps=nnwtnstps, vel_pcrd_stps=npcrdstps, vel_nwtn_tol=vel_nwtn_tol, ddir=ddir, get_datastring=None, clearprvdata=True, data_prfx=data_prfx, paraviewoutput=ParaviewOutput, vfileprfx=proutdir+'vel_', pfileprfx=proutdir+'p_') # # compute the uncontrolled steady state Navier-Stokes solution # v_ss_nse, list_norm_nwtnupd = snu.solve_steadystate_nse(**soldict)
def wolfeLineSearch(l, thetas_k, p, fk, gk, pk, c1, c2, rho, ak=1.0, nmaxls=100): pkgk = pk @ gk # Increase the step length until the Armijo rule is (almost) not satisfied while 0.5 * norm(r(l, thetas_k + rho * ak * pk, p)) < fk + c1 * rho * ak * pkgk[0]: ak *= rho # Use bisection to find the optimal step length aU = ak # upper step length limit aL = 0 # lower step length limit for i in range(nmaxls): # Find the midpoint of aU and aL ak = 0.5 * (aU + aL) if 0.5 * norm(r(l, thetas_k + ak * pk, p)) > fk + c1 * ak * pkgk: # Armijo condition is not satisfied, decrease the upper limit aU = ak continue gk_ak_pk = Jacobi(l, thetas_k + ak * pk).T @ r(l, thetas_k + ak * pk, p) if pk @ gk_ak_pk > -c2 * pkgk: # Upper Wolfe condition is not satisfied, decrease the upper limit aU = ak continue if pk @ gk_ak_pk < c2 * pkgk: # Lower Wolfe condition is not satisfied, increase the lower limit aL = ak continue # Otherwise, all conditions are satisfied, stop the search break return ak
def diffusion(M, adj, alpha=0.7, tol=10e-6): # TODO equation, M, alpha """ Network propagation iterative process Iterative algorithm for apply propagation using random walk on a network: Initialize:: X1 = M Repeat:: X2 = alpha * X1.A + (1-alpha) * M X1 = X2 Until:: norm(X2-X1) < tol Where:: A : degree-normalized adjacency matrix Parameters ---------- M : sparse matrix Data matrix to be diffused. adj : sparse matrix Adjacency matrice. alpha : float, default: 0.7 Diffusion/propagation factor with 0 <= alpha <= 1. For alpha = 0 : no diffusion. For alpha = 1 : tol : float, default: 10e-6 Convergence threshold. Returns ------- X2 : sparse matrix Smoothed matrix. Notes ----- Copied from the stratipy Python library """ n = adj.shape[0] adj = adj + sp.eye(n) d = sp.dia_matrix((np.array(adj.sum(axis=0))**-1, [0]), shape=(n, n)) A = adj.dot(d) X1 = M X2 = alpha * X1.dot(A) + (1 - alpha) * M i = 0 while norm(X2 - X1) > tol: X1 = X2 X2 = alpha * X1.dot(A) + (1 - alpha) * M i += 1 return X2
def dominate_value(mx): x1 = csr_matrix(random.rand(mx.shape[0], 1)) x2 = x1 print(x1.todense()) x1 = mx * x1 print(norm(x1, ord=float('inf'))) x1 = x1 / norm(x1, ord=float('inf')) while abs(norm(x1 - x2)) > 0.0000001: print(x1.todense()) print('\n') x2 = x1 x1 = mx.dot(x1) x1 = x1 / norm(x1, ord=float('inf')) print((x1 / norm(x1)).todense())
def test_sparse_varfd_1d(self): n_points = 100 degree = 10 quad = hm.Quad.gauss_hermite(n_points) mat1 = quad.varfd('1', degree, [0, 0], sparse=True).matrix mat2 = quad.varfd('x', degree, [0], sparse=True).matrix bk_ou = mat1 - mat2 off_diag = bk_ou - sp.diags(bk_ou.diagonal()) self.assertAlmostEqual(las.norm(off_diag), 0)
def _calculate_neighbor_weight_matrix(self, train_r: csc_matrix) -> np.ndarray: l2norm = norm(train_r, ord=2, axis=1) l2norm[l2norm == 0] = 1 # handel 0 vector U: csc_matrix = train_r.multiply(1 / l2norm.reshape(-1, 1)) UUT = U.dot(U.transpose()).toarray() # dense W = np.exp(self._tau * np.power(1 - UUT, self._k)) np.fill_diagonal(W, 0) return W
def norm_svd(di, q): """ Oblicza norma miedzy di a q (q ma norme 1) :param di: :param q: :return: """ a = norm(di) return ((q.dot(di) / a).toarray())[0][0]
def test_basics(self): x = np.array([[-2, -1], [-2, 1], [2, 1], [2, -1]]) knn = KNNDense(n_neighbors=1) knn.fit(x) truth = np.zeros(16).reshape((4, 4)) truth[0, 1] = 1 truth[2, 3] = 1 truth = sparse.csr_matrix(truth + truth.T) self.assertAlmostEqual(norm(truth - knn.adjacency_), 0)
def euclidean_distance(vector1, vector2): from scipy.sparse import csr_matrix import numpy as np from scipy.sparse.linalg import norm vector1 = csr_matrix(vector1) vector2 = csr_matrix(vector2) result = norm(vector1 - vector2) #print(result) return result
def test_basics(self): x = np.array([[-2, -1], [-2, 1], [2, 1], [2, -1]]) knn = KNeighborsTransformer(n_neighbors=1) knn.fit(x) gt = np.zeros(16).reshape((4, 4)) gt[0, 1] = 1 gt[2, 3] = 1 gt = sparse.csr_matrix(gt + gt.T) self.assertAlmostEqual(norm(gt - knn.adjacency_), 0)
def solve(A, name, use_umfpack=False): # Compute b such that the exact solution of Ax = b is xe = [1, 1, ...] n_rows, n_columns = A.shape p_non_zero = A.nnz / (n_rows * n_columns) xe = scipy.ones(n_rows) b = A * xe # Solve Ax = b start_time = datetime.now() x = spsolve(A, b, use_umfpack=use_umfpack) t = datetime.now() - start_time # || xe - x || / ||xe|| relative_error = norm(x - xe, 2) / norm(xe, 2) row = f"{name},{n_rows},{A.nnz},{relative_error}," + \ f"{t.seconds}.{t.microseconds},{platform.system()}" print(row)
def cosine_simil2(A, B): """ Returns the cosine similarity between the rows of A and B. The output is a Mx1 vector where M is the number of rows of A. A and B are sparse matrices. """ C = A.dot(B.T) AN = spla.norm(A, axis = 1).reshape((A.shape[0],1)) # Reshape into a column vector BN = np.linalg.norm(B) C = C * (1. / (AN * BN)) return C
def cosine_simil(A, B): """ Returns the cosine similarity between the rows of A and B. The output is a Mx1 vector where M is the number of rows of A. A and B are sparse matrices. """ if len(A.shape) == 1: A = A.reshape((1,A.shape[0])) if len(B.shape) == 1: B = B.reshape((1,B.shape[0])) C = A.dot(B.T) # Reshape into a column vector AN = spla.norm(A, axis = 1).reshape((A.shape[0],1)) if issparse(A) else np.linalg.norm(A, axis = 1).reshape((A.shape[0],1)) BN = spla.norm(B) if issparse(B) else np.linalg.norm(B) if issparse(C): C = C.multiply(1. / (AN * BN)) else: if len(C.shape) == 1: C = C.reshape(C.shape[0],1) AN *= BN C /= AN return C
def genRandomDocInTopic( topic ): K = 20 maxlen = -10.0 id = -1 tc.vec.set_params( norm=None ) for i in range( K ): tid = random.choice( tc.inv_topic[ topic ] ) d = tc.vec.transform( [ tc.corpus[ tid ] ] ) len = norm( d ) if len > maxlen: maxlen , id = len , tid tc.vec.set_params( norm='l2' ) return tc.datas[ id ][ 'body' ]
def test_sketch_preserves_frobenius_norm(self): # Given the probabilistic nature of the sketches # we run the test multiple times and check that # we pass all/almost all the tries. n_errors = 0 for A in self.test_matrices: if issparse(A): true_norm = norm(A) else: true_norm = np.linalg.norm(A) for seed in self.seeds: sketch = clarkson_woodruff_transform( A, self.n_sketch_rows, seed=seed, ) if issparse(sketch): sketch_norm = norm(sketch) else: sketch_norm = np.linalg.norm(sketch) if np.abs(true_norm - sketch_norm) > 0.1 * true_norm: n_errors += 1 assert_(n_errors == 0)
def test_norm_axis(self): a = np.array([[ 1, 2, 3], [-1, 1, 4]]) c = csr_matrix(a) #Frobenius norm assert_equal(norm(c, axis=0), np.sqrt(np.power(np.asmatrix(a), 2).sum(axis=0))) assert_equal(norm(c, axis=1), np.sqrt(np.power(np.asmatrix(a), 2).sum(axis=1))) assert_equal(norm(c, np.inf, axis=0), max(abs(np.asmatrix(a)).sum(axis=0))) assert_equal(norm(c, np.inf, axis=1), max(abs(np.asmatrix(a)).sum(axis=1))) assert_equal(norm(c, -np.inf, axis=0), min(abs(np.asmatrix(a)).sum(axis=0))) assert_equal(norm(c, -np.inf, axis=1), min(abs(np.asmatrix(a)).sum(axis=1))) assert_equal(norm(c, 1, axis=0), abs(np.asmatrix(a)).sum(axis=0)) assert_equal(norm(c, 1, axis=1), abs(np.asmatrix(a)).sum(axis=1)) assert_equal(norm(c, -1, axis=0), min(abs(np.asmatrix(a)).sum(axis=0)) ) assert_equal(norm(c, -1, axis=1), min(abs(np.asmatrix(a)).sum(axis=1)) ) #_multi_svd_norm is not implemented for sparse matrix assert_raises(NotImplementedError, norm, c, 2, 0)
def ksvd(AA,S,max_iter=10): """ Apply KSVD from the starting dictionary AA, given a set of signals S. """ M,N = AA.shape P = S.shape[1] A = AA.copy() X = zeros((N,P)) #max_iter = 10 i = 0 while i < max_iter: # SMV, and slow. #for p in range(P): # X[:,p] = mp.omp(A,S[:,p].reshape(M,1),0.1).flatten() # MMV # Default: no residual tolerance, max nonzeros 10% # Assume dictionary A is normalized! X = sklearn.linear_model.orthogonal_mp(A,S) print i,linalg.norm(S - dot(A,X)), sum([sparse.zero_norm(X[:,p],0.01) for p in range(P)]) for l in range(N): nonzeros = sparse.nonzero_idx(X[l,:]) # xs with nonzero in row 'l' if len(nonzeros) == 0: continue E_w = S[:,nonzeros] - dot(A,X[:,nonzeros]) + dot(A[:,l].reshape(M,1),X[l,nonzeros].reshape(1,len(nonzeros))) try: u,s,v = scipy.sparse.linalg.svds(E_w, k = 1) except: continue A[:,l] = u.flatten() X[l,nonzeros] = s*v.flatten() i += 1 return A, X
def test_norm(self): a = np.arange(9) - 4 b = a.reshape((3, 3)) b = csr_matrix(b) #Frobenius norm is the default assert_equal(norm(b), 7.745966692414834) assert_equal(norm(b, 'fro'), 7.745966692414834) assert_equal(norm(b, np.inf), 9) assert_equal(norm(b, -np.inf), 2) assert_equal(norm(b, 1), 7) assert_equal(norm(b, -1), 6) #_multi_svd_norm is not implemented for sparse matrix assert_raises(NotImplementedError, norm, b, 2) assert_raises(NotImplementedError, norm, b, -2)
def sp_expm(A, p=13, sparse=False): """ Sparse matrix exponential. Reference --------- Expokit, ACM-Transactions on Mathematical Software, 24(1):130-156, 1998 """ if _isdiag(A.indices, A.indptr, A.shape[0]): A.data = np.exp(A.data) return A N = A.shape[0] c = np.zeros(p+1,dtype=float) # Pade coefficients c[0] = 1 for k in range(p): c[k+1] = c[k]*((p-k)/((k+1.0)*(2.0*p-k))) # Scaling if sparse: A = A.tocsc() nrm = spla.norm(A, np.inf) else: A = A.toarray() nrm = la.norm(A, np.inf) if nrm > 0.5: nrm = max(0, np.fix(np.log(nrm)/np.log(2))+2) A = 2.0**(-nrm)*A # Horner evaluation of the irreducible fraction if sparse: I = sp.identity(N, dtype=complex, format='csc') else: I = np.identity(N, dtype=complex) A2 = A.dot(A) Q = c[-1]*I P = c[p]*I odd = 1 for k in range(p-2,-1,-1): if odd == 1: Q = Q.dot(A2) +c[k]*I else: P = P.dot(A2) +c[k]*I odd = 1-odd if odd == 1: Q = Q.dot(A) Q = Q-P if sparse: E = -(I+2.0*spla.spsolve(Q,P)) else: E = -(I+2.0*la.solve(Q,P)) else: P = P.dot(A) Q = Q-P if sparse: E = I+2.0*spla.spsolve(Q,P) else: E = I+2.0*la.solve(Q,P) # Squaring for k in range(int(nrm)): E = E.dot(E) return sp.csr_matrix(E)
def _ASD(self, M, r = None, reltol=1e-5, maxiter=5000): """ Alternating Steepest Descent (ASD) Taken from Low rank matrix completion by alternating steepest descent methods Jared Tanner and Ke Wei SIAM J. IMAGING SCIENCES (2014) We have a matrix M with incomplete entries, and want to estimate the full matrix Solves the following relaxation of the problem: minimize_{X,Y} \frac{1}{2} ||P_{\Omega}(Z^0) - P_\{Omega}(XY)||_F^2 Where \Omega represents the set of m observed entries of the matrix M and P_{\Omega}() is an operator that represents the observed data. Inputs: M := Incomplete matrix, with NaN on the unknown matrix r := hypothesized rank of the matrix Usage: Just call the function _ASD(M) """ # Get shape and Omega m, n = M.shape if r == None: r = min(m-1, n-1, 50) # Set relative error I, J = [], [] M_list = [] for i, j in M.keys(): I.append(i) J.append(j) M_list.append(M[i,j]) M_list = np.array(M_list) Omega = [I,J] frob_norm_data = linalg_s.norm(M) relres = reltol * frob_norm_data # Initialize M_omega = M.tocsc() U, s, V = linalg_s.svds(M_omega, r) S = np.diag(s) X = np.dot(U, S) Y = V itres = np.zeros((maxiter+1, 1)) XY = np.dot(X, Y) diff_on_omega = M_list - XY[Omega] res = linalg.norm(diff_on_omega) iter_c = 0 itres[iter_c] = res/frob_norm_data while iter_c < maxiter and res >= relres: # Gradient for X diff_on_omega_matrix = np.zeros((m,n)) diff_on_omega_matrix[Omega] = diff_on_omega grad_X = np.dot(diff_on_omega_matrix, np.transpose(Y)) # Stepsize for X delta_XY = np.dot(grad_X, Y) tx = linalg.norm(grad_X,'fro')**2/linalg.norm(delta_XY)**2 # Update X X = X + tx*grad_X; diff_on_omega = diff_on_omega-tx*delta_XY[Omega] # Gradient for Y diff_on_omega_matrix = np.zeros((m,n)) diff_on_omega_matrix[Omega] = diff_on_omega Xt = np.transpose(X) grad_Y = np.dot(Xt, diff_on_omega_matrix) # Stepsize for Y delta_XY = np.dot(X, grad_Y) ty = linalg.norm(grad_Y,'fro')**2/linalg.norm(delta_XY)**2 # Update Y Y = Y + ty*grad_Y diff_on_omega = diff_on_omega-ty*delta_XY[Omega] res = linalg.norm(diff_on_omega) iter_c = iter_c + 1 itres[iter_c] = res/frob_norm_data M_out = np.dot(X, Y) out_info = [iter_c, itres] return M_out, out_info
def cosine_distance(x, y): xy = x.dot(y.T) # should be 1x1 mat dist = xy / norm(x) / norm(y) return 1 - dist[0, 0] # ~ arccos()
def get_svd_res(mat, components): usvt= svds(mat,k=components,which='LM') ## compute svd if usvt[1][-1]==0: usvt=flip_svd_res(usvt) enrgy=(usvt[1][-1]/norm(mat)) ## energy of first principal component var=enrgy**2 ## variance of first principal component return usvt, enrgy, var
def compute_depth(mask_array, normal_array, threshold=100): index_map = -np.ones(mask_array.shape) x = [] y = [] ind = 0 for (xT, value) in np.ndenumerate(mask_array): if(value > threshold): index_map[xT] = ind x.append(xT[1]) y.append(-xT[0]) ind = ind+1 row = [] col = [] data = [] b = [] i = 0 for (xT, value) in np.ndenumerate(index_map): if value >= 0: normal = normal_array[xT]/128.0 - 1.0 normal = normal/linalg.norm(normal) if not np.isnan(np.sum(normal)): #x iother = index_map[xT[0], xT[1]+1] if abs(normal[2]) > 0.01: if iother >= 0: row.append(i) col.append(value) data.append(-normal[2]) row.append(i) col.append(iother) data.append(normal[2]) b.append(-normal[0]) i = i+1 #y iother = index_map[xT[0]-1, xT[1]] if iother >= 0: row.append(i) col.append(value) data.append(-normal[2]) row.append(i) col.append(iother) data.append(normal[2]) b.append(-normal[1]) i = i+1 mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(i, ind)).tocsc() b = np.array(b) z = scipy.sparse.linalg.lsqr(mat,b, iter_lim=1000, show=True) ind = 0 for (xT, value) in np.ndenumerate(index_map): if value >= 0: index_map[xT] = z[0][ind] ind = ind+1 return index_map
def cosine_distance(x, y): xy = x.dot(y.T) dist = xy/(norm(x)*norm(y)) return 1-dist[0,0]
def lbfgs(f, xk, gfk, k, vecs, props): logger = logging.getLogger("phf.innersolve") solve_fraction = props.get("solveFraction", 0.2) stepFactor = props.get("innerSolveStepFactor", 0.5) average = props.get("innerSolveAverage", False) n = len(xk) w = lbfgs_step(gfk, k, vecs, props) wsum = zeros(n) wsum_count = 0 gnorm = linalg.norm(gfk) # Each iteration requires two evaluations, so we half the max iters here maxiter = int(ceil(solve_fraction*f.parts/2.0)) pkHpk = 0 wHw = 0 for i in range(maxiter): mv = f.make_mv_rand(xk) Hw = mv(w) ri = Hw + gfk pk = -lbfgs_step(ri, k+i, vecs, props) mpk = mv(pk) Hpk = mpk pkHpk = dot(pk, Hpk) sst = stepFactor*dot(ri, pk) / pkHpk wHw = dot(w, Hw) ###### Update quasi-newton approximation kmax = max_key(vecs) if pkHpk < 0: raise Exception("Hessian is not positive semi-definite. " + "Try using the Gauss-Newton approximation to the hessian." + "If your problem is convex, your gradient " + "calculation may just be wrong") else: vecs[kmax] = (pk, Hpk, 1.0 / numpy.dot(pk,Hpk)) kmax = kmax + 1 if wHw > 0: vecs[kmax] = (w, Hw, 1.0 / numpy.dot(w,Hw)) wp = w - sst*pk cosdirection = dot(wp, gfk) / (linalg.norm(wp) * gnorm) if cosdirection < 0: w = wp if i == 0 or (i % max(1,maxiter / 10) == 0): logger.debug("w: %s", w[0:min(5, n)]) else: logger.debug("Skipping w update to ensure w is a descent direction") wnorm = linalg.norm(w) if i > maxiter/2: wsum += w wsum_count += 1 if average: wavg = wsum / wsum_count return wavg else: return w
def ncutW(W, num_eigs=10, kmeans_iters=10, offset = 0.5): """Run the normalized cut algorithm on the affinity matrix, W. (as implemented in Ng, Jordan, and Weiss, 2002) Parameters ---------- W : scipy sparse matrix Square matrix with high values for edges to be preserved, and low values for edges to be cut. num_eigs : int, optional Number of eigenvectors of the affinity matrix to use for clustering. kmeans_iters : int, optional Number of iterations of the k-means algorithm to run when clustering eigenvectors. offset : float, optional Diagonal offset used to stabilise the eigenvector computation. Returns ------- labels : array of int `labels[i]` is an integer value mapping node/row `i` to the cluster ID `labels[i]`. eigenvectors : list of array of float The computed eigenvectors of `W + offset * I`, where `I` is the identity matrix of same size as `W`. eigenvalues : array of float The corresponding eigenvalues. """ n, m = W.shape # Add an offset in case some rows are zero # We also add the offset below to the diagonal matrix. See (Yu, 2001), # "Understanding Popout through Repulsion" for more information. This # helps to stabilize the eigenvector computation. W = W + sparse.diags(np.full(n, offset)) d = np.ravel(W.sum(axis=1)) Dinv2 = sparse.diags(1 / (np.sqrt(d) + offset*np.ones(n))) P = Dinv2 @ W @ Dinv2 # Get the eigenvectors and sort by eigenvalue eigvals, U = eigs(P, num_eigs, which='LR') eigvals = np.real(eigvals) # it should be real anyway U = np.real(U) ind = np.argsort(eigvals)[::-1] eigvals = eigvals[ind] U = U[:, ind] # Normalize for i in range(n): U[i, :] /= norm(U[i, :]) # Cluster them into labels, running k-means multiple times labels_list = [] distortion_list = [] for _iternum in range(kmeans_iters): # Cluster centroid, labels = vq.kmeans2(U, num_eigs, minit='points') # Calculate distortion distortion = 0 for j in range(num_eigs): numvals = np.sum(labels == j) if numvals == 0: continue distortion += np.mean([norm(v - centroid[j])**2 for (i, v) in enumerate(U) if labels[i] == j]) # Save values labels_list.append(labels) distortion_list.append(distortion) # Use lowest distortion labels = labels_list[np.argmin(distortion_list)] return labels, U, eigvals