def normalize_adj(self, mx: sp.coo_matrix) -> sp.coo_matrix: """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) r_inv_sqrt = np.power(rowsum, -0.5).flatten() r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0. r_mat_inv_sqrt = sp.diags(r_inv_sqrt) return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt).tocoo()
def normalize_sp(mx: sp.coo_matrix) -> sp.coo_matrix: rows_sum = np.array(mx.sum(1)).astype('float') # 对每一行求和 rows_inv = np.power(rows_sum, -1).flatten() # 求倒数 rows_inv[np.isinf(rows_inv)] = 0 # 如果某一行全为0,则r_inv算出来会等于无穷大,将这些行的r_inv置为0 rows_mat_inv = sp.diags(rows_inv) # 构建对角元素为r_inv的对角矩阵 mx = rows_mat_inv.dot(mx) # .dot(cols_mat_inv) return mx
def normalize(self, mx: sp.coo_matrix) -> sp.coo_matrix: """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) r_inv = np.power(rowsum, -1.0).flatten() r_inv[np.isinf(r_inv)] = 0. r_mat_inv = sp.diags(r_inv) mx = r_mat_inv.dot(mx).tocoo() return mx
def laplacian_score(X: np.ndarray, W: sparse.coo_matrix) -> np.ndarray: """ This function implements the laplacian score feature selection, steps are as follows: 1. Construct the affinity matrix W if it is not specified 2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W 3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones) 4. Laplacian score for the r-th feature is score = (fr_hat'*L*fr_hat)/(fr_hat'*D*fr_hat) Input ----- X: {numpy array}, shape (n_samples, n_features) input data W: {sparse matrix}, shape (n_samples, n_samples) input affinity matrix Output ------ score: {numpy array}, shape (n_features,) laplacian score for each feature Reference --------- He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005. """ # build the diagonal D matrix from affinity matrix W D = np.array(W.sum(axis=1)) L = W tmp = np.dot(np.transpose(D), X) D = sparse.diags(np.transpose(D), [0]) Xt = np.transpose(X) t1 = np.transpose(np.dot(Xt, D.todense())) t2 = np.transpose(np.dot(Xt, L.todense())) # compute the numerator of Lr D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp) / D.sum() # compute the denominator of Lr L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp) / D.sum() # avoid the denominator of Lr to be 0 D_prime[D_prime < 1e-12] = 10000 # compute laplacian score for all features score = 1 - np.array(np.multiply(L_prime, 1 / D_prime))[0, :] return np.transpose(score)
def getDegrees(mtx: sp.coo_matrix) -> int: return np.array(mtx.sum(axis=0)).squeeze()