def get_sorted_best_item_indices(self, URM: sps.csr_matrix, target_column: np.ndarray, item_idx: int) -> np.ndarray: if self.sorted_indices is None: c_URM = URM.copy() c_URM.data **= 2 variances = np.array( c_URM.mean(axis=0) - np.power(URM.mean(axis=0), 2)).flatten() sorted_indices = np.argsort(variances)[::-1] return sorted_indices return self.sorted_indices
def _mat_mat_corr_sparse( X: csr_matrix, Y: np.ndarray, ) -> np.ndarray: """\ This function is borrow from cellrank """ n = X.shape[1] X_bar = np.reshape(np.array(X.mean(axis=1)), (-1, 1)) X_std = np.reshape(np.sqrt(np.array(X.power(2).mean(axis=1)) - (X_bar**2)), (-1, 1)) y_bar = np.reshape(np.mean(Y, axis=0), (1, -1)) y_std = np.reshape(np.std(Y, axis=0), (1, -1)) with np.warnings.catch_warnings(): np.warnings.filterwarnings( "ignore", r"invalid value encountered in true_divide") return (X @ Y - (n * X_bar * y_bar)) / ((n - 1) * X_std * y_std)
import matplotlib.pyplot as plt import pandas as pd import time num_of_css=5 is_pca=0 #0 for SVD, 1 for PCA is_sparse=1 Data=np.random.randn(100,11) if is_sparse==0: mean_data=np.mean(Data,0) mean_data=np.reshape(mean_data,(1,len(mean_data))) else: Data1=SM(Data) mean_data=Data1.mean(0) Datam=Data-np.dot(np.ones((Data.shape[0],1)),mean_data) Data1=SM(Data) n=Data.shape[0] d=Data.shape[1] opt_error=1 r=1 is_to_save=0 is_save=1 is_big_data=1 alg_list=[0,3,5] t0=1 if datum==2: coreset_space=5
def precompute_best_item_indices(self, URM: sps.csr_matrix): c_URM = URM.copy() c_URM.data **= 2 variances = np.array( c_URM.mean(axis=0) - np.power(URM.mean(axis=0), 2)).flatten() self.sorted_indices = np.argsort(variances)[::-1]