def _mat_mat_corr_sparse( X: csr_matrix, Y: np.ndarray, ) -> np.ndarray: """\ This function is borrow from cellrank """ n = X.shape[1] X_bar = np.reshape(np.array(X.mean(axis=1)), (-1, 1)) X_std = np.reshape(np.sqrt(np.array(X.power(2).mean(axis=1)) - (X_bar**2)), (-1, 1)) y_bar = np.reshape(np.mean(Y, axis=0), (1, -1)) y_std = np.reshape(np.std(Y, axis=0), (1, -1)) with np.warnings.catch_warnings(): np.warnings.filterwarnings( "ignore", r"invalid value encountered in true_divide") return (X @ Y - (n * X_bar * y_bar)) / ((n - 1) * X_std * y_std)
def t_test( X: csr_matrix, cluster_labels: List[str], cond_labels: List[str], gene_names: List[str], n_jobs: int, temp_folder: str, verbose: bool, ) -> List[pd.DataFrame]: """ Run Welch's t-test, triggering calc_t in parallel """ start = time.time() sum_vec = sum2_vec = None if cond_labels is None: sum_vec = X.sum(axis=0).A1 sum2_vec = X.power(2).sum(axis=0).A1 result_list = Parallel(n_jobs=n_jobs, max_nbytes=1e7, temp_folder=temp_folder)(delayed(calc_t)( clust_id, X.data, X.indices, X.indptr, X.shape, cluster_labels, cond_labels, gene_names, sum_vec, sum2_vec, verbose, ) for clust_id in cluster_labels.categories) end = time.time() if verbose: logger.info( "Welch's t-test is done. Time spent = {:.2f}s.".format(end - start)) return result_list
def axis_norms(X: sparse.csr_matrix, norm: str = "l1", axis: int = 1) -> np.ndarray: if norm == "l1": return np.asarray(X.sum(axis=axis)).reshape(-1) return np.sqrt(np.asarray(X.power(2).sum(axis=axis)).reshape(-1))