def filter_genes_fano_deprecated(X, Ecutoff, Vcutoff): """Filter genes by fano factor and mean. See `filter_genes_dispersion`. Reference: Weinreb et al. (2017). """ if issparse(X): raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.') mean_filter = np.mean(X, axis=0) > Ecutoff var_filter = np.var(X, axis=0) / (np.mean(X, axis=0) + .0001) > Vcutoff gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0] return gene_subset
def filter_genes_cv_deprecated(X, Ecutoff, cvFilter): """Filter genes by coefficient of variance and mean. See `filter_genes_dispersion`. Reference: Weinreb et al. (2017). """ if issparse(X): raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.') mean_filter = np.mean(X, axis=0) > Ecutoff var_filter = np.std(X, axis=0) / (np.mean(X, axis=0) + .0001) > cvFilter gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0] return gene_subset
def normalize_per_cell_weinreb16_deprecated(X, max_fraction=1, mult_with_mean=False): """Normalize each cell [Weinreb17]_. This is a deprecated version. See `normalize_per_cell` instead. Normalize each cell by UMI count, so that every cell has the same total count. Parameters ---------- X : np.ndarray Expression matrix. Rows correspond to cells and columns to genes. max_fraction : float, optional Only use genes that make up more than max_fraction of the total reads in every cell. mult_with_mean: bool, optional Multiply the result with the mean of total counts. Returns ------- X_norm : np.ndarray Normalized version of the original expression matrix. """ if issparse(X): raise ValueError('Sparse input not allowed. ' 'Consider `sc.pp.normalize_per_cell` instead.') if max_fraction < 0 or max_fraction > 1: raise ValueError('Choose max_fraction between 0 and 1.') counts_per_cell = np.sum(X, axis=1) if max_fraction == 1: X_norm = X / counts_per_cell[:, np.newaxis] return X_norm # restrict computation of counts to genes that make up less than # constrain_theshold of the total reads tc_tiled = np.tile(counts_per_cell[:, np.newaxis], (1, X.shape[1])) included = np.all(X <= tc_tiled * max_fraction, axis=0) tc_include = np.sum(X[:, included], axis=1) tc_tiled = np.tile(tc_include[:, np.newaxis], (1, X.shape[1])) + 1e-6 X_norm = X / tc_tiled if mult_with_mean: X_norm *= np.mean(counts_per_cell) return X_norm
def normalize_per_cell_weinreb16_deprecated(X, max_fraction=1, mult_with_mean=False): """Normalize each cell [Weinreb17]_. This is a deprecated version. See `normalize_per_cell` instead. Normalize each cell by UMI count, so that every cell has the same total count. Parameters ---------- X : np.ndarray Expression matrix. Rows correspond to cells and columns to genes. max_fraction : float, optional Only use genes that make up more than max_fraction of the total reads in every cell. mult_with_mean: bool, optional Multiply the result with the mean of total counts. Returns ------- X_norm : np.ndarray Normalized version of the original expression matrix. """ if max_fraction < 0 or max_fraction > 1: raise ValueError('Choose max_fraction between 0 and 1.') counts_per_cell = X.sum(1).A1 if issparse(X) else X.sum(1) gene_subset = np.all(X <= counts_per_cell[:, None] * max_fraction, axis=0) if issparse(X): gene_subset = gene_subset.A1 tc_include = X[:, gene_subset].sum(1).A1 if issparse( X) else X[:, gene_subset].sum(1) X_norm = X.multiply(csr_matrix( 1 / tc_include[:, None])) if issparse(X) else X / tc_include[:, None] if mult_with_mean: X_norm *= np.mean(counts_per_cell) return X_norm