def counts_per_cell_quantile(X, max_proportion_per_cell=0.05, counts_per_cell=None): if counts_per_cell is None: counts_per_cell = sum(X, axis=1) gene_subset = np.all( X <= counts_per_cell[:, None] * max_proportion_per_cell, axis=0) if issparse(X): gene_subset = gene_subset.A1 return sum(X[:, gene_subset], axis=1)
def _filter(X, min_counts=None, min_cells=None, max_counts=None, max_cells=None): counts = (sum(X, axis=0) if (min_counts is not None or max_counts is not None) else sum( X > 0, axis=0)) lb = (min_counts if min_counts is not None else min_cells if min_cells is not None else -np.inf) ub = (max_counts if max_counts is not None else max_cells if max_cells is not None else np.inf) return (lb <= counts) & (counts <= ub), counts
def leastsq_NxN(x, y, fit_offset=False, perc=None, constraint_positive_offset=True): """Solves least squares X*b=Y for b.""" warnings.warn( "`leastsq_NxN` is deprecated since scVelo v0.2.4 and will be removed in a " "future version. Please use `LinearRegression` from `scvelo/core/` instead.", DeprecationWarning, stacklevel=2, ) if perc is not None: if not fit_offset and isinstance(perc, (list, tuple)): perc = perc[1] weights = csr_matrix(get_weight(x, y, perc=perc)).astype(bool) x, y = weights.multiply(x).tocsr(), weights.multiply(y).tocsr() else: weights = None with warnings.catch_warnings(): warnings.simplefilter("ignore") xx_ = prod_sum(x, x, axis=0) xy_ = prod_sum(x, y, axis=0) if fit_offset: n_obs = x.shape[0] if weights is None else sum(weights, axis=0) x_ = sum(x, axis=0) / n_obs y_ = sum(y, axis=0) / n_obs gamma = (xy_ / n_obs - x_ * y_) / (xx_ / n_obs - x_**2) offset = y_ - gamma * x_ # fix negative offsets: if constraint_positive_offset: idx = offset < 0 if gamma.ndim > 0: gamma[idx] = xy_[idx] / xx_[idx] else: gamma = xy_ / xx_ offset = np.clip(offset, 0, None) else: gamma = xy_ / xx_ offset = np.zeros(x.shape[1]) if x.ndim > 1 else 0 nans_offset, nans_gamma = np.isnan(offset), np.isnan(gamma) if np.any([nans_offset, nans_gamma]): offset[np.isnan(offset)], gamma[np.isnan(gamma)] = 0, 0 return offset, gamma
def test_2d_arrays(self, a: ndarray, axis: int): a_summed = sum(a=a, axis=axis) if a.ndim == 1: axis = 0 assert_array_equal(a_summed, a.sum(axis=axis))
def get_indices(dist, n_neighbors=None, mode_neighbors="distances"): from scvelo.preprocessing.neighbors import compute_connectivities_umap D = dist.copy() D.data += 1e-6 n_counts = sum(D > 0, axis=1) n_neighbors = (n_counts.min() if n_neighbors is None else min( n_counts.min(), n_neighbors)) rows = np.where(n_counts > n_neighbors)[0] cumsum_neighs = np.insert(n_counts.cumsum(), 0, 0) dat = D.data for row in rows: n0, n1 = cumsum_neighs[row], cumsum_neighs[row + 1] rm_idx = n0 + dat[n0:n1].argsort()[n_neighbors:] dat[rm_idx] = 0 D.eliminate_zeros() D.data -= 1e-6 if mode_neighbors == "distances": indices = D.indices.reshape((-1, n_neighbors)) elif mode_neighbors == "connectivities": knn_indices = D.indices.reshape((-1, n_neighbors)) knn_distances = D.data.reshape((-1, n_neighbors)) _, conn = compute_connectivities_umap(knn_indices, knn_distances, D.shape[0], n_neighbors) indices = get_indices_from_csr(conn) return indices, D
def sum_var(A): """summation over axis 1 (var) equivalent to np.sum(A, 1)""" warnings.warn( "`sum_var` is deprecated since scVelo v0.2.4 and will be removed in a future " "version. Please use `sum(A, axis=1)` from `scvelo/core/` instead.", DeprecationWarning, stacklevel=2, ) return sum(A, axis=1)
def test_flat_arrays(self, a: ndarray): a_summed = sum(a=a, axis=0) assert_array_equal(a_summed, a.sum(axis=0))