def apply_qc_filters(unidata: UnimodalData): """ Apply QC filters to filter out low quality cells """ if "passed_qc" in unidata.obs: prior_n = unidata.shape[0] unidata._inplace_subset_obs(unidata.obs["passed_qc"]) cols = ["passed_qc"] if unidata.uns.get("__del_demux_type", False): cols.append("demux_type") if "assignment" in unidata.obs: # remove categories that contain no elements series = unidata.obs["assignment"].value_counts(sort=False) unidata.obs["assignment"] = pd.Categorical( unidata.obs["assignment"], categories=series[series > 0].index.astype(str)) # del unidata.uns["__del_demux_type"] unidata.obs.drop(columns=cols, inplace=True) if len(unidata.obsm) > 0: unidata.obsm.clear() if len(unidata.varm) > 0: unidata.varm.clear() for key in list(unidata.uns): if key not in {'genome', 'modality', 'norm_count', 'df_qcplot'}: del unidata.uns[key] logger.info( f"After filtration, {unidata.shape[0]} out of {prior_n} cell barcodes are kept in UnimodalData object {unidata.get_uid()}." )
def estimate_background_probs(hashing_data: UnimodalData, random_state: int = 0) -> None: """For cell-hashing data, estimate antibody background probability using KMeans algorithm. Parameters ---------- hashing_data: ``UnimodalData`` Annotated data matrix for antibody. random_state: ``int``, optional, default: ``0`` Random seed set for reproducing results. Returns ------- ``None`` Update ``hashing_data``: Filtered cell barcodes with 0 counts Update ``hashing_data.uns``: * ``hashing_data.uns["background_probs"]``: estimated antibody background probability. Example ------- >>> estimate_background_probs(hashing_data) """ hashing_data.obs["counts"] = hashing_data.X.sum(axis=1).A1 # Remove barcodes with 0 total counts idx = hashing_data.obs["counts"] == 0 ncell_zero = idx.sum() if ncell_zero > 0: logger.warning( f"Detected {ncell_zero} cell barcodes with 0 hashtag counts, which are removed from the hashing data object." ) hashing_data._inplace_subset_obs(~idx) counts_log10 = np.log10(hashing_data.obs["counts"].values.reshape(-1, 1)) kmeans = KMeans(n_clusters=2, random_state=random_state).fit(counts_log10) signal = 0 if kmeans.cluster_centers_[0] > kmeans.cluster_centers_[1] else 1 hashing_data.obs["hto_type"] = "background" hashing_data.obs.loc[kmeans.labels_ == signal, "hto_type"] = "signal" idx = np.isin(hashing_data.obs["hto_type"], "background") pvec = hashing_data.X[idx, ].sum(axis=0).A1 back_probs = pvec / pvec.sum() idx = back_probs <= 0.0 if idx.sum() > 0: logger.warning( f"Detected {idx.sum()} antibody barcodes {','.join(hashing_data.var_names[idx])} with 0 counts in the background! These barcodes are likely not in the experiment and thus removed." ) hashing_data._inplace_subset_var(~idx) back_probs = back_probs[~idx] hashing_data.uns["background_probs"] = back_probs logger.info("Background probability distribution is estimated.")
def apply_qc_filters(unidata: UnimodalData): """ Apply QC filters to filter out low quality cells """ if "passed_qc" in unidata.obs: prior_n = unidata.shape[0] unidata._inplace_subset_obs(unidata.obs["passed_qc"]) cols = ["passed_qc"] if unidata.uns.get("__del_demux_type", False): cols.append("demux_type") del unidata.uns["__del_demux_type"] unidata.obs.drop(columns=cols, inplace=True) logger.info( f"After filtration, {unidata.shape[0]} out of {prior_n} cell barcodes are kept in UnimodalData object {unidata.get_uid()}." )