def test_keep_stable_genes(donor_expression, thr, per, rank, stab): out = correct.keep_stable_genes(donor_expression, threshold=thr, percentile=per, rank=rank) assert all([isinstance(f, pd.DataFrame) for f in out]) for df1, df2 in itertools.combinations(out, 2): assert df1.shape == df2.shape # check that `return_stability` provides expression and stability if thr == 0 and stab: out, stab = correct.keep_stable_genes(donor_expression, threshold=thr, return_stability=stab) assert len(stab) == len(out[0].columns) assert np.all(out[0].columns == donor_expression[0].columns)
def test_keep_stable_genes(donor_expression): for thr, per, rank in itertools.product(np.arange(0, 1, 0.1), [True, False], [True, False]): out = correct.keep_stable_genes(donor_expression, threshold=thr, percentile=per, rank=rank) assert all([isinstance(f, pd.DataFrame) for f in out]) for df1, df2 in itertools.combinations(out, 2): assert df1.shape == df2.shape # check that `return_stability` provides expression and stability out, stab = correct.keep_stable_genes(donor_expression, threshold=0, return_stability=True) assert len(stab) == len(out[0].columns) assert np.all(out[0].columns == donor_expression[0].columns)
def test_keep_stable_genes(donor_expression): for thr, per, rank in itertools.product(np.arange(0, 1, 0.1), [True, False], [True, False]): out = correct.keep_stable_genes(donor_expression, threshold=thr, percentile=per, rank=rank) assert all([isinstance(f, pd.DataFrame) for f in out]) for df1, df2 in itertools.combinations(out, 2): assert df1.shape == df2.shape
def select_genes(dict_genes, sphere_lh, sphere_rh, n_perm=100, df_gandal=None): """Gene enrichment analysis. Parameters ---------- dict_genes: dict of pd.Dataframe Dictionary with pd.Dataframe of genes for each donor in the AHBA dataset. sphere_lh: BSPolyData Sphere for left hemisphere. sphere_rh: BSPolyData Sphere for right hemisphere. n_perm: int, default=None df_gandal: pd.DataFrame, default=None Dataframe with logFC for ASD, SCZ and BD. Returns ------- selected_genes: list of str Genes with significant spatial overlap with idiosyncrasy measures. """ from abagen.correct import keep_stable_genes dict_genes = get_shared_genes(dict_genes) genes = list(dict_genes.values())[0].columns tval_sd, pval_sd = _select_genes_one(dict_genes, 'SD', sphere_lh, sphere_rh, n_perm=n_perm) tval_dd, pval_dd = _select_genes_one(dict_genes, 'DD', sphere_lh, sphere_rh, n_perm=n_perm) mask_selected = np.logical_and(pval_sd < 0.05, pval_dd < 0.05) selected_genes = genes[mask_selected] # Only keep stable genes stable_genes = keep_stable_genes(list(dict_genes.values()), threshold=.5, percentile=False, rank=True)[0] selected_genes = np.intersect1d(stable_genes.columns, selected_genes) if df_gandal is None: return selected_genes df = pd.DataFrame(np.c_[tval_sd, tval_dd], index=genes, columns=['SD', 'DD']) df_gandal.rename(columns={'gene_name': 'gene_symbol'}, inplace=True) df = df.merge(df_gandal.set_index('gene_symbol'), on='gene_symbol') df_res = pd.DataFrame('', columns=['SD', 'DD'], index=['ASD', 'SCZ', 'BD']) for idio in ['SD', 'DD']: y = df[[idio]] for k in ['ASD', 'SCZ', 'BD']: fc = f'{k}.log2FC' x = df[[fc, 'percentage_gene_gc_content']] x['Constant'] = 1 rlm_results = sm.RLM(y, x).fit() tv, pv = rlm_results.tvalues[0], rlm_results.pvalues[0] df_res.loc[k, idio] = f'{tv:.3f} ({pv:.3f})' return selected_genes, df_res