Example #1
0
def _sanitize_anndata(adata: AnnData) -> None:
    """Sanitization and sanity checks on TCR-anndata object. 
    Should be executed by every read_xxx function"""
    assert (
        len(adata.X.shape) == 2
    ), "X needs to have dimensions, otherwise concat doesn't work. "
    adata._sanitize()
Example #2
0
def normalize(adata: scanpy.AnnData,
              filter_min_counts=None,
              size_factors=False,
              scale_input=False,
              logtrans_input=False):

    if filter_min_counts:
        scanpy.pp.filter_genes(adata, min_counts=filter_min_counts)
        scanpy.pp.filter_cells(adata, min_counts=filter_min_counts)

    adata.raw = adata
    dataset = GeneExpressionDataset()
    dataset.from_data(adata.X, raw=adata.raw.X)

    if size_factors:
        scanpy.pp.normalize_per_cell(adata)
        size_factor_cell = dataset.nb_cell_counts / np.median(
            dataset.nb_cell_counts)
        dataset.initialize_cell_attribute("size_factor",
                                          size_factor_cell.reshape((-1, 1)))
    if logtrans_input:
        scanpy.pp.log1p(adata)
    if scale_input:
        scanpy.pp.scale(adata)

    dataset.data = adata.X
    obs_index = adata.obs.columns
    for i, index in enumerate(obs_index):
        dataset.initialize_cell_attribute(index, adata.obs.iloc[:, i].values)
    return dataset
Example #3
0
def merge_slide_seq_beads(adata: sc.AnnData, grid_size: int):
    df = adata.to_df().copy()
    df["x"] = adata.obs["x"].values
    df["y"] = adata.obs["y"].values
    x_borders, y_borders = [
        np.linspace(df[s].min(), df[s].max(), grid_size) for s in ["x", "y"]
    ]

    cells_collapsed = []
    sizes = []
    x_vals, y_vals = [], []
    for xs, xe in tqdm_notebook(zip(x_borders[:-1], x_borders[1:]),
                                total=len(x_borders) - 1):
        for ys, ye in zip(y_borders[:-1], y_borders[1:]):
            mask = (df["x"].values > xs) & (df["x"].values <= xe) & (
                df["y"].values > ys) & (df["y"].values <= ye)
            if mask.any():
                df_cur = df[mask]
                cells_collapsed.append(df_cur.iloc[:, :-2].sum())
                sizes.append(df_cur.shape[0])
                x_vals.append(df_cur["x"].mean())
                y_vals.append(df_cur["y"].mean())

    adata_collapsed = sc.AnnData(DataFrame(cells_collapsed))
    adata_collapsed.obs_names = adata_collapsed.obs_names.map(str)
    adata_collapsed.obs["n_merged"] = sizes
    adata_collapsed.obs["x"] = x_vals
    adata_collapsed.obs["y"] = y_vals

    mit_gene_mask = adata_collapsed.var_names.map(lambda x: x[:3] == 'mt-')
    adata_collapsed.obs["mit_frac"] = adata_collapsed[:, mit_gene_mask].X.sum(
        axis=1) / adata_collapsed.X.sum(axis=1)

    return adata_collapsed
Example #4
0
	def write_h5ad_from_anndata(self, dataToWrite: sc.AnnData, fileName: str,
								 analysisTask: Optional[TaskOrName]=None,
								 subDir: Optional[str]=None, **kwargs) -> None:
		"""
		writes a csv file form a dataframe.
		Args:
			dataToWrite: pandas dataframe to be written to file
			fileName: Name of file to be written, including the extension
			subDir: subdirectory of current analysis task to write file to
			Additional parameters can be passed to the read_csv method
			from pandas as **kwargs
		"""
		filePath = self.get_analysis_path(analysisTask=analysisTask,
										  subDir=subDir,
										  fileName=fileName,
										  extension='.h5ad')
		dataToWrite.write(filePath, **kwargs)
Example #5
0
def _tcr_objs_to_anndata(tcr_objs: Collection) -> AnnData:
    """Convert a list of TcrCells to an AnnData object"""
    tcr_df = pd.DataFrame.from_records(
        (_process_tcr_cell(x) for x in tcr_objs), index="cell_id"
    )
    adata = AnnData(obs=tcr_df, X=np.empty([tcr_df.shape[0], 0]))
    _sanitize_anndata(adata)
    return adata
Example #6
0
def _make_anndata(X: np.ndarray,
                  observation: DataFrame,
                  variables: Optional[DataFrame] = None) -> AnnData:
    '''Make a scanpy AnnData object out of pieces

        :Param X: numpy array with biological data, e.g. expression
        :Param observation: annotation for biological data
        :Param variables: some data along second dimension of expression, e.g. genes
        :Return: AnnData object
    '''
    return AnnData(X, observation, variables)
Example #7
0
        def process(self):
            """
            A method to run `harmony` on input Data Frame
            """

            # Harmony augmented affinity matrix
            logg.info('Harmony augmented affinity matrix ...', r=True)

            self.tp = pd.Series(index=self.data_df.index)
            for t in self.timepoints:
                cells = self.data_df.index[self.data_df.index.str.contains(t)]
                self.tp[cells] = t
            self.timepoint_connections = pd.DataFrame(columns=[0, 1])
            index = 0
            for i in range(len(self.timepoints) - 1):
                self.timepoint_connections.loc[
                    index, :] = self.timepoints[i:i + 2]
                index += 1

            # compute the augmented and non-augmented affinity matrices
            self.aug_aff, self.aff = self.harmony.core.augmented_affinity_matrix(
                self.data_df, self.tp, self.timepoint_connections)

            # Visualization using force directed layouts
            self.layout = self.harmony.plot.force_directed_layout(
                self.aug_aff, self.data_df.index)

            # push outputs to a new scanpy.AnnDana
            from scanpy import AnnData
            self.harmony_adata = AnnData(self.data_df)
            self.harmony_adata.obsm['layout'] = np.array(self.layout)
            self.harmony_adata.uns['tp'] = self.tp
            self.harmony_adata.uns['aff'] = self.aff
            self.harmony_adata.uns['aug_aff'] = self.aug_aff
            self.harmony_adata.uns['sample_names'] = self.sample_names
            self.harmony_adata.uns['timepoints'] = self.timepoints
            self.harmony_adata.uns[
                'timepoint_connections'] = self.timepoint_connections

            logg.info('End of processing, start plotting.', r=True)

            return self.harmony_adata
Example #8
0
def process_scanpy(adata: sc.AnnData,
                   n_neighbors: int = 10,
                   n_pcs: int = 50,
                   n_od_genes: int = 0,
                   metric: str = 'cosine',
                   cl_resolution: float = 0.5,
                   do_log: bool = False):
    adata = adata.copy()
    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
    if do_log:
        sc.pp.log1p(adata)

    use_od_genes = (n_od_genes > 0 and do_log)
    if use_od_genes:
        sc.pp.highly_variable_genes(adata, n_top_genes=n_od_genes)
    sc.tl.pca(adata, svd_solver='arpack', use_highly_variable=use_od_genes)
    sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs, metric=metric)
    sc.tl.umap(adata)
    sc.tl.louvain(adata, resolution=cl_resolution)
    return adata
Example #9
0
def test_chain_pairing():
    obs = pd.DataFrame.from_records(
        [
            ["False", "nan", "nan", "nan", "nan", "nan"],
            ["True", "True", "AAAA", "BBBB", "CCCC", "DDDD"],
            ["True", "False", "AAAA", "BBBB", "CCCC", "DDDD"],
            ["True", "nan", "AAAA", "nan", "nan", "nan"],
            ["True", "False", "AAAA", "nan", "CCCC", "nan"],
            ["True", "False", "AAAA", "BBBB", "nan", "nan"],
            ["True", "False", "AAAA", "BBBB", "CCCC", "nan"],
            ["True", "False", "nan", "nan", "CCCC", "nan"],
            ["True", "False", "nan", "nan", "CCCC", "DDDD"],
            ["True", "False", "AAAA", "nan", "CCCC", "DDDD"],
        ],
        columns=[
            "has_tcr",
            "multi_chain",
            "TRA_1_cdr3",
            "TRA_2_cdr3",
            "TRB_1_cdr3",
            "TRB_2_cdr3",
        ],
    )
    adata = AnnData(obs=obs)
    res = st.tl.chain_pairing(adata, inplace=False)
    npt.assert_equal(
        res,
        [
            "No TCR",
            "Multichain",
            "Two full chains",
            "Orphan alpha",
            "Single pair",
            "Orphan alpha",
            "Extra alpha",
            "Orphan beta",
            "Orphan beta",
            "Extra beta",
        ],
    )
Example #10
0
def test_group_abundance():
    obs = pd.DataFrame.from_records(
        [
            ["cell1", "A", "ct1"],
            ["cell2", "A", "ct1"],
            ["cell3", "A", "ct1"],
            ["cell3", "A", "NaN"],
            ["cell4", "B", "ct1"],
            ["cell5", "B", "ct2"],
        ],
        columns=["cell_id", "group", "clonotype"],
    ).set_index("cell_id")
    adata = AnnData(obs=obs)

    # Check counts
    res = st.tl.group_abundance(
        adata, groupby="clonotype", target_col="group", fraction=False
    )
    expected_count = pd.DataFrame.from_dict(
        {"ct1": {"A": 3.0, "B": 1.0}, "ct2": {"A": 0.0, "B": 1.0},}, orient="index",
    )
    npt.assert_equal(res.values, expected_count.values)

    # Check fractions
    res = st.tl.group_abundance(
        adata, groupby="clonotype", target_col="group", fraction=True
    )
    expected_frac = pd.DataFrame.from_dict(
        {"ct1": {"A": 0.75, "B": 0.25}, "ct2": {"A": 0.0, "B": 1.0},}, orient="index",
    )
    npt.assert_equal(res.values, expected_frac.values)

    # Check swapped
    res = st.tl.group_abundance(
        adata, groupby="group", target_col="clonotype", fraction=True
    )
    expected_frac = pd.DataFrame.from_dict(
        {"A": {"ct1": 1.0, "ct2": 0.0}, "B": {"ct1": 0.5, "ct2": 0.5},}, orient="index",
    )
    npt.assert_equal(res.values, expected_frac.values)
Example #11
0
def rank_genes_groups_bayes(
    adata: sc.AnnData,
    scvi_posterior: scvi.inference.Posterior,
    n_samples: int = None,
    M_permutation: int = None,
    n_genes: int = 25,
    label_name: str = "louvain_scvi",
) -> pd.DataFrame:
    """
    Rank genes for characterizing groups.
    Computes Bayes factor for each cluster against the others to test for differential expression.
    See Nature article (https://rdcu.be/bdHYQ)

    :param adata: sc.AnnData object non-normalized
    :param scvi_posterior:
    :param n_samples:
    :param M_permutation:
    :param n_genes:
    :param label_name: The groups tested are taken from adata.obs[label_name] which can be computed
                       using clustering like Louvain (Ex: sc.tl.louvain(adata, key_added=label_name) )
    :return: Summary of Bayes factor per gene, per cluster
    """

    # Call scvi function
    per_cluster_de, cluster_id = scvi_posterior.one_vs_all_degenes(
        cell_labels=np.asarray(adata.obs[label_name].values).astype(int).ravel(),
        min_cells=1,
        n_samples=n_samples,
        M_permutation=M_permutation,
    )

    # convert to ScanPy format -- this is just about feeding scvi results into a format readable by ScanPy
    markers = []
    scores = []
    names = []
    for i, x in enumerate(per_cluster_de):
        subset_de = x[:n_genes]
        markers.append(subset_de)
        scores.append(tuple(subset_de["bayes1"].values))
        names.append(tuple(subset_de.index.values))

    markers = pd.concat(markers)
    dtypes_scores = [(str(i), "<f4") for i in range(len(scores))]
    dtypes_names = [(str(i), "<U50") for i in range(len(names))]
    scores = np.array([tuple(row) for row in np.array(scores).T], dtype=dtypes_scores)
    scores = scores.view(np.recarray)
    names = np.array([tuple(row) for row in np.array(names).T], dtype=dtypes_names)
    names = names.view(np.recarray)

    adata.uns["rank_genes_groups_scvi"] = {
        "params": {
            "groupby": "",
            "reference": "rest",
            "method": "",
            "use_raw": True,
            "corr_method": "",
        },
        "scores": scores,
        "names": names,
    }
    return markers
Example #12
0
def test_chain_qc():
    obs = pd.DataFrame.from_records(
        [
            ["False", "nan", "nan", "nan", "nan", "nan"],
            ["True", "True", "TRA", "TRB", "TRA", "TRB"],
            # multichain takes precedencee over ambiguous
            ["True", "True", "TRA", "IGH", "nan", "nan"],
            ["True", "False", "TRA", "TRB", "nan", "nan"],
            ["True", "False", "TRA", "TRB", "TRA", "nan"],
            ["True", "False", "TRA", "TRB", "nan", "TRB"],
            ["True", "False", "TRA", "TRB", "TRA", "TRB"],
            ["True", "False", "IGK", "IGH", "nan", "nan"],
            ["True", "False", "IGL", "IGH", "IGL", "IGH"],
            ["True", "False", "IGL", "IGH", "IGK", "IGH"],
            ["True", "False", "nan", "IGH", "nan", "IGH"],
            ["True", "False", "TRA", "TRB", "TRG", "TRB"],
            ["True", "False", "IGK", "TRB", "nan", "nan"],
            ["True", "False", "TRA", "nan", "nan", "nan"],
            ["True", "False", "IGL", "nan", "nan", "nan"],
            ["True", "False", "nan", "TRD", "nan", "nan"],
        ],
        columns=[
            "has_ir",
            "multi_chain",
            "IR_VJ_1_locus",
            "IR_VDJ_1_locus",
            "IR_VJ_2_locus",
            "IR_VDJ_2_locus",
        ],
    )
    # fake chains
    for chain, chain_number in itertools.product(["VJ", "VDJ"], ["1", "2"]):
        obs[f"IR_{chain}_{chain_number}_junction_aa"] = [
            "AAA" if x != "nan" else "nan"
            for x in obs[f"IR_{chain}_{chain_number}_locus"]
        ]
    adata = AnnData(obs=obs)
    adata.uns["scirpy_version"] = "0.7"

    ir.tl.chain_qc(adata, key_added=("rec_type", "rec_subtype", "ch_pairing"))

    npt.assert_equal(
        adata.obs["rec_type"],
        np.array([
            "no IR",
            "multichain",
            "multichain",
            "TCR",
            "TCR",
            "TCR",
            "TCR",
            "BCR",
            "BCR",
            "BCR",
            "BCR",
            "TCR",
            "ambiguous",
            "TCR",
            "BCR",
            "TCR",
        ]),
    )
    npt.assert_equal(
        adata.obs["rec_subtype"],
        np.array([
            "no IR",
            "multichain",
            #
            "multichain",
            "TRA+TRB",
            "TRA+TRB",
            "TRA+TRB",
            "TRA+TRB",
            "IGH+IGK",
            "IGH+IGL",
            "ambiguous",
            "IGH",
            "ambiguous",
            "ambiguous",
            "TRA+TRB",
            "IGH+IGL",
            "TRG+TRD",
        ]),
    )
Example #13
0
def test_chain_pairing():
    obs = pd.DataFrame.from_records(
        [
            [
                "False", "nan", "nan", "nan", "nan", "nan", "nan", "nan",
                "nan", "nan"
            ],
            [
                "True", "True", "AA", "BB", "CC", "DD", "TRA", "TRA", "TRA",
                "TRB"
            ],
            [
                "True", "False", "AA", "BB", "CC", "DD", "TRA", "TRA", "TRB",
                "TRB"
            ],
            [
                "True", "False", "AA", "nan", "nan", "nan", "TRA", "nan",
                "nan", "nan"
            ],
            [
                "True", "False", "AA", "nan", "CC", "nan", "TRA", "nan", "TRB",
                "nan"
            ],
            [
                "True", "False", "AA", "BB", "nan", "nan", "TRA", "TRA", "nan",
                "nan"
            ],
            [
                "True", "False", "AA", "BB", "CC", "nan", "TRA", "TRA", "TRB",
                "TRB"
            ],
            [
                "True", "False", "nan", "nan", "CC", "nan", "nan", "nan",
                "TRB", "nan"
            ],
            [
                "True", "False", "nan", "nan", "CC", "DD", "nan", "nan", "TRB",
                "TRB"
            ],
            [
                "True", "False", "AA", "nan", "CC", "DD", "TRA", "nan", "TRB",
                "TRB"
            ],
            [
                "True", "False", "AA", "nan", "CC", "DD", "TRA", "nan", "TRB",
                "IGH"
            ],
        ],
        columns=[
            "has_ir",
            "multi_chain",
            "IR_VJ_1_junction_aa",
            "IR_VJ_2_junction_aa",
            "IR_VDJ_1_junction_aa",
            "IR_VDJ_2_junction_aa",
            "IR_VJ_1_locus",
            "IR_VJ_2_locus",
            "IR_VDJ_1_locus",
            "IR_VDJ_2_locus",
        ],
    )
    adata = AnnData(obs=obs)
    adata.uns["scirpy_version"] = "0.7"
    res = ir.tl.chain_pairing(adata, inplace=False)
    npt.assert_equal(
        res,
        [
            "no IR",
            "multichain",
            "two full chains",
            "orphan VJ",
            "single pair",
            "orphan VJ",
            "extra VJ",
            "orphan VDJ",
            "orphan VDJ",
            "extra VDJ",
            "ambiguous",
        ],
    )
Example #14
0
def postprocess_mnnpy(adata, bdata):
    """ postprocessing to generate a newly functional AnnData object

    After running mnnpy_mnncorrect we obtain ann AnnData object bdata. Since mnn_correct automatically
    truncates all the genes contained in .raw to contain only the highly variable genes this function
    creates a new AnnData object that contains .X from bdata but .raw from AnnData (which still contains all the
    genes, not only the highly variable ones).

    Before creation of the new AnnData object the matrices are sorted according to cellbarcode so
    that we ensure the labelings are correct.

    parameters
    ----------

    adata:
        the uncorrected AnnData object
    bdata:
        the batch correted AnnData object

    returns
    -------
    AnnData
        AnnData object with adata.X containing the corrected values and .raw all of the original values

    """
    corrected_matrix = DataFrame(data = bdata.X, index = bdata.obs_names.tolist(), columns = bdata.var_names.tolist())
    corrected_matrix.sort_index(inplace=True)

    new_adata = AnnData(corrected_matrix.values)
    new_adata.obs = bdata.obs.sort_index()
    new_adata.var_names = bdata.var_names
    new_adata.obs_names = bdata.obs_names.sort_values()
    new_adata.var = bdata.var

    #need to sort raw object to match the batch corrected order
    raw_matrix = DataFrame(data=(adata.raw.X.todense() if scipy.sparse.issparse(adata.raw.X) else adata.raw.X), index=adata.obs_names.tolist(), columns=adata.raw.var_names.tolist())
    raw_matrix.sort_index(inplace=True)

    #recreate raw
    raw = AnnData(raw_matrix.values)
    raw.var_names = adata.raw.var_names
    raw.obs_names = adata.obs_names.sort_values()
    raw.var = adata.raw.var

    #add raw back in
    new_adata.raw = raw

    #ensure that indices are preserved
    adata.obs_names = adata.obs.CELL
    adata.obs.index = adata.obs.CELL

    return(new_adata)
Example #15
0
def mast(
    adata: AnnData,
    *,
    groupby: str,
    groups: Union[Literal["all"], Sequence[str]],
    cofactors: Sequence[str] = None,
    layer: Optional[str] = None,
    n_cores_per_job: int = 4,
    n_jobs: int = 4,
):
    """
    Perform DE analysis using edgeR.

    Requires that an R installation and the following packages are available

        MAST
        BiocParallel

    Install them with `conda install bioconductor-mast bioconductor-biocparallel`.

    Parameters
    ----------
    adata
        annotated data matrix. X must contain normalized and log-transformed values.
    groupby
        The column in adata.obs to test for DE
    cofactors
        Additional columns to include into the model
    layer
        layer in adata that contains raw counts. If None, use `X`.
    subsample_disp
        Subsample cells to this nubmer during estimation of overdispersion.
    n_cores_per_job
        Number of cores to run per job (including BLAS parallelization)
    n_jobs
        Number of tests to run in parallel.
    """

    try:
        from rpy2.robjects.packages import importr
        from rpy2.robjects import pandas2ri
        from rpy2.robjects.conversion import localconverter
        from rpy2 import robjects as ro
        import anndata2ri
    except ImportError:
        raise ImportError(
            "MAST requires rpy2 and anndata2ri to be installed. ")

    try:
        mast = importr("MAST")
        bcparallel = importr("BiocParallel")
    except ImportError:
        raise ImportError(
            "MAST requires a valid R installation with the following packages: "
            "MAST, BiocParallel")

    bcparallel.register(bcparallel.MulticoreParam(n_jobs))

    logging.info("Preparing AnnData")
    tmp_adata = AnnData(
        X=adata.X if layer is None else adata.layers[layer],
        obs=adata.obs,
        var=adata.var,
    )
    tmp_adata.obs.columns = _make_names(tmp_adata.obs.columns)
    tmp_adata.obs[groupby] = _make_names(tmp_adata.obs[groupby])
    contrasts = []
    for group in tmp_adata.obs[groupby].unique():
        contrasts.append(f"is_group_{group}")
        tmp_adata.obs[f"is_group_{group}"] = tmp_adata.obs[groupby] == group

    logging.info("Preparing R objects")
    with localconverter(anndata2ri.converter):
        sce = ro.conversion.py2rpy(tmp_adata)
    sca = mast.SceToSingleCellAssay(sce)
    groupby = _make_names([groupby])[0]
    cofactor_formula = ("" if cofactors is None else "+ " +
                        " + ".join(_make_names(cofactors)))

    logging.info("Running MAST")
    ro.globalenv["cpus_per_thread"] = n_cores_per_job
    ro.globalenv["contrasts"] = contrasts
    ro.globalenv["cofactor_formula"] = cofactor_formula
    ro.globalenv["sca"] = sca
    ro.r("""
        library(dplyr)
        de_res = bplapply(contrasts, function(model_col) {
            op = options(mc.cores=cpus_per_thread)
            on.exit(options(op))
            contrast_to_test = paste0(model_col, "TRUE")
            fit = zlm(as.formula(paste0("~", model_col, cofactor_formula)), sca)
            res = summary(fit, doLRT=contrast_to_test)$datatable
            merge(
                res[contrast==contrast_to_test & component=='H', .(primerid, `Pr(>Chisq)`)], #P-vals
                res[contrast==contrast_to_test & component=='logFC', .(primerid, coef)],
                by='primerid'
            ) %>% mutate(comparison=model_col)                  
        }) %>% bind_rows()
        """)

    with localconverter(ro.default_converter + pandas2ri.converter):
        de_res = ro.conversion.rpy2py(ro.globalenv["de_res"])

    de_res["comparison"] = de_res["comparison"].str.replace("is_group_", "")
    return de_res
Example #16
0
def de_res_to_anndata(
    adata: AnnData,
    de_res: pd.DataFrame,
    *,
    groupby: str,
    gene_id_col: str = "gene_symbol",
    score_col: str = "score",
    pval_col: str = "pvalue",
    pval_adj_col: Optional[str] = None,
    lfc_col: str = "lfc",
    key_added: str = "rank_genes_groups",
) -> None:
    """Add a tabular differential expression result to AnnData as
    if it was produced by scanpy.tl.rank_genes_groups.

    Parameters
    ----------
    adata
        annotated data matrix
    de_res
        Tablular de result
    groupby
        column in `de_res` that indicates the group. This column must
        also exist in `adata.obs`.
    gene_id_col
        column in `de_res` that holds the gene identifiers
    score_col
        column in `de_res` that holds the score (results will be ordered by score).
    pval_col
        column in `de_res` that holds the unadjusted pvalue
    pval_adj_col
        column in `de_res` that holds the adjusted pvalue. If not specified, the
        unadjusted pvalues will be FDR-adjusted.
    lfc_col
        column in `de_res` that holds the log fold change
    key_added
        key under which the results will be stored in adata.uns
    """
    if groupby not in adata.obs.columns or groupby not in de_res.columns:
        raise ValueError(
            "groupby column must exist in both adata and de_res. ")
    res_dict = {
        "params": {
            "groupby": groupby,
            "reference": "rest",
            "method": "other",
            "use_raw": True,
            "layer": None,
            "corr_method": "other",
        },
        "names": [],
        "scores": [],
        "pvals": [],
        "pvals_adj": [],
        "logfoldchanges": [],
    }
    df_groupby = de_res.groupby(groupby)
    for _, tmp_df in df_groupby:
        tmp_df = tmp_df.sort_values(score_col, ascending=False)
        res_dict["names"].append(tmp_df[gene_id_col].values)
        res_dict["scores"].append(tmp_df[score_col].values)
        res_dict["pvals"].append(tmp_df[pval_col].values)
        if pval_adj_col is not None:
            res_dict["pvals_adj"].append(tmp_df[pval_adj_col].values)
        else:
            res_dict["pvals_adj"].append(
                fdrcorrection(tmp_df[pval_col].values)[1])
        res_dict["logfoldchanges"].append(tmp_df[lfc_col].values)

    for key in ["names", "scores", "pvals", "pvals_adj", "logfoldchanges"]:
        res_dict[key] = pd.DataFrame(
            np.vstack(res_dict[key]).T,
            columns=list(df_groupby.groups.keys()),
        ).to_records(index=False, column_dtypes="O")
    adata.uns[key_added] = res_dict