Python sf Examples, scipy.stats.distributions.norm.sf Python Examples

Example #1

0

Show file

    def summary(self, yname=None, xname=None, title=None, alpha=0.05):

        df = pd.DataFrame()

        df["Type"] = (["Mean"] * self.k_exog + ["Scale"] * self.k_scale +
                      ["Smooth"] * self.k_smooth + ["SD"] * self.k_noise)
        df["coef"] = self.params

        try:
            df["std err"] = np.sqrt(np.diag(self.cov_params()))
        except Exception:
            df["std err"] = np.nan

        from scipy.stats.distributions import norm
        df["tvalues"] = df.coef / df["std err"]
        df["P>|t|"] = 2 * norm.sf(np.abs(df.tvalues))

        f = norm.ppf(1 - alpha / 2)
        df["[%.3f" % (alpha / 2)] = df.coef - f * df["std err"]
        df["%.3f]" % (1 - alpha / 2)] = df.coef + f * df["std err"]

        df.index = self.model.data.param_names

        summ = summary2.Summary()
        if title is None:
            title = "Gaussian process regression results"
        summ.add_title(title)
        summ.add_df(df)

        return summ

Example #2

0

Show file

File: process_regression.py Project: statsmodels/statsmodels

    def summary(self, yname=None, xname=None, title=None, alpha=0.05):

        df = pd.DataFrame()

        df["Type"] = (["Mean"] * self.k_exog + ["Scale"] * self.k_scale +
                      ["Smooth"] * self.k_smooth + ["SD"] * self.k_noise)
        df["coef"] = self.params

        try:
            df["std err"] = np.sqrt(np.diag(self.cov_params()))
        except Exception:
            df["std err"] = np.nan

        from scipy.stats.distributions import norm
        df["tvalues"] = df.coef / df["std err"]
        df["P>|t|"] = 2 * norm.sf(np.abs(df.tvalues))

        f = norm.ppf(1 - alpha / 2)
        df["[%.3f" % (alpha / 2)] = df.coef - f * df["std err"]
        df["%.3f]" % (1 - alpha / 2)] = df.coef + f * df["std err"]

        df.index = self.model.data.param_names

        summ = summary2.Summary()
        if title is None:
            title = "Gaussian process regression results"
        summ.add_title(title)
        summ.add_df(df)

        return summ

Example #3

0

Show file

def mannwhitneyu(x, y, use_continuity=True):
    """
    Computes the Mann-Whitney rank test on samples x and y.

    Parameters
    ----------
    x, y : array_like
        Array of samples, should be one-dimensional.
    use_continuity : bool, optional
            Whether a continuity correction (1/2.) should be taken into
            account. Default is True.

    Returns
    -------
    u : float
        The Mann-Whitney statistics.
    prob : float
        One-sided p-value assuming a asymptotic normal distribution.

    Notes
    -----
    Use only when the number of observation in each sample is > 20 and
    you have 2 independent samples of ranks. Mann-Whitney U is
    significant if the u-obtained is LESS THAN or equal to the critical
    value of U.

    This test corrects for ties and by default uses a continuity correction.
    The reported p-value is for a one-sided hypothesis, to get the two-sided
    p-value multiply the returned p-value by 2.

    """
    x = np.asarray(x)
    y = np.asarray(y)
    n1 = len(x)
    n2 = len(y)
    ranked = rankdata(np.concatenate((x, y)))
    rankx = ranked[0:n1]  # get the x-ranks
    #ranky = ranked[n1:]        # the rest are y-ranks
    u1 = n1 * n2 + (n1 *
                    (n1 + 1)) / 2.0 - np.sum(rankx, axis=0)  # calc U for x
    u2 = n1 * n2 - u1  # remainder is U for y
    bigu = max(u1, u2)
    smallu = min(u1, u2)
    #T = np.sqrt(tiecorrect(ranked))  # correction factor for tied scores
    T = tiecorrect(ranked)
    if T == 0:
        raise ValueError('All numbers are identical in amannwhitneyu')
    sd = np.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)

    if use_continuity:
        # normal approximation for prob calc with continuity correction
        z = (bigu - 0.5 - n1 * n2 / 2.0) / sd
    else:
        z = (bigu - n1 * n2 / 2.0) / sd  # normal approximation for prob calc
    z *= int(u1 < u2) - int(u1 > u2)
    return z, norm.sf(abs(z))  #(1.0 - zprob(z))

Example #4

0

Show file

File: stats.py Project: davek44/utility

def mannwhitneyu(x, y, use_continuity=True):
    """
    Computes the Mann-Whitney rank test on samples x and y.

    Parameters
    ----------
    x, y : array_like
        Array of samples, should be one-dimensional.
    use_continuity : bool, optional
            Whether a continuity correction (1/2.) should be taken into
            account. Default is True.

    Returns
    -------
    u : float
        The Mann-Whitney statistics.
    prob : float
        One-sided p-value assuming a asymptotic normal distribution.

    Notes
    -----
    Use only when the number of observation in each sample is > 20 and
    you have 2 independent samples of ranks. Mann-Whitney U is
    significant if the u-obtained is LESS THAN or equal to the critical
    value of U.

    This test corrects for ties and by default uses a continuity correction.
    The reported p-value is for a one-sided hypothesis, to get the two-sided
    p-value multiply the returned p-value by 2.

    """
    x = np.asarray(x)
    y = np.asarray(y)
    n1 = len(x)
    n2 = len(y)
    ranked = rankdata(np.concatenate((x,y)))
    rankx = ranked[0:n1]       # get the x-ranks
    #ranky = ranked[n1:]        # the rest are y-ranks
    u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx,axis=0)  # calc U for x
    u2 = n1*n2 - u1                            # remainder is U for y
    bigu = max(u1,u2)
    smallu = min(u1,u2)
    #T = np.sqrt(tiecorrect(ranked))  # correction factor for tied scores
    T = tiecorrect(ranked)
    if T == 0:
        raise ValueError('All numbers are identical in amannwhitneyu')
    sd = np.sqrt(T*n1*n2*(n1+n2+1)/12.0)

    if use_continuity:
        # normal approximation for prob calc with continuity correction
        z = (bigu-0.5-n1*n2/2.0) / sd
    else:
        z = (bigu-n1*n2/2.0) / sd  # normal approximation for prob calc
    z *= int(u1<u2)-int(u1>u2)
    return z, norm.sf(abs(z))  #(1.0 - zprob(z))

Example #5

0

Show file

File: mann_whitney_utest.py Project: gajduk/yeast-johan-assay-data-analysis

    def mannwhitneyu(self, x, y, use_continuity=True):
        x = asarray(x)
        y = asarray(y)
        n1 = len(x)
        n2 = len(y)
        ranked = rankdata(np.concatenate((x, y)))
        rankx = ranked[0:n1]  # get the x-ranks
        u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - np.sum(rankx, axis=0)  # calc U for x
        u2 = n1 * n2 - u1  # remainder is U for y
        bigu = max(u1, u2)
        smallu = min(u1, u2)

        T = tiecorrect(ranked)
        sd = np.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
        if use_continuity:
            # normal approximation for prob calc with continuity correction
            z = abs((bigu - 0.5 - n1 * n2 / 2.0) / sd)
        else:
            z = abs((bigu - n1 * n2 / 2.0) / sd)  # normal approximation for prob calc
        p = norm.sf(z)
        return smallu, bigu, z, p

Example #6

0

Show file

def info_score(X, nbhds, max_bins=float('inf'),
               entropy_normalize=False, fast_version=True, binom_scores=None, gene_bins=None,
               return_bin_info=False, verbose=True, n_tests='auto', model='wilcoxon',
               chunk_size=1000, **kwargs):
    """
    :param X: sparse count matrix
    :param nbhds: list with indices of nearest neighbors for each obs in X, e.g. from kneighbors() in sklearn
    :param max_bins: Resolution at which global gene probabilities are computed.
    if inf, genes get their own probabilities. Otherwise, the unit interval is split into max_bins pieces
    and they are rounded. This makes it faster with little performance difference
    :param return_all: if True, will also return global and local gene probabilities

    :param binom_scores: pass in binomial scores for each gene/bin, if pre-computed. Allows saving for future iterations.
    :param gene_bins: pass in gene bins from previous run. Speeds up iteration

    :param return_bin_info: for iteration: keep information about gene bins and binomial probs.
    :param fast_version: if True, use matrix multiplication instead of iteration. Fast, but memory-intensive.
    :return: dense matrix of gene/cell weightings.
    """



    if type(nbhds) is np.ndarray:
        nbhds = list(nbhds)

    k = len(nbhds[0])  # how many neighbors?

    if n_tests == 'auto':
        # determine by boostrapping
        n_tests = bootstrapped_ntests(X, k=k, model=model)

    wts = np.zeros((len(nbhds), X.shape[1]))  # too big for large data
    # nbhd_counts = np.zeros(X.shape)  # ditto
    # nbhd_sizes = [len(x) for x in NNs]

    # first compute frequencies of all genes:
    gene_probs = np.array((X > 0).sum(axis=0) / float(X.shape[0])).flatten()


    # frequencies of genes within neighborhoods
    nbhd_probs = np.zeros(X.shape)

    if model == 'ttest':
        data = np.ones(np.sum([len(x) for x in nbhds]))
        col_ind = [item for sublist in nbhds for item in sublist]
        row_ind = [i for i, sublist in enumerate(nbhds) for item in sublist]

        # sparse adjacency matrix of NN graph
        nn_matrix = csr_matrix((data, (row_ind, col_ind)), shape=(len(nbhds), X.shape[0]))

        # get mean gene expressions within each neighborhood; this matrix may be less sparse
        mean_nbhd_exprs = (nn_matrix * X).astype('int').multiply(1/nn_matrix.sum(axis=1)).tocsr()

        vars = np.zeros((len(nbhds), X.shape[1]))
        for i in range(len(nbhds)): # gotta go cell by cell
            nbrs = np.array(nbhds[i]).flatten()
            gene_diffs = np.power((X[nbrs,:].todense()-mean_nbhd_exprs[i,:].todense()),2) # diffs of gene expression
            vars[i,:] = gene_diffs.mean(axis=0)
        vars = csr_matrix(vars)

        global_means = np.tile(X.mean(axis=0), (len(nbhds),1))

        #sign is pos if mean is higher, negative otherwise.
        signs = 2*(mean_nbhd_exprs.todense() >= global_means).astype('int') - 1

        global_var = np.tile(np.var(X.todense(), axis=0), (len(nbhds),1))
        nobs_global = np.tile(X.shape[0], (len(nbhds), X.shape[1]))
        nobs_local = np.tile(k, (len(nbhds), X.shape[1]))

        wts = ttest_ind_from_stats(mean1=mean_nbhd_exprs.todense().flatten(),
                                              std1=np.array(np.sqrt(vars.todense()).flatten()),
                                              nobs1=np.array(nobs_local).flatten(),
                                              mean2=np.array(global_means).flatten(),
                                              std2=np.array(np.sqrt(global_var)).flatten(),
                                              nobs2=np.array(nobs_global).flatten()).pvalue.reshape((len(nbhds), X.shape[1]))

        np.nan_to_num(wts, copy=False, nan=1.0) # nans become pval 1

        wts[wts==0] = sys.float_info.min # remove zeros


        if n_tests>1:
            # use FWER to correct for testing many genes
            wts_corrected = 1 - np.power(1 - wts, n_tests)
            wts_corrected[wts < 1e-10] = taylor_exp(wts[wts < 1e-10], n_tests)  # more accurate

            wts = wts_corrected
        else:
            wts_corrected = wts

        wts = -1*np.log(wts) # convert to info

        np.nan_to_num(wts, copy=False, nan=1.0)  # nans become pval 1

        wts = np.multiply(signs, wts) # negative if underexpressed





        return(csr_matrix(wts))

    #TODO TODO add signs

    elif model == 'wilcoxon':
        from scipy.stats import rankdata

        def fastRank(array):
            temp = array.argsort(axis=0)
            ranks = np.zeros(temp.shape)

            rows = temp.transpose().flatten()
            cols = np.repeat(np.arange(temp.shape[1]), temp.shape[0])
            ranks[rows, cols] = np.array(list(np.arange(temp.shape[0])) * temp.shape[1])
            return (ranks)

        # Wilcoxon rank sum testa
        #overall_exprs = X.todense().transpose().tolist()

        n_genes = X.shape[1]
        chunk_ends = [0] + list(np.arange(chunk_size, n_genes, chunk_size))
        chunk_ends.append(n_genes)
        gene_idxs = np.array(list(range(n_genes)))
        gene_chunks = [np.array(gene_idxs[chunk_ends[i]:chunk_ends[i + 1]]) for i in range(len(chunk_ends) - 1)]

        wt_blocks = [] # list of sparse matrices to concatenate horizontally

        # make nbhd adjacency matrix
        data = np.ones(np.sum([len(x) for x in nbhds]))
        col_ind = [item for sublist in nbhds for item in sublist]
        row_ind = [i for i, sublist in enumerate(nbhds) for item in sublist]

        # sparse adjacency matrix of NN graph
        nn_matrix = csr_matrix((data, (row_ind, col_ind)), shape=(len(nbhds), X.shape[0]))

        for i,chunk in enumerate(gene_chunks):
            if verbose:
                print('chunk {}/{}'.format(i+1,len(gene_chunks)), end='\r')

            X_chunk = X[:,chunk]

            wts = rankdata(X_chunk.todense(), axis=0) # gene rankings

            wts = nn_matrix @ wts  # nbhd_ranksums; only want to store one big matrix

            n1 = k
            n2 = X_chunk.shape[0] - k
            sd = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
            meanrank = n1 * n2 / 2.0

            # #sign is pos if mean rank is higher than average, negative otherwise.
            # signs = 2*(wts >= meanrank).astype('int') - 1

            wts = wts - ((n1 * (n1 + 1)) / 2.0)  # calc U for x, u1

            is_neg = (wts<meanrank) # remember where it was negative

            wts = np.maximum(wts, n1 * n2 - wts)  # bigu

            wts = ((wts - meanrank) / sd) # z values

            wts = 2 * norm.sf(np.abs(wts)) #p values
            #
            # for i in range(len(nbhds)):
            #     print('cell {}/{}'.format(i+1,len(nbhds)+1), end='\r')
            #     #gene_exprs = X[nbhds[i],:].todense()
            #     #all_exprs = np.vstack((gene_exprs, X.todense()))
            #
            #     nbhd_ranks = gene_rankings[nbhds[i],:]
            #
            #     ranksums = np.sum(nbhd_ranks, axis=0)
            #     n1 = k
            #     n2 = X.shape[0]-k
            #     #
            #     # ranks = fastRank(all_exprs.A)
            #     #
            #     # n1 = k
            #     # n2 = X.shape[0]
            #     # ranksums = np.sum(ranks[:k, :], axis=0)
            #     u1 = ranksums - ((n1 * (n1 + 1)) / 2.0)  # calc U for x
            #     u2 = n1 * n2 - u1  # remainder is U for y
            #
            #     sd = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
            #     meanrank = n1 * n2 / 2.0
            #
            #     bigu = np.maximum(u1, u2)
            #    # wts[i,:] = bigu
            #
            #     z = ((bigu - meanrank) / sd).flatten()
            #     p = 2 * norm.sf(abs(z))
            #     wts[i,:] = p
            #
            #

                # gene_exprs = X[nbhds[i],:].todense().transpose().tolist() # list of gene expression vectors for each nbr
                # for j,local_expr in enumerate(gene_exprs):
                #     global_expr = overall_exprs[j]
                #     wts[i,j] = mannwhitneyu(x=local_expr, y=global_expr, alternative='two-sided', use_continuity=False)[1]
            if n_tests>1:
                # use FWER to correct for testing many genes
                wts[wts> 1e-10] = 1-np.power(1-wts[wts>1e-10], n_tests)
                wts[wts<=1e-10] = taylor_exp(wts[wts <= 1e-10], n_tests)
                # wts_corrected = 1 - np.power(1 - wts, n_tests)
                # wts_corrected[wts < 1e-10] = taylor_exp(wts[wts < 1e-10], n_tests)  # more accurate
                #
                # wts = wts_corrected

            wts = -1*np.log(wts) # convert to info scores

            #sign them
            wts[is_neg] *= -1
            #wts = np.multiply(signs, wts)

            wt_blocks.append(csr_matrix(wts))

        return(hstack(wt_blocks))

    elif model == 'log_likelihood':
        means = X.mean(axis=0)
        variances = sparse_vars(X, axis=0)/float(k)

        data = np.ones(np.sum([len(x) for x in nbhds]))
        col_ind = [item for sublist in nbhds for item in sublist]
        row_ind = [i for i, sublist in enumerate(nbhds) for item in sublist]

        # sparse adjacency matrix of NN graph
        nn_matrix = csr_matrix((data, (row_ind, col_ind)), shape=(len(nbhds), X.shape[0]))

        nbhd_means = ((nn_matrix * X)/float(k)).todense()

        print(np.min(variances))
        wts = 1 / 2. * np.log(2 * np.pi) + np.power((nbhd_means - means), 2) / (2 * variances) + np.log(variances)/2.

        signs = 2*(nbhd_means >= means).astype('int') - 1

        wts = np.multiply(wts, signs)

        return(csr_matrix(wts))



    elif model == 'binomial':
        X = csr_matrix((X > 0).astype('float'))  # convert to sparse binarized matrix
        if binom_scores is None or gene_bins is None:
            if n_tests is None:
                n_tests = X.shape[1]  # multi-correct per cell
            gene_bins, binom_scores = get_binom_scores(gene_probs, k, max_bins=max_bins,
                                                       verbose=verbose, n_tests=n_tests, **kwargs)

        if fast_version:
            # compute significance of gene expression in each cell's neighborhood
            # first convert neighborhood to sparse matrix
            data = np.ones(np.sum([len(x) for x in nbhds]))
            col_ind = [item for sublist in nbhds for item in sublist]
            row_ind = [i for i, sublist in enumerate(nbhds) for item in sublist]

            # sparse adjacency matrix of NN graph
            nn_matrix = csr_matrix((data, (row_ind, col_ind)), shape=(len(nbhds), X.shape[0]))

            # get gene expressions within each neighborhood; this matrix may be less sparse
            nbhd_exprs = (nn_matrix * X).astype('int').todense()

            # # extract locations and values of nonzero nbhd expressions.
            # rows, cols = nbhd_exprs.nonzero()
            # exprs = nbhd_exprs.data

            # apply binomial scores
            rows, cols = np.indices((len(nbhds), X.shape[1]))
            rows = rows.flatten()
            cols = cols.flatten()

            wts = binom_scores[gene_bins[cols], np.array(nbhd_exprs[rows, cols]).flatten()].reshape((len(nbhds), X.shape[1]))

        else:
            for i in range(len(nbhds)):
                if verbose:
                    if i < len(nbhds) - 1:
                        print('\r computing counts for cell {}/{}'.format(i, X.shape[0]), end='         ')
                    else:
                        print('\r computing counts for cell {}/{}'.format(i, X.shape[0]), end='         \n')

                nnbhd = X[nbhds[i], :]

                nbhd_size = len(nbhds[i])
                nbhd_gene_counts = np.array((nnbhd > 0).sum(axis=0)).flatten()

                nbhd_probs[i, :] = nbhd_gene_counts / nbhd_size

                if max_bins < float('inf'):
                    # look up the binomial score in the nearest bins

                    # gene_scores = [binom_scores[gene_bins[j], count] for j, count in enumerate(nbhd_gene_counts)]
                    gene_scores = binom_scores[gene_bins, nbhd_gene_counts]

                else:
                    gene_scores = [binom_scores[j, count] for j, count in enumerate(nbhd_gene_counts)]

                wts[i, :] = gene_scores
                # expected_vals = nbhd_size * gene_probs
                # wts[i, :] = -1*np.log(gene_scores) * (2*(nbhd_gene_counts > expected_vals)-1)

    if entropy_normalize:  # divide each column by the entropy of the corresponding gene
        gene_entropies = -1 * (np.multiply(gene_probs, np.log(gene_probs))
                               + np.multiply((1 - gene_probs), np.log(1 - gene_probs)))
        gene_entropies[np.logical_not(np.isfinite(gene_entropies))] = float(
            'inf')  # zeros out non-expressed or everywhere-expressed genes
        wts = np.divide(wts, gene_entropies)

    # if return_all:
    #     return (wts, gene_probs, nbhd_probs)
    wts = csr_matrix(wts)
    if model == 'binomial' and return_bin_info:  # for iteration
        return (wts, gene_bins, binom_scores)
    else:
        return (wts)

Example #7

0

Show file

File: diffrank.py Project: charliex210/scanpy

def diffrank(adata,
             smp='groups',
             names='all',
             sig_level=0.05,
             correction='Bonferroni',
             log=False):
    """
    Compare groups by ranking genes according to differential expression.

    Parameters
    ----------
    adata : AnnData
        Annotated data matrix.
    smp : str, optional (default: 'exp_groups')
        Specify the name of the grouping to consider.
    names : str, list, np.ndarray, optional (default: 'all')
        Subset of categories - e.g. 'C1,C2,C3' or ['C1', 'C2', 'C3'] - to which
        comparison shall be restricted. If not provided all categories will be
        compared to all other categories.

    Writes to adata
    ---------------
    diffrank_zscores : np.ndarray
        Array of shape (number of comparisons) x (number of genes) storing the
        zscore of the each gene for each test.
    diffrank_rankings_names : np.ndarray of dtype str
        Array of shape (number of comparisons). Stores the labels for each comparison, 
        for example "C1 vs. C2" when comparing category 'C1' with 'C2'.
    diffrank_rankings_geneidcs : np.ndarray
        Array of shape (number of comparisons) x (number of genes) storing gene
        indices that sort them according to decreasing absolute value of the
        zscore.
    """
    # for clarity, rename variable
    groups_names = names
    groups_names, groups_masks = utils.select_groups(adata, groups_names, smp)
    adata['diffrank_groups'] = smp
    adata['diffrank_groups_names'] = groups_names
    X = adata.X
    if log:
        # TODO: treat negativity explicitly
        X = np.abs(X)
        X = np.log(X) / np.log(2)

    # loop over all masks and compute means, variances and sample numbers
    nr_groups = groups_masks.shape[0]
    nr_genes = X.shape[1]
    means = np.zeros((nr_groups, nr_genes))
    vars = np.zeros((nr_groups, nr_genes))
    ns = np.zeros(nr_groups, dtype=int)
    for imask, mask in enumerate(groups_masks):
        means[imask] = X[mask].mean(axis=0)
        vars[imask] = X[mask].var(axis=0)
        ns[imask] = np.where(mask)[0].size
    sett.m(0, 'testing', smp, groups_names, 'with sample numbers', ns)
    sett.m(2, 'means', means)
    sett.m(2, 'variances', vars)

    igroups_masks = np.arange(len(groups_masks), dtype=int)
    pairs = list(combinations(igroups_masks, 2))
    pvalues_all = np.zeros((len(pairs), nr_genes))
    zscores_all = np.zeros((len(pairs), nr_genes))
    rankings_geneidcs = np.zeros((len(pairs), nr_genes), dtype=int)
    # each test provides a ranking of genes
    # we store the name of the ranking, i.e. the name of the test,
    # in the following list
    adata['diffrank_rankings_names'] = []

    # test all combinations of groups against each other
    for ipair, (i, j) in enumerate(pairs):
        # z-scores
        denom = np.sqrt(vars[i] / ns[i] + vars[j] / ns[j])
        zeros = np.flatnonzero(denom == 0)
        denom[zeros] = np.nan
        zscores = (means[i] - means[j]) / denom
        # the following is equivalent with
        # zscores = np.ma.masked_invalid(zscores)
        zscores = np.ma.masked_array(zscores, mask=np.isnan(zscores))

        zscores_all[ipair] = zscores
        abs_zscores = np.abs(zscores)

        # p-values
        if False:
            pvalues = 2 * norm.sf(abs_zscores)  # two-sided test
            pvalues = np.ma.masked_invalid(pvalues)
            sig_genes = np.flatnonzero(pvalues < 0.05 / zscores.shape[0])
            pvalues_all[ipair] = pvalues

        # sort genes according to score
        ranking_geneidcs = np.argsort(abs_zscores)[::-1]
        # move masked values to the end of the index array
        masked = abs_zscores[ranking_geneidcs].mask
        len_not_masked = len(ranking_geneidcs[masked == False])
        save_masked_idcs = np.copy(ranking_geneidcs[masked])
        ranking_geneidcs[:len_not_masked] = ranking_geneidcs[masked == False]
        ranking_geneidcs[len_not_masked:] = save_masked_idcs
        # write to global rankings_genedics
        rankings_geneidcs[ipair] = ranking_geneidcs
        # names
        ranking_name = groups_names[i] + ' vs ' + groups_names[j]
        adata['diffrank_rankings_names'].append(ranking_name)

    if False:
        adata['diffrank_pvalues'] = -np.log10(pvalues_all)

    adata['diffrank_zscores'] = zscores_all
    adata['diffrank_rankings_geneidcs'] = rankings_geneidcs
    adata['diffrank_scoreskey'] = 'zscores'

    return adata