Ejemplo n.º 1
0
	def _do_tests(self, dec_reg, hit_reg, iter):
		active_features = np.where(dec_reg >= 0)[0]		   
		hits = hit_reg[active_features]
		# get uncorrected p values based on hit_reg
		to_accept_ps = sp.stats.binom.sf(hits - 1, iter, .5).flatten()
		to_reject_ps = sp.stats.binom.cdf(hits, iter, .5).flatten()
		
		# two step multicor process
		# first we correct for testing several features in each round using FDR
		to_accept = multicor(to_accept_ps, alpha=self.multi_alpha, 
							 method='fdr_bh')[0]
		to_reject = multicor(to_reject_ps, alpha=self.multi_alpha, 
							 method='fdr_bh')[0]
		
		# second we correct for testing the same feature over and over again 
		# using bonferroni
		to_accept2 = to_accept_ps <= self.multi_alpha / float(iter)
		to_reject2 = to_reject_ps <= self.multi_alpha / float(iter)
		
		# combine the two multi corrections, and get indexes
		to_accept = to_accept * to_accept2
		to_reject = to_reject * to_reject2
		
		# find features which are 0 and have been rejected or accepted
		to_accept = np.where((dec_reg[active_features] == 0) * to_accept)[0]
		to_reject = np.where((dec_reg[active_features] == 0) * to_reject)[0]
		
		# updating dec_reg
		dec_reg[active_features[to_accept]] = 1
		dec_reg[active_features[to_reject]] = -1

		return dec_reg
Ejemplo n.º 2
0
    def _do_tests(self, dec_reg, hit_reg, iter):
        active_features = np.where(dec_reg >= 0)[0]
        hits = hit_reg[active_features]
        # get uncorrected p values based on hit_reg
        to_accept_ps = sp.stats.binom.sf(hits - 1, iter, .5).flatten()
        to_reject_ps = sp.stats.binom.cdf(hits, iter, .5).flatten()

        # two step multicor process
        # first we correct for testing several features in each round using FDR
        to_accept = multicor(to_accept_ps,
                             alpha=self.multi_alpha,
                             method='fdr_bh')[0]
        to_reject = multicor(to_reject_ps,
                             alpha=self.multi_alpha,
                             method='fdr_bh')[0]

        # second we correct for testing the same feature over and over again
        # using bonferroni
        to_accept2 = to_accept_ps <= self.multi_alpha / float(iter)
        to_reject2 = to_reject_ps <= self.multi_alpha / float(iter)

        # combine the two multi corrections, and get indexes
        to_accept = to_accept * to_accept2
        to_reject = to_reject * to_reject2

        # find features which are 0 and have been rejected or accepted
        to_accept = np.where((dec_reg[active_features] == 0) * to_accept)[0]
        to_reject = np.where((dec_reg[active_features] == 0) * to_reject)[0]

        # updating dec_reg
        dec_reg[active_features[to_accept]] = 1
        dec_reg[active_features[to_reject]] = -1

        return dec_reg
Ejemplo n.º 3
0
    def _do_tests(self, dec_reg, hit_reg, iter):
        # get uncorrected p values based on hit_reg
        to_accept_ps = sp.stats.binom.sf(hit_reg - 1, iter, 0.5).flatten()
        to_reject_ps = sp.stats.binom.cdf(hit_reg, iter, 0.5).flatten()

        # correct p values for multiple testing
        to_accept = np.where(multicor(to_accept_ps, alpha=self.multi_alpha, method=self.multi_corr_method)[0])[0]
        to_reject = np.where(multicor(to_reject_ps, alpha=self.multi_alpha, method=self.multi_corr_method)[0])[0]

        # finding those to_accept and to_reject that are 0, and setting them
        dec_reg[to_accept[np.where(dec_reg[to_accept] == 0)]] = 1
        dec_reg[to_reject[np.where(dec_reg[to_reject] == 0)]] = -1
        return dec_reg
Ejemplo n.º 4
0
    def _do_tests(self, dec_reg, hit_reg, iter):
        # get uncorrected p values based on hit_reg
        to_accept_ps = sp.stats.binom.sf(hit_reg - 1, iter, .5).flatten()
        to_reject_ps = sp.stats.binom.cdf(hit_reg, iter, .5).flatten()

        # correct p values for multiple testing
        to_accept = np.where(multicor(to_accept_ps, alpha=self.multi_alpha,
                                      method=self.multi_corr_method)[0])[0]
        to_reject = np.where(multicor(to_reject_ps, alpha=self.multi_alpha,
                                      method=self.multi_corr_method)[0])[0]

        # finding those to_accept and to_reject that are 0, and setting them
        dec_reg[to_accept[np.where(dec_reg[to_accept] == 0)]] = 1
        dec_reg[to_reject[np.where(dec_reg[to_reject] == 0)]] = -1
        return dec_reg
Ejemplo n.º 5
0
def check_pvals(p_vals, params):
    """
    Corrects for multiple  testing using the user specified method and alpha
    value. Returns a boolean mask that can be used to filter the resulting
    heatmap.
    """

    # basic params
    n, p = p_vals.shape
    mc_method = params['multi_corr_method']
    mc_alpha = params['alpha_val']

    # corr matrix is symmetric we only need to correct the lower half
    ps_to_check = np.array(np.tril(np.ones((p, p)), k=-1), dtype=np.bool)

    ps = multicor(p_vals[ps_to_check], method=mc_method, alpha=float(mc_alpha))

    # make Boolean and corrected p-val matrices
    p_mask = np.zeros((p, p), dtype=np.bool)
    p_mask[ps_to_check] = ps[0]
    p_mask = mirror_lower_triangle(p_mask)
    p_vals = np.ones((p, p))
    p_vals[ps_to_check] = ps[1]
    p_vals = mirror_lower_triangle(p_vals)

    return p_vals, p_mask
Ejemplo n.º 6
0
def gpd_spearman(rX, perm_num=10000, prec=None, mc_method='fdr_bh', mc_alpha=0.05):
    """
    This is the main function of the module. 
    
    The correlation values are calculated in matrix form in parallel, and the 
    p-values are approximated using Monte Carlo sampling (1e4 by default), 
    followed by Generalized Paretho Distribution Approximation to make them 
    more precise.
    
    Then the p values are corrected for multiple testing using the Benjamini
    Hochberg FDR with .05 alpha (by default). For this, only the lower triangle
    of the correlation matrix is used, to avoid including each value twice.

    Parameters
    ----------
    
    - rX : array of shape [n sample, p features]
        Column ranked X matrix of observations and features. Used to calculate
        Spearman correlations and their p-values. 
    - perm_num : int, default = 10000
        Number of permutations to use to estimate each correlation's p-value.
    - prec : array of shape [p features, p features], default = None
        Estimated precision matrix by some Graph Lasso algorithm. If provided,
        only those correlations will be considered whose precision is not 0.
    - mc_method : string, default = 'fdr_bh'
        Method for correction for multiple testing.
    - mc_alpha : float, default = 0.05
        Threshold for q-values after FDR correction.
    
    Returns
    -------
    
    Tuple with three matrices:
    - rs : array of shape [p features, p features]
        Holds empirical Spearman correlation values.
    - p_mask : array of shape [p features, p features]
        Mask for those p-values which passed the FDR correction with alpha=0.05
    - p_vals : array of shape [p features, p features]
        FDR corrected p-values
    """
    n, p = rX.shape
    # little trick to avoid having many ifs everywhere in code
    prec_mask = np.ones((p, p), dtype=np.bool)
    if prec is not None:
        prec_mask = prec != 0

    # calculate empirical Spearman from data
    rs = np.corrcoef(rX, rowvar=0)
    # get permuted Spearman values, in parallel
    r_perms = np.array(Parallel(n_jobs=-1,backend='threading')
                       (delayed(perm_r)(rX, i) for i in xrange(perm_num)))
    
    # GPD APPROXIMATION OF P-VALUES IN PARALLEL
    pGPDs = np.ones((p,p))
    # we should only each value once, i.e. the cells below the diagonal    
    lower_tri_ind = np.tril_indices(p, k=-1)
    lower_tri_mask = np.array(np.tril(np.ones((p, p)), k=-1), dtype=np.bool)
    pGPDs[lower_tri_ind] = np.array(Parallel(n_jobs=-1,backend='threading')(delayed(perm_gpd)
                           (prec_mask[lower_tri_ind[0][i], lower_tri_ind[1][i]],
                            rs[lower_tri_ind[0][i], lower_tri_ind[1][i]],
                            r_perms[:, lower_tri_ind[0][i], lower_tri_ind[1][i]]) 
                            for i in xrange(lower_tri_ind[0].shape[0])))    
    
    # correction for multiple testing, make sure that we only include each 
    # p-value once not twice (symmetric rs), by combining two numpy mask arrays: 
    #   - the lower triangular rs matrix
    #   - prec_mask, i.e. p-vals whose precision is not 0
    ps_to_check = lower_tri_mask * prec_mask
    p_vals = np.ones((p, p))
    if float(mc_alpha) < 1:
        p_mask = np.zeros((p, p), dtype=np.bool)
        ps = multicor(pGPDs[ps_to_check], method=mc_method, alpha=float(mc_alpha))
        p_mask[ps_to_check] = ps[0]
        p_vals[ps_to_check] = ps[1]
    else:
        # user doesn't want correction for multiple testing
        p_mask = np.ones((p, p), dtype=np.bool)
        p_vals = np.ones((p, p))
        p_vals[ps_to_check] = pGPDs[ps_to_check]
    p_mask = mirror_lower_triangle(p_mask)
    p_vals = mirror_lower_triangle(p_vals)
    return rs, p_vals, p_mask