Esempio n. 1
0
def get_significant_pvals(pvals, q):
    try:
        significant_pvals = lsu(pvals, q)
    except TypeError:
        print('TYPEERROR occured in sig')
        significant_pvals = np.array([pvals < q])

    finally:
        # print (list(zip(['{:.4f}'.format(p) for p in pvals], significant_pvals)))
        return significant_pvals
Esempio n. 2
0
def invest_alpha_with_fdr(p, invest_rate=0.5, alpha=0.05, wealth=0.05):
    if not isinstance(wealth, list):
        wealth = [wealth]
    alpha_j = (wealth[-1] * invest_rate)
    p = lsu(p, q=alpha_j)
    # If there is any significant simultneous finding, increase alpha. If not, decrease.
    if sum(p) > 0:
        # Check this should not be alpha_j.
        wealth.append(wealth[-1] + alpha)
    else:
        wealth.append(wealth[-1] - (alpha_j) / (1 - alpha_j))

    return p, wealth
Esempio n. 3
0
def filterByFDR(g, level, keepOnlyMain):
    # Filtering edges
    pvals = np.array(g.edge_properties["pvalue"].a)

    fdr_ep = g.new_ep("bool", True)
    fdr_ep.a = lsu(pvals, q=level)

    tv = GraphView(g, efilt=fdr_ep)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv
from multipy.fdr import lsu
# from multipy.data import neuhaus

from numpy import genfromtxt
import time

start_read = time.time()
print 'reading pvals from file ...'

pvals = genfromtxt('../../outputs/pvalues_gcm.csv', delimiter=',')

print '>>>>>>   Data read in {} seconds'.format(time.time() - start_read)

# pvals = neuhaus()
print pvals
print "type(pvals): ", type(pvals)
print len(pvals)

start_testing = time.time()
significant_pvals = lsu(pvals, q=0.4)
print '>>>>>>   Hypotheses tested in {} seconds'.format(time.time() - start_testing)

print "type(significant_pvals): ", type(significant_pvals)
print significant_pvals


# print(zip(['{:.4f}'.format(p) for p in pvals], significant_pvals))
Esempio n. 5
0
def get_number_findings(p,
                        method,
                        threshold=0.05,
                        output='findings',
                        permutations=100):
    """
    This function allows for alpha debt, and simultaneous corrections. It can output the number of studies, findings and (for simultaneous corrections) which studies were significant

    Parameters
    -----------
    p : array 
        p-values (dependent, independent variables)
    method : str
        either fdr, fwe or debt
    threshold : alpha threshold
    output : str
        findings (number of significant findings), studies (number of DVs with at least one significant finding) where (which dings were significant), or debt (all results for alpha debt)
    permutations: int
        number of permutations (alpha debt only)
    
    Output: 
    -------
    If output == 'findings': findings_sequentially_uncorrected, findings_allsimultaneously_corrected
    If output == 'studies': studies_sequentially_uncorrected, studies_allsimultaneously_corrected    
    If output == 'debt': findings_debt_randomodr, findings_debt_informedodr, studies_debt_randomodr, studies_debt_informedodr
    If output == 'where': which "allsimultaneously" findings were significant (output = [DV,IV])    
    """
    if method == 'fdr':
        accumuluate_fdr = 0
        pub_studies_acum = 0
        for j in range(p.shape[1]):
            accum = np.sum(lsu(p[:, j], q=threshold))
            accumuluate_fdr += accum
            if accum > 0:
                pub_studies_acum += 1
            if j > 0:
                psim = np.random.beta(1, 10, j * p.shape[0])
                afdr_tmp = lsu(np.hstack([psim, p[:, j]]),
                               q=threshold)[-p.shape[0]:]
            else:
                afdr_tmp = p[:, j] < threshold
            accum = np.sum(afdr_tmp)
        all_fdr_ps = lsu(p.flatten(), q=threshold).reshape(p.shape)
        pub_studies_all = np.sum(np.sum(all_fdr_ps, axis=0) > 0)
        all_fdr = sum(lsu(p.flatten(), q=threshold))
        if output == 'findings':
            return accumuluate_fdr, all_fdr,
        if output == 'studies':
            return pub_studies_acum, pub_studies_all
        if output == 'where':
            return np.where(all_fdr_ps == True)

    elif method == 'fwe' or method == 'debt':
        accumuluate_bonferroni = 0
        pub_studies_acum = 0
        for j in range(p.shape[1]):
            accum = np.sum(bonferroni(p[:, j], alpha=threshold))
            accumuluate_bonferroni += accum
            if accum > 0:
                pub_studies_acum += 1
        all_bonferroni_flat = bonferroni(p.flatten(), alpha=threshold)
        all_bonferroni = np.sum(all_bonferroni_flat)

        all_bonferroni_rs = all_bonferroni_flat.reshape(p.shape)
        pub_studies_all = np.sum(np.sum(all_bonferroni_rs, axis=0) > 0)

    if method == 'debt':
        informed_studies = np.where(all_bonferroni_rs == True)[1]
        accumuluate_correct_debt = np.zeros(permutations)
        accumuluate_correct_debt_informend = np.zeros(permutations)
        pub_studies_acum = 0
        pub_studies_debt = np.zeros(permutations)
        pub_studies_debt_informed = np.zeros(permutations)
        for c in range(permutations):
            pshuffle = p.copy()
            odr = np.random.permutation(pshuffle.shape[1])
            pshuffle = pshuffle[:, odr]
            for j in range(p.shape[1]):
                corraccum = np.sum(
                    lsu(pshuffle[:, j].flatten(), q=threshold / (j + 1)))
                accumuluate_correct_debt[c] += corraccum
                if corraccum > 0:
                    pub_studies_debt[c] += 1
            pshuffle = p.copy()
            # Lambda = 0 (50/50 an informed comparison is drawn, at each draw)
            cs = np.zeros(182) + (2 / 4) / 180
            cs[informed_studies] = (2 / 4) / 2
            odr = np.random.choice(182, 182, replace=False, p=cs)
            pshuffle = pshuffle[:, odr]
            for j in range(p.shape[1]):
                corraccum = np.sum(
                    lsu(pshuffle[:, j].flatten(), q=threshold / (j + 1)))
                accumuluate_correct_debt_informend[c] += corraccum
                if corraccum > 0:
                    pub_studies_debt_informed[c] += 1

    if output == 'findings':
        return accumuluate_bonferroni, all_bonferroni
    if output == 'studies':
        return pub_studies_acum, pub_studies_all
    if output == 'debt':
        return accumuluate_correct_debt, accumuluate_correct_debt_informend, pub_studies_debt, pub_studies_debt_informed
    if output == 'where':
        return np.where(all_bonferroni_rs == True)
Esempio n. 6
0
informed_studies = which_fdr_studies[1]

#%%

# These functions do the same as get_number_findings, but with alpha spending and alpha investing which were not integrated with that funciton
# Spending, random order
permutations = 100
pas_fdr = np.zeros([68, 182, permutations])
for n in range(permutations):
    pshuffle = p.copy()
    odr = np.random.permutation(pshuffle.shape[1])
    pshuffle = pshuffle[:, odr]
    alpha_history = None
    for j in range(pshuffle.shape[1]):
        alpha = alpha_spend(alpha_history=alpha_history) * 0.5
        pas_fdr[:, odr[j], n] = pshuffle[:, j] < lsu(pshuffle[:, j], q=alpha)
        if alpha_history is None:
            alpha_history = [0.025]
        else:
            alpha_history.append(alpha)

# Spending, informed order.
permutations = 100
pas_fdr_informed = np.zeros([68, 182, permutations])
for n in range(permutations):
    pshuffle = p.copy()
    c = np.zeros(182) + (2 / 4) / 180
    # odds of the two significant comparisions are
    c[[39, 40]] = (2 / 4) / 2
    odr = np.random.choice(182, 182, replace=False, p=c)
    pshuffle = pshuffle[:, odr]
Esempio n. 7
0
 for tri, tr in enumerate(trrange):
     # Generate the data for this parameter configuration.
     x, y, true_trials = gen_data_sim(npts,
                                      maxcomparisons,
                                      numbertrue=nt,
                                      truecorr=tr,
                                      likelihood_of_truth=prob)
     # Calculate pairwise correlations between the all the comparisons
     ry = np.zeros([maxcomparisons])
     py = np.zeros([maxcomparisons])
     for n in range(maxcomparisons):
         ry[n], py[n] = pearsonr(y, x[:, n])
     # pSimultaneous bonferroni:
     p_bon = bonferroni(py)
     # Simultaneous
     p_fdr = lsu(py)
     # uncorrected
     p_uncorr = py < 0.05
     # alpha debt
     p_pad = np.array([p < (0.05 / (i + 1)) for i, p in enumerate(py)])
     # alpha spending
     p_alphaspend = np.zeros([maxcomparisons])
     for n in range(maxcomparisons):
         if n == 0:
             alpha_history = None
         alpha = spent_alpha(alpha_history=alpha_history) * 0.5
         p_alphaspend[n] = py[n] < alpha
         if n == 0:
             alpha_history = [0.025]
         else:
             alpha_history.append(alpha)
Esempio n. 8
0
shp_peru_no_lake = gpd.read_file(
    "./data/raw/vectorial/Peru_no_lake.shp").to_crs({"init": "epsg:4326"})

pisco = xr.open_dataset("./data/processed/pisco_data.nc")

gridded_trend = xr.Dataset({})
grid_data = pisco["p"].rio.set_crs(shp_peru_no_lake.crs)
grid_data = grid_data.rio.clip(shp_peru_no_lake.geometry, grid_data.rio.crs)
gridded_trend["p"] = grid_data

for var in list(pisco._variables.keys())[3:]:

    var_data = pisco[var]
    var_data = var_data.rio.set_crs(shp_peru_no_lake.crs)
    var_data = var_data.rio.clip(shp_peru_no_lake.geometry, var_data.rio.crs)

    gridded_trend[var + "_slope"] = (('latitude', 'longitude'),
                                     np.apply_along_axis(
                                         slope_trend, 0, var_data))
    gridded_trend[var + "_slope"] = gridded_trend[var + "_slope"] * 10
    gridded_trend[var + "_pvalue"] = (('latitude', 'longitude'),
                                      np.apply_along_axis(
                                          pvalue_trend, 0, var_data))
    gridded_trend[var + "_pvalue"].values = (
        lsu(pvals=gridded_trend[var + "_pvalue"].values.ravel()) + 0).reshape(
            183, 126)

gridded_trend = gridded_trend.drop_vars("p")
gridded_trend.to_netcdf("./data/processed/pisco_data_trends.nc")