def get_significant_pvals(pvals, q): try: significant_pvals = lsu(pvals, q) except TypeError: print('TYPEERROR occured in sig') significant_pvals = np.array([pvals < q]) finally: # print (list(zip(['{:.4f}'.format(p) for p in pvals], significant_pvals))) return significant_pvals
def invest_alpha_with_fdr(p, invest_rate=0.5, alpha=0.05, wealth=0.05): if not isinstance(wealth, list): wealth = [wealth] alpha_j = (wealth[-1] * invest_rate) p = lsu(p, q=alpha_j) # If there is any significant simultneous finding, increase alpha. If not, decrease. if sum(p) > 0: # Check this should not be alpha_j. wealth.append(wealth[-1] + alpha) else: wealth.append(wealth[-1] - (alpha_j) / (1 - alpha_j)) return p, wealth
def filterByFDR(g, level, keepOnlyMain): # Filtering edges pvals = np.array(g.edge_properties["pvalue"].a) fdr_ep = g.new_ep("bool", True) fdr_ep.a = lsu(pvals, q=level) tv = GraphView(g, efilt=fdr_ep) # Keeping largest component if keepOnlyMain: comp, hist = label_components(tv) main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0])) tv.vertex_properties["main_component"] = main_component tv.set_vertex_filter(main_component) return tv
from multipy.fdr import lsu # from multipy.data import neuhaus from numpy import genfromtxt import time start_read = time.time() print 'reading pvals from file ...' pvals = genfromtxt('../../outputs/pvalues_gcm.csv', delimiter=',') print '>>>>>> Data read in {} seconds'.format(time.time() - start_read) # pvals = neuhaus() print pvals print "type(pvals): ", type(pvals) print len(pvals) start_testing = time.time() significant_pvals = lsu(pvals, q=0.4) print '>>>>>> Hypotheses tested in {} seconds'.format(time.time() - start_testing) print "type(significant_pvals): ", type(significant_pvals) print significant_pvals # print(zip(['{:.4f}'.format(p) for p in pvals], significant_pvals))
def get_number_findings(p, method, threshold=0.05, output='findings', permutations=100): """ This function allows for alpha debt, and simultaneous corrections. It can output the number of studies, findings and (for simultaneous corrections) which studies were significant Parameters ----------- p : array p-values (dependent, independent variables) method : str either fdr, fwe or debt threshold : alpha threshold output : str findings (number of significant findings), studies (number of DVs with at least one significant finding) where (which dings were significant), or debt (all results for alpha debt) permutations: int number of permutations (alpha debt only) Output: ------- If output == 'findings': findings_sequentially_uncorrected, findings_allsimultaneously_corrected If output == 'studies': studies_sequentially_uncorrected, studies_allsimultaneously_corrected If output == 'debt': findings_debt_randomodr, findings_debt_informedodr, studies_debt_randomodr, studies_debt_informedodr If output == 'where': which "allsimultaneously" findings were significant (output = [DV,IV]) """ if method == 'fdr': accumuluate_fdr = 0 pub_studies_acum = 0 for j in range(p.shape[1]): accum = np.sum(lsu(p[:, j], q=threshold)) accumuluate_fdr += accum if accum > 0: pub_studies_acum += 1 if j > 0: psim = np.random.beta(1, 10, j * p.shape[0]) afdr_tmp = lsu(np.hstack([psim, p[:, j]]), q=threshold)[-p.shape[0]:] else: afdr_tmp = p[:, j] < threshold accum = np.sum(afdr_tmp) all_fdr_ps = lsu(p.flatten(), q=threshold).reshape(p.shape) pub_studies_all = np.sum(np.sum(all_fdr_ps, axis=0) > 0) all_fdr = sum(lsu(p.flatten(), q=threshold)) if output == 'findings': return accumuluate_fdr, all_fdr, if output == 'studies': return pub_studies_acum, pub_studies_all if output == 'where': return np.where(all_fdr_ps == True) elif method == 'fwe' or method == 'debt': accumuluate_bonferroni = 0 pub_studies_acum = 0 for j in range(p.shape[1]): accum = np.sum(bonferroni(p[:, j], alpha=threshold)) accumuluate_bonferroni += accum if accum > 0: pub_studies_acum += 1 all_bonferroni_flat = bonferroni(p.flatten(), alpha=threshold) all_bonferroni = np.sum(all_bonferroni_flat) all_bonferroni_rs = all_bonferroni_flat.reshape(p.shape) pub_studies_all = np.sum(np.sum(all_bonferroni_rs, axis=0) > 0) if method == 'debt': informed_studies = np.where(all_bonferroni_rs == True)[1] accumuluate_correct_debt = np.zeros(permutations) accumuluate_correct_debt_informend = np.zeros(permutations) pub_studies_acum = 0 pub_studies_debt = np.zeros(permutations) pub_studies_debt_informed = np.zeros(permutations) for c in range(permutations): pshuffle = p.copy() odr = np.random.permutation(pshuffle.shape[1]) pshuffle = pshuffle[:, odr] for j in range(p.shape[1]): corraccum = np.sum( lsu(pshuffle[:, j].flatten(), q=threshold / (j + 1))) accumuluate_correct_debt[c] += corraccum if corraccum > 0: pub_studies_debt[c] += 1 pshuffle = p.copy() # Lambda = 0 (50/50 an informed comparison is drawn, at each draw) cs = np.zeros(182) + (2 / 4) / 180 cs[informed_studies] = (2 / 4) / 2 odr = np.random.choice(182, 182, replace=False, p=cs) pshuffle = pshuffle[:, odr] for j in range(p.shape[1]): corraccum = np.sum( lsu(pshuffle[:, j].flatten(), q=threshold / (j + 1))) accumuluate_correct_debt_informend[c] += corraccum if corraccum > 0: pub_studies_debt_informed[c] += 1 if output == 'findings': return accumuluate_bonferroni, all_bonferroni if output == 'studies': return pub_studies_acum, pub_studies_all if output == 'debt': return accumuluate_correct_debt, accumuluate_correct_debt_informend, pub_studies_debt, pub_studies_debt_informed if output == 'where': return np.where(all_bonferroni_rs == True)
informed_studies = which_fdr_studies[1] #%% # These functions do the same as get_number_findings, but with alpha spending and alpha investing which were not integrated with that funciton # Spending, random order permutations = 100 pas_fdr = np.zeros([68, 182, permutations]) for n in range(permutations): pshuffle = p.copy() odr = np.random.permutation(pshuffle.shape[1]) pshuffle = pshuffle[:, odr] alpha_history = None for j in range(pshuffle.shape[1]): alpha = alpha_spend(alpha_history=alpha_history) * 0.5 pas_fdr[:, odr[j], n] = pshuffle[:, j] < lsu(pshuffle[:, j], q=alpha) if alpha_history is None: alpha_history = [0.025] else: alpha_history.append(alpha) # Spending, informed order. permutations = 100 pas_fdr_informed = np.zeros([68, 182, permutations]) for n in range(permutations): pshuffle = p.copy() c = np.zeros(182) + (2 / 4) / 180 # odds of the two significant comparisions are c[[39, 40]] = (2 / 4) / 2 odr = np.random.choice(182, 182, replace=False, p=c) pshuffle = pshuffle[:, odr]
for tri, tr in enumerate(trrange): # Generate the data for this parameter configuration. x, y, true_trials = gen_data_sim(npts, maxcomparisons, numbertrue=nt, truecorr=tr, likelihood_of_truth=prob) # Calculate pairwise correlations between the all the comparisons ry = np.zeros([maxcomparisons]) py = np.zeros([maxcomparisons]) for n in range(maxcomparisons): ry[n], py[n] = pearsonr(y, x[:, n]) # pSimultaneous bonferroni: p_bon = bonferroni(py) # Simultaneous p_fdr = lsu(py) # uncorrected p_uncorr = py < 0.05 # alpha debt p_pad = np.array([p < (0.05 / (i + 1)) for i, p in enumerate(py)]) # alpha spending p_alphaspend = np.zeros([maxcomparisons]) for n in range(maxcomparisons): if n == 0: alpha_history = None alpha = spent_alpha(alpha_history=alpha_history) * 0.5 p_alphaspend[n] = py[n] < alpha if n == 0: alpha_history = [0.025] else: alpha_history.append(alpha)
shp_peru_no_lake = gpd.read_file( "./data/raw/vectorial/Peru_no_lake.shp").to_crs({"init": "epsg:4326"}) pisco = xr.open_dataset("./data/processed/pisco_data.nc") gridded_trend = xr.Dataset({}) grid_data = pisco["p"].rio.set_crs(shp_peru_no_lake.crs) grid_data = grid_data.rio.clip(shp_peru_no_lake.geometry, grid_data.rio.crs) gridded_trend["p"] = grid_data for var in list(pisco._variables.keys())[3:]: var_data = pisco[var] var_data = var_data.rio.set_crs(shp_peru_no_lake.crs) var_data = var_data.rio.clip(shp_peru_no_lake.geometry, var_data.rio.crs) gridded_trend[var + "_slope"] = (('latitude', 'longitude'), np.apply_along_axis( slope_trend, 0, var_data)) gridded_trend[var + "_slope"] = gridded_trend[var + "_slope"] * 10 gridded_trend[var + "_pvalue"] = (('latitude', 'longitude'), np.apply_along_axis( pvalue_trend, 0, var_data)) gridded_trend[var + "_pvalue"].values = ( lsu(pvals=gridded_trend[var + "_pvalue"].values.ravel()) + 0).reshape( 183, 126) gridded_trend = gridded_trend.drop_vars("p") gridded_trend.to_netcdf("./data/processed/pisco_data_trends.nc")