def run_sample(samplenum, min_sep, max_sep, rpbins, pimax, wp, cosmo, frac=1, bin_size=None, pibinwidth=2): fn = '../data/lss.dr72bright{}_czcut.dat'.format(samplenum) data1 = pd.read_csv(fn) rand1 = get_random(data1) print 'Sample {}, {}'.format(samplenum, labels_mr[samplenum]) print 'ndata=', len(data1.index) print 'nrand=', len(rand1.index) data1 = data1.sample(frac=frac) print 'subsampling rands' frac /= 10. rand1 = rand1.sample(frac=frac) # data1 = data1[:int(frac*len(data1.index))] # rand1 = rand1[:int(frac*len(rand1.index))] print 'ndata=', len(data1.index) print 'nrand=', len(rand1.index) data2 = data1 rand2 = rand1 start = time.time() #xi, dd, dr, rd, rr = run.run_treecorr(data1, rand1, data2, rand2, min_sep, max_sep, bin_size, pimax, wp) #xi, dd, dr, rd, rr = run.run_treecorr_orig(data1, rand1, data2, rand2, min_sep, max_sep, bin_size, pimax, wp) #weights_data = data1['fgotten'] #weights_rand = rand1['fgotten'] weights_data = None weights_rand = None rp_avg, wprp = run.run_corrfunc(data1, rand1, data2, rand2, rpbins, pimax, cosmo, weights_data=weights_data, weights_rand=weights_rand, pibinwidth=pibinwidth) end = time.time() print 'Time for sample {}, ndata={}: {}'.format(samplenum, len(data1.index), end - start) return rp_avg, wprp
def run_sample_corrfunc(samplenum, tag, min_sep, max_sep, rpbins, pimax, wp, cosmo, frac=1, bin_size=None, pibinwidth=2): fn = '../data/lss.dr72bright{}{}.dat'.format(samplenum, tag) data1 = pd.read_csv(fn) fn_rand = '../data/random-0.dr72bright{}{}.dat'.format(samplenum, tag) rand1 = pd.read_csv(fn_rand) #rand1 = get_random(data1) print 'Sample {}'.format(samplenum) print 'ndata=', len(data1.index) print 'nrand=', len(rand1.index) data1 = data1.sample(frac=frac) rand1 = rand1.sample(frac=frac) # data1 = data1[:int(frac*len(data1.index))] # rand1 = rand1[:int(frac*len(rand1.index))] print 'ndata=', len(data1.index) print 'nrand=', len(rand1.index) data2 = data1 rand2 = rand1 start = time.time() weights_data = data1['fgotten'] weights_rand = rand1['fgotten'] rp_avg, wprp = run.run_corrfunc(data1, rand1, data2, rand2, rpbins, pimax, cosmo, weights_data=weights_data, weights_rand=weights_rand, pibinwidth=pibinwidth) end = time.time() print 'Time for sample {}, ndata={}, nrand={}: {}'.format( samplenum, len(data1.index), len(rand1.index), end - start) return rp_avg, wprp
def run_together(min_sep, max_sep, bin_size, K, pimax, wp): #samplenums = [7, 8, 9, 10, 11, 12] samplenums = [8, 9, 10] data1, rand1 = combine_samples(samplenums) data2 = data1.copy() rand2 = rand1.copy() print 'ndata={}, nrand={}'.format(len(data1.index), len(rand1.index)) rpbins = np.logspace(np.log10(min_sep), np.log10(max_sep), K + 1) rpbins_avg = run.bins_logavg(rpbins) logwidth = run.log_width(rpbins) #basisfuncs = [estimator.top_z] #basisfuncs = [estimator.top_Mr] basisfuncs = [estimator.gauss_Mr] #basisfuncs = [estimator.tophat] K *= 3 #vals = [0.1, 0.15, 0.2, 0.25] vals = [-22.5, -21.5, -20.5, -19.5, -18.5] #vals = [-21., -20.5, -20, -19.5] #labels = ["M_r={:.2f}".format(val) for val in vals] labels = ['corrfunc all'] #labels = [0.15] #vals = [None] #labels = ['top'] cols = [ 'purple', 'red', 'orange', 'green', 'blue', 'cyan', 'magenta', 'grey' ] print 'Run' start = time.time() est_ls, wprp = run.run_corrfunc(data1, rand1, data2, rand2, rpbins, pimax) rps = [rpbins_avg] wprps = [wprp] # rps, wprps = run.run(data1, rand1, data2, rand2, pimax, min_sep, max_sep, bin_size, basisfuncs, # K, cosmo, wp, rpbins, vals, logrpbins_avg, logwidth) end = time.time() print 'Time for all, ndata={}: {}'.format(len(data1.index), end - start) plotter.plot_wprp(rps, wprps, labels, colors=cols)
def run_dr7_LRGs(): #sample = 'Bright-no' sample = 'Dim-no' datafn = '../data/DR7-{}.ascii'.format(sample) randfn = '../data/random-DR7-{}.ascii'.format(sample) data = pd.read_table(datafn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'M_g', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'fiber_coll_weight', 'fogtmain', 'ilss', 'icomb', 'sector' ], dtype={'z': np.float64}, skiprows=1) rand = pd.read_table(randfn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'ilss', 'sector' ], dtype={'z': np.float64}, skiprows=1) frac = 1 #saveto = None saveto = "../results/wp_dr7_{}LRG_frac{}_weights.npy".format(sample, frac) cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75) print 'ndata=', len(data.index) print 'nrand=', len(rand.index) #Sector completeness already cut to >0.6, not sure if still have to downsample randoms #and many have sector completness > 1!! ?? # data = data[data['z']<0.36] # data = data[data['ra']>90][data['ra']<270] #NGC print len(data.index) data = data.sample(frac=frac) rand = rand.sample(frac=frac) # data1 = data1[:int(frac*len(data1.index))] # rand1 = rand1[:int(frac*len(rand1.index))] print 'ndata=', len(data.index) print 'nrand=', len(rand.index) weights_data = data['radial_weight'] * data['fiber_coll_weight'] weights_rand = rand['radial_weight'] #losmax = 1.0 losmax = 40.0 zspace = False if sample == 'Bright-no': K = 21 rmin = 60 rmax = 200 elif sample == 'Full': K = 14 rmin = 40 rmax = 180 elif sample == 'Dim-no': K = 15 rmin = 0.01 rmax = 8. else: exit('ERROR') #bins = np.linspace(rmin, rmax, K + 1) bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1) start = time.time() # or rp, wp s, xi = run.run_corrfunc(data, rand, data, rand, bins, losmax, cosmo, weights_data=weights_data, weights_rand=weights_rand, zspace=zspace) end = time.time() print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index), end - start) ss = [s] xis = [xi] labels = ['dr7 {} LRGs'.format(sample)] if saveto: run.save_results(saveto, ss, xis, labels)
def time_pairs(): times_cf = np.zeros(len(ndata)) times_pg = np.zeros(len(ndata)) times_pgz = np.zeros(len(ndata)) rps = [] wprps = [] nrand = [] nproc = 2 K = 10 pimax = 40. #Mpc/h pibinwidth = pimax min_sep = 0.1 max_sep = 10. #Mpc/h basisfuncs = [estimator_chunks.tophat_robust] #bin_sep = np.log(rmax / rmin) / float(K) rpbins = np.logspace(np.log10(min_sep), np.log10(max_sep), K + 1) rpbins_avg = run.bins_logavg(rpbins) logrpbins_avg = run.logbins_avg(rpbins) logwidth = run.log_width(rpbins) bin_arg = np.log10(rpbins) cosmo = LambdaCDM(H0=70, Om0=0.3, Ode0=0.7) wp = True for i in range(len(ndata)): nd = ndata[i] print i, ndata data1fn = '../../lss/mangler/samples/a0.6452_0001.v5_ngc_ifield_ndata{}.rdzw'.format( nd) rand1fn = '../../lss/mangler/samples/a0.6452_rand20x.dr12d_cmass_ngc_ifield_ndata{}.rdz'.format( nd) data2fn = data1fn rand2fn = rand1fn data1 = pd.read_csv(data1fn) rand1 = pd.read_csv(rand1fn) data2 = pd.read_csv(data2fn) rand2 = pd.read_csv(rand2fn) nrand.append(len(rand1)) # should make so can take list print 'Adding info to dataframes' data1 = run.add_info(data1, zfile=None) rand1 = run.add_info(rand1, zfile=None) data2 = run.add_info(data2, zfile=None) rand2 = run.add_info(rand2, zfile=None) # start0 = time.time() # # run.run_treecorr(data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp) # xi, d1d2pairs, d1r2pairs, d2r1pairs, r1r2pairs = run.pairs_treecorr( # data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp) # end0 = time.time() # print "Time treecorr pairs:", end0 - start0 # times_tcp[i] = end0 - start0 # # start1 = time.time() # #run.run_treecorr_orig(data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp) # end1 = time.time() # print "Time treecorr:", end1 - start1 # times_tc[i] = end1 - start1 # start2 = time.time() # d1d2pairs, d1r2pairs, d2r1pairs, r1r2pairs = pairs.pairs(data1, rand1, data2, rand2, # rmax, cosmo, wp) # end2 = time.time() # print "Time pairs:", end2 - start2 # times_kd[i] = end2 - start2 start = time.time() rp, wprp = run.run_corrfunc(data1, rand1, data2, rand2, rpbins, pimax, cosmo, nproc=nproc, pibinwidth=int(pibinwidth)) end = time.time() print "Time corrfunc:", end - start times_cf[i] = end - start rps.append(logrpbins_avg) wprps.append(wprp) vals = None start = time.time() ddgen = pairgen.PairGen(data1, data2, max_sep, cosmo, wp) drgen = pairgen.PairGen(data1, rand2, max_sep, cosmo, wp) rdgen = pairgen.PairGen(data2, rand1, max_sep, cosmo, wp) rrgen = pairgen.PairGen(rand1, rand2, max_sep, cosmo, wp) a = estimator_chunks.est(ddgen, drgen, rdgen, rrgen, pimax, max_sep, cosmo, basisfuncs, K, wp, nproc, bin_arg, logwidth) rp, wprp = run.calc_wprp(a, rpbins_avg, basisfuncs, K, rpbins, vals, pibinwidth, bin_arg, logwidth) rps.append(rp) wprps.append(wprp) end = time.time() print "Time chunks:", end - start times_pg[i] = end - start start = time.time() ddgen = pairgenz.PairGen(data1, data2, max_sep, cosmo, wp, pimax) drgen = pairgenz.PairGen(data1, rand2, max_sep, cosmo, wp, pimax) rdgen = pairgenz.PairGen(data2, rand1, max_sep, cosmo, wp, pimax) rrgen = pairgenz.PairGen(rand1, rand2, max_sep, cosmo, wp, pimax) a = estimator_chunks.est(ddgen, drgen, rdgen, rrgen, pimax, max_sep, cosmo, basisfuncs, K, wp, nproc, bin_arg, logwidth) rp, wprp = run.calc_wprp(a, rpbins_avg, basisfuncs, K, rpbins, vals, pibinwidth, bin_arg, logwidth) rps.append(rp) wprps.append(wprp) end = time.time() print "Time chunks:", end - start times_pgz[i] = end - start # time_arrs = [times_tc, times_kd] # labels = ['treecorr', 'kdtree'] time_arrs = [times_cf, times_pg, times_pgz] ndatas = [ndata] * len(time_arrs) nrands = [nrand] * len(time_arrs) labels = ['corrfunc', 'pairgen', 'pairgen zshells'] np.save( '../results/times/times_zshells_n{}_nproc{}.npy'.format( max(ndata), nproc), [ndatas, nrands, time_arrs, labels, rps, wprps])
def run_dr7_LRGs(): nproc = 2 frac = 0.05 #sample = 'Bright-no' #sample = 'Dim-no' print "Loading data..." sample = 'Full' datafn = '../data/DR7-{}.ascii'.format(sample) randfn = '../data/random-DR7-{}.ascii'.format(sample) data = pd.read_table(datafn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'M_g', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'fiber_coll_weight', 'fogtmain', 'ilss', 'icomb', 'sector' ], dtype={'z': np.float64}, skiprows=1) rand = pd.read_table(randfn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'ilss', 'sector' ], dtype={'z': np.float64}, skiprows=1) #saveto = None saveto = "../results/bao/xis_dr7_{}LRG_frac{}.npy".format(sample, frac) cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75) print 'ndata=', len(data.index) print 'nrand=', len(rand.index) #Sector completeness already cut to >0.6, not sure if still have to downsample randoms #and many have sector completness > 1!! ?? # data = data[data['z']<0.36] # data = data[data['ra']>90][data['ra']<270] #NGC # data = data.sample(frac=frac) # rand = rand.sample(frac=frac) data = data[:int(frac * len(data.index))] rand = rand[:int(frac * len(rand.index))] print 'ndata=', len(data.index) print 'nrand=', len(rand.index) print "Adding info..." data = run.add_info(data, cosmo) rand = run.add_info(rand, cosmo) print max(data['dcm_mpc']), min(data['dcm_mpc']) print max(rand['dcm_mpc']), min(rand['dcm_mpc']) #weights_data = data['radial_weight']*data['fiber_coll_weight'] #weights_rand = rand['radial_weight'] weights_data = None weights_rand = None losmax = 1.0 #max of cosine #losmax = 40.0 zspace = True if sample == 'Bright-no': K = 21 rmin = 60 rmax = 200 elif sample == 'Full': K = 14 rmin = 40 rmax = 180 elif sample == 'Dim-no': K = 15 rmin = 0.01 rmax = 8. else: exit('ERROR') bins = np.linspace(rmin, rmax, K + 1) print "Bins:", bins #bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1) ss = [] xis = [] labels = [] print "Running corrfunc..." start = time.time() # or rp, wp s, xi = run.run_corrfunc(data, rand, data, rand, bins, losmax, cosmo, weights_data=weights_data, weights_rand=weights_rand, zspace=zspace) ss.append(s) xis.append(xi) labels.append("corrfunc") end = time.time() print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index), end - start) #wp = True wp = False basisfuncs = [estimator_chunks.tophat_xis] bin_arg = bins binwidth = (rmax - rmin) / float(K) pibinwidth = losmax vals = None print "Running estimator..." s_est, xi_est, a = run.run_chunks(data, rand, data, rand, losmax, rmin, rmax, basisfuncs, K, cosmo, wp, bins, vals, pibinwidth, zspace, nproc, bin_arg, binwidth) ss.append(s_est) xis.append(xi_est) labels.append("est tophat") #labels = ['dr7 {} LRGs'.format(sample)] if saveto: print "Saving to {}".format(saveto) np.save(saveto, [ss, xis, labels])
def run_dr7_LRGs_corrfunc(): print "Just running corrfunc" nproc = 1 frac = 0.05 #sample = 'Bright-no' #sample = 'Dim-no' print "Loading data..." sample = 'Full' datafn = '../data/DR7-{}.ascii'.format(sample) randfn = '../data/random-DR7-{}.ascii'.format(sample) print sdfsdf data = pd.read_csv(datafn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'M_g', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'fiber_coll_weight', 'fogtmain', 'ilss', 'icomb', 'sector' ], dtype={'z': np.float64}, skiprows=1) rand = pd.read_csv(randfn, index_col=False, delim_whitespace=True, names=[ 'ra', 'dec', 'z', 'sector_completeness', 'n(z)*1e4', 'radial_weight', 'ilss', 'sector' ], dtype={'z': np.float64}, skiprows=1) #saveto = None saveto = "../results/bao/xis_dr7_{}LRG_frac{}_corrfunc.npy".format( sample, frac) cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75) #utils.write_comoving_dist(data, ) print 'ndata=', len(data.index) print 'nrand=', len(rand.index) #Sector completeness already cut to >0.6, not sure if still have to downsample randoms #and many have sector completness > 1!! ?? # data = data[data['z']<0.36] # data = data[data['ra']>90][data['ra']<270] #NGC data = data.sample(frac=frac) #frac *=0.5 rand = rand.sample(frac=frac) #data = data[:int(frac*len(data.index))] #rand = rand[:int(frac*len(rand.index))] print 'ndata=', len(data.index) print 'nrand=', len(rand.index) weights_data = data['radial_weight'] * data['fiber_coll_weight'] weights_rand = rand['radial_weight'] # weights_data = None # weights_rand = None losmax = 1.0 #max of cosine #losmax = 40.0 zspace = True if sample == 'Bright-no': K = 21 rmin = 60 rmax = 200 elif sample == 'Full': K = 14 rmin = 40 rmax = 180 elif sample == 'Dim-no': K = 15 rmin = 0.01 rmax = 8. else: exit('ERROR') bins = np.linspace(rmin, rmax, K + 1) #bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1) print "bins:", bins ss = [] xis = [] aa = [] labels = [] print "Running corrfunc..." start = time.time() # or rp, wp s, xi_orig, xi_proj, amps = run.run_corrfunc(data, rand, data, rand, bins, losmax, cosmo, weights_data=weights_data, weights_rand=weights_rand, zspace=zspace, proj=True, nproc=nproc) print "s:", s print "xi_orig", xi_orig print "xi_proj", xi_proj ss.append(s) xis.append(xi_orig) aa.append(None) labels.append("corrfunc orig") ss.append(s) xis.append(xi_proj) aa.append(amps) labels.append("corrfunc projected") end = time.time() print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index), end - start) if saveto: print "Saving to {}".format(saveto) np.save(saveto, [ss, xis, aa, labels])