Exemple #1
0
def run_sample(samplenum,
               min_sep,
               max_sep,
               rpbins,
               pimax,
               wp,
               cosmo,
               frac=1,
               bin_size=None,
               pibinwidth=2):

    fn = '../data/lss.dr72bright{}_czcut.dat'.format(samplenum)
    data1 = pd.read_csv(fn)
    rand1 = get_random(data1)

    print 'Sample {}, {}'.format(samplenum, labels_mr[samplenum])

    print 'ndata=', len(data1.index)
    print 'nrand=', len(rand1.index)

    data1 = data1.sample(frac=frac)
    print 'subsampling rands'
    frac /= 10.
    rand1 = rand1.sample(frac=frac)
    # data1 = data1[:int(frac*len(data1.index))]
    # rand1 = rand1[:int(frac*len(rand1.index))]

    print 'ndata=', len(data1.index)
    print 'nrand=', len(rand1.index)

    data2 = data1
    rand2 = rand1

    start = time.time()
    #xi, dd, dr, rd, rr = run.run_treecorr(data1, rand1, data2, rand2, min_sep, max_sep, bin_size, pimax, wp)
    #xi, dd, dr, rd, rr = run.run_treecorr_orig(data1, rand1, data2, rand2, min_sep, max_sep, bin_size, pimax, wp)
    #weights_data = data1['fgotten']
    #weights_rand = rand1['fgotten']
    weights_data = None
    weights_rand = None
    rp_avg, wprp = run.run_corrfunc(data1,
                                    rand1,
                                    data2,
                                    rand2,
                                    rpbins,
                                    pimax,
                                    cosmo,
                                    weights_data=weights_data,
                                    weights_rand=weights_rand,
                                    pibinwidth=pibinwidth)

    end = time.time()
    print 'Time for sample {}, ndata={}: {}'.format(samplenum,
                                                    len(data1.index),
                                                    end - start)
    return rp_avg, wprp
def run_sample_corrfunc(samplenum,
                        tag,
                        min_sep,
                        max_sep,
                        rpbins,
                        pimax,
                        wp,
                        cosmo,
                        frac=1,
                        bin_size=None,
                        pibinwidth=2):

    fn = '../data/lss.dr72bright{}{}.dat'.format(samplenum, tag)
    data1 = pd.read_csv(fn)
    fn_rand = '../data/random-0.dr72bright{}{}.dat'.format(samplenum, tag)
    rand1 = pd.read_csv(fn_rand)
    #rand1 = get_random(data1)

    print 'Sample {}'.format(samplenum)

    print 'ndata=', len(data1.index)
    print 'nrand=', len(rand1.index)

    data1 = data1.sample(frac=frac)
    rand1 = rand1.sample(frac=frac)
    # data1 = data1[:int(frac*len(data1.index))]
    # rand1 = rand1[:int(frac*len(rand1.index))]

    print 'ndata=', len(data1.index)
    print 'nrand=', len(rand1.index)

    data2 = data1
    rand2 = rand1

    start = time.time()
    weights_data = data1['fgotten']
    weights_rand = rand1['fgotten']
    rp_avg, wprp = run.run_corrfunc(data1,
                                    rand1,
                                    data2,
                                    rand2,
                                    rpbins,
                                    pimax,
                                    cosmo,
                                    weights_data=weights_data,
                                    weights_rand=weights_rand,
                                    pibinwidth=pibinwidth)

    end = time.time()
    print 'Time for sample {}, ndata={}, nrand={}: {}'.format(
        samplenum, len(data1.index), len(rand1.index), end - start)
    return rp_avg, wprp
Exemple #3
0
def run_together(min_sep, max_sep, bin_size, K, pimax, wp):
    #samplenums = [7, 8, 9, 10, 11, 12]
    samplenums = [8, 9, 10]

    data1, rand1 = combine_samples(samplenums)
    data2 = data1.copy()
    rand2 = rand1.copy()
    print 'ndata={}, nrand={}'.format(len(data1.index), len(rand1.index))

    rpbins = np.logspace(np.log10(min_sep), np.log10(max_sep), K + 1)
    rpbins_avg = run.bins_logavg(rpbins)
    logwidth = run.log_width(rpbins)

    #basisfuncs = [estimator.top_z]
    #basisfuncs = [estimator.top_Mr]
    basisfuncs = [estimator.gauss_Mr]
    #basisfuncs = [estimator.tophat]

    K *= 3
    #vals = [0.1, 0.15, 0.2, 0.25]
    vals = [-22.5, -21.5, -20.5, -19.5, -18.5]
    #vals = [-21., -20.5, -20, -19.5]

    #labels = ["M_r={:.2f}".format(val) for val in vals]
    labels = ['corrfunc all']
    #labels = [0.15]
    #vals = [None]
    #labels = ['top']
    cols = [
        'purple', 'red', 'orange', 'green', 'blue', 'cyan', 'magenta', 'grey'
    ]

    print 'Run'
    start = time.time()
    est_ls, wprp = run.run_corrfunc(data1, rand1, data2, rand2, rpbins, pimax)
    rps = [rpbins_avg]
    wprps = [wprp]
    # rps, wprps = run.run(data1, rand1, data2, rand2, pimax, min_sep, max_sep, bin_size, basisfuncs,
    #     K, cosmo, wp, rpbins, vals, logrpbins_avg, logwidth)
    end = time.time()

    print 'Time for all, ndata={}: {}'.format(len(data1.index), end - start)

    plotter.plot_wprp(rps, wprps, labels, colors=cols)
Exemple #4
0
def run_dr7_LRGs():

    #sample = 'Bright-no'
    sample = 'Dim-no'
    datafn = '../data/DR7-{}.ascii'.format(sample)
    randfn = '../data/random-DR7-{}.ascii'.format(sample)
    data = pd.read_table(datafn,
                         index_col=False,
                         delim_whitespace=True,
                         names=[
                             'ra', 'dec', 'z', 'M_g', 'sector_completeness',
                             'n(z)*1e4', 'radial_weight', 'fiber_coll_weight',
                             'fogtmain', 'ilss', 'icomb', 'sector'
                         ],
                         dtype={'z': np.float64},
                         skiprows=1)
    rand = pd.read_table(randfn,
                         index_col=False,
                         delim_whitespace=True,
                         names=[
                             'ra', 'dec', 'z', 'sector_completeness',
                             'n(z)*1e4', 'radial_weight', 'ilss', 'sector'
                         ],
                         dtype={'z': np.float64},
                         skiprows=1)

    frac = 1
    #saveto = None
    saveto = "../results/wp_dr7_{}LRG_frac{}_weights.npy".format(sample, frac)
    cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75)

    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)
    #Sector completeness already cut to >0.6, not sure if still have to downsample randoms
    #and many have sector completness > 1!! ??
    # data = data[data['z']<0.36]
    # data = data[data['ra']>90][data['ra']<270] #NGC

    print len(data.index)

    data = data.sample(frac=frac)
    rand = rand.sample(frac=frac)
    # data1 = data1[:int(frac*len(data1.index))]
    # rand1 = rand1[:int(frac*len(rand1.index))]
    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)

    weights_data = data['radial_weight'] * data['fiber_coll_weight']
    weights_rand = rand['radial_weight']

    #losmax = 1.0
    losmax = 40.0
    zspace = False
    if sample == 'Bright-no':
        K = 21
        rmin = 60
        rmax = 200
    elif sample == 'Full':
        K = 14
        rmin = 40
        rmax = 180
    elif sample == 'Dim-no':
        K = 15
        rmin = 0.01
        rmax = 8.
    else:
        exit('ERROR')
    #bins = np.linspace(rmin, rmax, K + 1)
    bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1)

    start = time.time()
    # or rp, wp
    s, xi = run.run_corrfunc(data,
                             rand,
                             data,
                             rand,
                             bins,
                             losmax,
                             cosmo,
                             weights_data=weights_data,
                             weights_rand=weights_rand,
                             zspace=zspace)
    end = time.time()
    print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index),
                                                      end - start)

    ss = [s]
    xis = [xi]
    labels = ['dr7 {} LRGs'.format(sample)]
    if saveto:
        run.save_results(saveto, ss, xis, labels)
def time_pairs():

    times_cf = np.zeros(len(ndata))
    times_pg = np.zeros(len(ndata))
    times_pgz = np.zeros(len(ndata))

    rps = []
    wprps = []
    nrand = []

    nproc = 2

    K = 10
    pimax = 40.  #Mpc/h
    pibinwidth = pimax

    min_sep = 0.1
    max_sep = 10.  #Mpc/h
    basisfuncs = [estimator_chunks.tophat_robust]
    #bin_sep = np.log(rmax / rmin) / float(K)

    rpbins = np.logspace(np.log10(min_sep), np.log10(max_sep), K + 1)
    rpbins_avg = run.bins_logavg(rpbins)
    logrpbins_avg = run.logbins_avg(rpbins)
    logwidth = run.log_width(rpbins)

    bin_arg = np.log10(rpbins)

    cosmo = LambdaCDM(H0=70, Om0=0.3, Ode0=0.7)
    wp = True

    for i in range(len(ndata)):

        nd = ndata[i]
        print i, ndata

        data1fn = '../../lss/mangler/samples/a0.6452_0001.v5_ngc_ifield_ndata{}.rdzw'.format(
            nd)
        rand1fn = '../../lss/mangler/samples/a0.6452_rand20x.dr12d_cmass_ngc_ifield_ndata{}.rdz'.format(
            nd)
        data2fn = data1fn
        rand2fn = rand1fn

        data1 = pd.read_csv(data1fn)
        rand1 = pd.read_csv(rand1fn)
        data2 = pd.read_csv(data2fn)
        rand2 = pd.read_csv(rand2fn)

        nrand.append(len(rand1))

        # should make so can take list
        print 'Adding info to dataframes'
        data1 = run.add_info(data1, zfile=None)
        rand1 = run.add_info(rand1, zfile=None)
        data2 = run.add_info(data2, zfile=None)
        rand2 = run.add_info(rand2, zfile=None)

        # start0 = time.time()
        # # run.run_treecorr(data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp)
        # xi, d1d2pairs, d1r2pairs, d2r1pairs, r1r2pairs = run.pairs_treecorr(
        #     data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp)
        # end0 = time.time()
        # print "Time treecorr pairs:", end0 - start0
        # times_tcp[i] = end0 - start0
        #
        # start1 = time.time()
        # #run.run_treecorr_orig(data1, rand1, data2, rand2, rmin, rmax, bin_sep, pimax, wp)
        # end1 = time.time()
        # print "Time treecorr:", end1 - start1
        # times_tc[i] = end1 - start1

        # start2 = time.time()
        # d1d2pairs, d1r2pairs, d2r1pairs, r1r2pairs = pairs.pairs(data1, rand1, data2, rand2,
        #                                                          rmax, cosmo, wp)
        # end2 = time.time()
        # print "Time pairs:", end2 - start2
        # times_kd[i] = end2 - start2

        start = time.time()
        rp, wprp = run.run_corrfunc(data1,
                                    rand1,
                                    data2,
                                    rand2,
                                    rpbins,
                                    pimax,
                                    cosmo,
                                    nproc=nproc,
                                    pibinwidth=int(pibinwidth))
        end = time.time()
        print "Time corrfunc:", end - start
        times_cf[i] = end - start
        rps.append(logrpbins_avg)
        wprps.append(wprp)

        vals = None

        start = time.time()
        ddgen = pairgen.PairGen(data1, data2, max_sep, cosmo, wp)
        drgen = pairgen.PairGen(data1, rand2, max_sep, cosmo, wp)
        rdgen = pairgen.PairGen(data2, rand1, max_sep, cosmo, wp)
        rrgen = pairgen.PairGen(rand1, rand2, max_sep, cosmo, wp)
        a = estimator_chunks.est(ddgen, drgen, rdgen, rrgen, pimax, max_sep,
                                 cosmo, basisfuncs, K, wp, nproc, bin_arg,
                                 logwidth)
        rp, wprp = run.calc_wprp(a, rpbins_avg, basisfuncs, K, rpbins, vals,
                                 pibinwidth, bin_arg, logwidth)
        rps.append(rp)
        wprps.append(wprp)
        end = time.time()
        print "Time chunks:", end - start
        times_pg[i] = end - start

        start = time.time()
        ddgen = pairgenz.PairGen(data1, data2, max_sep, cosmo, wp, pimax)
        drgen = pairgenz.PairGen(data1, rand2, max_sep, cosmo, wp, pimax)
        rdgen = pairgenz.PairGen(data2, rand1, max_sep, cosmo, wp, pimax)
        rrgen = pairgenz.PairGen(rand1, rand2, max_sep, cosmo, wp, pimax)
        a = estimator_chunks.est(ddgen, drgen, rdgen, rrgen, pimax, max_sep,
                                 cosmo, basisfuncs, K, wp, nproc, bin_arg,
                                 logwidth)
        rp, wprp = run.calc_wprp(a, rpbins_avg, basisfuncs, K, rpbins, vals,
                                 pibinwidth, bin_arg, logwidth)
        rps.append(rp)
        wprps.append(wprp)
        end = time.time()
        print "Time chunks:", end - start
        times_pgz[i] = end - start

    # time_arrs = [times_tc, times_kd]
    # labels = ['treecorr', 'kdtree']
    time_arrs = [times_cf, times_pg, times_pgz]
    ndatas = [ndata] * len(time_arrs)
    nrands = [nrand] * len(time_arrs)

    labels = ['corrfunc', 'pairgen', 'pairgen zshells']

    np.save(
        '../results/times/times_zshells_n{}_nproc{}.npy'.format(
            max(ndata), nproc),
        [ndatas, nrands, time_arrs, labels, rps, wprps])
Exemple #6
0
def run_dr7_LRGs():

    nproc = 2
    frac = 0.05
    #sample = 'Bright-no'
    #sample = 'Dim-no'
    print "Loading data..."
    sample = 'Full'
    datafn = '../data/DR7-{}.ascii'.format(sample)
    randfn = '../data/random-DR7-{}.ascii'.format(sample)
    data = pd.read_table(datafn,
                         index_col=False,
                         delim_whitespace=True,
                         names=[
                             'ra', 'dec', 'z', 'M_g', 'sector_completeness',
                             'n(z)*1e4', 'radial_weight', 'fiber_coll_weight',
                             'fogtmain', 'ilss', 'icomb', 'sector'
                         ],
                         dtype={'z': np.float64},
                         skiprows=1)
    rand = pd.read_table(randfn,
                         index_col=False,
                         delim_whitespace=True,
                         names=[
                             'ra', 'dec', 'z', 'sector_completeness',
                             'n(z)*1e4', 'radial_weight', 'ilss', 'sector'
                         ],
                         dtype={'z': np.float64},
                         skiprows=1)

    #saveto = None
    saveto = "../results/bao/xis_dr7_{}LRG_frac{}.npy".format(sample, frac)
    cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75)

    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)

    #Sector completeness already cut to >0.6, not sure if still have to downsample randoms
    #and many have sector completness > 1!! ??
    # data = data[data['z']<0.36]
    # data = data[data['ra']>90][data['ra']<270] #NGC

    # data = data.sample(frac=frac)
    # rand = rand.sample(frac=frac)
    data = data[:int(frac * len(data.index))]
    rand = rand[:int(frac * len(rand.index))]
    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)

    print "Adding info..."
    data = run.add_info(data, cosmo)
    rand = run.add_info(rand, cosmo)

    print max(data['dcm_mpc']), min(data['dcm_mpc'])
    print max(rand['dcm_mpc']), min(rand['dcm_mpc'])

    #weights_data = data['radial_weight']*data['fiber_coll_weight']
    #weights_rand = rand['radial_weight']
    weights_data = None
    weights_rand = None

    losmax = 1.0  #max of cosine
    #losmax = 40.0
    zspace = True
    if sample == 'Bright-no':
        K = 21
        rmin = 60
        rmax = 200
    elif sample == 'Full':
        K = 14
        rmin = 40
        rmax = 180
    elif sample == 'Dim-no':
        K = 15
        rmin = 0.01
        rmax = 8.
    else:
        exit('ERROR')
    bins = np.linspace(rmin, rmax, K + 1)
    print "Bins:", bins
    #bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1)
    ss = []
    xis = []
    labels = []

    print "Running corrfunc..."
    start = time.time()
    # or rp, wp
    s, xi = run.run_corrfunc(data,
                             rand,
                             data,
                             rand,
                             bins,
                             losmax,
                             cosmo,
                             weights_data=weights_data,
                             weights_rand=weights_rand,
                             zspace=zspace)
    ss.append(s)
    xis.append(xi)
    labels.append("corrfunc")
    end = time.time()
    print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index),
                                                      end - start)

    #wp = True
    wp = False
    basisfuncs = [estimator_chunks.tophat_xis]
    bin_arg = bins
    binwidth = (rmax - rmin) / float(K)
    pibinwidth = losmax
    vals = None
    print "Running estimator..."
    s_est, xi_est, a = run.run_chunks(data, rand, data, rand, losmax, rmin,
                                      rmax, basisfuncs, K, cosmo, wp, bins,
                                      vals, pibinwidth, zspace, nproc, bin_arg,
                                      binwidth)
    ss.append(s_est)
    xis.append(xi_est)
    labels.append("est tophat")

    #labels = ['dr7 {} LRGs'.format(sample)]

    if saveto:
        print "Saving to {}".format(saveto)
        np.save(saveto, [ss, xis, labels])
Exemple #7
0
def run_dr7_LRGs_corrfunc():

    print "Just running corrfunc"
    nproc = 1
    frac = 0.05
    #sample = 'Bright-no'
    #sample = 'Dim-no'
    print "Loading data..."
    sample = 'Full'
    datafn = '../data/DR7-{}.ascii'.format(sample)
    randfn = '../data/random-DR7-{}.ascii'.format(sample)

    print sdfsdf
    data = pd.read_csv(datafn,
                       index_col=False,
                       delim_whitespace=True,
                       names=[
                           'ra', 'dec', 'z', 'M_g', 'sector_completeness',
                           'n(z)*1e4', 'radial_weight', 'fiber_coll_weight',
                           'fogtmain', 'ilss', 'icomb', 'sector'
                       ],
                       dtype={'z': np.float64},
                       skiprows=1)
    rand = pd.read_csv(randfn,
                       index_col=False,
                       delim_whitespace=True,
                       names=[
                           'ra', 'dec', 'z', 'sector_completeness', 'n(z)*1e4',
                           'radial_weight', 'ilss', 'sector'
                       ],
                       dtype={'z': np.float64},
                       skiprows=1)

    #saveto = None
    saveto = "../results/bao/xis_dr7_{}LRG_frac{}_corrfunc.npy".format(
        sample, frac)
    cosmo = LambdaCDM(H0=70, Om0=0.25, Ode0=0.75)

    #utils.write_comoving_dist(data, )

    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)

    #Sector completeness already cut to >0.6, not sure if still have to downsample randoms
    #and many have sector completness > 1!! ??
    # data = data[data['z']<0.36]
    # data = data[data['ra']>90][data['ra']<270] #NGC

    data = data.sample(frac=frac)
    #frac *=0.5
    rand = rand.sample(frac=frac)
    #data = data[:int(frac*len(data.index))]
    #rand = rand[:int(frac*len(rand.index))]
    print 'ndata=', len(data.index)
    print 'nrand=', len(rand.index)

    weights_data = data['radial_weight'] * data['fiber_coll_weight']
    weights_rand = rand['radial_weight']
    # weights_data = None
    # weights_rand = None

    losmax = 1.0  #max of cosine
    #losmax = 40.0
    zspace = True
    if sample == 'Bright-no':
        K = 21
        rmin = 60
        rmax = 200
    elif sample == 'Full':
        K = 14
        rmin = 40
        rmax = 180
    elif sample == 'Dim-no':
        K = 15
        rmin = 0.01
        rmax = 8.
    else:
        exit('ERROR')
    bins = np.linspace(rmin, rmax, K + 1)
    #bins = np.logspace(np.log10(rmin), np.log10(rmax), K + 1)
    print "bins:", bins
    ss = []
    xis = []
    aa = []
    labels = []

    print "Running corrfunc..."
    start = time.time()
    # or rp, wp
    s, xi_orig, xi_proj, amps = run.run_corrfunc(data,
                                                 rand,
                                                 data,
                                                 rand,
                                                 bins,
                                                 losmax,
                                                 cosmo,
                                                 weights_data=weights_data,
                                                 weights_rand=weights_rand,
                                                 zspace=zspace,
                                                 proj=True,
                                                 nproc=nproc)

    print "s:", s
    print "xi_orig", xi_orig
    print "xi_proj", xi_proj

    ss.append(s)
    xis.append(xi_orig)
    aa.append(None)
    labels.append("corrfunc orig")

    ss.append(s)
    xis.append(xi_proj)
    aa.append(amps)
    labels.append("corrfunc projected")
    end = time.time()
    print 'Time for dr7 {} LRGs, ndata={}: {}'.format(sample, len(data.index),
                                                      end - start)

    if saveto:
        print "Saving to {}".format(saveto)
        np.save(saveto, [ss, xis, aa, labels])