Python DLASurvey.load_SDSS_DR5の例、pyigm.surveys.dlasurvey.DLASurvey.load_SDSS_DR5 Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_dlasurvey.py プロジェクト: LiuFang816/SALSTM_py_data

def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss = DLASurvey.load_SDSS_DR5()
    assert len(sdss.NHI) == 737

コード例 #2

0

ファイルを表示

ファイル: test_dlasurvey.py プロジェクト: ninoc/pyigm

def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss = DLASurvey.load_SDSS_DR5()
    assert len(sdss.NHI) == 737

コード例 #3

0

ファイルを表示

ファイル: training_set.py プロジェクト: samwang141224/dla_cnn

def write_sdss_sightlines():
    """ Writes the SDSS DR5 sightlines that have no (or very few) DLAs
    Returns
    -------
    None : Writes to Dropbox

    """
    import os
    import h5py
    outfile = os.getenv(
        'DROPBOX_DIR') + '/MachineLearning/DR5/SDSS_DR5_noDLAs.hdf5'
    # Load
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    slines, sdict = grab_sightlines(sdss, flg_bal=0)
    coords = SkyCoord(ra=slines['RA'], dec=slines['DEC'], unit='deg')
    # Load spectra -- RA/DEC in igmsp is not identical to RA_GROUP, DEC_GROUP in SDSS_DR7
    igmsp = IgmSpec()
    sdss_meta = igmsp['SDSS_DR7'].meta
    qso_coord = SkyCoord(ra=sdss_meta['RA_GROUP'],
                         dec=sdss_meta['DEC_GROUP'],
                         unit='deg')
    idxq, d2dq, d3dq = match_coordinates_sky(coords, qso_coord, nthneighbor=1)
    in_igmsp = d2dq < 1 * u.arcsec  # Check
    # Cut meta
    cut_meta = sdss_meta[idxq[in_igmsp]]
    assert len(slines) == len(cut_meta)
    # Grab
    spectra = igmsp['SDSS_DR7'].spec_from_meta(cut_meta)
    # Write
    hdf = h5py.File(outfile, 'w')
    spectra.write_to_hdf5(outfile, hdf5=hdf, clobber=True, fill_val=0.)
    # Add table (meta is already used)
    hdf['cut_meta'] = cut_meta
    hdf.close()

コード例 #4

0

ファイルを表示

def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss_stat = DLASurvey.load_SDSS_DR5()
    assert len(sdss_stat.NHI) == 737
    # Binned
    lX, lX_lo, lX_hi = sdss_stat.calculate_lox([2., 2.5, 3])
    assert np.isclose(lX[0], 0.04625038, atol=1e-5)
    fN, fN_lo, fN_hi = sdss_stat.calculate_fn([20.3, 20.5, 21., 21.5, 22.],
                                              [2, 2.5],
                                              log=True)
    assert fN.size == 4
    assert np.isclose(fN_lo[0], 0.0682087, atol=1e-5)

コード例 #5

0

ファイルを表示

def main(args=None):

    import pdb
    import numpy as np

    from linetools import utils as ltu

    from pyigm.surveys.analysis import fit_atan_dla_lz, fit_fN_dblpow
    from pyigm.surveys.dlasurvey import load_dla_surveys, update_dla_fits
    from pyigm.surveys.dlasurvey import DLASurvey
    from pyigm.surveys import dlasurvey

    pargs = parser()

    # DLA l(z) analysis
    if pargs.dla_lz or pargs.all:
        # arctan from Prochaska & Neeleman 2017
        surveys = load_dla_surveys()
        dfits, _ = fit_atan_dla_lz(surveys,
                                   nstep=100,
                                   bootstrap=pargs.dla_lz_boot,
                                   nboot=50000,
                                   nproc=pargs.nproc,
                                   boot_out=dlasurvey.lz_boot_file)
        # Calculate error
        lz_boot = dlasurvey.load_boot_lz()
        for key in ['A', 'B', 'C']:
            boot = lz_boot[key].data
            # 68%
            perc = np.percentile(boot, [16., 84.])
            dfits['lz']['atan']['sig_{:s}'.format(
                key)] = perc - dfits['lz']['atan'][key]
        # Write
        dfits['lz']['atan']['Ref'] = 'Prochaska & Neeleman 2017'
        update_dla_fits(dfits)

    # Fit double power law to f(N) of DLA [PW09 only]
    if pargs.dla_dpow or pargs.all:
        sdss_dr5 = DLASurvey.load_SDSS_DR5()
        dfits, best, Ndgrid, a3grid, a4grid, lik = fit_fN_dblpow(sdss_dr5.NHI,
                                                                 (-3., -1.1),
                                                                 (-6, -2),
                                                                 (21., 22.),
                                                                 nstep=100)
        # Write
        dfits['fN']['dpow']['Ref'] = 'PHW05'
        update_dla_fits(dfits)

    # DLA ne/nH
    if pargs.dla_nenH or pargs.all:
        dfits = {}
        dfits['nenH'] = {}
        dfits['nenH']['loglog'] = dict(
            bp=-2.881,
            m=-0.352,
            bp_sig=(+0.253, -0.256),
            m_sig=(+0.321, -0.317))  # Values with all 50 measurements
        dfits['nenH']['loglog']['Ref'] = 'Neeleman+15; PN17'
        # Update
        update_dla_fits(dfits)

コード例 #6

0

ファイルを表示

def main(flg_tst, sdss=None, ml_survey=None):

    # Load JSON for DR5
    if (flg_tst % 2**1) >= 2**0:
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5()
        #ml_survey = json_to_sdss_dlasurvey('../results/dr5_v1_predictions.json', sdss)
        ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss)

    # Vette
    if (flg_tst % 2**2) >= 2**1:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss)
        vette_dlasurvey(ml_survey, sdss)

    # Vette v5 and generate CSV
    if (flg_tst % 2**3) >= 2**2:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v5_predictions.json', sdss)
        false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v5.csv')

    # Vette v6 and generate CSV
    if (flg_tst % 2**4) >= 2**3:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v6.1_results.json', sdss)
        false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v6.1.csv')

    # Vette gensample v2
    if (flg_tst % 2**5) >= 2**4:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/results_catalog_dr7_model_gensample_v2.json',sdss)
        false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v2_gen.csv')
        mk_false_neg_table(false_pos, '../results/false_positives_DR5_v2_gen.csv')

    # Vette gensample v4.3.1
    if flg_tst & (2**5):
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/results_model_4.3.1_data_dr5.json',sdss)
        false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v4.3.1_gen.csv')
        mk_false_neg_table(false_pos, '../results/false_positives_DR5_v4.3.1_gen.csv')

    if flg_tst & (2**6):
        dr5_for_david()

コード例 #7

0

ファイルを表示

ファイル: tlk_dla_HI.py プロジェクト: profxj/talks

def fig_rhoHI(lw=1.5, csz=15., lsz=14.):
    """  Generate a DLA in optical depth and flux space
    Parameters
    ----------
    """
    sdss = DLASurvey.load_SDSS_DR5()
    zbins = [2.2, 2.4, 2.75, 3., 3.5, 4.5]
    rho_HI, rho_HI_low, rho_HI_hi = sdss.binned_rhoHI(zbins)

    outfile = 'fig_rhoHI.png'

    # Figure
    plt.figure(figsize=(5, 5))
    plt.clf()
    gs = gridspec.GridSpec(1, 1)


    # Tau plot
    ax = plt.subplot(gs[0])

    # Plot
    for kk in range(len(zbins)-1):
        zcen = np.sum(zbins[kk:kk+2])/2.
        yerr= np.array([rho_HI_low[kk].value/1e8, rho_HI_hi[kk].value/1e8])
        ax.errorbar([zcen], [rho_HI[kk].value/1e8], xerr=zcen-zbins[kk], fmt='o', color='blue', capthick=2)
        ax.errorbar([zcen], [rho_HI[kk].value/1e8], yerr=[yerr], color='blue', capthick=2)
    # z=0
    xmnx = [2., 4.5]
    ax.fill_between(xmnx, 0.45, 0.6, color='green', alpha=0.5)

    # Axes
    ax.set_xlim(xmnx)
    #ax.set_ylim(1e-2, 5e7)
    ax.set_ylabel(r'$\rho_{\rm HI} \; (10^8 \, \rm M_\odot \, Mpc^{-3} \, h_{72})$')
    ax.set_xlabel(r'$z$')
    ax.text(0.1, 0.9, 'SDSS-DR5 (PW09)', color='blue', size=lsz, transform=ax.transAxes, ha='left')
    ax.text(0.9, 0.1, 'z~0 [21cm] \n (Zwaan+05)', color='green', size=lsz, transform=ax.transAxes, ha='right')
    #ax.xaxis.set_major_locator(plt.MultipleLocator(10.))
    #
    set_spines(ax, 2.)
    set_fontsize(ax,csz)

    # Write
    plt.tight_layout(pad=0.2,h_pad=0.,w_pad=0.1)
    plt.savefig(outfile, dpi=750)
    plt.close()
    print("Wrote {:s}".format(outfile))

コード例 #8

0

ファイルを表示

def dr5_for_david():
    """ Generate a Table for David
    """
    # imports
    from pyigm.abssys.dla import DLASystem
    from pyigm.abssys.lls import LLSSystem
    sdss_survey = DLASurvey.load_SDSS_DR5()
    # Fiber key
    for fkey in ['FIBER', 'FIBER_ID', 'FIB']:
        if fkey in sdss_survey.sightlines.keys():
            break
    # Init
    #idict = dict(plate=[], fiber=[], classification_confidence=[],  # FOR v2
    #             classification=[], ra=[], dec=[])
    # Connect to sightlines
    s_coord = SkyCoord(ra=sdss_survey.sightlines['RA'], dec=sdss_survey.sightlines['DEC'], unit='deg')
    # Add plate/fiber to statistical DLAs
    dla_coord = sdss_survey.coord
    idx2, d2d, d3d = match_coordinates_sky(dla_coord, s_coord, nthneighbor=1)
    if np.min(d2d.to('arcsec').value) > 1.:
        raise ValueError("Bad match to sightlines")
    plates, fibers = [], []
    for jj,igd in enumerate(np.where(sdss_survey.mask)[0]):
        dla = sdss_survey._abs_sys[igd]
        try:
            dla.plate = sdss_survey.sightlines['PLATE'][idx2[jj]]
        except IndexError:
            pdb.set_trace()
        dla.fiber = sdss_survey.sightlines[fkey][idx2[jj]]
        plates.append(sdss_survey.sightlines['PLATE'][idx2[jj]])
        fibers.append(sdss_survey.sightlines[fkey][idx2[jj]])
    # Write
    dtbl = Table()
    dtbl['plate'] = plates
    dtbl['fiber'] = fibers
    dtbl['zabs'] = sdss_survey.zabs
    dtbl['NHI'] = sdss_survey.NHI
    dtbl.write('results/dr5_for_david.ascii', format='ascii')
    # Write sightline info
    stbl = sdss_survey.sightlines[['PLATE', 'FIB', 'Z_START', 'Z_END', 'RA', 'DEC']]
    gdsl = stbl['Z_END'] > stbl['Z_START']
    stbl[gdsl].write('results/dr5_sightlines_for_david.ascii', format='ascii')

コード例 #9

0

ファイルを表示

def main(args=None):
    pargs = parser(options=args)

    # Setup
    import sys
    pfind = __file__.rfind('/scripts')
    spth = __file__[:pfind] + '/src'
    sys.path.append(spth)
    import training_set as tset
    from pyigm.surveys.dlasurvey import DLASurvey

    outroot = pargs.outpath + '/training_{:d}_{:d}'.format(
        pargs.seed, pargs.ntrain)

    # Sightlines
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    slines, sdict = tset.grab_sightlines(sdss, flg_bal=0)
    # Run
    _, _ = tset.make_set(pargs.ntrain,
                         slines,
                         outroot=outroot,
                         seed=pargs.seed,
                         slls=pargs.slls)

コード例 #10

0

ファイルを表示

def examine_false_pos(test_file='data/test_dlas_96629_10000.json.gz',
                      pred_file='data/test_dlas_96629_predictions.json.gz',
                      vette_file='vette_10k.json'):
    """ Examine false positives in the Test set (held out)
    """
    from pyigm.surveys.dlasurvey import DLASurvey
    import h5py
    import json
    from matplotlib import pyplot as plt
    # Load Test
    test_dlas = test_to_tbl(test_file)
    ntest = len(test_dlas)
    # Load hdf5
    CNN_result_path = '/home/xavier/Projects/ML_DLA_results/CNN/'
    hdf5_datafile = CNN_result_path + 'gensample_hdf5_files/test_dlas_96629_10000.hdf5'
    hdf = h5py.File(hdf5_datafile, 'r')
    headers = json.loads(hdf['meta'].value)['headers']
    # Load ML
    ml_abs = pred_to_tbl(pred_file)
    # Vette
    vette = ltu.loadjson(vette_file)
    test_ml_idx = np.array(vette['test_idx'])
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()
    all_dr5 = DLASurvey.load_SDSS_DR5(sample='all_sys')

    # False positives
    fpos = ml_abs['NHI'] >= 20.3  # Must be a DLA
    imatched = np.where(test_ml_idx >= 0)[0]
    match_val = test_ml_idx[imatched]
    fpos[match_val] = False
    print("There are {:d} total false positives".format(np.sum(fpos)))
    # This nearly matches David's.  Will run with his analysis.

    fpos_in_dr5 = fpos.copy()
    # Restrict on DR5
    for idx in np.where(fpos_in_dr5)[0]:
        # Convoluted indexing..
        mlid = ml_abs['ids'][idx]
        # Plate/Fiber
        plate = headers[mlid]['PLATE']
        fib = headers[mlid]['FIBER']
        # Finally, match to DR5
        dr5_sl = np.where((dr5.sightlines['PLATE'] == plate)
                          & (dr5.sightlines['FIB'] == fib))[0][0]
        if (ml_abs['zabs'][idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \
                (ml_abs['zabs'][idx] <= dr5.sightlines['Z_END'][dr5_sl]):
            pass
        else:
            fpos_in_dr5[idx] = False
    print("Number of FP in DR5 analysis region = {:d}".format(
        np.sum(fpos_in_dr5)))

    # How many match to DR5 SLLS?
    slls = all_dr5.NHI < 20.3
    slls_coord = all_dr5.coord[slls]
    slls_zabs = all_dr5.zabs[slls]
    nslls = 0
    for idx in np.where(fpos_in_dr5)[0]:
        # Convoluted indexing..
        mlid = ml_abs['ids'][idx]
        # RA/DEC
        ra = headers[mlid]['RA_GROUP']
        dec = headers[mlid]['DEC_GROUP']
        coord = SkyCoord(ra=ra, dec=dec, unit='deg')
        # Match coord
        mt = coord.separation(slls_coord) < 3 * u.arcsec
        if np.any(mt):
            # Match redshift
            if np.min(np.abs(slls_zabs[mt] - ml_abs['zabs'][idx])) < 0.015:
                nslls += 1
    print("Number of FP that are SLLS in DR5 = {:d}".format(nslls))

    low_NHI = ml_abs['NHI'][fpos_in_dr5] < 20.5
    print("Number of FP that are NHI <= 20.5 = {:d}".format(np.sum(low_NHI)))

    # Write out
    fp_tbl = Table()
    for key in ['ids', 'NHI', 'zabs', 'conf']:
        fp_tbl[key] = ml_abs[key][fpos_in_dr5]
    fp_tbl.write('test10k_false_pos.ascii',
                 format='ascii.fixed_width',
                 overwrite=True)

    # Histogram
    dr5_idx = np.where(fpos_in_dr5)
    plt.clf()
    ax = plt.gca()
    ax.hist(ml_abs['conf'][dr5_idx])
    plt.show()

コード例 #11

0

ファイルを表示

ファイル: training_set.py プロジェクト: samwang141224/dla_cnn

def grab_sightlines(dlasurvey=None,
                    flg_bal=None,
                    zmin=2.3,
                    s2n=5.,
                    DX=0.,
                    igmsp_survey='SDSS_DR7',
                    update_zem=True):
    """ Grab a set of sightlines without DLAs from a DLA survey
    Insist that all have spectra occur in igmspec
    Update sightline zem with igmspec zem

    Parameters
    ----------
    dlas : DLASurvey
      Usually SDSS or BOSS
    flg_bal : int, optional
      Maximum BAL flag (0=No signature, 1=Weak BAL, 2=BAL)
    s2n : float, optional
      Minimum S/N as defined in some manner
    DX : float, optional
      Restrict on DX
    zmin : float, optional
      Minimum redshift for zem
    update_zem : bool, optional
      Update zem in sightlines?

    Returns
    -------
    final : Table
      astropy Table of good sightlines
    sdict : dict
      dict describing the sightlines
    """
    #1)  REMOVE 910, 526  z=2.88; NHI=21.19
    import warnings
    warnings.warn("Someday remove 910, 526 which has a *strong* DLA")
    igmsp = IgmSpec()
    # Init
    if dlasurvey is None:
        print("Using the DR5 sample for the sightlines")
        dlasurvey = DLASurvey.load_SDSS_DR5(sample='all')
        igmsp_survey = 'SDSS_DR7'
    nsight = len(dlasurvey.sightlines)
    keep = np.array([True] * nsight)
    meta = igmsp[igmsp_survey].meta

    # Avoid DLAs
    dla_coord = dlasurvey.coord
    sl_coord = SkyCoord(ra=dlasurvey.sightlines['RA'],
                        dec=dlasurvey.sightlines['DEC'])
    idx, d2d, d3d = match_coordinates_sky(sl_coord, dla_coord, nthneighbor=1)
    clear = d2d > 1 * u.arcsec
    keep = keep & clear

    # BAL
    if flg_bal is not None:
        gd_bal = dlasurvey.sightlines['FLG_BAL'] <= flg_bal
        keep = keep & gd_bal

    # S/N
    if s2n > 0.:
        gd_s2n = dlasurvey.sightlines['S2N'] > s2n
        keep = keep & gd_s2n

    # Cut on DX
    if DX > 0.:
        gd_DX = dlasurvey.sightlines['DX'] > DX
        keep = keep & gd_DX

    # igmsp
    qso_coord = SkyCoord(ra=meta['RA_GROUP'],
                         dec=meta['DEC_GROUP'],
                         unit='deg')
    idxq, d2dq, d3dq = match_coordinates_sky(sl_coord,
                                             qso_coord,
                                             nthneighbor=1)
    in_igmsp = d2dq < 1 * u.arcsec
    keep = keep & in_igmsp

    # Check zem and dz
    #igm_id = meta['IGM_ID'][idxq]
    #cat_rows = match_ids(igm_id, igmsp.cat['IGM_ID'])
    #zem = igmsp.cat['zem'][cat_rows]
    zem = meta['zem_GROUP'][idxq]
    dz = np.abs(zem - dlasurvey.sightlines['ZEM'])
    gd_dz = dz < 0.1
    keep = keep & gd_dz  #& gd_zlim
    if zmin is not None:
        gd_zmin = zem > zmin
        keep = keep & gd_zmin  #& gd_zlim
    #gd_zlim = (zem-dlasurvey.sightlines['Z_START']) > 0.1
    #pdb.set_trace()

    # Assess
    final = dlasurvey.sightlines[keep]
    #final_coords = SkyCoord(ra=final['RA'], dec=final['DEC'], unit='deg')
    #matches, meta = igmsp.meta_from_coords(final_coords, groups=['SDSS_DR7'], tol=1*u.arcsec)
    #idxq2, d2dq2, d3dq2 = match_coordinates_sky(final_coords, qso_coord, nthneighbor=1)
    #in_igmsp2 = d2dq2 < 1*u.arcsec
    #pdb.set_trace()
    sdict = {}
    sdict['n'] = len(final)
    print("We have {:d} sightlines for analysis".format(sdict['n']))

    def qck_stats(idict, tbl, istr, key):
        idict[istr + 'min'] = np.min(tbl[key])
        idict[istr + 'max'] = np.max(tbl[key])
        idict[istr + 'median'] = np.median(tbl[key])

    qck_stats(sdict, final, 'z', 'ZEM')
    qck_stats(sdict, final, 'i', 'MAG')

    print("Min z = {:g}, Median z = {:g}, Max z = {:g}".format(
        sdict['zmin'], sdict['zmedian'], sdict['zmax']))

    # Return
    return final, sdict

コード例 #12

0

ファイルを表示

def main(flg_tst, sdss=None, ml_survey=None):
    import os

    # Sightlines
    flg_tst = int(flg_tst)
    if (flg_tst % 2**1) >= 2**0:
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)

    # Test case of 100 sightlines
    if (flg_tst % 2**2) >= 2**1:
        # Make training set
        _, _ = make_set(100, slines, outroot='results/training_100')

    # Production runs
    if (flg_tst % 2**3) >= 2**2:
        #training_prod(123456, 5, 10, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/')  # TEST
        #training_prod(123456, 10, 500, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/')  # TEST
        training_prod(12345,
                      10,
                      5000,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/DLAs/')

    # Production runs -- 100k more
    if (flg_tst % 2**4) >= 2**3:
        # python src/training_set.py
        training_prod(22345,
                      10,
                      10000,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/DLAs/')

    # Production runs -- 100k more
    if flg_tst & (2**4):
        # python src/training_set.py
        if False:
            if sdss is None:
                sdss = DLASurvey.load_SDSS_DR5(sample='all')
            slines, sdict = grab_sightlines(sdss, flg_bal=0)
            _, _ = make_set(100,
                            slines,
                            outroot='results/slls_training_100',
                            slls=True)
        #training_prod(22343, 10, 100, slls=True, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/SLLSs/')
        training_prod(22343,
                      10,
                      5000,
                      slls=True,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/SLLSs/')

    # Mixed systems for testing
    if flg_tst & (2**5):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 10000
        seed = 23559
        _, _ = make_set(
            ntrials,
            slines,
            seed=seed,
            mix=True,
            outroot=os.getenv('DROPBOX_DIR') +
            '/MachineLearning/Mix/mix_test_{:d}_{:d}'.format(seed, ntrials))

    # DR5 DLA-free sightlines
    if flg_tst & (2**6):
        write_sdss_sightlines()

    # High NHI systems for testing
    if flg_tst & (2**7):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 20000
        seed = 83559
        _, _ = make_set(ntrials,
                        slines,
                        seed=seed,
                        high=True,
                        outroot=os.getenv('DROPBOX_DIR') +
                        '/MachineLearning/HighNHI/high_train_{:d}_{:d}'.format(
                            seed, ntrials))

    # Low S/N
    if flg_tst & (2**8):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 10000
        seed = 83557
        _, _ = make_set(
            ntrials,
            slines,
            seed=seed,
            low_s2n=True,
            outroot=os.getenv('DROPBOX_DIR') +
            '/MachineLearning/LowS2N/lows2n_train_{:d}_{:d}'.format(
                seed, ntrials))

コード例 #13

0

ファイルを表示

def chk_dr5_dla_to_ml(ml_dlasurvey=None, ml_llssurvey=None, dz_toler=0.015,
                      outfile='vette_dr5.json', write_again=True):
    # Load ML
    if (ml_dlasurvey is None) or (ml_llssurvey is None):
        ml_llssurvey, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Use coord to efficiently deal with sightlines
    ml_coord = SkyCoord(ra=ml_dlasurvey.sightlines['RA'], dec=ml_dlasurvey.sightlines['DEC'], unit='deg')
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(dr5_coord, ml_coord, nthneighbor=1)
    in_ml = d2d < 2*u.arcsec
    print("{:d} of the DR5 sightlines were covered by ML out of {:d}".format(np.sum(in_ml), len(dr5.sightlines)))
    # 7477 sightlines out of 7482

    # Cut down
    dr5.sightlines = dr5.sightlines[in_ml]
    new_mask = dla_stat(dr5, dr5.sightlines) # 737 good DLAs
    dr5.mask = new_mask
    dr5_dla_coord = dr5.coord
    dr5_dla_zabs = dr5.zabs
    ndr5 = len(dr5_dla_coord)

    ml_dla_coord = ml_dlasurvey.coords
    ml_lls_coord = ml_llssurvey.coords

    # Loop on DR5 DLAs and save indices of the matches
    dr5_ml_idx = np.zeros(ndr5).astype(int) - 1
    for ii in range(ndr5):
        # Match to ML
        dla_mts = np.where(dr5_dla_coord[ii].separation(ml_dla_coord) < 2*u.arcsec)[0]
        nmt = len(dla_mts)
        if nmt == 0:  # No match
            # Check for LLS
            lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
            nmt2 = len(lls_mts)
            if nmt2 == 0:  # No match
                pass
            else:
                zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                if np.min(zdiff) < dz_toler:
                    dr5_ml_idx[ii] = -9  # SLLS match
        else:
            zML = ml_dlasurvey.zabs[dla_mts] # Redshifts of all DLAs on the sightline in ML
            zdiff = np.abs(dr5_dla_zabs[ii]-zML)
            if np.min(zdiff) < dz_toler:
                #print("Match on {:d}!".format(ii))
                # Match
                imin = np.argmin(zdiff)
                dr5_ml_idx[ii] = dla_mts[imin]
            else: # Check for LLS
                lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
                nmt2 = len(lls_mts)
                if nmt2 == 0:  # No match
                    pass
                else:
                    zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                    zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                    if np.min(zdiff) < dz_toler:
                        dr5_ml_idx[ii] = -9  # SLLS match


    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')

    # Write out misses
    misses = np.where(dr5_ml_idx == -1)[0]
    plates, fibers = [], []
    for miss in misses:
        imin = np.argmin(dr5_dla_coord[miss].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[misses]
    mtbl['zabs'] = dr5.zabs[misses]
    if write_again:
        mtbl.write('DR5_misses.ascii', format='ascii.fixed_width', overwrite=True)

    # Write out SLLS
    sllss = np.where(dr5_ml_idx == -9)[0]
    plates, fibers = [], []
    for slls in sllss:
        imin = np.argmin(dr5_dla_coord[slls].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[sllss]
    mtbl['zabs'] = dr5.zabs[sllss]
    if write_again:
        mtbl.write('DR5_SLLS.ascii', format='ascii.fixed_width', overwrite=True)
    pdb.set_trace()

    # ML not matched by PW09?
    ml_dla_coords = ml_dlasurvey.coords
    idx2, d2d2, d3d = match_coordinates_sky(ml_dla_coords, dr5_dla_coord, nthneighbor=1)
    not_in_dr5 = d2d2 > 2*u.arcsec  # This doesn't match redshifts!
    might_be_in_dr5 = np.where(~not_in_dr5)[0]

    others_not_in = []  # this is some painful book-keeping
    for idx in might_be_in_dr5:  # Matching redshifts..
        imt = ml_dla_coord[idx].separation(dr5_dla_coord) < 2*u.arcsec
        # Match on dztoler
        if np.min(np.abs(ml_dlasurvey.zabs[idx]-dr5.zabs[imt])) > dz_toler:
            others_not_in.append(idx)

    # Save
    out_dict = {}
    out_dict['in_ml'] = in_ml
    out_dict['dr5_idx'] = dr5_ml_idx  # -1 are misses, -9 are SLLS
    out_dict['not_in_dr5'] = np.concatenate([np.where(not_in_dr5)[0], np.array(others_not_in)])
    ltu.savejson(outfile, ltu.jsonify(out_dict), overwrite=True)

コード例 #14

0

ファイルを表示

def dr5_false_positives(ml_dlasurvey=None, ml_llssurvey=None):
    vette_file = 'vette_dr5.json'
    from pyigm.surveys.dlasurvey import DLASurvey
    from matplotlib import pyplot as plt
    # Load ML
    if (ml_dlasurvey is None):
        _, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Vette
    vette = ltu.loadjson(vette_file)
    dr5_ml_idx = np.array(vette['dr5_idx'])

    # Use coord to efficiently deal with sightlines
    ml_dla_coord = ml_dlasurvey.coords
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(ml_dla_coord, dr5_coord, nthneighbor=1)
    in_dr5 = d2d < 2*u.arcsec
    print("{:d} of the ML DLA were in the DR5 sightlines".format(np.sum(in_dr5)))

    # False positives
    fpos = np.array([True]*ml_dlasurvey.nsys)
    fpos[~in_dr5] = False

    # False positives
    imatched = np.where(dr5_ml_idx >= 0)[0]
    match_val = dr5_ml_idx[imatched]
    fpos[match_val] = False
    print("There are {:d} total false positives".format(np.sum(fpos)))
    # This nearly matches David's.  Will run with his analysis.

    fpos_in_stat = fpos.copy()
    # Restrict on DR5
    plates = ml_dlasurvey.plate
    fibers = ml_dlasurvey.fiber
    zabs = ml_dlasurvey.zabs
    zem = ml_dlasurvey.zem
    for idx in np.where(fpos_in_stat)[0]:
        # Finally, match to DR5
        dr5_sl = np.where((dr5.sightlines['PLATE'] == plates[idx]) &
                          (dr5.sightlines['FIB'] == fibers[idx]))[0][0]
        if (zabs[idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \
                (zabs[idx] <= dr5.sightlines['Z_END'][dr5_sl]):
            pass
        else:
            fpos_in_stat[idx] = False
    print("Number of FP in DR5 analysis region = {:d}".format(np.sum(fpos_in_stat)))
    print("Number with NHI<20.45 = {:d}".format(np.sum(ml_dlasurvey.NHI[fpos_in_stat]< 20.45)))

    # High NHI
    highNHI = ml_dlasurvey.NHI[fpos_in_stat] > 21.
    htbl = Table()
    htbl['PLATE'] = plates[fpos_in_stat][highNHI]
    htbl['FIBER'] = fibers[fpos_in_stat][highNHI]
    htbl['zabs'] = zabs[fpos_in_stat][highNHI]
    htbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][highNHI]
    htbl.write("FP_DR5_highNHI.ascii", format='ascii.fixed_width', overwrite=True)

    # Medium NHI
    medNHI = (ml_dlasurvey.NHI[fpos_in_stat] > 20.6) & (ml_dlasurvey.NHI[fpos_in_stat] < 21)
    mtbl = Table()
    mtbl['PLATE'] = plates[fpos_in_stat][medNHI]
    mtbl['FIBER'] = fibers[fpos_in_stat][medNHI]
    mtbl['zabs'] = zabs[fpos_in_stat][medNHI]
    mtbl['zem'] = zem[fpos_in_stat][medNHI]
    mtbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][medNHI]
    mtbl.write("FP_DR5_medNHI.ascii", format='ascii.fixed_width', overwrite=True)