Exemple #1
0
def test_read_p03_g09():
    """ XQ-100 """
    p03 = DLASurvey.load_P03()
    assert p03.nsys == 105

    g09 = DLASurvey.load_G09()
    assert g09.nsys == 38
Exemple #2
0
def test_read_hst16():
    # Statistical
    hst16 = DLASurvey.load_HST16()
    assert hst16.nsys == 4
    # All
    hst16_all = DLASurvey.load_HST16(sample='all')
    assert hst16_all.nsys == 48
def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss = DLASurvey.load_SDSS_DR5()
    assert len(sdss.NHI) == 737
Exemple #4
0
def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss = DLASurvey.load_SDSS_DR5()
    assert len(sdss.NHI) == 737
Exemple #5
0
def neeleman13():
    """ Build a summary file for the Neeleman+13 sample
    """
    prefix = "H100"
    outpath = os.getenv("DROPBOX_DIR") + "/Public/DLA/" + prefix + "/"
    dlasurvey = DLASurvey.from_flist("Lists/Neeleman13.lst", tree=os.environ.get("DLA"))
    dlasurvey.ref = "Neeleman+13"
    # Reset vlim
    for dla in dlasurvey._abs_sys:
        dla.vlim = [-1000.0, 1000.0] * u.km / u.s
    # Mask
    dlasurvey.mask = dlasurvey.NHI == dlasurvey.NHI
    # Json file for ions
    dlasurvey.fill_ions(use_Nfile=True)
    mk_json_ions(dlasurvey, prefix, outpath + prefix + "_DLA_ions.json")

    # Json files for .clm files
    mk_json_clms(dlasurvey, outpath + "CLMS/", prefix)
    print("It is likely you wish to tarball the CLMS folder for distribution")

    # JSON SYS files (preferred)
    mk_json_sys(dlasurvey, outpath, prefix)

    # Summary file and spectra
    mk_summary(
        dlasurvey,
        prefix,
        outpath + prefix + "_DLA.fits",
        specpath=outpath + "/Spectra/",
        htmlfil=outpath + prefix + "_DLA.html",
    )
Exemple #6
0
def main(args=None):

    import pdb
    import numpy as np

    from linetools import utils as ltu

    from pyigm.surveys.analysis import fit_atan_dla_lz, fit_fN_dblpow
    from pyigm.surveys.dlasurvey import load_dla_surveys, update_dla_fits
    from pyigm.surveys.dlasurvey import DLASurvey
    from pyigm.surveys import dlasurvey

    pargs = parser()

    # DLA l(z) analysis
    if pargs.dla_lz or pargs.all:
        # arctan from Prochaska & Neeleman 2017
        surveys = load_dla_surveys()
        dfits, _ = fit_atan_dla_lz(surveys,
                                   nstep=100,
                                   bootstrap=pargs.dla_lz_boot,
                                   nboot=50000,
                                   nproc=pargs.nproc,
                                   boot_out=dlasurvey.lz_boot_file)
        # Calculate error
        lz_boot = dlasurvey.load_boot_lz()
        for key in ['A', 'B', 'C']:
            boot = lz_boot[key].data
            # 68%
            perc = np.percentile(boot, [16., 84.])
            dfits['lz']['atan']['sig_{:s}'.format(
                key)] = perc - dfits['lz']['atan'][key]
        # Write
        dfits['lz']['atan']['Ref'] = 'Prochaska & Neeleman 2017'
        update_dla_fits(dfits)

    # Fit double power law to f(N) of DLA [PW09 only]
    if pargs.dla_dpow or pargs.all:
        sdss_dr5 = DLASurvey.load_SDSS_DR5()
        dfits, best, Ndgrid, a3grid, a4grid, lik = fit_fN_dblpow(sdss_dr5.NHI,
                                                                 (-3., -1.1),
                                                                 (-6, -2),
                                                                 (21., 22.),
                                                                 nstep=100)
        # Write
        dfits['fN']['dpow']['Ref'] = 'PHW05'
        update_dla_fits(dfits)

    # DLA ne/nH
    if pargs.dla_nenH or pargs.all:
        dfits = {}
        dfits['nenH'] = {}
        dfits['nenH']['loglog'] = dict(
            bp=-2.881,
            m=-0.352,
            bp_sig=(+0.253, -0.256),
            m_sig=(+0.321, -0.317))  # Values with all 50 measurements
        dfits['nenH']['loglog']['Ref'] = 'Neeleman+15; PN17'
        # Update
        update_dla_fits(dfits)
def test_read_h100():
    h100 = DLASurvey.load_H100()
    assert h100.nsys == 100

    SiII_clms = h100.ions((14, 2))
    gdSiII = np.where(SiII_clms['flag_N'] > 0)[0]
    assert len(gdSiII) == 98
Exemple #8
0
def test_read_h100():
    h100 = DLASurvey.load_H100()
    assert h100.nsys == 100

    SiII_clms = h100.ions((14, 2))
    gdSiII = np.where(SiII_clms['flag_N'] > 0)[0]
    assert len(gdSiII) == 98
Exemple #9
0
def neeleman13():
    """ Build a summary file for the Neeleman+13 sample
    """
    prefix = 'H100'
    outpath = os.getenv('DROPBOX_DIR') + '/Public/DLA/' + prefix + '/'
    dlasurvey = DLASurvey.from_flist('Lists/Neeleman13.lst',
                                     tree=os.environ.get('DLA'))
    dlasurvey.ref = 'Neeleman+13'
    # Reset vlim
    for dla in dlasurvey._abs_sys:
        dla.vlim = [-1000., 1000.] * u.km / u.s
    # Mask
    dlasurvey.mask = dlasurvey.NHI == dlasurvey.NHI
    # Json file for ions
    dlasurvey.fill_ions(use_Nfile=True)
    mk_json_ions(dlasurvey, prefix, outpath + prefix + '_DLA_ions.json')

    # Json files for .clm files
    mk_json_clms(dlasurvey, outpath + 'CLMS/', prefix)
    print('It is likely you wish to tarball the CLMS folder for distribution')

    # JSON SYS files (preferred)
    mk_json_sys(dlasurvey, outpath, prefix)

    # Summary file and spectra
    mk_summary(dlasurvey,
               prefix,
               outpath + prefix + '_DLA.fits',
               specpath=outpath + '/Spectra/',
               htmlfil=outpath + prefix + '_DLA.html')
Exemple #10
0
def write_sdss_sightlines():
    """ Writes the SDSS DR5 sightlines that have no (or very few) DLAs
    Returns
    -------
    None : Writes to Dropbox

    """
    import os
    import h5py
    outfile = os.getenv(
        'DROPBOX_DIR') + '/MachineLearning/DR5/SDSS_DR5_noDLAs.hdf5'
    # Load
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    slines, sdict = grab_sightlines(sdss, flg_bal=0)
    coords = SkyCoord(ra=slines['RA'], dec=slines['DEC'], unit='deg')
    # Load spectra -- RA/DEC in igmsp is not identical to RA_GROUP, DEC_GROUP in SDSS_DR7
    igmsp = IgmSpec()
    sdss_meta = igmsp['SDSS_DR7'].meta
    qso_coord = SkyCoord(ra=sdss_meta['RA_GROUP'],
                         dec=sdss_meta['DEC_GROUP'],
                         unit='deg')
    idxq, d2dq, d3dq = match_coordinates_sky(coords, qso_coord, nthneighbor=1)
    in_igmsp = d2dq < 1 * u.arcsec  # Check
    # Cut meta
    cut_meta = sdss_meta[idxq[in_igmsp]]
    assert len(slines) == len(cut_meta)
    # Grab
    spectra = igmsp['SDSS_DR7'].spec_from_meta(cut_meta)
    # Write
    hdf = h5py.File(outfile, 'w')
    spectra.write_to_hdf5(outfile, hdf5=hdf, clobber=True, fill_val=0.)
    # Add table (meta is already used)
    hdf['cut_meta'] = cut_meta
    hdf.close()
Exemple #11
0
def test_sdss():
    # All
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    # Testing
    assert sdss.nsys == 1182
    # Stat
    sdss_stat = DLASurvey.load_SDSS_DR5()
    assert len(sdss_stat.NHI) == 737
    # Binned
    lX, lX_lo, lX_hi = sdss_stat.calculate_lox([2., 2.5, 3])
    assert np.isclose(lX[0], 0.04625038, atol=1e-5)
    fN, fN_lo, fN_hi = sdss_stat.calculate_fn([20.3, 20.5, 21., 21.5, 22.],
                                              [2, 2.5],
                                              log=True)
    assert fN.size == 4
    assert np.isclose(fN_lo[0], 0.0682087, atol=1e-5)
def test_dat_list():
    """JXP format :: Likely to be Deprecated
    """
    if os.getenv('DLA') is None:
        assert True
        return
    # Load
    dlas = DLASurvey.neeleman13_tree()
    # tests
    assert dlas.nsys == 100
Exemple #13
0
def test_dat_list():
    """JXP format :: Likely to be Deprecated
    """
    if os.getenv('DLA') is None:
        assert True
        return
    # Load
    dlas = DLASurvey.neeleman13_tree()
    # tests
    assert dlas.nsys == 100
Exemple #14
0
def main(flg_tst, sdss=None, ml_survey=None):

    # Load JSON for DR5
    if (flg_tst % 2**1) >= 2**0:
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5()
        #ml_survey = json_to_sdss_dlasurvey('../results/dr5_v1_predictions.json', sdss)
        ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss)

    # Vette
    if (flg_tst % 2**2) >= 2**1:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss)
        vette_dlasurvey(ml_survey, sdss)

    # Vette v5 and generate CSV
    if (flg_tst % 2**3) >= 2**2:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v5_predictions.json', sdss)
        false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v5.csv')

    # Vette v6 and generate CSV
    if (flg_tst % 2**4) >= 2**3:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/dr5_v6.1_results.json', sdss)
        false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v6.1.csv')

    # Vette gensample v2
    if (flg_tst % 2**5) >= 2**4:
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/results_catalog_dr7_model_gensample_v2.json',sdss)
        false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v2_gen.csv')
        mk_false_neg_table(false_pos, '../results/false_positives_DR5_v2_gen.csv')

    # Vette gensample v4.3.1
    if flg_tst & (2**5):
        if ml_survey is None:
            sdss = DLASurvey.load_SDSS_DR5()
            ml_survey = json_to_sdss_dlasurvey('../results/results_model_4.3.1_data_dr5.json',sdss)
        false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss)
        # CSV of false negatives
        mk_false_neg_table(false_neg, '../results/false_negative_DR5_v4.3.1_gen.csv')
        mk_false_neg_table(false_pos, '../results/false_positives_DR5_v4.3.1_gen.csv')

    if flg_tst & (2**6):
        dr5_for_david()
Exemple #15
0
def fig_rhoHI(lw=1.5, csz=15., lsz=14.):
    """  Generate a DLA in optical depth and flux space
    Parameters
    ----------
    """
    sdss = DLASurvey.load_SDSS_DR5()
    zbins = [2.2, 2.4, 2.75, 3., 3.5, 4.5]
    rho_HI, rho_HI_low, rho_HI_hi = sdss.binned_rhoHI(zbins)

    outfile = 'fig_rhoHI.png'

    # Figure
    plt.figure(figsize=(5, 5))
    plt.clf()
    gs = gridspec.GridSpec(1, 1)


    # Tau plot
    ax = plt.subplot(gs[0])

    # Plot
    for kk in range(len(zbins)-1):
        zcen = np.sum(zbins[kk:kk+2])/2.
        yerr= np.array([rho_HI_low[kk].value/1e8, rho_HI_hi[kk].value/1e8])
        ax.errorbar([zcen], [rho_HI[kk].value/1e8], xerr=zcen-zbins[kk], fmt='o', color='blue', capthick=2)
        ax.errorbar([zcen], [rho_HI[kk].value/1e8], yerr=[yerr], color='blue', capthick=2)
    # z=0
    xmnx = [2., 4.5]
    ax.fill_between(xmnx, 0.45, 0.6, color='green', alpha=0.5)

    # Axes
    ax.set_xlim(xmnx)
    #ax.set_ylim(1e-2, 5e7)
    ax.set_ylabel(r'$\rho_{\rm HI} \; (10^8 \, \rm M_\odot \, Mpc^{-3} \, h_{72})$')
    ax.set_xlabel(r'$z$')
    ax.text(0.1, 0.9, 'SDSS-DR5 (PW09)', color='blue', size=lsz, transform=ax.transAxes, ha='left')
    ax.text(0.9, 0.1, 'z~0 [21cm] \n (Zwaan+05)', color='green', size=lsz, transform=ax.transAxes, ha='right')
    #ax.xaxis.set_major_locator(plt.MultipleLocator(10.))
    #
    set_spines(ax, 2.)
    set_fontsize(ax,csz)

    # Write
    plt.tight_layout(pad=0.2,h_pad=0.,w_pad=0.1)
    plt.savefig(outfile, dpi=750)
    plt.close()
    print("Wrote {:s}".format(outfile))
Exemple #16
0
def grab_meta():
    """ Generates the meta data needed for the IGMSpec build
    Returns
    -------
    meta : Table
    spec_files : list
      List of spec_file names
    """
    # Load DLA
    from pyigm.surveys.dlasurvey import DLASurvey
    hdla100 = DLASurvey.neeleman13_tree()
    # Cut down to unique QSOs
    spec_files = []
    names = []
    ra = []
    dec = []
    coords = hdla100.coord
    cnt = 0
    for coord in coords:
        # Load
        names.append('J{:s}{:s}'.format(
            coord.ra.to_string(unit=u.hour, sep='', pad=True, precision=2),
            coord.dec.to_string(sep='', pad=True, precision=1)))
        # RA/DEC
        ra.append(coord.ra.value)
        dec.append(coord.dec.value)
        # SPEC_FILE
        fname = hdla100._abs_sys[cnt]._datdict['hi res file'].split('/')[-1]
        spec_files.append(fname)
        cnt += 1
    uni, uni_idx = np.unique(names, return_index=True)
    nqso = len(uni_idx)
    #
    meta = Table()
    meta['RA_GROUP'] = np.array(ra)[uni_idx]
    meta['DEC_GROUP'] = np.array(dec)[uni_idx]
    meta['zem_GROUP'] = hdla100.zem[uni_idx]
    meta['sig_zem'] = [0.] * nqso
    meta['flag_zem'] = [str('UNKN')] * nqso
    meta['STYPE'] = [str('QSO')] * nqso
    meta['SPEC_FILE'] = np.array(spec_files)[uni_idx]
    # Check
    assert chk_meta(meta, chk_cat_only=True)
    return meta
Exemple #17
0
def grab_meta():
    """ Generates the meta data needed for the IGMSpec build
    Returns
    -------
    meta : Table
    spec_files : list
      List of spec_file names
    """
    # Load DLA
    from pyigm.surveys.dlasurvey import DLASurvey
    hdla100 = DLASurvey.neeleman13_tree()
    # Cut down to unique QSOs
    spec_files = []
    names = []
    ra = []
    dec = []
    coords = hdla100.coord
    cnt = 0
    for coord in coords:
        # Load
        names.append('J{:s}{:s}'.format(coord.ra.to_string(unit=u.hour, sep='', pad=True, precision=2),
                                       coord.dec.to_string(sep='', pad=True, precision=1)))
        # RA/DEC
        ra.append(coord.ra.value)
        dec.append(coord.dec.value)
        # SPEC_FILE
        fname = hdla100._abs_sys[cnt]._datdict['hi res file'].split('/')[-1]
        spec_files.append(fname)
        cnt += 1
    uni, uni_idx = np.unique(names, return_index=True)
    nqso = len(uni_idx)
    #
    meta = Table()
    meta['RA_GROUP'] = np.array(ra)[uni_idx]
    meta['DEC_GROUP'] = np.array(dec)[uni_idx]
    meta['zem_GROUP'] = hdla100.zem[uni_idx]
    meta['sig_zem'] = [0.]*nqso
    meta['flag_zem'] = [str('UNKN')]*nqso
    meta['STYPE'] = [str('QSO')]*nqso
    meta['SPEC_FILE'] = np.array(spec_files)[uni_idx]
    # Check
    assert chk_meta(meta, chk_cat_only=True)
    return meta
Exemple #18
0
def dr5_for_david():
    """ Generate a Table for David
    """
    # imports
    from pyigm.abssys.dla import DLASystem
    from pyigm.abssys.lls import LLSSystem
    sdss_survey = DLASurvey.load_SDSS_DR5()
    # Fiber key
    for fkey in ['FIBER', 'FIBER_ID', 'FIB']:
        if fkey in sdss_survey.sightlines.keys():
            break
    # Init
    #idict = dict(plate=[], fiber=[], classification_confidence=[],  # FOR v2
    #             classification=[], ra=[], dec=[])
    # Connect to sightlines
    s_coord = SkyCoord(ra=sdss_survey.sightlines['RA'], dec=sdss_survey.sightlines['DEC'], unit='deg')
    # Add plate/fiber to statistical DLAs
    dla_coord = sdss_survey.coord
    idx2, d2d, d3d = match_coordinates_sky(dla_coord, s_coord, nthneighbor=1)
    if np.min(d2d.to('arcsec').value) > 1.:
        raise ValueError("Bad match to sightlines")
    plates, fibers = [], []
    for jj,igd in enumerate(np.where(sdss_survey.mask)[0]):
        dla = sdss_survey._abs_sys[igd]
        try:
            dla.plate = sdss_survey.sightlines['PLATE'][idx2[jj]]
        except IndexError:
            pdb.set_trace()
        dla.fiber = sdss_survey.sightlines[fkey][idx2[jj]]
        plates.append(sdss_survey.sightlines['PLATE'][idx2[jj]])
        fibers.append(sdss_survey.sightlines[fkey][idx2[jj]])
    # Write
    dtbl = Table()
    dtbl['plate'] = plates
    dtbl['fiber'] = fibers
    dtbl['zabs'] = sdss_survey.zabs
    dtbl['NHI'] = sdss_survey.NHI
    dtbl.write('results/dr5_for_david.ascii', format='ascii')
    # Write sightline info
    stbl = sdss_survey.sightlines[['PLATE', 'FIB', 'Z_START', 'Z_END', 'RA', 'DEC']]
    gdsl = stbl['Z_END'] > stbl['Z_START']
    stbl[gdsl].write('results/dr5_sightlines_for_david.ascii', format='ascii')
Exemple #19
0
def neeleman13():
    """ Build a summary file for the Neeleman+13 sample
    """
    prefix = 'H100'
    outpath = os.getenv('DROPBOX_DIR')+'/Public/DLA/'+prefix+'/'
    dlasurvey = DLASurvey.from_flist('Lists/Neeleman13.lst',
                                     tree=os.environ.get('DLA'))
    dlasurvey.ref = 'Neeleman+13'
    # Json file for ions
    dlasurvey.fill_ions(use_Nfile=True)
    mk_json_ions(dlasurvey, prefix, outpath+prefix+'_DLA_ions.json')

    # Json files for .clm files
    mk_json_clms(dlasurvey, outpath+'CLMS/', prefix)
    print('It is likely you wish to tarball the CLMS folder for distribution')

    # Summary file and spectra
    mk_summary(dlasurvey, prefix, outpath+prefix+'_DLA.fits',
               specpath=outpath+'/Spectra/',
               htmlfil=outpath+prefix+'_DLA.html')
Exemple #20
0
def test_init():
    dlas = DLASurvey(ref='null')
    assert dlas.abs_type == 'DLA'

    coord = SkyCoord(ra=123.1143, dec=-12.4321, unit='deg')
    dlasys = DLASystem(coord, 1.244, [-300, 300.] * u.km / u.s, 20.4)
    dlasys.name = 'Sys1'
    #
    coord2 = SkyCoord(ra=223.1143, dec=42.4321, unit='deg')
    dlasys2 = DLASystem(coord2, 1.744, [-300, 300.] * u.km / u.s, 21.7)
    dlasys2.name = 'Sys2'
    # Add systems
    dlas.add_abs_sys(dlasys)
    dlas.add_abs_sys(dlasys2)
    assert dlas.nsys == 2
Exemple #21
0
def test_dla_fitted():
    dlas = DLASurvey(ref='null')
    # f(N) double power law
    fN = dlas.fitted_fN(21.)
    assert isinstance(fN, float)
    assert np.isclose(fN, 12.661299335610309)
    fN = dlas.fitted_fN(np.arange(20.3, 21.3, 0.1))
    assert isinstance(fN, np.ndarray)
    # l(z)
    lz = dlas.fitted_lz(1.)
    assert isinstance(lz, float)
    assert np.isclose(lz, 0.054821907396422453)
    # Error
    lz, sig_lz = dlas.fitted_lz(1., boot_error=True)
    assert sig_lz.shape == (1, 2)
    # nenH
    nenH = dlas.fitted_nenH(21.)
    assert isinstance(nenH, float)
    assert np.isclose(nenH, -3.12739999999999999)
Exemple #22
0
def neeleman13():
    """ Build a summary file for the Neeleman+13 sample
    """
    prefix = 'H100'
    outpath = os.getenv('DROPBOX_DIR') + '/Public/DLA/' + prefix + '/'
    dlasurvey = DLASurvey.from_flist('Lists/Neeleman13.lst',
                                     tree=os.environ.get('DLA'))
    dlasurvey.ref = 'Neeleman+13'
    # Json file for ions
    dlasurvey.fill_ions(use_Nfile=True)
    mk_json_ions(dlasurvey, prefix, outpath + prefix + '_DLA_ions.json')

    # Json files for .clm files
    mk_json_clms(dlasurvey, outpath + 'CLMS/', prefix)
    print('It is likely you wish to tarball the CLMS folder for distribution')

    # Summary file and spectra
    mk_summary(dlasurvey,
               prefix,
               outpath + prefix + '_DLA.fits',
               specpath=outpath + '/Spectra/',
               htmlfil=outpath + prefix + '_DLA.html')
Exemple #23
0
def main(args=None):
    pargs = parser(options=args)

    # Setup
    import sys
    pfind = __file__.rfind('/scripts')
    spth = __file__[:pfind] + '/src'
    sys.path.append(spth)
    import training_set as tset
    from pyigm.surveys.dlasurvey import DLASurvey

    outroot = pargs.outpath + '/training_{:d}_{:d}'.format(
        pargs.seed, pargs.ntrain)

    # Sightlines
    sdss = DLASurvey.load_SDSS_DR5(sample='all')
    slines, sdict = tset.grab_sightlines(sdss, flg_bal=0)
    # Run
    _, _ = tset.make_set(pargs.ntrain,
                         slines,
                         outroot=outroot,
                         seed=pargs.seed,
                         slls=pargs.slls)
Exemple #24
0
def grab_sightlines(dlasurvey=None,
                    flg_bal=None,
                    zmin=2.3,
                    s2n=5.,
                    DX=0.,
                    igmsp_survey='SDSS_DR7',
                    update_zem=True):
    """ Grab a set of sightlines without DLAs from a DLA survey
    Insist that all have spectra occur in igmspec
    Update sightline zem with igmspec zem

    Parameters
    ----------
    dlas : DLASurvey
      Usually SDSS or BOSS
    flg_bal : int, optional
      Maximum BAL flag (0=No signature, 1=Weak BAL, 2=BAL)
    s2n : float, optional
      Minimum S/N as defined in some manner
    DX : float, optional
      Restrict on DX
    zmin : float, optional
      Minimum redshift for zem
    update_zem : bool, optional
      Update zem in sightlines?

    Returns
    -------
    final : Table
      astropy Table of good sightlines
    sdict : dict
      dict describing the sightlines
    """
    #1)  REMOVE 910, 526  z=2.88; NHI=21.19
    import warnings
    warnings.warn("Someday remove 910, 526 which has a *strong* DLA")
    igmsp = IgmSpec()
    # Init
    if dlasurvey is None:
        print("Using the DR5 sample for the sightlines")
        dlasurvey = DLASurvey.load_SDSS_DR5(sample='all')
        igmsp_survey = 'SDSS_DR7'
    nsight = len(dlasurvey.sightlines)
    keep = np.array([True] * nsight)
    meta = igmsp[igmsp_survey].meta

    # Avoid DLAs
    dla_coord = dlasurvey.coord
    sl_coord = SkyCoord(ra=dlasurvey.sightlines['RA'],
                        dec=dlasurvey.sightlines['DEC'])
    idx, d2d, d3d = match_coordinates_sky(sl_coord, dla_coord, nthneighbor=1)
    clear = d2d > 1 * u.arcsec
    keep = keep & clear

    # BAL
    if flg_bal is not None:
        gd_bal = dlasurvey.sightlines['FLG_BAL'] <= flg_bal
        keep = keep & gd_bal

    # S/N
    if s2n > 0.:
        gd_s2n = dlasurvey.sightlines['S2N'] > s2n
        keep = keep & gd_s2n

    # Cut on DX
    if DX > 0.:
        gd_DX = dlasurvey.sightlines['DX'] > DX
        keep = keep & gd_DX

    # igmsp
    qso_coord = SkyCoord(ra=meta['RA_GROUP'],
                         dec=meta['DEC_GROUP'],
                         unit='deg')
    idxq, d2dq, d3dq = match_coordinates_sky(sl_coord,
                                             qso_coord,
                                             nthneighbor=1)
    in_igmsp = d2dq < 1 * u.arcsec
    keep = keep & in_igmsp

    # Check zem and dz
    #igm_id = meta['IGM_ID'][idxq]
    #cat_rows = match_ids(igm_id, igmsp.cat['IGM_ID'])
    #zem = igmsp.cat['zem'][cat_rows]
    zem = meta['zem_GROUP'][idxq]
    dz = np.abs(zem - dlasurvey.sightlines['ZEM'])
    gd_dz = dz < 0.1
    keep = keep & gd_dz  #& gd_zlim
    if zmin is not None:
        gd_zmin = zem > zmin
        keep = keep & gd_zmin  #& gd_zlim
    #gd_zlim = (zem-dlasurvey.sightlines['Z_START']) > 0.1
    #pdb.set_trace()

    # Assess
    final = dlasurvey.sightlines[keep]
    #final_coords = SkyCoord(ra=final['RA'], dec=final['DEC'], unit='deg')
    #matches, meta = igmsp.meta_from_coords(final_coords, groups=['SDSS_DR7'], tol=1*u.arcsec)
    #idxq2, d2dq2, d3dq2 = match_coordinates_sky(final_coords, qso_coord, nthneighbor=1)
    #in_igmsp2 = d2dq2 < 1*u.arcsec
    #pdb.set_trace()
    sdict = {}
    sdict['n'] = len(final)
    print("We have {:d} sightlines for analysis".format(sdict['n']))

    def qck_stats(idict, tbl, istr, key):
        idict[istr + 'min'] = np.min(tbl[key])
        idict[istr + 'max'] = np.max(tbl[key])
        idict[istr + 'median'] = np.median(tbl[key])

    qck_stats(sdict, final, 'z', 'ZEM')
    qck_stats(sdict, final, 'i', 'MAG')

    print("Min z = {:g}, Median z = {:g}, Max z = {:g}".format(
        sdict['zmin'], sdict['zmedian'], sdict['zmax']))

    # Return
    return final, sdict
Exemple #25
0
def test_read_h100_nosys():
    h100 = DLASurvey.load_H100(load_sys=False)
    assert h100.nsys == 100
Exemple #26
0
def main(flg_tst, sdss=None, ml_survey=None):
    import os

    # Sightlines
    flg_tst = int(flg_tst)
    if (flg_tst % 2**1) >= 2**0:
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)

    # Test case of 100 sightlines
    if (flg_tst % 2**2) >= 2**1:
        # Make training set
        _, _ = make_set(100, slines, outroot='results/training_100')

    # Production runs
    if (flg_tst % 2**3) >= 2**2:
        #training_prod(123456, 5, 10, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/')  # TEST
        #training_prod(123456, 10, 500, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/')  # TEST
        training_prod(12345,
                      10,
                      5000,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/DLAs/')

    # Production runs -- 100k more
    if (flg_tst % 2**4) >= 2**3:
        # python src/training_set.py
        training_prod(22345,
                      10,
                      10000,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/DLAs/')

    # Production runs -- 100k more
    if flg_tst & (2**4):
        # python src/training_set.py
        if False:
            if sdss is None:
                sdss = DLASurvey.load_SDSS_DR5(sample='all')
            slines, sdict = grab_sightlines(sdss, flg_bal=0)
            _, _ = make_set(100,
                            slines,
                            outroot='results/slls_training_100',
                            slls=True)
        #training_prod(22343, 10, 100, slls=True, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/SLLSs/')
        training_prod(22343,
                      10,
                      5000,
                      slls=True,
                      outpath=os.getenv('DROPBOX_DIR') +
                      '/MachineLearning/SLLSs/')

    # Mixed systems for testing
    if flg_tst & (2**5):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 10000
        seed = 23559
        _, _ = make_set(
            ntrials,
            slines,
            seed=seed,
            mix=True,
            outroot=os.getenv('DROPBOX_DIR') +
            '/MachineLearning/Mix/mix_test_{:d}_{:d}'.format(seed, ntrials))

    # DR5 DLA-free sightlines
    if flg_tst & (2**6):
        write_sdss_sightlines()

    # High NHI systems for testing
    if flg_tst & (2**7):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 20000
        seed = 83559
        _, _ = make_set(ntrials,
                        slines,
                        seed=seed,
                        high=True,
                        outroot=os.getenv('DROPBOX_DIR') +
                        '/MachineLearning/HighNHI/high_train_{:d}_{:d}'.format(
                            seed, ntrials))

    # Low S/N
    if flg_tst & (2**8):
        # python src/training_set.py
        if sdss is None:
            sdss = DLASurvey.load_SDSS_DR5(sample='all')
        slines, sdict = grab_sightlines(sdss, flg_bal=0)
        ntrials = 10000
        seed = 83557
        _, _ = make_set(
            ntrials,
            slines,
            seed=seed,
            low_s2n=True,
            outroot=os.getenv('DROPBOX_DIR') +
            '/MachineLearning/LowS2N/lows2n_train_{:d}_{:d}'.format(
                seed, ntrials))
Exemple #27
0
def test_read_HST():
    """ Neeleman+16
    """
    hst16 = DLASurvey.load_HST16()
    assert hst16.nsys == 4
Exemple #28
0
def dr5_false_positives(ml_dlasurvey=None, ml_llssurvey=None):
    vette_file = 'vette_dr5.json'
    from pyigm.surveys.dlasurvey import DLASurvey
    from matplotlib import pyplot as plt
    # Load ML
    if (ml_dlasurvey is None):
        _, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Vette
    vette = ltu.loadjson(vette_file)
    dr5_ml_idx = np.array(vette['dr5_idx'])

    # Use coord to efficiently deal with sightlines
    ml_dla_coord = ml_dlasurvey.coords
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(ml_dla_coord, dr5_coord, nthneighbor=1)
    in_dr5 = d2d < 2*u.arcsec
    print("{:d} of the ML DLA were in the DR5 sightlines".format(np.sum(in_dr5)))

    # False positives
    fpos = np.array([True]*ml_dlasurvey.nsys)
    fpos[~in_dr5] = False

    # False positives
    imatched = np.where(dr5_ml_idx >= 0)[0]
    match_val = dr5_ml_idx[imatched]
    fpos[match_val] = False
    print("There are {:d} total false positives".format(np.sum(fpos)))
    # This nearly matches David's.  Will run with his analysis.

    fpos_in_stat = fpos.copy()
    # Restrict on DR5
    plates = ml_dlasurvey.plate
    fibers = ml_dlasurvey.fiber
    zabs = ml_dlasurvey.zabs
    zem = ml_dlasurvey.zem
    for idx in np.where(fpos_in_stat)[0]:
        # Finally, match to DR5
        dr5_sl = np.where((dr5.sightlines['PLATE'] == plates[idx]) &
                          (dr5.sightlines['FIB'] == fibers[idx]))[0][0]
        if (zabs[idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \
                (zabs[idx] <= dr5.sightlines['Z_END'][dr5_sl]):
            pass
        else:
            fpos_in_stat[idx] = False
    print("Number of FP in DR5 analysis region = {:d}".format(np.sum(fpos_in_stat)))
    print("Number with NHI<20.45 = {:d}".format(np.sum(ml_dlasurvey.NHI[fpos_in_stat]< 20.45)))

    # High NHI
    highNHI = ml_dlasurvey.NHI[fpos_in_stat] > 21.
    htbl = Table()
    htbl['PLATE'] = plates[fpos_in_stat][highNHI]
    htbl['FIBER'] = fibers[fpos_in_stat][highNHI]
    htbl['zabs'] = zabs[fpos_in_stat][highNHI]
    htbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][highNHI]
    htbl.write("FP_DR5_highNHI.ascii", format='ascii.fixed_width', overwrite=True)

    # Medium NHI
    medNHI = (ml_dlasurvey.NHI[fpos_in_stat] > 20.6) & (ml_dlasurvey.NHI[fpos_in_stat] < 21)
    mtbl = Table()
    mtbl['PLATE'] = plates[fpos_in_stat][medNHI]
    mtbl['FIBER'] = fibers[fpos_in_stat][medNHI]
    mtbl['zabs'] = zabs[fpos_in_stat][medNHI]
    mtbl['zem'] = zem[fpos_in_stat][medNHI]
    mtbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][medNHI]
    mtbl.write("FP_DR5_medNHI.ascii", format='ascii.fixed_width', overwrite=True)
Exemple #29
0
def chk_dr5_dla_to_ml(ml_dlasurvey=None, ml_llssurvey=None, dz_toler=0.015,
                      outfile='vette_dr5.json', write_again=True):
    # Load ML
    if (ml_dlasurvey is None) or (ml_llssurvey is None):
        ml_llssurvey, ml_dlasurvey = load_ml_dr7()
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()  # This is the statistical sample
    # Use coord to efficiently deal with sightlines
    ml_coord = SkyCoord(ra=ml_dlasurvey.sightlines['RA'], dec=ml_dlasurvey.sightlines['DEC'], unit='deg')
    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')
    idx, d2d, d3d = match_coordinates_sky(dr5_coord, ml_coord, nthneighbor=1)
    in_ml = d2d < 2*u.arcsec
    print("{:d} of the DR5 sightlines were covered by ML out of {:d}".format(np.sum(in_ml), len(dr5.sightlines)))
    # 7477 sightlines out of 7482

    # Cut down
    dr5.sightlines = dr5.sightlines[in_ml]
    new_mask = dla_stat(dr5, dr5.sightlines) # 737 good DLAs
    dr5.mask = new_mask
    dr5_dla_coord = dr5.coord
    dr5_dla_zabs = dr5.zabs
    ndr5 = len(dr5_dla_coord)

    ml_dla_coord = ml_dlasurvey.coords
    ml_lls_coord = ml_llssurvey.coords

    # Loop on DR5 DLAs and save indices of the matches
    dr5_ml_idx = np.zeros(ndr5).astype(int) - 1
    for ii in range(ndr5):
        # Match to ML
        dla_mts = np.where(dr5_dla_coord[ii].separation(ml_dla_coord) < 2*u.arcsec)[0]
        nmt = len(dla_mts)
        if nmt == 0:  # No match
            # Check for LLS
            lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
            nmt2 = len(lls_mts)
            if nmt2 == 0:  # No match
                pass
            else:
                zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                if np.min(zdiff) < dz_toler:
                    dr5_ml_idx[ii] = -9  # SLLS match
        else:
            zML = ml_dlasurvey.zabs[dla_mts] # Redshifts of all DLAs on the sightline in ML
            zdiff = np.abs(dr5_dla_zabs[ii]-zML)
            if np.min(zdiff) < dz_toler:
                #print("Match on {:d}!".format(ii))
                # Match
                imin = np.argmin(zdiff)
                dr5_ml_idx[ii] = dla_mts[imin]
            else: # Check for LLS
                lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0]
                nmt2 = len(lls_mts)
                if nmt2 == 0:  # No match
                    pass
                else:
                    zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML
                    zdiff = np.abs(dr5_dla_zabs[ii]-zML)
                    if np.min(zdiff) < dz_toler:
                        dr5_ml_idx[ii] = -9  # SLLS match


    dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg')

    # Write out misses
    misses = np.where(dr5_ml_idx == -1)[0]
    plates, fibers = [], []
    for miss in misses:
        imin = np.argmin(dr5_dla_coord[miss].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[misses]
    mtbl['zabs'] = dr5.zabs[misses]
    if write_again:
        mtbl.write('DR5_misses.ascii', format='ascii.fixed_width', overwrite=True)

    # Write out SLLS
    sllss = np.where(dr5_ml_idx == -9)[0]
    plates, fibers = [], []
    for slls in sllss:
        imin = np.argmin(dr5_dla_coord[slls].separation(dr5_coord))
        plates.append(dr5.sightlines['PLATE'][imin])
        fibers.append(dr5.sightlines['FIB'][imin])
    mtbl = Table()
    mtbl['PLATE'] = plates
    mtbl['FIBER'] = fibers
    mtbl['NHI'] = dr5.NHI[sllss]
    mtbl['zabs'] = dr5.zabs[sllss]
    if write_again:
        mtbl.write('DR5_SLLS.ascii', format='ascii.fixed_width', overwrite=True)
    pdb.set_trace()

    # ML not matched by PW09?
    ml_dla_coords = ml_dlasurvey.coords
    idx2, d2d2, d3d = match_coordinates_sky(ml_dla_coords, dr5_dla_coord, nthneighbor=1)
    not_in_dr5 = d2d2 > 2*u.arcsec  # This doesn't match redshifts!
    might_be_in_dr5 = np.where(~not_in_dr5)[0]

    others_not_in = []  # this is some painful book-keeping
    for idx in might_be_in_dr5:  # Matching redshifts..
        imt = ml_dla_coord[idx].separation(dr5_dla_coord) < 2*u.arcsec
        # Match on dztoler
        if np.min(np.abs(ml_dlasurvey.zabs[idx]-dr5.zabs[imt])) > dz_toler:
            others_not_in.append(idx)

    # Save
    out_dict = {}
    out_dict['in_ml'] = in_ml
    out_dict['dr5_idx'] = dr5_ml_idx  # -1 are misses, -9 are SLLS
    out_dict['not_in_dr5'] = np.concatenate([np.where(not_in_dr5)[0], np.array(others_not_in)])
    ltu.savejson(outfile, ltu.jsonify(out_dict), overwrite=True)
Exemple #30
0
def test_read_xq100():
    """ XQ-100 """
    xq100 = DLASurvey.load_XQ100(sample='stat')
    assert xq100.nsys == 36
def test_init():
    dlas = DLASurvey(ref='null')
    assert dlas.abs_type == 'DLA'
Exemple #32
0
def load_ml_file(pred_file):
    """ Load the search results from the CNN into a DLASurvey object
    Parameters
    ----------
    pred_file

    Returns
    -------
    ml_llssurvey: LLSSurvey
    ml_dlasusrvey: DLASurvey
    """
    print("Loading {:s}.  Please be patient..".format(pred_file))
    # Read
    ml_results = ltu.loadjson(pred_file)
    use_platef = False
    if 'plate' in ml_results[0].keys():
        use_platef = True
    else:
        if 'id' in ml_results[0].keys():
            use_id = True
    # Init
    idict = dict(ra=[], dec=[], plate=[], fiber=[])
    if use_platef:
        for key in ['plate', 'fiber', 'mjd']:
            idict[key] = []
    dlasystems = []
    llssystems = []

    # Generate coords to speed things up
    for obj in ml_results:
        for key in ['ra', 'dec']:
            idict[key].append(obj[key])
    ml_coords = SkyCoord(ra=idict['ra'], dec=idict['dec'], unit='deg')
    ra_names = ml_coords.icrs.ra.to_string(unit=u.hour, sep='', pad=True)
    dec_names = ml_coords.icrs.dec.to_string(sep='', pad=True, alwayssign=True)
    vlim = [-500., 500.] * u.km / u.s
    dcoord = SkyCoord(ra=0., dec=0., unit='deg')

    # Loop on list
    didx, lidx = [], []
    print("Looping on sightlines..")
    for tt, obj in enumerate(ml_results):
        #if (tt % 100) == 0:
        #    print('tt: {:d}'.format(tt))
        # Sightline
        if use_id:
            plate, fiber = [int(spl) for spl in obj['id'].split('-')]
            idict['plate'].append(plate)
            idict['fiber'].append(fiber)

        # Systems
        for ss, syskey in enumerate(['dlas', 'subdlas']):
            for idla in obj[syskey]:
                name = 'J{:s}{:s}_z{:.3f}'.format(ra_names[tt], dec_names[tt],
                                                  idla['z_dla'])
                if ss == 0:
                    isys = DLASystem(dcoord,
                                     idla['z_dla'],
                                     vlim,
                                     NHI=idla['column_density'],
                                     zem=obj['z_qso'],
                                     name=name)
                else:
                    isys = LLSSystem(dcoord,
                                     idla['z_dla'],
                                     vlim,
                                     NHI=idla['column_density'],
                                     zem=obj['z_qso'],
                                     name=name)
                isys.confidence = idla['dla_confidence']
                isys.s2n = idla['s2n']
                if use_platef:
                    isys.plate = obj['plate']
                    isys.fiber = obj['fiber']
                elif use_id:
                    isys.plate = plate
                    isys.fiber = fiber
                # Save
                if ss == 0:
                    didx.append(tt)
                    dlasystems.append(isys)
                else:
                    lidx.append(tt)
                    llssystems.append(isys)
    # Generate sightline tables
    sightlines = Table()
    sightlines['RA'] = idict['ra']
    sightlines['DEC'] = idict['dec']
    sightlines['PLATE'] = idict['plate']
    sightlines['FIBERID'] = idict['fiber']
    # Surveys
    ml_llssurvey = LLSSurvey()
    ml_llssurvey.sightlines = sightlines.copy()
    ml_llssurvey._abs_sys = llssystems
    ml_llssurvey.coords = ml_coords[np.array(lidx)]

    ml_dlasurvey = DLASurvey()
    ml_dlasurvey.sightlines = sightlines.copy()
    ml_dlasurvey._abs_sys = dlasystems
    ml_dlasurvey.coords = ml_coords[np.array(didx)]

    # Return
    return ml_llssurvey, ml_dlasurvey
Exemple #33
0
def examine_false_pos(test_file='data/test_dlas_96629_10000.json.gz',
                      pred_file='data/test_dlas_96629_predictions.json.gz',
                      vette_file='vette_10k.json'):
    """ Examine false positives in the Test set (held out)
    """
    from pyigm.surveys.dlasurvey import DLASurvey
    import h5py
    import json
    from matplotlib import pyplot as plt
    # Load Test
    test_dlas = test_to_tbl(test_file)
    ntest = len(test_dlas)
    # Load hdf5
    CNN_result_path = '/home/xavier/Projects/ML_DLA_results/CNN/'
    hdf5_datafile = CNN_result_path + 'gensample_hdf5_files/test_dlas_96629_10000.hdf5'
    hdf = h5py.File(hdf5_datafile, 'r')
    headers = json.loads(hdf['meta'].value)['headers']
    # Load ML
    ml_abs = pred_to_tbl(pred_file)
    # Vette
    vette = ltu.loadjson(vette_file)
    test_ml_idx = np.array(vette['test_idx'])
    # Load DR5
    dr5 = DLASurvey.load_SDSS_DR5()
    all_dr5 = DLASurvey.load_SDSS_DR5(sample='all_sys')

    # False positives
    fpos = ml_abs['NHI'] >= 20.3  # Must be a DLA
    imatched = np.where(test_ml_idx >= 0)[0]
    match_val = test_ml_idx[imatched]
    fpos[match_val] = False
    print("There are {:d} total false positives".format(np.sum(fpos)))
    # This nearly matches David's.  Will run with his analysis.

    fpos_in_dr5 = fpos.copy()
    # Restrict on DR5
    for idx in np.where(fpos_in_dr5)[0]:
        # Convoluted indexing..
        mlid = ml_abs['ids'][idx]
        # Plate/Fiber
        plate = headers[mlid]['PLATE']
        fib = headers[mlid]['FIBER']
        # Finally, match to DR5
        dr5_sl = np.where((dr5.sightlines['PLATE'] == plate)
                          & (dr5.sightlines['FIB'] == fib))[0][0]
        if (ml_abs['zabs'][idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \
                (ml_abs['zabs'][idx] <= dr5.sightlines['Z_END'][dr5_sl]):
            pass
        else:
            fpos_in_dr5[idx] = False
    print("Number of FP in DR5 analysis region = {:d}".format(
        np.sum(fpos_in_dr5)))

    # How many match to DR5 SLLS?
    slls = all_dr5.NHI < 20.3
    slls_coord = all_dr5.coord[slls]
    slls_zabs = all_dr5.zabs[slls]
    nslls = 0
    for idx in np.where(fpos_in_dr5)[0]:
        # Convoluted indexing..
        mlid = ml_abs['ids'][idx]
        # RA/DEC
        ra = headers[mlid]['RA_GROUP']
        dec = headers[mlid]['DEC_GROUP']
        coord = SkyCoord(ra=ra, dec=dec, unit='deg')
        # Match coord
        mt = coord.separation(slls_coord) < 3 * u.arcsec
        if np.any(mt):
            # Match redshift
            if np.min(np.abs(slls_zabs[mt] - ml_abs['zabs'][idx])) < 0.015:
                nslls += 1
    print("Number of FP that are SLLS in DR5 = {:d}".format(nslls))

    low_NHI = ml_abs['NHI'][fpos_in_dr5] < 20.5
    print("Number of FP that are NHI <= 20.5 = {:d}".format(np.sum(low_NHI)))

    # Write out
    fp_tbl = Table()
    for key in ['ids', 'NHI', 'zabs', 'conf']:
        fp_tbl[key] = ml_abs[key][fpos_in_dr5]
    fp_tbl.write('test10k_false_pos.ascii',
                 format='ascii.fixed_width',
                 overwrite=True)

    # Histogram
    dr5_idx = np.where(fpos_in_dr5)
    plt.clf()
    ax = plt.gca()
    ax.hist(ml_abs['conf'][dr5_idx])
    plt.show()
def test_read_h100_nosys():
    h100 = DLASurvey.load_H100(load_sys=False)
    assert h100.nsys == 100