def test_sdss(): # All sdss = DLASurvey.load_SDSS_DR5(sample='all') # Testing assert sdss.nsys == 1182 # Stat sdss = DLASurvey.load_SDSS_DR5() assert len(sdss.NHI) == 737
def write_sdss_sightlines(): """ Writes the SDSS DR5 sightlines that have no (or very few) DLAs Returns ------- None : Writes to Dropbox """ import os import h5py outfile = os.getenv( 'DROPBOX_DIR') + '/MachineLearning/DR5/SDSS_DR5_noDLAs.hdf5' # Load sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) coords = SkyCoord(ra=slines['RA'], dec=slines['DEC'], unit='deg') # Load spectra -- RA/DEC in igmsp is not identical to RA_GROUP, DEC_GROUP in SDSS_DR7 igmsp = IgmSpec() sdss_meta = igmsp['SDSS_DR7'].meta qso_coord = SkyCoord(ra=sdss_meta['RA_GROUP'], dec=sdss_meta['DEC_GROUP'], unit='deg') idxq, d2dq, d3dq = match_coordinates_sky(coords, qso_coord, nthneighbor=1) in_igmsp = d2dq < 1 * u.arcsec # Check # Cut meta cut_meta = sdss_meta[idxq[in_igmsp]] assert len(slines) == len(cut_meta) # Grab spectra = igmsp['SDSS_DR7'].spec_from_meta(cut_meta) # Write hdf = h5py.File(outfile, 'w') spectra.write_to_hdf5(outfile, hdf5=hdf, clobber=True, fill_val=0.) # Add table (meta is already used) hdf['cut_meta'] = cut_meta hdf.close()
def test_sdss(): # All sdss = DLASurvey.load_SDSS_DR5(sample='all') # Testing assert sdss.nsys == 1182 # Stat sdss_stat = DLASurvey.load_SDSS_DR5() assert len(sdss_stat.NHI) == 737 # Binned lX, lX_lo, lX_hi = sdss_stat.calculate_lox([2., 2.5, 3]) assert np.isclose(lX[0], 0.04625038, atol=1e-5) fN, fN_lo, fN_hi = sdss_stat.calculate_fn([20.3, 20.5, 21., 21.5, 22.], [2, 2.5], log=True) assert fN.size == 4 assert np.isclose(fN_lo[0], 0.0682087, atol=1e-5)
def main(args=None): import pdb import numpy as np from linetools import utils as ltu from pyigm.surveys.analysis import fit_atan_dla_lz, fit_fN_dblpow from pyigm.surveys.dlasurvey import load_dla_surveys, update_dla_fits from pyigm.surveys.dlasurvey import DLASurvey from pyigm.surveys import dlasurvey pargs = parser() # DLA l(z) analysis if pargs.dla_lz or pargs.all: # arctan from Prochaska & Neeleman 2017 surveys = load_dla_surveys() dfits, _ = fit_atan_dla_lz(surveys, nstep=100, bootstrap=pargs.dla_lz_boot, nboot=50000, nproc=pargs.nproc, boot_out=dlasurvey.lz_boot_file) # Calculate error lz_boot = dlasurvey.load_boot_lz() for key in ['A', 'B', 'C']: boot = lz_boot[key].data # 68% perc = np.percentile(boot, [16., 84.]) dfits['lz']['atan']['sig_{:s}'.format( key)] = perc - dfits['lz']['atan'][key] # Write dfits['lz']['atan']['Ref'] = 'Prochaska & Neeleman 2017' update_dla_fits(dfits) # Fit double power law to f(N) of DLA [PW09 only] if pargs.dla_dpow or pargs.all: sdss_dr5 = DLASurvey.load_SDSS_DR5() dfits, best, Ndgrid, a3grid, a4grid, lik = fit_fN_dblpow(sdss_dr5.NHI, (-3., -1.1), (-6, -2), (21., 22.), nstep=100) # Write dfits['fN']['dpow']['Ref'] = 'PHW05' update_dla_fits(dfits) # DLA ne/nH if pargs.dla_nenH or pargs.all: dfits = {} dfits['nenH'] = {} dfits['nenH']['loglog'] = dict( bp=-2.881, m=-0.352, bp_sig=(+0.253, -0.256), m_sig=(+0.321, -0.317)) # Values with all 50 measurements dfits['nenH']['loglog']['Ref'] = 'Neeleman+15; PN17' # Update update_dla_fits(dfits)
def main(flg_tst, sdss=None, ml_survey=None): # Load JSON for DR5 if (flg_tst % 2**1) >= 2**0: if sdss is None: sdss = DLASurvey.load_SDSS_DR5() #ml_survey = json_to_sdss_dlasurvey('../results/dr5_v1_predictions.json', sdss) ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss) # Vette if (flg_tst % 2**2) >= 2**1: if ml_survey is None: sdss = DLASurvey.load_SDSS_DR5() ml_survey = json_to_sdss_dlasurvey('../results/dr5_v2_results.json', sdss) vette_dlasurvey(ml_survey, sdss) # Vette v5 and generate CSV if (flg_tst % 2**3) >= 2**2: if ml_survey is None: sdss = DLASurvey.load_SDSS_DR5() ml_survey = json_to_sdss_dlasurvey('../results/dr5_v5_predictions.json', sdss) false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss) # CSV of false negatives mk_false_neg_table(false_neg, '../results/false_negative_DR5_v5.csv') # Vette v6 and generate CSV if (flg_tst % 2**4) >= 2**3: if ml_survey is None: sdss = DLASurvey.load_SDSS_DR5() ml_survey = json_to_sdss_dlasurvey('../results/dr5_v6.1_results.json', sdss) false_neg, midx, _ = vette_dlasurvey(ml_survey, sdss) # CSV of false negatives mk_false_neg_table(false_neg, '../results/false_negative_DR5_v6.1.csv') # Vette gensample v2 if (flg_tst % 2**5) >= 2**4: if ml_survey is None: sdss = DLASurvey.load_SDSS_DR5() ml_survey = json_to_sdss_dlasurvey('../results/results_catalog_dr7_model_gensample_v2.json',sdss) false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss) # CSV of false negatives mk_false_neg_table(false_neg, '../results/false_negative_DR5_v2_gen.csv') mk_false_neg_table(false_pos, '../results/false_positives_DR5_v2_gen.csv') # Vette gensample v4.3.1 if flg_tst & (2**5): if ml_survey is None: sdss = DLASurvey.load_SDSS_DR5() ml_survey = json_to_sdss_dlasurvey('../results/results_model_4.3.1_data_dr5.json',sdss) false_neg, midx, false_pos = vette_dlasurvey(ml_survey, sdss) # CSV of false negatives mk_false_neg_table(false_neg, '../results/false_negative_DR5_v4.3.1_gen.csv') mk_false_neg_table(false_pos, '../results/false_positives_DR5_v4.3.1_gen.csv') if flg_tst & (2**6): dr5_for_david()
def fig_rhoHI(lw=1.5, csz=15., lsz=14.): """ Generate a DLA in optical depth and flux space Parameters ---------- """ sdss = DLASurvey.load_SDSS_DR5() zbins = [2.2, 2.4, 2.75, 3., 3.5, 4.5] rho_HI, rho_HI_low, rho_HI_hi = sdss.binned_rhoHI(zbins) outfile = 'fig_rhoHI.png' # Figure plt.figure(figsize=(5, 5)) plt.clf() gs = gridspec.GridSpec(1, 1) # Tau plot ax = plt.subplot(gs[0]) # Plot for kk in range(len(zbins)-1): zcen = np.sum(zbins[kk:kk+2])/2. yerr= np.array([rho_HI_low[kk].value/1e8, rho_HI_hi[kk].value/1e8]) ax.errorbar([zcen], [rho_HI[kk].value/1e8], xerr=zcen-zbins[kk], fmt='o', color='blue', capthick=2) ax.errorbar([zcen], [rho_HI[kk].value/1e8], yerr=[yerr], color='blue', capthick=2) # z=0 xmnx = [2., 4.5] ax.fill_between(xmnx, 0.45, 0.6, color='green', alpha=0.5) # Axes ax.set_xlim(xmnx) #ax.set_ylim(1e-2, 5e7) ax.set_ylabel(r'$\rho_{\rm HI} \; (10^8 \, \rm M_\odot \, Mpc^{-3} \, h_{72})$') ax.set_xlabel(r'$z$') ax.text(0.1, 0.9, 'SDSS-DR5 (PW09)', color='blue', size=lsz, transform=ax.transAxes, ha='left') ax.text(0.9, 0.1, 'z~0 [21cm] \n (Zwaan+05)', color='green', size=lsz, transform=ax.transAxes, ha='right') #ax.xaxis.set_major_locator(plt.MultipleLocator(10.)) # set_spines(ax, 2.) set_fontsize(ax,csz) # Write plt.tight_layout(pad=0.2,h_pad=0.,w_pad=0.1) plt.savefig(outfile, dpi=750) plt.close() print("Wrote {:s}".format(outfile))
def dr5_for_david(): """ Generate a Table for David """ # imports from pyigm.abssys.dla import DLASystem from pyigm.abssys.lls import LLSSystem sdss_survey = DLASurvey.load_SDSS_DR5() # Fiber key for fkey in ['FIBER', 'FIBER_ID', 'FIB']: if fkey in sdss_survey.sightlines.keys(): break # Init #idict = dict(plate=[], fiber=[], classification_confidence=[], # FOR v2 # classification=[], ra=[], dec=[]) # Connect to sightlines s_coord = SkyCoord(ra=sdss_survey.sightlines['RA'], dec=sdss_survey.sightlines['DEC'], unit='deg') # Add plate/fiber to statistical DLAs dla_coord = sdss_survey.coord idx2, d2d, d3d = match_coordinates_sky(dla_coord, s_coord, nthneighbor=1) if np.min(d2d.to('arcsec').value) > 1.: raise ValueError("Bad match to sightlines") plates, fibers = [], [] for jj,igd in enumerate(np.where(sdss_survey.mask)[0]): dla = sdss_survey._abs_sys[igd] try: dla.plate = sdss_survey.sightlines['PLATE'][idx2[jj]] except IndexError: pdb.set_trace() dla.fiber = sdss_survey.sightlines[fkey][idx2[jj]] plates.append(sdss_survey.sightlines['PLATE'][idx2[jj]]) fibers.append(sdss_survey.sightlines[fkey][idx2[jj]]) # Write dtbl = Table() dtbl['plate'] = plates dtbl['fiber'] = fibers dtbl['zabs'] = sdss_survey.zabs dtbl['NHI'] = sdss_survey.NHI dtbl.write('results/dr5_for_david.ascii', format='ascii') # Write sightline info stbl = sdss_survey.sightlines[['PLATE', 'FIB', 'Z_START', 'Z_END', 'RA', 'DEC']] gdsl = stbl['Z_END'] > stbl['Z_START'] stbl[gdsl].write('results/dr5_sightlines_for_david.ascii', format='ascii')
def main(args=None): pargs = parser(options=args) # Setup import sys pfind = __file__.rfind('/scripts') spth = __file__[:pfind] + '/src' sys.path.append(spth) import training_set as tset from pyigm.surveys.dlasurvey import DLASurvey outroot = pargs.outpath + '/training_{:d}_{:d}'.format( pargs.seed, pargs.ntrain) # Sightlines sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = tset.grab_sightlines(sdss, flg_bal=0) # Run _, _ = tset.make_set(pargs.ntrain, slines, outroot=outroot, seed=pargs.seed, slls=pargs.slls)
def examine_false_pos(test_file='data/test_dlas_96629_10000.json.gz', pred_file='data/test_dlas_96629_predictions.json.gz', vette_file='vette_10k.json'): """ Examine false positives in the Test set (held out) """ from pyigm.surveys.dlasurvey import DLASurvey import h5py import json from matplotlib import pyplot as plt # Load Test test_dlas = test_to_tbl(test_file) ntest = len(test_dlas) # Load hdf5 CNN_result_path = '/home/xavier/Projects/ML_DLA_results/CNN/' hdf5_datafile = CNN_result_path + 'gensample_hdf5_files/test_dlas_96629_10000.hdf5' hdf = h5py.File(hdf5_datafile, 'r') headers = json.loads(hdf['meta'].value)['headers'] # Load ML ml_abs = pred_to_tbl(pred_file) # Vette vette = ltu.loadjson(vette_file) test_ml_idx = np.array(vette['test_idx']) # Load DR5 dr5 = DLASurvey.load_SDSS_DR5() all_dr5 = DLASurvey.load_SDSS_DR5(sample='all_sys') # False positives fpos = ml_abs['NHI'] >= 20.3 # Must be a DLA imatched = np.where(test_ml_idx >= 0)[0] match_val = test_ml_idx[imatched] fpos[match_val] = False print("There are {:d} total false positives".format(np.sum(fpos))) # This nearly matches David's. Will run with his analysis. fpos_in_dr5 = fpos.copy() # Restrict on DR5 for idx in np.where(fpos_in_dr5)[0]: # Convoluted indexing.. mlid = ml_abs['ids'][idx] # Plate/Fiber plate = headers[mlid]['PLATE'] fib = headers[mlid]['FIBER'] # Finally, match to DR5 dr5_sl = np.where((dr5.sightlines['PLATE'] == plate) & (dr5.sightlines['FIB'] == fib))[0][0] if (ml_abs['zabs'][idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \ (ml_abs['zabs'][idx] <= dr5.sightlines['Z_END'][dr5_sl]): pass else: fpos_in_dr5[idx] = False print("Number of FP in DR5 analysis region = {:d}".format( np.sum(fpos_in_dr5))) # How many match to DR5 SLLS? slls = all_dr5.NHI < 20.3 slls_coord = all_dr5.coord[slls] slls_zabs = all_dr5.zabs[slls] nslls = 0 for idx in np.where(fpos_in_dr5)[0]: # Convoluted indexing.. mlid = ml_abs['ids'][idx] # RA/DEC ra = headers[mlid]['RA_GROUP'] dec = headers[mlid]['DEC_GROUP'] coord = SkyCoord(ra=ra, dec=dec, unit='deg') # Match coord mt = coord.separation(slls_coord) < 3 * u.arcsec if np.any(mt): # Match redshift if np.min(np.abs(slls_zabs[mt] - ml_abs['zabs'][idx])) < 0.015: nslls += 1 print("Number of FP that are SLLS in DR5 = {:d}".format(nslls)) low_NHI = ml_abs['NHI'][fpos_in_dr5] < 20.5 print("Number of FP that are NHI <= 20.5 = {:d}".format(np.sum(low_NHI))) # Write out fp_tbl = Table() for key in ['ids', 'NHI', 'zabs', 'conf']: fp_tbl[key] = ml_abs[key][fpos_in_dr5] fp_tbl.write('test10k_false_pos.ascii', format='ascii.fixed_width', overwrite=True) # Histogram dr5_idx = np.where(fpos_in_dr5) plt.clf() ax = plt.gca() ax.hist(ml_abs['conf'][dr5_idx]) plt.show()
def grab_sightlines(dlasurvey=None, flg_bal=None, zmin=2.3, s2n=5., DX=0., igmsp_survey='SDSS_DR7', update_zem=True): """ Grab a set of sightlines without DLAs from a DLA survey Insist that all have spectra occur in igmspec Update sightline zem with igmspec zem Parameters ---------- dlas : DLASurvey Usually SDSS or BOSS flg_bal : int, optional Maximum BAL flag (0=No signature, 1=Weak BAL, 2=BAL) s2n : float, optional Minimum S/N as defined in some manner DX : float, optional Restrict on DX zmin : float, optional Minimum redshift for zem update_zem : bool, optional Update zem in sightlines? Returns ------- final : Table astropy Table of good sightlines sdict : dict dict describing the sightlines """ #1) REMOVE 910, 526 z=2.88; NHI=21.19 import warnings warnings.warn("Someday remove 910, 526 which has a *strong* DLA") igmsp = IgmSpec() # Init if dlasurvey is None: print("Using the DR5 sample for the sightlines") dlasurvey = DLASurvey.load_SDSS_DR5(sample='all') igmsp_survey = 'SDSS_DR7' nsight = len(dlasurvey.sightlines) keep = np.array([True] * nsight) meta = igmsp[igmsp_survey].meta # Avoid DLAs dla_coord = dlasurvey.coord sl_coord = SkyCoord(ra=dlasurvey.sightlines['RA'], dec=dlasurvey.sightlines['DEC']) idx, d2d, d3d = match_coordinates_sky(sl_coord, dla_coord, nthneighbor=1) clear = d2d > 1 * u.arcsec keep = keep & clear # BAL if flg_bal is not None: gd_bal = dlasurvey.sightlines['FLG_BAL'] <= flg_bal keep = keep & gd_bal # S/N if s2n > 0.: gd_s2n = dlasurvey.sightlines['S2N'] > s2n keep = keep & gd_s2n # Cut on DX if DX > 0.: gd_DX = dlasurvey.sightlines['DX'] > DX keep = keep & gd_DX # igmsp qso_coord = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg') idxq, d2dq, d3dq = match_coordinates_sky(sl_coord, qso_coord, nthneighbor=1) in_igmsp = d2dq < 1 * u.arcsec keep = keep & in_igmsp # Check zem and dz #igm_id = meta['IGM_ID'][idxq] #cat_rows = match_ids(igm_id, igmsp.cat['IGM_ID']) #zem = igmsp.cat['zem'][cat_rows] zem = meta['zem_GROUP'][idxq] dz = np.abs(zem - dlasurvey.sightlines['ZEM']) gd_dz = dz < 0.1 keep = keep & gd_dz #& gd_zlim if zmin is not None: gd_zmin = zem > zmin keep = keep & gd_zmin #& gd_zlim #gd_zlim = (zem-dlasurvey.sightlines['Z_START']) > 0.1 #pdb.set_trace() # Assess final = dlasurvey.sightlines[keep] #final_coords = SkyCoord(ra=final['RA'], dec=final['DEC'], unit='deg') #matches, meta = igmsp.meta_from_coords(final_coords, groups=['SDSS_DR7'], tol=1*u.arcsec) #idxq2, d2dq2, d3dq2 = match_coordinates_sky(final_coords, qso_coord, nthneighbor=1) #in_igmsp2 = d2dq2 < 1*u.arcsec #pdb.set_trace() sdict = {} sdict['n'] = len(final) print("We have {:d} sightlines for analysis".format(sdict['n'])) def qck_stats(idict, tbl, istr, key): idict[istr + 'min'] = np.min(tbl[key]) idict[istr + 'max'] = np.max(tbl[key]) idict[istr + 'median'] = np.median(tbl[key]) qck_stats(sdict, final, 'z', 'ZEM') qck_stats(sdict, final, 'i', 'MAG') print("Min z = {:g}, Median z = {:g}, Max z = {:g}".format( sdict['zmin'], sdict['zmedian'], sdict['zmax'])) # Return return final, sdict
def main(flg_tst, sdss=None, ml_survey=None): import os # Sightlines flg_tst = int(flg_tst) if (flg_tst % 2**1) >= 2**0: if sdss is None: sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) # Test case of 100 sightlines if (flg_tst % 2**2) >= 2**1: # Make training set _, _ = make_set(100, slines, outroot='results/training_100') # Production runs if (flg_tst % 2**3) >= 2**2: #training_prod(123456, 5, 10, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/') # TEST #training_prod(123456, 10, 500, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/DLAs/') # TEST training_prod(12345, 10, 5000, outpath=os.getenv('DROPBOX_DIR') + '/MachineLearning/DLAs/') # Production runs -- 100k more if (flg_tst % 2**4) >= 2**3: # python src/training_set.py training_prod(22345, 10, 10000, outpath=os.getenv('DROPBOX_DIR') + '/MachineLearning/DLAs/') # Production runs -- 100k more if flg_tst & (2**4): # python src/training_set.py if False: if sdss is None: sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) _, _ = make_set(100, slines, outroot='results/slls_training_100', slls=True) #training_prod(22343, 10, 100, slls=True, outpath=os.getenv('DROPBOX_DIR')+'/MachineLearning/SLLSs/') training_prod(22343, 10, 5000, slls=True, outpath=os.getenv('DROPBOX_DIR') + '/MachineLearning/SLLSs/') # Mixed systems for testing if flg_tst & (2**5): # python src/training_set.py if sdss is None: sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) ntrials = 10000 seed = 23559 _, _ = make_set( ntrials, slines, seed=seed, mix=True, outroot=os.getenv('DROPBOX_DIR') + '/MachineLearning/Mix/mix_test_{:d}_{:d}'.format(seed, ntrials)) # DR5 DLA-free sightlines if flg_tst & (2**6): write_sdss_sightlines() # High NHI systems for testing if flg_tst & (2**7): # python src/training_set.py if sdss is None: sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) ntrials = 20000 seed = 83559 _, _ = make_set(ntrials, slines, seed=seed, high=True, outroot=os.getenv('DROPBOX_DIR') + '/MachineLearning/HighNHI/high_train_{:d}_{:d}'.format( seed, ntrials)) # Low S/N if flg_tst & (2**8): # python src/training_set.py if sdss is None: sdss = DLASurvey.load_SDSS_DR5(sample='all') slines, sdict = grab_sightlines(sdss, flg_bal=0) ntrials = 10000 seed = 83557 _, _ = make_set( ntrials, slines, seed=seed, low_s2n=True, outroot=os.getenv('DROPBOX_DIR') + '/MachineLearning/LowS2N/lows2n_train_{:d}_{:d}'.format( seed, ntrials))
def chk_dr5_dla_to_ml(ml_dlasurvey=None, ml_llssurvey=None, dz_toler=0.015, outfile='vette_dr5.json', write_again=True): # Load ML if (ml_dlasurvey is None) or (ml_llssurvey is None): ml_llssurvey, ml_dlasurvey = load_ml_dr7() # Load DR5 dr5 = DLASurvey.load_SDSS_DR5() # This is the statistical sample # Use coord to efficiently deal with sightlines ml_coord = SkyCoord(ra=ml_dlasurvey.sightlines['RA'], dec=ml_dlasurvey.sightlines['DEC'], unit='deg') dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg') idx, d2d, d3d = match_coordinates_sky(dr5_coord, ml_coord, nthneighbor=1) in_ml = d2d < 2*u.arcsec print("{:d} of the DR5 sightlines were covered by ML out of {:d}".format(np.sum(in_ml), len(dr5.sightlines))) # 7477 sightlines out of 7482 # Cut down dr5.sightlines = dr5.sightlines[in_ml] new_mask = dla_stat(dr5, dr5.sightlines) # 737 good DLAs dr5.mask = new_mask dr5_dla_coord = dr5.coord dr5_dla_zabs = dr5.zabs ndr5 = len(dr5_dla_coord) ml_dla_coord = ml_dlasurvey.coords ml_lls_coord = ml_llssurvey.coords # Loop on DR5 DLAs and save indices of the matches dr5_ml_idx = np.zeros(ndr5).astype(int) - 1 for ii in range(ndr5): # Match to ML dla_mts = np.where(dr5_dla_coord[ii].separation(ml_dla_coord) < 2*u.arcsec)[0] nmt = len(dla_mts) if nmt == 0: # No match # Check for LLS lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0] nmt2 = len(lls_mts) if nmt2 == 0: # No match pass else: zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML zdiff = np.abs(dr5_dla_zabs[ii]-zML) if np.min(zdiff) < dz_toler: dr5_ml_idx[ii] = -9 # SLLS match else: zML = ml_dlasurvey.zabs[dla_mts] # Redshifts of all DLAs on the sightline in ML zdiff = np.abs(dr5_dla_zabs[ii]-zML) if np.min(zdiff) < dz_toler: #print("Match on {:d}!".format(ii)) # Match imin = np.argmin(zdiff) dr5_ml_idx[ii] = dla_mts[imin] else: # Check for LLS lls_mts = np.where(dr5_dla_coord[ii].separation(ml_lls_coord) < 2*u.arcsec)[0] nmt2 = len(lls_mts) if nmt2 == 0: # No match pass else: zML = ml_llssurvey.zabs[lls_mts] # Redshifts of all DLAs on the sightline in ML zdiff = np.abs(dr5_dla_zabs[ii]-zML) if np.min(zdiff) < dz_toler: dr5_ml_idx[ii] = -9 # SLLS match dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg') # Write out misses misses = np.where(dr5_ml_idx == -1)[0] plates, fibers = [], [] for miss in misses: imin = np.argmin(dr5_dla_coord[miss].separation(dr5_coord)) plates.append(dr5.sightlines['PLATE'][imin]) fibers.append(dr5.sightlines['FIB'][imin]) mtbl = Table() mtbl['PLATE'] = plates mtbl['FIBER'] = fibers mtbl['NHI'] = dr5.NHI[misses] mtbl['zabs'] = dr5.zabs[misses] if write_again: mtbl.write('DR5_misses.ascii', format='ascii.fixed_width', overwrite=True) # Write out SLLS sllss = np.where(dr5_ml_idx == -9)[0] plates, fibers = [], [] for slls in sllss: imin = np.argmin(dr5_dla_coord[slls].separation(dr5_coord)) plates.append(dr5.sightlines['PLATE'][imin]) fibers.append(dr5.sightlines['FIB'][imin]) mtbl = Table() mtbl['PLATE'] = plates mtbl['FIBER'] = fibers mtbl['NHI'] = dr5.NHI[sllss] mtbl['zabs'] = dr5.zabs[sllss] if write_again: mtbl.write('DR5_SLLS.ascii', format='ascii.fixed_width', overwrite=True) pdb.set_trace() # ML not matched by PW09? ml_dla_coords = ml_dlasurvey.coords idx2, d2d2, d3d = match_coordinates_sky(ml_dla_coords, dr5_dla_coord, nthneighbor=1) not_in_dr5 = d2d2 > 2*u.arcsec # This doesn't match redshifts! might_be_in_dr5 = np.where(~not_in_dr5)[0] others_not_in = [] # this is some painful book-keeping for idx in might_be_in_dr5: # Matching redshifts.. imt = ml_dla_coord[idx].separation(dr5_dla_coord) < 2*u.arcsec # Match on dztoler if np.min(np.abs(ml_dlasurvey.zabs[idx]-dr5.zabs[imt])) > dz_toler: others_not_in.append(idx) # Save out_dict = {} out_dict['in_ml'] = in_ml out_dict['dr5_idx'] = dr5_ml_idx # -1 are misses, -9 are SLLS out_dict['not_in_dr5'] = np.concatenate([np.where(not_in_dr5)[0], np.array(others_not_in)]) ltu.savejson(outfile, ltu.jsonify(out_dict), overwrite=True)
def dr5_false_positives(ml_dlasurvey=None, ml_llssurvey=None): vette_file = 'vette_dr5.json' from pyigm.surveys.dlasurvey import DLASurvey from matplotlib import pyplot as plt # Load ML if (ml_dlasurvey is None): _, ml_dlasurvey = load_ml_dr7() # Load DR5 dr5 = DLASurvey.load_SDSS_DR5() # This is the statistical sample # Vette vette = ltu.loadjson(vette_file) dr5_ml_idx = np.array(vette['dr5_idx']) # Use coord to efficiently deal with sightlines ml_dla_coord = ml_dlasurvey.coords dr5_coord = SkyCoord(ra=dr5.sightlines['RA'], dec=dr5.sightlines['DEC'], unit='deg') idx, d2d, d3d = match_coordinates_sky(ml_dla_coord, dr5_coord, nthneighbor=1) in_dr5 = d2d < 2*u.arcsec print("{:d} of the ML DLA were in the DR5 sightlines".format(np.sum(in_dr5))) # False positives fpos = np.array([True]*ml_dlasurvey.nsys) fpos[~in_dr5] = False # False positives imatched = np.where(dr5_ml_idx >= 0)[0] match_val = dr5_ml_idx[imatched] fpos[match_val] = False print("There are {:d} total false positives".format(np.sum(fpos))) # This nearly matches David's. Will run with his analysis. fpos_in_stat = fpos.copy() # Restrict on DR5 plates = ml_dlasurvey.plate fibers = ml_dlasurvey.fiber zabs = ml_dlasurvey.zabs zem = ml_dlasurvey.zem for idx in np.where(fpos_in_stat)[0]: # Finally, match to DR5 dr5_sl = np.where((dr5.sightlines['PLATE'] == plates[idx]) & (dr5.sightlines['FIB'] == fibers[idx]))[0][0] if (zabs[idx] >= dr5.sightlines['Z_START'][dr5_sl]) & \ (zabs[idx] <= dr5.sightlines['Z_END'][dr5_sl]): pass else: fpos_in_stat[idx] = False print("Number of FP in DR5 analysis region = {:d}".format(np.sum(fpos_in_stat))) print("Number with NHI<20.45 = {:d}".format(np.sum(ml_dlasurvey.NHI[fpos_in_stat]< 20.45))) # High NHI highNHI = ml_dlasurvey.NHI[fpos_in_stat] > 21. htbl = Table() htbl['PLATE'] = plates[fpos_in_stat][highNHI] htbl['FIBER'] = fibers[fpos_in_stat][highNHI] htbl['zabs'] = zabs[fpos_in_stat][highNHI] htbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][highNHI] htbl.write("FP_DR5_highNHI.ascii", format='ascii.fixed_width', overwrite=True) # Medium NHI medNHI = (ml_dlasurvey.NHI[fpos_in_stat] > 20.6) & (ml_dlasurvey.NHI[fpos_in_stat] < 21) mtbl = Table() mtbl['PLATE'] = plates[fpos_in_stat][medNHI] mtbl['FIBER'] = fibers[fpos_in_stat][medNHI] mtbl['zabs'] = zabs[fpos_in_stat][medNHI] mtbl['zem'] = zem[fpos_in_stat][medNHI] mtbl['NHI'] = ml_dlasurvey.NHI[fpos_in_stat][medNHI] mtbl.write("FP_DR5_medNHI.ascii", format='ascii.fixed_width', overwrite=True)