def hdf5_adddata(hdf, sname, meta, debug=False, chk_meta_only=False): """ Append KODIAQ data to the h5 file Parameters ---------- hdf : hdf5 pointer IDs : ndarray int array of IGM_ID values in mainDB sname : str Survey name chk_meta_only : bool, optional Only check meta file; will not write Returns ------- """ # Add Survey print("Adding {:s} survey to DB".format(sname)) kodiaq_grp = hdf.create_group(sname) # Load up # Checks if sname != 'KODIAQ_DR2': raise IOError("Not expecting this survey..") # Build spectra (and parse for meta) nspec = len(meta) max_npix = 60000 # Just needs to be large enough # Init data = init_data(max_npix, include_co=False) spec_set = hdf[sname].create_dataset('spec', data=data, chunks=True, maxshape=(None,), compression='gzip') spec_set.resize((nspec,)) # Lists Rlist = [] wvminlist = [] wvmaxlist = [] gratinglist = [] npixlist = [] speclist = [] # Loop path = os.getenv('RAW_IGMSPEC')+'/KODIAQ2/Data/' maxpix = 0 for jj,row in enumerate(meta): # Generate full file full_file = path+row['qso']+'/'+row['pi_date']+'/'+row['spec_prefix']+'_f.fits' # Extract print("KODIAQ: Reading {:s}".format(full_file)) hduf = fits.open(full_file) head = hduf[0].header spec = lsio.readspec(full_file) # Parse name fname = full_file.split('/')[-1] # npix npix = spec.npix if npix > max_npix: raise ValueError("Not enough pixels in the data... ({:d})".format(npix)) else: maxpix = max(npix,maxpix) # Some fiddling about for key in ['wave','flux','sig']: data[key] = 0. # Important to init (for compression too) data['flux'][0][:npix] = spec.flux.value data['sig'][0][:npix] = spec.sig.value data['wave'][0][:npix] = spec.wavelength.value # Meta speclist.append(str(fname)) wvminlist.append(np.min(data['wave'][0][:npix])) wvmaxlist.append(np.max(data['wave'][0][:npix])) if 'XDISPERS' in head.keys(): if head['XDISPERS'].strip() == 'UV': gratinglist.append('BLUE') else: gratinglist.append('RED') else: # Original, earl data gratinglist.append('RED') npixlist.append(npix) try: Rlist.append(set_resolution(head)) except ValueError: pdb.set_trace() # Only way to set the dataset correctly if chk_meta_only: continue spec_set[jj] = data # print("Max pix = {:d}".format(maxpix)) # Add columns meta.add_column(Column([2000.]*nspec, name='EPOCH')) meta.add_column(Column(speclist, name='SPEC_FILE')) meta.add_column(Column(npixlist, name='NPIX')) meta.add_column(Column(wvminlist, name='WV_MIN')) meta.add_column(Column(wvmaxlist, name='WV_MAX')) meta.add_column(Column(Rlist, name='R')) meta.add_column(Column(gratinglist, name='DISPERSER')) meta.add_column(Column(np.arange(nspec,dtype=int),name='GROUP_ID')) # Add HDLLS meta to hdf5 if chk_meta(meta): if chk_meta_only: pdb.set_trace() hdf[sname]['meta'] = meta else: raise ValueError("meta file failed") # References refs = [dict(url='http://adsabs.harvard.edu/abs/2017AJ....154..114O', bib='kodiaq') ] jrefs = ltu.jsonify(refs) hdf[sname]['meta'].attrs['Refs'] = json.dumps(jrefs) # return
def mk_meta(files, ztbl, fname=False, stype='QSO', skip_badz=False, mdict=None, parse_head=None, debug=False, chkz=False, mtbl_file=None, verbose=False, specdb=None, sdb_key=None, **kwargs): """ Generate a meta Table from an input list of files Parameters ---------- files : list List of FITS files ztbl : Table Table of redshifts. Must include RA, DEC, ZEM, ZEM_SOURCE Used for RA/DEC if fname=False; then requires SPEC_FILE too fname : bool, optional Attempt to parse RA/DEC from the file name Format must be SDSSJ######(.##)+/-######(.#)[x] where x cannot be a #. or +/- stype : str, optional Description of object type (e.g. 'QSO', 'Galaxy', 'SN') specdb : SpecDB, optional Database object to grab ID values from Requires sdb_key sdb_key : str, optional ID key in SpecDB object skip_badz : bool, optional Skip spectra without a parseable redshift (using the Myers catalog) parse_head : dict, optional Parse header for meta info with this dict mdict : dict, optional Input meta data in dict form e.g. mdict=dict(INSTR='ESI') chkz : bool, optional If any sources have no parseable redshift, hit a set_trace mtbl_file : str Filename of input meta table. Current allowed extensions are _meta.ascii or _meta.fits and they must be readable by Table.read(). The values in this table will overwrite any others generated. Table must include a SPEC_FILE column to link meta data Returns ------- meta : Table Meta table """ if specdb is not None: if sdb_key is None: raise IOError("Must specify sdb_key if you are passing in specdb") Rdicts = defs.get_res_dicts() # coordlist = [] snames = [] for ifile in files: sname = ifile.split('/')[-1] snames.append(sname) if fname: # Starting index if 'SDSSJ' in ifile: i0 = ifile.find('SDSSJ') + 4 else: i0 = ifile.rfind('J') + 1 # Find end (ugly) for ii in range(i0 + 1, 99999): if ifile[ii] in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '+', '-'): continue else: i1 = ii break # Deal with .fits if ifile[i1 - 1] == '.': i1 -= 1 # Get coord try: coord = ltu.radec_to_coord(ifile[i0:i1]) except (UnboundLocalError, ValueError): pdb.set_trace() else: mt = np.where(ztbl['SPEC_FILE'] == sname)[0] if len(mt) != 1: raise IndexError("NO MATCH FOR {:s}".format(sname)) coord = ltu.radec_to_coord((ztbl['RA'][mt], ztbl['DEC'][mt]))[0] coordlist.append(coord) ras = np.array([coord.ra.degree for coord in coordlist]) decs = np.array([coord.dec.degree for coord in coordlist]) coords = SkyCoord(ra=ras, dec=decs, unit='deg') # Generate maindb Table #maindb, tkeys = spbu.start_maindb(private=True) # Fill meta = Table() meta['RA_GROUP'] = coords.ra.deg meta['DEC_GROUP'] = coords.dec.deg meta['STYPE'] = [str(stype)] * len(meta) zem, zsource = spzu.zem_from_radec(meta['RA_GROUP'], meta['DEC_GROUP'], ztbl, **kwargs) badz = zem <= 0. if np.sum(badz) > 0: if skip_badz: warnings.warn( "Skipping {:d} entries without a parseable redshift".format( np.sum(badz))) else: if chkz: # Turn this on to hit a stop instead of an Exception pdb.set_trace() else: raise ValueError( "{:d} entries without a parseable redshift".format( np.sum(badz))) meta['zem_GROUP'] = zem meta['sig_zem'] = 0. # Need to add meta['flag_zem'] = zsource # Cut meta = meta[~badz] # specdb IDs if sdb_key is not None: meta[sdb_key] = [-9999] * len(meta) if sdb_key not in meta.keys(): meta[sdb_key] = [-9999] * len(meta) c_igmsp = SkyCoord(ra=specdb.qcat.cat['RA'], dec=specdb.qcat.cat['DEC'], unit='deg') c_new = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg') # Find new sources idx, d2d, d3d = match_coordinates_sky(c_new, c_igmsp, nthneighbor=1) cdict = defs.get_cat_dict() mtch = d2d < cdict['match_toler'] meta[sdb_key][mtch] = specdb.qcat.cat[sdb_key][idx[mtch]] # Stack (primarily as a test) ''' try: maindb = vstack([maindb,meta], join_type='exact') except: pdb.set_trace() ''' # SPEC_FILE meta['SPEC_FILE'] = np.array(files)[~badz] root_names = np.array(snames)[~badz] # Try Header? if parse_head is not None: # Setup to store plist = {} for key in parse_head.keys(): plist[key] = [] # Loop on files for sfile in meta['SPEC_FILE']: if verbose: print('Parsing {:s}'.format(sfile)) try: head = fits.open(sfile)[0].header except FileNotFoundError: # Try for compressed head = fits.open(sfile + '.gz')[0].header for key, item in parse_head.items(): # R if key == 'R': if parse_head[key] is True: try: plist[key].append(spbu.set_resolution(head)) except ValueError: if mdict is not None: try: plist[key].append(mdict['R']) except KeyError: pdb.set_trace() else: pdb.set_trace() plist[key].append(0.) else: raise ValueError("Set something else for R") elif key == 'DATE-OBS': if 'MJD' in item: tval = Time(head[item], format='mjd', out_subfmt='date') else: tval = Time(head[item].replace('/', '-'), format='isot', out_subfmt='date') plist[key].append(tval.iso) else: plist[key].append(head[item]) # INSTRUMENT SPECIFIC try: instr = head['INSTRUME'] except KeyError: instr = 'none' if 'LRIS' in instr: if 'DISPERSER' not in plist.keys(): plist['DISPERSER'] = [] plist['INSTR'] = [] plist['R'] = [] try: det = head['DETECTOR'] except KeyError: if head['OUTFILE'] == 'lred': det = 'LRIS-R' else: det = 'LRIS-B' if 'LRIS-R' in det: plist['DISPERSER'].append(head['GRANAME']) plist['INSTR'].append('LRISr') else: plist['DISPERSER'].append(head['GRISNAME']) plist['INSTR'].append('LRISb') # Resolution res = Rdicts[plist['INSTR'][-1]][plist['DISPERSER'][-1]] try: sname = head['SLITNAME'] except KeyError: swidth = 1. else: swidth = defs.slit_width(sname, LRIS=True) plist['R'].append(res / swidth) # Finish for key in plist.keys(): meta[key] = plist[key] # mdict if mdict is not None: for key, item in mdict.items(): meta[key] = [item] * len(meta) # EPOCH if 'EPOCH' not in meta.keys(): warnings.warn("EPOCH not defined. Filling with 2000.") meta['EPOCH'] = 2000. # GROUP ID meta['GROUP_ID'] = np.arange(len(meta)).astype(int) # Fill in empty columns with warning mkeys = meta.keys() req_clms = defs.get_req_clms(sdb_key=sdb_key) for clm in req_clms: if clm not in mkeys: if clm not in ['NPIX', 'WV_MIN', 'WV_MAX']: # File in ingest_spec warnings.warn( "Meta Column {:s} not defined. Filling with DUMMY".format( clm)) if clm == 'DATE-OBS': meta[clm] = ['9999-1-1'] * len(meta) else: meta[clm] = ['DUMMY'] * len(meta) # Input meta table if mtbl_file is not None: # Read if '_meta.ascii' in mtbl_file: imtbl = Table.read(mtbl_file, format='ascii') elif '_meta.fits' in mtbl_file: imtbl = Table.read(mtbl_file) else: raise IOError( "Input meta table must have either an ascii or fits extension") # Check length if len(imtbl) != len(meta): raise IOError( "Input meta table must have same length as self-generated one") # Check for SPEC_FILE if 'SPEC_FILE' not in imtbl.keys(): raise ValueError("Input meta table must include SPEC_FILE column") # Loop to get indices idx = [] for row in imtbl: imt = np.where(root_names == row['SPEC_FILE'])[0] if len(imt) == 0: print("No match to spec file {:s}. Will ignore".format( row['SPEC_FILE'])) elif len(imt) == 1: idx.append(imt[0]) else: raise ValueError( "Two entries with the same SPEC_FILE. Something went wrong.." ) idx = np.array(idx) # Loop on keys for key in imtbl.keys(): # Skip? if key in ['SPEC_FILE']: continue if key in meta.keys(): pdb.set_trace() else: # Add Column meta.add_column(imtbl[key][idx]) # Return if debug: meta[['RA_GROUP', 'DEC_GROUP', 'SPEC_FILE']].pprint(max_width=120) pdb.set_trace() return meta
def hdf5_adddata(hdf, sname, hdla100_meta, debug=False, chk_meta_only=False, mk_test_file=False): """ Append HDLA100 data to the h5 file Parameters ---------- hdf : hdf5 pointer IDs : ndarray int array of IGM_ID values in mainDB sname : str Survey name chk_meta_only : bool, optional Only check meta file; will not write mk_test_file : bool, optional Generate the debug test file for Travis?? Returns ------- """ from specdb import defs # Add Survey print("Adding {:s} survey to DB".format(sname)) hdlls_grp = hdf.create_group(sname) # Load up Rdicts = defs.get_res_dicts() # Checks if sname != 'HDLA100': raise IOError("Not expecting this survey..") # Build spectra (and parse for meta) #if mk_test_file: # hdla100_full = hdlls_full[0:3] max_npix = 192000 # Just needs to be large enough data = init_data(max_npix, include_co=False) # Init spec_set = hdf[sname].create_dataset('spec', data=data, chunks=True, maxshape=(None, ), compression='gzip') nspec = len(hdla100_meta) spec_set.resize((nspec, )) Rlist = [] wvminlist = [] wvmaxlist = [] dateobslist = [] npixlist = [] gratinglist = [] # Loop for jj, row in enumerate(hdla100_meta): kk = jj # Extract f = os.getenv('RAW_IGMSPEC') + '/HDLA100/' + row['SPEC_FILE'] spec = lsio.readspec(f) # Parse name fname = f.split('/')[-1] # npix head = spec.header npix = spec.npix if npix > max_npix: raise ValueError( "Not enough pixels in the data... ({:d})".format(npix)) # Some fiddling about for key in ['wave', 'flux', 'sig']: data[key] = 0. # Important to init (for compression too) data['flux'][0][:npix] = spec.flux.value data['sig'][0][:npix] = spec.sig.value data['wave'][0][:npix] = spec.wavelength.value # Meta wvminlist.append(np.min(data['wave'][0][:npix])) wvmaxlist.append(np.max(data['wave'][0][:npix])) npixlist.append(npix) try: Rlist.append(set_resolution(head)) except ValueError: raise ValueError("Header is required for {:s}".format(fname)) else: if '/' in head['DATE-OBS']: spl = head['DATE-OBS'].split('/') t = Time(datetime.datetime( int(spl[2]) + 1900, int(spl[1]), int(spl[0])), format='datetime') else: t = Time(head['DATE-OBS'], format='isot', out_subfmt='date') dateobslist.append(t.iso) # Grating try: gratinglist.append(head['XDISPERS']) except KeyError: try: yr = t.value.year except AttributeError: yr = int(t.value[0:4]) if yr <= 1997: gratinglist.append('RED') else: pdb.set_trace() # Only way to set the dataset correctly if chk_meta_only: continue spec_set[kk] = data # Add columns nmeta = len(hdla100_meta) hdla100_meta.add_column(Column([2000.] * nmeta, name='EPOCH')) hdla100_meta.add_column(Column(npixlist, name='NPIX')) hdla100_meta.add_column( Column([str(date) for date in dateobslist], name='DATE-OBS')) hdla100_meta.add_column(Column(wvminlist, name='WV_MIN')) hdla100_meta.add_column(Column(wvmaxlist, name='WV_MAX')) hdla100_meta.add_column(Column(Rlist, name='R')) hdla100_meta.add_column( Column(np.arange(nmeta, dtype=int), name='GROUP_ID')) hdla100_meta.add_column(Column(gratinglist, name='DISPERSER')) hdla100_meta['INSTR'] = ['HIRES'] * nspec hdla100_meta['TELESCOPE'] = ['Keck-I'] * nspec #hdla100_meta.rename_column('Z_QSO', 'zem') # Add HDLLS meta to hdf5 if chk_meta(hdla100_meta): if chk_meta_only: pdb.set_trace() hdf[sname]['meta'] = hdla100_meta else: raise ValueError("meta file failed") # References refs = [ dict(url='http://adsabs.harvard.edu/abs/2013ApJ...769...54N', bib='neeleman+13'), ] jrefs = ltu.jsonify(refs) hdf[sname]['meta'].attrs['Refs'] = json.dumps(jrefs) # return
def hdf5_adddata(hdf, sname, meta, debug=False, chk_meta_only=False, mk_test_file=False): """ Append HD-LLS data to the h5 file Parameters ---------- hdf : hdf5 pointer IDs : ndarray int array of IGM_ID values in mainDB sname : str Survey name chk_meta_only : bool, optional Only check meta file; will not write mk_test_file : bool, optional Generate the debug test file for Travis?? Returns ------- """ from specdb import defs # Add Survey print("Adding {:s} survey to DB".format(sname)) hdlls_grp = hdf.create_group(sname) # Load up Rdicts = defs.get_res_dicts() mike_meta = grab_meta_mike() mike_coord = SkyCoord(ra=mike_meta['RA_GROUP'], dec=mike_meta['DEC_GROUP'], unit='deg') # Checks if sname != 'HD-LLS_DR1': raise IOError("Not expecting this survey..") full_coord = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg') # Build spectra (and parse for meta) if mk_test_file: meta = meta[0:3] nspec = len(meta) max_npix = 210000 # Just needs to be large enough data = init_data(max_npix, include_co=False) # Init full_idx = np.zeros(len(meta), dtype=int) spec_set = hdf[sname].create_dataset('spec', data=data, chunks=True, maxshape=(None, ), compression='gzip') spec_set.resize((nspec, )) Rlist = [] wvminlist = [] wvmaxlist = [] dateobslist = [] npixlist = [] instrlist = [] gratinglist = [] telelist = [] # Loop members = glob.glob(os.getenv('RAW_IGMSPEC') + '/{:s}/*fits'.format(sname)) kk = -1 for jj, member in enumerate(members): if 'HD-LLS_DR1.fits' in member: continue kk += 1 # Extract f = member hdu = fits.open(f) # Parse name fname = f.split('/')[-1] mt = np.where(meta['SPEC_FILE'] == fname)[0] if mk_test_file and (jj >= 3): continue if len(mt) != 1: pdb.set_trace() raise ValueError("HD-LLS: No match to spectral file?!") else: print('loading {:s}'.format(fname)) full_idx[kk] = mt[0] # npix head = hdu[0].header # Some fiddling about for key in ['wave', 'flux', 'sig']: data[key] = 0. # Important to init (for compression too) # Double check if kk == 0: assert hdu[1].name == 'ERROR' assert hdu[2].name == 'WAVELENGTH' # Write spec = lsio.readspec(f) # Handles dummy pixels in ESI npix = spec.npix if npix > max_npix: raise ValueError( "Not enough pixels in the data... ({:d})".format(npix)) data['flux'][0][:npix] = spec.flux.value data['sig'][0][:npix] = spec.sig.value data['wave'][0][:npix] = spec.wavelength.value #data['flux'][0][:npix] = hdu[0].data #data['sig'][0][:npix] = hdu[1].data #data['wave'][0][:npix] = hdu[2].data # Meta wvminlist.append(np.min(data['wave'][0][:npix])) wvmaxlist.append(np.max(data['wave'][0][:npix])) npixlist.append(npix) if 'HIRES' in fname: instrlist.append('HIRES') telelist.append('Keck-I') gratinglist.append('BOTH') try: Rlist.append(set_resolution(head)) except ValueError: # A few by hand (pulled from Table 1) if 'J073149' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2006-01-04', '%Y-%m-%d') elif 'J081435' in fname: Rlist.append(Rdicts['HIRES']['C1']) tval = datetime.datetime.strptime('2006-12-26', '%Y-%m-%d') # 2008 too elif 'J095309' in fname: Rlist.append(Rdicts['HIRES']['C1']) tval = datetime.datetime.strptime('2005-03-18', '%Y-%m-%d') elif 'J113418' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2006-01-05', '%Y-%m-%d') elif 'J135706' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2007-04-28', '%Y-%m-%d') elif 'J155556.9' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2005-04-15', '%Y-%m-%d') elif 'J212329' in fname: Rlist.append(Rdicts['HIRES']['E3']) tval = datetime.datetime.strptime('2006-08-20', '%Y-%m-%d') else: pdb.set_trace() else: tval = datetime.datetime.strptime(head['DATE-OBS'], '%Y-%m-%d') dateobslist.append(datetime.datetime.strftime(tval, '%Y-%m-%d')) elif 'ESI' in fname: instrlist.append('ESI') telelist.append('Keck-II') gratinglist.append('ECH') try: Rlist.append(set_resolution(head)) except ValueError: print("Using R=6,000 for ESI") Rlist.append(6000.) try: tval = datetime.datetime.strptime(head['DATE'], '%Y-%m-%d') except KeyError: if ('J223438.5' in fname) or ('J231543' in fname): tval = datetime.datetime.strptime('2004-09-11', '%Y-%m-%d') else: pdb.set_trace() dateobslist.append(datetime.datetime.strftime(tval, '%Y-%m-%d')) elif 'MIKE' in fname: # APPROXIMATE if 'MIKEr' in fname: instrlist.append('MIKEr') gratinglist.append('RED') elif 'MIKEb' in fname: instrlist.append('MIKEb') gratinglist.append('BLUE') else: instrlist.append('MIKE') gratinglist.append('BOTH') telelist.append('Magellan') sep = full_coord[mt[0]].separation(mike_coord) imin = np.argmin(sep) if sep[imin] > 1. * u.arcsec: pdb.set_trace() raise ValueError("Bad separation in MIKE") # R and Date Rlist.append(25000. / mike_meta['Slit'][imin]) tval = datetime.datetime.strptime(mike_meta['DATE-OBS'][imin], '%Y-%b-%d') dateobslist.append(datetime.datetime.strftime(tval, '%Y-%m-%d')) elif 'MAGE' in fname: # APPROXIMATE instrlist.append('MagE') if 'Clay' in head['TELESCOP']: telelist.append('Magellan/Clay') else: telelist.append('Magellan/Baade') gratinglist.append('N/A') Rlist.append(set_resolution(head)) dateobslist.append(head['DATE-OBS']) else: # MagE raise ValueError("UH OH") # Only way to set the dataset correctly if chk_meta_only: continue spec_set[kk] = data # Add columns meta = meta[full_idx] nmeta = len(meta) meta.add_column(Column([2000.] * nmeta, name='EPOCH')) meta.add_column(Column(npixlist, name='NPIX')) meta.add_column( Column([str(date) for date in dateobslist], name='DATE-OBS')) meta.add_column(Column(wvminlist, name='WV_MIN')) meta.add_column(Column(wvmaxlist, name='WV_MAX')) meta.add_column(Column(Rlist, name='R')) meta.add_column(Column(np.arange(nmeta, dtype=int), name='GROUP_ID')) meta.add_column(Column(gratinglist, name='GRATING')) meta.add_column(Column(instrlist, name='INSTR')) meta.add_column(Column(telelist, name='TELESCOPE')) # v02 meta.rename_column('GRATING', 'DISPERSER') # Add HDLLS meta to hdf5 if chk_meta(meta): if chk_meta_only: pdb.set_trace() hdf[sname]['meta'] = meta else: raise ValueError("meta file failed") # References refs = [ dict(url='http://adsabs.harvard.edu/abs/2015ApJS..221....2P', bib='prochaska+15'), ] jrefs = ltu.jsonify(refs) hdf[sname]['meta'].attrs['Refs'] = json.dumps(jrefs) # return
def hdf5_adddata(hdf, sname, meta, debug=False, chk_meta_only=False, mk_test_file=False): """ Append HD-LLS data to the h5 file Parameters ---------- hdf : hdf5 pointer IDs : ndarray int array of IGM_ID values in mainDB sname : str Survey name chk_meta_only : bool, optional Only check meta file; will not write mk_test_file : bool, optional Generate the debug test file for Travis?? Returns ------- """ from specdb import defs # Add Survey print("Adding {:s} survey to DB".format(sname)) hdlls_grp = hdf.create_group(sname) # Load up Rdicts = defs.get_res_dicts() mike_meta = grab_meta_mike() mike_coord = SkyCoord(ra=mike_meta['RA_GROUP'], dec=mike_meta['DEC_GROUP'], unit='deg') # Checks if sname != 'HD-LLS_DR1': raise IOError("Not expecting this survey..") full_coord = SkyCoord(ra=meta['RA_GROUP'], dec=meta['DEC_GROUP'], unit='deg') # Build spectra (and parse for meta) if mk_test_file: meta = meta[0:3] nspec = len(meta) max_npix = 210000 # Just needs to be large enough data = init_data(max_npix, include_co=False) # Init full_idx = np.zeros(len(meta), dtype=int) spec_set = hdf[sname].create_dataset('spec', data=data, chunks=True, maxshape=(None,), compression='gzip') spec_set.resize((nspec,)) Rlist = [] wvminlist = [] wvmaxlist = [] dateobslist = [] npixlist = [] instrlist = [] gratinglist = [] telelist = [] # Loop members = glob.glob(os.getenv('RAW_IGMSPEC')+'/{:s}/*fits'.format(sname)) kk = -1 for jj,member in enumerate(members): if 'HD-LLS_DR1.fits' in member: continue kk += 1 # Extract f = member hdu = fits.open(f) # Parse name fname = f.split('/')[-1] mt = np.where(meta['SPEC_FILE'] == fname)[0] if mk_test_file and (jj>=3): continue if len(mt) != 1: pdb.set_trace() raise ValueError("HD-LLS: No match to spectral file?!") else: print('loading {:s}'.format(fname)) full_idx[kk] = mt[0] # npix head = hdu[0].header # Some fiddling about for key in ['wave','flux','sig']: data[key] = 0. # Important to init (for compression too) # Double check if kk == 0: assert hdu[1].name == 'ERROR' assert hdu[2].name == 'WAVELENGTH' # Write spec = lsio.readspec(f) # Handles dummy pixels in ESI npix = spec.npix if npix > max_npix: raise ValueError("Not enough pixels in the data... ({:d})".format(npix)) data['flux'][0][:npix] = spec.flux.value data['sig'][0][:npix] = spec.sig.value data['wave'][0][:npix] = spec.wavelength.value #data['flux'][0][:npix] = hdu[0].data #data['sig'][0][:npix] = hdu[1].data #data['wave'][0][:npix] = hdu[2].data # Meta wvminlist.append(np.min(data['wave'][0][:npix])) wvmaxlist.append(np.max(data['wave'][0][:npix])) npixlist.append(npix) if 'HIRES' in fname: instrlist.append('HIRES') telelist.append('Keck-I') gratinglist.append('BOTH') try: Rlist.append(set_resolution(head)) except ValueError: # A few by hand (pulled from Table 1) if 'J073149' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2006-01-04', '%Y-%m-%d') elif 'J081435' in fname: Rlist.append(Rdicts['HIRES']['C1']) tval = datetime.datetime.strptime('2006-12-26', '%Y-%m-%d') # 2008 too elif 'J095309' in fname: Rlist.append(Rdicts['HIRES']['C1']) tval = datetime.datetime.strptime('2005-03-18', '%Y-%m-%d') elif 'J113418' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2006-01-05', '%Y-%m-%d') elif 'J135706' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2007-04-28', '%Y-%m-%d') elif 'J155556.9' in fname: Rlist.append(Rdicts['HIRES']['C5']) tval = datetime.datetime.strptime('2005-04-15', '%Y-%m-%d') elif 'J212329' in fname: Rlist.append(Rdicts['HIRES']['E3']) tval = datetime.datetime.strptime('2006-08-20', '%Y-%m-%d') else: pdb.set_trace() else: tval = datetime.datetime.strptime(head['DATE-OBS'], '%Y-%m-%d') dateobslist.append(datetime.datetime.strftime(tval,'%Y-%m-%d')) elif 'ESI' in fname: instrlist.append('ESI') telelist.append('Keck-II') gratinglist.append('ECH') try: Rlist.append(set_resolution(head)) except ValueError: print("Using R=6,000 for ESI") Rlist.append(6000.) try: tval = datetime.datetime.strptime(head['DATE'], '%Y-%m-%d') except KeyError: if ('J223438.5' in fname) or ('J231543' in fname): tval = datetime.datetime.strptime('2004-09-11', '%Y-%m-%d') else: pdb.set_trace() dateobslist.append(datetime.datetime.strftime(tval,'%Y-%m-%d')) elif 'MIKE' in fname: # APPROXIMATE if 'MIKEr' in fname: instrlist.append('MIKEr') gratinglist.append('RED') elif 'MIKEb' in fname: instrlist.append('MIKEb') gratinglist.append('BLUE') else: instrlist.append('MIKE') gratinglist.append('BOTH') telelist.append('Magellan') sep = full_coord[mt[0]].separation(mike_coord) imin = np.argmin(sep) if sep[imin] > 1.*u.arcsec: pdb.set_trace() raise ValueError("Bad separation in MIKE") # R and Date Rlist.append(25000. / mike_meta['Slit'][imin]) tval = datetime.datetime.strptime(mike_meta['DATE-OBS'][imin], '%Y-%b-%d') dateobslist.append(datetime.datetime.strftime(tval,'%Y-%m-%d')) elif 'MAGE' in fname: # APPROXIMATE instrlist.append('MagE') if 'Clay' in head['TELESCOP']: telelist.append('Magellan/Clay') else: telelist.append('Magellan/Baade') gratinglist.append('N/A') Rlist.append(set_resolution(head)) dateobslist.append(head['DATE-OBS']) else: # MagE raise ValueError("UH OH") # Only way to set the dataset correctly if chk_meta_only: continue spec_set[kk] = data # Add columns meta = meta[full_idx] nmeta = len(meta) meta.add_column(Column([2000.]*nmeta, name='EPOCH')) meta.add_column(Column(npixlist, name='NPIX')) meta.add_column(Column([str(date) for date in dateobslist], name='DATE-OBS')) meta.add_column(Column(wvminlist, name='WV_MIN')) meta.add_column(Column(wvmaxlist, name='WV_MAX')) meta.add_column(Column(Rlist, name='R')) meta.add_column(Column(np.arange(nmeta,dtype=int),name='GROUP_ID')) meta.add_column(Column(gratinglist, name='GRATING')) meta.add_column(Column(instrlist, name='INSTR')) meta.add_column(Column(telelist, name='TELESCOPE')) # v02 meta.rename_column('GRATING', 'DISPERSER') # Add HDLLS meta to hdf5 if chk_meta(meta): if chk_meta_only: pdb.set_trace() hdf[sname]['meta'] = meta else: raise ValueError("meta file failed") # References refs = [dict(url='http://adsabs.harvard.edu/abs/2015ApJS..221....2P', bib='prochaska+15'), ] jrefs = ltu.jsonify(refs) hdf[sname]['meta'].attrs['Refs'] = json.dumps(jrefs) # return
def hdf5_adddata(hdf, sname, hdla100_meta, debug=False, chk_meta_only=False, mk_test_file=False): """ Append HDLA100 data to the h5 file Parameters ---------- hdf : hdf5 pointer IDs : ndarray int array of IGM_ID values in mainDB sname : str Survey name chk_meta_only : bool, optional Only check meta file; will not write mk_test_file : bool, optional Generate the debug test file for Travis?? Returns ------- """ from specdb import defs # Add Survey print("Adding {:s} survey to DB".format(sname)) hdlls_grp = hdf.create_group(sname) # Load up Rdicts = defs.get_res_dicts() # Checks if sname != 'HDLA100': raise IOError("Not expecting this survey..") # Build spectra (and parse for meta) #if mk_test_file: # hdla100_full = hdlls_full[0:3] max_npix = 192000 # Just needs to be large enough data = init_data(max_npix, include_co=False) # Init spec_set = hdf[sname].create_dataset('spec', data=data, chunks=True, maxshape=(None,), compression='gzip') nspec = len(hdla100_meta) spec_set.resize((nspec,)) Rlist = [] wvminlist = [] wvmaxlist = [] dateobslist = [] npixlist = [] gratinglist = [] # Loop for jj,row in enumerate(hdla100_meta): kk = jj # Extract f = os.getenv('RAW_IGMSPEC')+'/HDLA100/'+row['SPEC_FILE'] spec = lsio.readspec(f) # Parse name fname = f.split('/')[-1] # npix head = spec.header npix = spec.npix if npix > max_npix: raise ValueError("Not enough pixels in the data... ({:d})".format(npix)) # Some fiddling about for key in ['wave','flux','sig']: data[key] = 0. # Important to init (for compression too) data['flux'][0][:npix] = spec.flux.value data['sig'][0][:npix] = spec.sig.value data['wave'][0][:npix] = spec.wavelength.value # Meta wvminlist.append(np.min(data['wave'][0][:npix])) wvmaxlist.append(np.max(data['wave'][0][:npix])) npixlist.append(npix) try: Rlist.append(set_resolution(head)) except ValueError: raise ValueError("Header is required for {:s}".format(fname)) else: if '/' in head['DATE-OBS']: spl = head['DATE-OBS'].split('/') t = Time(datetime.datetime(int(spl[2])+1900, int(spl[1]), int(spl[0])), format='datetime') else: t = Time(head['DATE-OBS'], format='isot', out_subfmt='date') dateobslist.append(t.iso) # Grating try: gratinglist.append(head['XDISPERS']) except KeyError: try: yr = t.value.year except AttributeError: yr = int(t.value[0:4]) if yr <= 1997: gratinglist.append('RED') else: pdb.set_trace() # Only way to set the dataset correctly if chk_meta_only: continue spec_set[kk] = data # Add columns nmeta = len(hdla100_meta) hdla100_meta.add_column(Column([2000.]*nmeta, name='EPOCH')) hdla100_meta.add_column(Column(npixlist, name='NPIX')) hdla100_meta.add_column(Column([str(date) for date in dateobslist], name='DATE-OBS')) hdla100_meta.add_column(Column(wvminlist, name='WV_MIN')) hdla100_meta.add_column(Column(wvmaxlist, name='WV_MAX')) hdla100_meta.add_column(Column(Rlist, name='R')) hdla100_meta.add_column(Column(np.arange(nmeta,dtype=int),name='GROUP_ID')) hdla100_meta.add_column(Column(gratinglist, name='DISPERSER')) hdla100_meta['INSTR'] = ['HIRES']*nspec hdla100_meta['TELESCOPE'] = ['Keck-I']*nspec #hdla100_meta.rename_column('Z_QSO', 'zem') # Add HDLLS meta to hdf5 if chk_meta(hdla100_meta): if chk_meta_only: pdb.set_trace() hdf[sname]['meta'] = hdla100_meta else: raise ValueError("meta file failed") # References refs = [dict(url='http://adsabs.harvard.edu/abs/2013ApJ...769...54N', bib='neeleman+13'), ] jrefs = ltu.jsonify(refs) hdf[sname]['meta'].attrs['Refs'] = json.dumps(jrefs) # return