def test_ingest(): # Begin id_key = 'TEST_ID' maindb, tkeys = spbu.start_maindb(id_key) # ztbl = Table.read(os.path.join(os.path.dirname(__file__), 'files', 'ztbl_E.fits')) data_dir = os.path.join(os.path.dirname(__file__), 'files') ffiles,_ = pbuild.grab_files(data_dir) meta = pbuild.mk_meta(ffiles, ztbl, fname=True, skip_badz=True, mdict=dict(INSTR='HIRES')) # Group and IDs gdict = {} flag_g = spbu.add_to_group_dict('COS', gdict) maindb = pbuild.add_ids(maindb, meta, flag_g, tkeys, id_key, first=(flag_g==1)) # hdf = h5py.File('tmp.hdf5','w') pbuild.ingest_spectra(hdf, 'test', meta) hdf.close() # Read tmp = h5py.File('tmp.hdf5','r') # Test assert 'meta' in tmp['test'].keys() assert isinstance(tmp['test/spec'].value, np.ndarray)
def ver01(test=False, clobber=False, publisher='J.X. Prochaska', **kwargs): """ Build version 1.0 Parameters ---------- test : bool, optional Run test only Returns ------- """ pdb.set_trace() # THIS VERSION IS NOW FROZEN raise IOError("THIS VERSION IS NOW FROZEN") version = 'v01' # HDF5 file outfil = igmspec.__path__[0] + '/../DB/IGMspec_DB_{:s}.hdf5'.format( version) # Chk clobber if os.path.isfile(outfil): if clobber: warnings.warn("Overwriting previous DB file {:s}".format(outfil)) else: warnings.warn( "Not overwiting previous DB file. Use clobber=True to do so") return # Begin hdf = h5py.File(outfil, 'w') ''' Myers QSOs ''' myers.orig_add_to_hdf(hdf) # Main DB Table idkey = 'IGM_ID' maindb, tkeys = sdbbu.start_maindb(idkey) # Group dict group_dict = {} # Organize for main loop groups = get_build_groups(version) pair_groups = ['SDSS_DR7'] meta_only = False # Loop over the groups for gname in groups: # Meta if gname == 'SDSS_DR7': meta = groups[gname].grab_meta(hdf) else: meta = groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(gname in pair_groups)) # Spectra if not meta_only: groups[gname].hdf5_adddata(hdf, gname, meta, idkey) # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check for junk zpri = defs.z_priority() # Finish sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")
def ver03(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False, version='v03.1', out_path=None, redo_dr14=False): """ Build version 3.X Reads several previous datasets from v1.X Remakes the maindb using BOSS DR14 as the main driver v3.0 will be BOSS DR14 only to speed up generation of the rest Parameters ---------- test : bool, optional Run test only skip_copy : bool, optional Skip copying the data from v01 Returns ------- """ import os from specdb.specdb import IgmSpec # Read v02 v02file = os.getenv('SPECDB') + '/IGMspec_DB_v02.1.hdf5' #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5' print("Loading v02") igmsp_v02 = IgmSpec(db_file=v02file) v02hdf = igmsp_v02.hdf #maindb = igmsp_v02.cat.copy() # Start new file if out_path is None: out_path = '/scratch/IGMSpec/' outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version) # Clobber? if not chk_clobber(outfil, clobber=clobber): return # Other bits pair_groups = ['SDSS_DR7'] # Begin hdf = h5py.File(outfil, 'w') # Set/Check keys (and set idkey internally for other checks) idkey = 'IGM_ID' maindb, tkeys = sdbbu.start_maindb(idkey) group_dict = {} # BOSS DR14 new_groups = get_build_groups('v03') gname = 'BOSS_DR14' # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) if not redo_dr14: v030file = os.getenv('SPECDB') + '/IGMspec_DB_v03.0.hdf5' igmsp_v030 = IgmSpec(db_file=v030file) grp = hdf.create_group(gname) # Copy spectra #warnings.warn("GET THE DR14 spectra!") igmsp_v030.hdf.copy(gname + '/spec', hdf[gname]) # Copy meta igmsp_v030.hdf.copy(gname + '/meta', hdf[gname]) # Meta for maindb (a little risky as Meta needs to be aligned to the spectra but they should be) meta = igmsp_v030['BOSS_DR14'].meta meta.remove_column('IGM_ID') maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(gname in pair_groups), debug=False) #hdf[key+'/meta'] = meta #for akey in v01hdf[key+'/meta'].attrs.keys(): # hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey] # SSA info #new_groups[gname].add_ssa(hdf, gname) else: # BOSS DR14 print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(gname in pair_groups), debug=False) # Spectra new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Pop me new_groups.pop('BOSS_DR14') # Loop on new v3 groups before copying in the others for gname in new_groups.keys(): print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(gname in pair_groups), debug=False) # Spectra new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Copy over all the old stuff redo_groups = [] #'HD-LLS_DR1'] skip_groups = [ 'BOSS_DR12' ] # 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!") skip_copy = False if (not test) and (not skip_copy): old1 = get_build_groups('v01') old2 = get_build_groups('v02') # Add v02 to v01 list for key, item in old2.items(): old1[key] = item # Loop on the combined for key in old1.keys(): if key in ['catalog'] + redo_groups + skip_groups: continue print("Working on: {:s}".format(key)) grp = hdf.create_group(key) # Meta meta = Table(v02hdf[key + '/meta'].value) meta.remove_column('IGM_ID') # Survey flag flag_g = sdbbu.add_to_group_dict(key, group_dict, skip_for_debug=True) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(key in pair_groups), debug=False) # Add meta to HDF5 #meta.rename_column('GRATING', 'DISPERSER') hdf[key + '/meta'] = meta for akey in v02hdf[key + '/meta'].attrs.keys(): hdf[key + '/meta'].attrs[akey] = v02hdf[key + '/meta'].attrs[akey] # SSA info old1[key].add_ssa(hdf, key) # Copy spectra v02hdf.copy(key + '/spec', hdf[key]) skip_myers = False if skip_myers: warnings.warn("NEED TO INCLUDE MYERS!") else: # Copy from v02 _ = hdf.create_group('quasars') v02hdf.copy('quasars', hdf['quasars']) #myers.add_to_hdf(hdf) # Setup groups pair_groups = [] ''' # Loop over the old groups to update (as needed) new_IDs = False for gname in redo_groups: print("Working to replace meta/spec for group: {:s}".format(gname)) # Meta meta = old_groups[gname].grab_meta() # Group flag flag_g = group_dict[gname] # IDs if new_IDs: pdb.set_trace() # NOT READY FOR THIS #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, # first=(flag_g==1), close_pairs=(gname in pair_groups)) else: _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey) # Spectra old_groups[gname].hdf5_adddata(hdf, gname, meta) old_groups[gname].add_ssa(hdf, gname) ''' ''' meta_only = False # Loop over the new groups for gname in new_groups: print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs debug= False #if gname == 'XQ-100': # debug = True maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=debug) # Spectra if not meta_only: pdb.set_trace() new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) ''' # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check stacking if not sdbbu.chk_vstack(hdf): print("Meta data will not stack using specdb.utils.clean_vstack") print("Proceed to write at your own risk..") pdb.set_trace() # Finish zpri = v02hdf['catalog'].attrs['Z_PRIORITY'] sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")
def ver02(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False, version='v02', out_path=None): """ Build version 2.X Reads previous datasets from v1.X Parameters ---------- test : bool, optional Run test only skip_copy : bool, optional Skip copying the data from v01 Returns ------- """ import os from specdb.specdb import IgmSpec # Read v01 v01file = os.getenv('SPECDB') + '/IGMspec_DB_v01.hdf5' #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5' print("Loading v01") igmsp_v01 = IgmSpec(db_file=v01file) v01hdf = igmsp_v01.hdf maindb = igmsp_v01.cat.copy() # Start new file if out_path is None: out_path = igmspec.__path__[0] + '/../DB/' outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version) # Clobber? if not chk_clobber(outfil, clobber=clobber): return # Begin hdf = h5py.File(outfil, 'w') # Copy over the old stuff redo_groups = ['HD-LLS_DR1'] skip_groups = [ ] #'BOSS_DR12', 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!") skip_copy = False if (not test) and (not skip_copy): old_groups = get_build_groups('v01') for key in v01hdf.keys(): if key in ['catalog', 'quasars'] + redo_groups + skip_groups: continue else: #v01hdf.copy(key, hdf) # ONE STOP SHOPPING grp = hdf.create_group(key) # Copy spectra v01hdf.copy(key + '/spec', hdf[key]) # Modify v01 meta and add if key == 'BOSS_DR12': meta = boss.add_coflag(v01hdf) else: meta = Table(v01hdf[key + '/meta'].value) meta.rename_column('GRATING', 'DISPERSER') hdf[key + '/meta'] = meta for akey in v01hdf[key + '/meta'].attrs.keys(): hdf[key + '/meta'].attrs[akey] = v01hdf[key + '/meta'].attrs[akey] # SSA info old_groups[key].add_ssa(hdf, key) skip_myers = False if skip_myers: warnings.warn("NEED TO INCLUDE MYERS!") else: myers.add_to_hdf(hdf) # Setup groups old_groups = get_build_groups('v01') pair_groups = [] group_dict = igmsp_v01.qcat.group_dict # Set/Check keys (and set idkey internally for other checks) idkey = 'IGM_ID' _, tkeys = sdbbu.start_maindb(idkey) mkeys = list(maindb.keys()) for key in tkeys: assert key in mkeys # Loop over the old groups to update (as needed) new_IDs = False for gname in redo_groups: print("Working to replace meta/spec for group: {:s}".format(gname)) # Meta meta = old_groups[gname].grab_meta() # Group flag flag_g = group_dict[gname] # IDs if new_IDs: pdb.set_trace() # NOT READY FOR THIS #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, # first=(flag_g==1), close_pairs=(gname in pair_groups)) else: _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey) # Spectra old_groups[gname].hdf5_adddata(hdf, gname, meta) old_groups[gname].add_ssa(hdf, gname) meta_only = False new_groups = get_build_groups(version) # Loop over the new groups for gname in new_groups: print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs debug = False #if gname == 'XQ-100': # debug = True maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g == 1), close_pairs=(gname in pair_groups), debug=debug) # Spectra if not meta_only: new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check stacking if not sdbbu.chk_vstack(hdf): print("Meta data will not stack using specdb.utils.clean_vstack") print("Proceed to write at your own risk..") pdb.set_trace() # Finish zpri = v01hdf['catalog'].attrs['Z_PRIORITY'] sdbbuwrite_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")
def hdf5_writter(path, reso, lowl, highl, outputname): ''' :param path: path to the spectra file :param lowl float: lower limit for request redshift :param highl float: higher limit for request redshift :param reso: delta v in km/s to rebin the data :param outputname: the name for the output file :return: hdf5file ''' outfil = str(outputname) hdf = h5py.File(outfil, 'w') # gdict = {} # creating group for z in np.arange(lowl, highl, 0.05): # hdf.create_group('z'+str(z)+'-'+str(z+0.05)) print(z) spectest1 = [] spec2 = [] spectest1, spec2 = reading_data(path, reso, z, z + 0.05) print(np.round(z, 2), len(spectest1.z)) # hdf_append = h5py.File('tmp'+str(z)+'.hdf', 'w') # hdf.create_group('z' + str(z) + '-' + str(z + 0.05)) f = hdf.create_group('z' + str(np.round(z, 2)) + '-' + str(np.round(z + 0.05, 2))) npix = len(spec2.wavelength) data = sdb_u.init_data(npix) nspec = len(spectest1.z) print(npix) #creat dataset spec_set = hdf['z' + str(np.round(z, 2)) + '-' + str(np.round(z + 0.05, 2))].create_dataset( 'spec', data=data, chunks=True, maxshape=(None, ), compression='gzip') spec_set.resize((nspec, )) for ii in range(nspec): data['flux'][0][:npix] = spec2[ii].flux # Should be flux values data['sig'][0][:npix] = spec2[ii].sig # SHould be sigma values # print (spec[ii].sig) data['wave'][0][:npix] = spec2[ ii].wavelength # Should be wavelength values # Fill spec_set[ii] = data # hdf.copy('z'+str(z)+'-'+str(z+0.05), hdf_append['z'+str(z)+'-'+str(z+0.05)]) # making meta data # hdfnew = h5py.File('z2.8_specdb_test1.hdf', 'w') # group = 'z2.8-2.85' # _ = hdfnew.create_group(group) group = 'z' + str(np.round(z, 2)) + '-' + str(np.round(z + 0.05, 2)) id_key = 'DESI_ID' maindb, tkeys = spbu.start_maindb(id_key) meta = Table() meta['zem_GROUP'] = spectest1.z meta['RA_GROUP'] = spectest1.ra meta['DEC_GROUP'] = spectest1.dec meta['EPOCH'] = 2000. meta['sig_zem'] = 0. meta['flag_zem'] = np.string_('DESI') meta['STYPE'] = np.string_('QSO') # Observation meta['SPEC_FILE'] = np.array(spectest1.filename, dtype=float) meta['DATE-OBS'] = spectest1.date # meta['GROUP_ID'] = np.arange(len(meta)).astype(int) # Spectrograph meta['R'] = 3000. meta['TELESCOPE'] = np.string_('KPNO-4m') meta['DISPERSER'] = np.string_('ALL') meta['INSTR'] = np.string_('DESI') meta['WV_MIN'] = 3800. # Should be the right value meta['WV_MAX'] = 9900. # Should be the right value meta['NPIX'] = 8000 # Should be the right value meta['PLATE'] = np.array(np.tile(1, len(spectest1.id)), dtype=int) meta['FIBERID'] = spectest1.id meta['MOCK_ID'] = spectest1.id flag_g = spbu.add_to_group_dict(group, gdict) maindb = spbu.add_ids(maindb, meta, flag_g, tkeys, 'DESI_ID', first=(flag_g == flag_g)) hdf[group]['meta'] = meta zpri = spb_defs.z_priority() print(flag_g) spbu.write_hdf(hdf, str('DESI_v05'), maindb, zpri, gdict, str('v0.1'), Publisher='jding') hdf.close()
def generate_by_refs(input_refs, outfile, version): """ Build a specDB file according to the input references Args: input_refs (list): List of references from which to build the specDB outfile (str): Output filename version (str): Version number """ # Not elegant but it works all_folders = glob.glob(db_path+'/*/*') all_refs = [os.path.basename(ifolder) for ifolder in all_folders] # z_tbl allz_tbl = Table() # Loop in input refs all_spec_files = [] refs_list = [] for ref in input_refs: idx = all_refs.index(ref) # Redshift tables z_tbl = load_z_tables(all_folders[idx]) allz_tbl = vstack([allz_tbl, z_tbl]) # Grab the list of spectra specs = glob.glob(os.path.join(all_folders[idx], 'J*_spec.fits')) if len(specs) == 0: continue # Save all_spec_files += specs refs_list += [ref]*len(specs) # Get it started # HDF5 file hdf = h5py.File(outfile, 'w') # Defs zpri = defs.z_priority() # Main DB Table id_key = 'FRB_ID' maindb, tkeys = spbu.start_maindb(id_key) tkeys += ['ZQ'] gdict = {} # Loop on Instruments #pair_groups = ['MUSE'] pair_groups = [] badf = None for instr in all_instruments: print("Working on {}".format(instr)) fits_files, irefs = grab_files(all_spec_files, refs_list, instr) if len(fits_files) == 0: continue # Option dicts mwargs = {} mwargs['toler'] = 1.0 * units.arcsec # Require an skipz = False swargs = {} # Meta parse_head, mdict, fname = None, None, True if instr == 'SDSS': mdict = dict(DISPERSER='BOTH', R=2000., TELESCOPE='SDSS 2.5-M', INSTR='SDSS') parse_head = {'DATE-OBS': 'MJD'} maxpix = 4000 scale = 1e-17 elif instr == 'FORS2': mdict = dict(TELESCOPE='VLT', INSTR='FORS2') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'R': True} maxpix = 2050 scale = 1e-17 elif instr == 'MUSE': mdict = dict(TELESCOPE='VLT', R=2000.) parse_head = {'DATE-OBS': 'MJD-OBS', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'} maxpix = 4000 scale = 1e-20 elif instr == 'KCWI': mdict = dict(TELESCOPE='Keck-2') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME', 'R': True} maxpix = 4000 scale = 1e-17 elif instr == 'MagE': parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 'TELESCOPE': 'TELESCOP', 'INSTR': 'INSTRUME', 'DISPERSER': 'DISPNAME'} maxpix = 18000 scale = 1e-17 elif instr == 'GMOS-S': mdict = dict(TELESCOPE='Gemini-S', INSTR='GMOS-S') parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 'DISPERSER': 'DISPNAME'} maxpix = 3500 scale = 1e-17 elif instr == 'LRISb': mdict = dict(TELESCOPE='Keck-1') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'} maxpix = 2050 # 2x binning elif instr == 'GMOS-N': mdict = dict(TELESCOPE='Gemini-N', INSTR='GMOS-N') parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 'DISPERSER': 'DISPNAME'} maxpix = 3500 scale = 1e-17 elif instr == 'LRISr': mdict = dict(TELESCOPE='Keck-1') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'} maxpix = 2050 scale = 1e-17 elif instr == 'DEIMOS': mdict = dict(TELESCOPE='Keck-2') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'} maxpix = 9000 scale = 1e-17 elif instr == 'Goodman': mdict = dict(TELESCOPE='SOAR', INSTR='Goodman') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'R': True} maxpix = 2048 scale = 1e-17 elif instr == 'XSHOOTER': mdict = dict(TELESCOPE='VLT') parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'} maxpix = 33000 scale = 1e-17 else: embed(header='172') # Meta full_meta = pbuild.mk_meta(fits_files, allz_tbl, mdict=mdict, fname=fname, verbose=True, parse_head=parse_head, skip_badz=skipz, stype='GAL', chkz=True, **mwargs) full_meta['Ref'] = irefs # Survey flag flag_g = spbu.add_to_group_dict(instr, gdict, skip_for_debug=True) # IDs #if 'MUSE' in instr: # embed(header='278 of build specdb') maindb = spbu.add_ids(maindb, full_meta, flag_g, tkeys, id_key, first=(flag_g==1), mtch_toler=1.*units.arcsec, close_pairs=(instr in pair_groups)) # Ingest -- pbuild.ingest_spectra(hdf, instr, full_meta, max_npix=maxpix, verbose=False, badf=badf, grab_conti=False, scale=scale, **swargs) # Write spbu.write_hdf(hdf, str('FRB'), maindb, zpri, gdict, version, Publisher=str('JXP')) print("Wrote {:s} DB file".format(outfile)) print("You probably need to move it into SPECDB")
def mk_db(dbname, tree, outfil, iztbl, version='v00', id_key='PRIV_ID', publisher='Unknown', **kwargs): """ Generate the DB Parameters ---------- dbname : str Name for the database tree : str Path to top level of the tree of FITS files Typically, each branch in the tree corresponds to a single instrument outfil : str Output file name for the hdf5 file iztbl : Table or str If Table, see meta() docs for details on its format If str, it must be 'igmspec' and the user must have that DB downloaded version : str, optional Version code Returns ------- """ from specdb import defs # ztbl if isinstance(iztbl, str): if iztbl == 'igmspec': from specdb.specdb import IgmSpec igmsp = IgmSpec() ztbl = Table(igmsp.idb.hdf['quasars'][...]) elif isinstance(iztbl, Table): ztbl = iztbl else: raise IOError("Bad type for ztbl") # Find the branches branches = glob.glob(tree+'/*') branches.sort() # HDF5 file hdf = h5py.File(outfil,'w') # Defs zpri = defs.z_priority() gdict = {} # Main DB Table maindb, tkeys = spbu.start_maindb(id_key) # MAIN LOOP for ss,branch in enumerate(branches): # Skip files if not os.path.isdir(branch): continue print('Working on branch: {:s}'.format(branch)) # Files fits_files, out_tup = grab_files(branch) meta_file, mtbl_file, ssa_file = out_tup # Meta maxpix, phead, mdict, stype = 10000, None, None, 'QSO' if meta_file is not None: # Load meta_dict = ltu.loadjson(meta_file) # Maxpix if 'maxpix' in meta_dict.keys(): maxpix = meta_dict['maxpix'] # STYPE if 'stype' in meta_dict.keys(): stype = meta_dict['stype'] # Parse header if 'parse_head' in meta_dict.keys(): phead = meta_dict['parse_head'] if 'meta_dict' in meta_dict.keys(): mdict = meta_dict['meta_dict'] full_meta = mk_meta(fits_files, ztbl, mtbl_file=mtbl_file, parse_head=phead, mdict=mdict, **kwargs) # Update group dict group_name = branch.split('/')[-1] flag_g = spbu.add_to_group_dict(group_name, gdict) # IDs maindb = add_ids(maindb, full_meta, flag_g, tkeys, 'PRIV_ID', first=(flag_g==1)) # Ingest ingest_spectra(hdf, group_name, full_meta, max_npix=maxpix, **kwargs) # SSA if ssa_file is not None: user_ssa = ltu.loadjson(ssa_file) ssa_dict = default_fields(user_ssa['Title'], flux=user_ssa['flux'], fxcalib=user_ssa['fxcalib']) hdf[group_name]['meta'].attrs['SSA'] = json.dumps(ltu.jsonify(ssa_dict)) # Check stacking if not spbu.chk_vstack(hdf): print("Meta data will not stack using specdb.utils.clean_vstack") print("Proceed to write at your own risk..") pdb.set_trace() # Write write_hdf(hdf, str(dbname), maindb, zpri, gdict, version, Publisher=publisher) print("Wrote {:s} DB file".format(outfil))
def ver01(test=False, clobber=False, publisher='J.X. Prochaska', **kwargs): """ Build version 1.0 Parameters ---------- test : bool, optional Run test only Returns ------- """ pdb.set_trace() # THIS VERSION IS NOW FROZEN raise IOError("THIS VERSION IS NOW FROZEN") version = 'v01' # HDF5 file outfil = igmspec.__path__[0]+'/../DB/IGMspec_DB_{:s}.hdf5'.format(version) # Chk clobber if os.path.isfile(outfil): if clobber: warnings.warn("Overwriting previous DB file {:s}".format(outfil)) else: warnings.warn("Not overwiting previous DB file. Use clobber=True to do so") return # Begin hdf = h5py.File(outfil,'w') ''' Myers QSOs ''' myers.orig_add_to_hdf(hdf) # Main DB Table idkey = 'IGM_ID' maindb, tkeys = sdbbu.start_maindb(idkey) # Group dict group_dict = {} # Organize for main loop groups = get_build_groups(version) pair_groups = ['SDSS_DR7'] meta_only = False # Loop over the groups for gname in groups: # Meta if gname == 'SDSS_DR7': meta = groups[gname].grab_meta(hdf) else: meta = groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups)) # Spectra if not meta_only: groups[gname].hdf5_adddata(hdf, gname, meta, idkey) # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check for junk zpri = defs.z_priority() # Finish sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")
def ver03(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False, version='v03.1', out_path=None, redo_dr14=False): """ Build version 3.X Reads several previous datasets from v1.X Remakes the maindb using BOSS DR14 as the main driver v3.0 will be BOSS DR14 only to speed up generation of the rest Parameters ---------- test : bool, optional Run test only skip_copy : bool, optional Skip copying the data from v01 Returns ------- """ import os from specdb.specdb import IgmSpec # Read v02 v02file = os.getenv('SPECDB')+'/IGMspec_DB_v02.1.hdf5' #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5' print("Loading v02") igmsp_v02 = IgmSpec(db_file=v02file) v02hdf = igmsp_v02.hdf #maindb = igmsp_v02.cat.copy() # Start new file if out_path is None: out_path = '/scratch/IGMSpec/' outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version) # Clobber? if not chk_clobber(outfil, clobber=clobber): return # Other bits pair_groups = ['SDSS_DR7'] # Begin hdf = h5py.File(outfil,'w') # Set/Check keys (and set idkey internally for other checks) idkey = 'IGM_ID' maindb, tkeys = sdbbu.start_maindb(idkey) group_dict = {} # BOSS DR14 new_groups = get_build_groups('v03') gname = 'BOSS_DR14' # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) if not redo_dr14: v030file = os.getenv('SPECDB')+'/IGMspec_DB_v03.0.hdf5' igmsp_v030 = IgmSpec(db_file=v030file) grp = hdf.create_group(gname) # Copy spectra #warnings.warn("GET THE DR14 spectra!") igmsp_v030.hdf.copy(gname+'/spec', hdf[gname]) # Copy meta igmsp_v030.hdf.copy(gname+'/meta', hdf[gname]) # Meta for maindb (a little risky as Meta needs to be aligned to the spectra but they should be) meta = igmsp_v030['BOSS_DR14'].meta meta.remove_column('IGM_ID') maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=False) #hdf[key+'/meta'] = meta #for akey in v01hdf[key+'/meta'].attrs.keys(): # hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey] # SSA info #new_groups[gname].add_ssa(hdf, gname) else: # BOSS DR14 print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=False) # Spectra new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Pop me new_groups.pop('BOSS_DR14') # Loop on new v3 groups before copying in the others for gname in new_groups.keys(): print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=False) # Spectra new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Copy over all the old stuff redo_groups = []#'HD-LLS_DR1'] skip_groups = ['BOSS_DR12']# 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!") skip_copy = False if (not test) and (not skip_copy): old1 = get_build_groups('v01') old2 = get_build_groups('v02') # Add v02 to v01 list for key,item in old2.items(): old1[key] = item # Loop on the combined for key in old1.keys(): if key in ['catalog']+redo_groups+skip_groups: continue print("Working on: {:s}".format(key)) grp = hdf.create_group(key) # Meta meta = Table(v02hdf[key+'/meta'].value) meta.remove_column('IGM_ID') # Survey flag flag_g = sdbbu.add_to_group_dict(key, group_dict, skip_for_debug=True) # IDs maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(key in pair_groups), debug=False) # Add meta to HDF5 #meta.rename_column('GRATING', 'DISPERSER') hdf[key+'/meta'] = meta for akey in v02hdf[key+'/meta'].attrs.keys(): hdf[key+'/meta'].attrs[akey] = v02hdf[key+'/meta'].attrs[akey] # SSA info old1[key].add_ssa(hdf, key) # Copy spectra v02hdf.copy(key+'/spec', hdf[key]) skip_myers = False if skip_myers: warnings.warn("NEED TO INCLUDE MYERS!") else: # Copy from v02 _ = hdf.create_group('quasars') v02hdf.copy('quasars', hdf['quasars']) #myers.add_to_hdf(hdf) # Setup groups pair_groups = [] ''' # Loop over the old groups to update (as needed) new_IDs = False for gname in redo_groups: print("Working to replace meta/spec for group: {:s}".format(gname)) # Meta meta = old_groups[gname].grab_meta() # Group flag flag_g = group_dict[gname] # IDs if new_IDs: pdb.set_trace() # NOT READY FOR THIS #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, # first=(flag_g==1), close_pairs=(gname in pair_groups)) else: _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey) # Spectra old_groups[gname].hdf5_adddata(hdf, gname, meta) old_groups[gname].add_ssa(hdf, gname) ''' ''' meta_only = False # Loop over the new groups for gname in new_groups: print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs debug= False #if gname == 'XQ-100': # debug = True maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=debug) # Spectra if not meta_only: pdb.set_trace() new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) ''' # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check stacking if not sdbbu.chk_vstack(hdf): print("Meta data will not stack using specdb.utils.clean_vstack") print("Proceed to write at your own risk..") pdb.set_trace() # Finish zpri = v02hdf['catalog'].attrs['Z_PRIORITY'] sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")
def ver02(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False, version='v02', out_path=None): """ Build version 2.X Reads previous datasets from v1.X Parameters ---------- test : bool, optional Run test only skip_copy : bool, optional Skip copying the data from v01 Returns ------- """ import os from specdb.specdb import IgmSpec # Read v01 v01file = os.getenv('SPECDB')+'/IGMspec_DB_v01.hdf5' #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5' print("Loading v01") igmsp_v01 = IgmSpec(db_file=v01file) v01hdf = igmsp_v01.hdf maindb = igmsp_v01.cat.copy() # Start new file if out_path is None: out_path = igmspec.__path__[0]+'/../DB/' outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version) # Clobber? if not chk_clobber(outfil, clobber=clobber): return # Begin hdf = h5py.File(outfil,'w') # Copy over the old stuff redo_groups = ['HD-LLS_DR1'] skip_groups = []#'BOSS_DR12', 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!") skip_copy = False if (not test) and (not skip_copy): old_groups = get_build_groups('v01') for key in v01hdf.keys(): if key in ['catalog','quasars']+redo_groups+skip_groups: continue else: #v01hdf.copy(key, hdf) # ONE STOP SHOPPING grp = hdf.create_group(key) # Copy spectra v01hdf.copy(key+'/spec', hdf[key]) # Modify v01 meta and add if key == 'BOSS_DR12': meta = boss.add_coflag(v01hdf) else: meta = Table(v01hdf[key+'/meta'].value) meta.rename_column('GRATING', 'DISPERSER') hdf[key+'/meta'] = meta for akey in v01hdf[key+'/meta'].attrs.keys(): hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey] # SSA info old_groups[key].add_ssa(hdf, key) skip_myers = False if skip_myers: warnings.warn("NEED TO INCLUDE MYERS!") else: myers.add_to_hdf(hdf) # Setup groups old_groups = get_build_groups('v01') pair_groups = [] group_dict = igmsp_v01.qcat.group_dict # Set/Check keys (and set idkey internally for other checks) idkey = 'IGM_ID' _, tkeys = sdbbu.start_maindb(idkey) mkeys = list(maindb.keys()) for key in tkeys: assert key in mkeys # Loop over the old groups to update (as needed) new_IDs = False for gname in redo_groups: print("Working to replace meta/spec for group: {:s}".format(gname)) # Meta meta = old_groups[gname].grab_meta() # Group flag flag_g = group_dict[gname] # IDs if new_IDs: pdb.set_trace() # NOT READY FOR THIS #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, # first=(flag_g==1), close_pairs=(gname in pair_groups)) else: _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey) # Spectra old_groups[gname].hdf5_adddata(hdf, gname, meta) old_groups[gname].add_ssa(hdf, gname) meta_only = False new_groups = get_build_groups(version) # Loop over the new groups for gname in new_groups: print("Working on group: {:s}".format(gname)) # Meta meta = new_groups[gname].grab_meta() # Survey flag flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True) # IDs debug= False #if gname == 'XQ-100': # debug = True maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey, first=(flag_g==1), close_pairs=(gname in pair_groups), debug=debug) # Spectra if not meta_only: new_groups[gname].hdf5_adddata(hdf, gname, meta) new_groups[gname].add_ssa(hdf, gname) # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates) if not sdbbu.chk_for_duplicates(maindb, dup_lim=2): raise ValueError("Failed duplicates") # Check stacking if not sdbbu.chk_vstack(hdf): print("Meta data will not stack using specdb.utils.clean_vstack") print("Proceed to write at your own risk..") pdb.set_trace() # Finish zpri = v01hdf['catalog'].attrs['Z_PRIORITY'] sdbbuwrite_hdf(hdf, str('igmspec'), maindb, zpri, group_dict, version, Publisher=str(publisher)) print("Wrote {:s} DB file".format(outfil)) print("Update DB info in specdb.defs.dbase_info !!")