Beispiel #1
0
def test_ingest():
    # Begin
    id_key = 'TEST_ID'
    maindb, tkeys = spbu.start_maindb(id_key)
    #
    ztbl = Table.read(os.path.join(os.path.dirname(__file__), 'files', 'ztbl_E.fits'))
    data_dir = os.path.join(os.path.dirname(__file__), 'files')
    ffiles,_ = pbuild.grab_files(data_dir)
    meta = pbuild.mk_meta(ffiles, ztbl, fname=True, skip_badz=True, mdict=dict(INSTR='HIRES'))
    # Group and IDs
    gdict = {}
    flag_g = spbu.add_to_group_dict('COS', gdict)
    maindb = pbuild.add_ids(maindb, meta, flag_g, tkeys, id_key, first=(flag_g==1))
    #
    hdf = h5py.File('tmp.hdf5','w')
    pbuild.ingest_spectra(hdf, 'test', meta)
    hdf.close()
    # Read
    tmp = h5py.File('tmp.hdf5','r')
    # Test
    assert 'meta' in tmp['test'].keys()
    assert isinstance(tmp['test/spec'].value, np.ndarray)
Beispiel #2
0
def ver01(test=False, clobber=False, publisher='J.X. Prochaska', **kwargs):
    """ Build version 1.0

    Parameters
    ----------
    test : bool, optional
      Run test only

    Returns
    -------

    """
    pdb.set_trace()  # THIS VERSION IS NOW FROZEN
    raise IOError("THIS VERSION IS NOW FROZEN")
    version = 'v01'
    # HDF5 file
    outfil = igmspec.__path__[0] + '/../DB/IGMspec_DB_{:s}.hdf5'.format(
        version)
    # Chk clobber
    if os.path.isfile(outfil):
        if clobber:
            warnings.warn("Overwriting previous DB file {:s}".format(outfil))
        else:
            warnings.warn(
                "Not overwiting previous DB file.  Use clobber=True to do so")
            return
    # Begin
    hdf = h5py.File(outfil, 'w')
    ''' Myers QSOs '''
    myers.orig_add_to_hdf(hdf)

    # Main DB Table
    idkey = 'IGM_ID'
    maindb, tkeys = sdbbu.start_maindb(idkey)

    # Group dict
    group_dict = {}

    # Organize for main loop
    groups = get_build_groups(version)

    pair_groups = ['SDSS_DR7']

    meta_only = False
    # Loop over the groups
    for gname in groups:
        # Meta
        if gname == 'SDSS_DR7':
            meta = groups[gname].grab_meta(hdf)
        else:
            meta = groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict)
        # IDs
        maindb = sdbbu.add_ids(maindb,
                               meta,
                               flag_g,
                               tkeys,
                               idkey,
                               first=(flag_g == 1),
                               close_pairs=(gname in pair_groups))
        # Spectra
        if not meta_only:
            groups[gname].hdf5_adddata(hdf, gname, meta, idkey)

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check for junk
    zpri = defs.z_priority()

    # Finish
    sdbbu.write_hdf(hdf,
                    str('igmspec'),
                    maindb,
                    zpri,
                    group_dict,
                    version,
                    Publisher=str(publisher))
    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")
Beispiel #3
0
def ver03(test=False,
          skip_copy=False,
          publisher='J.X. Prochaska',
          clobber=False,
          version='v03.1',
          out_path=None,
          redo_dr14=False):
    """ Build version 3.X
    Reads several previous datasets from v1.X
    Remakes the maindb using BOSS DR14 as the main driver

    v3.0 will be BOSS DR14 only to speed up generation of the rest

    Parameters
    ----------
    test : bool, optional
      Run test only
    skip_copy : bool, optional
      Skip copying the data from v01

    Returns
    -------
    """
    import os
    from specdb.specdb import IgmSpec
    # Read v02
    v02file = os.getenv('SPECDB') + '/IGMspec_DB_v02.1.hdf5'
    #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5'
    print("Loading v02")
    igmsp_v02 = IgmSpec(db_file=v02file)
    v02hdf = igmsp_v02.hdf
    #maindb = igmsp_v02.cat.copy()

    # Start new file
    if out_path is None:
        out_path = '/scratch/IGMSpec/'
    outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version)
    # Clobber?
    if not chk_clobber(outfil, clobber=clobber):
        return

    # Other bits
    pair_groups = ['SDSS_DR7']

    # Begin
    hdf = h5py.File(outfil, 'w')

    # Set/Check keys (and set idkey internally for other checks)
    idkey = 'IGM_ID'
    maindb, tkeys = sdbbu.start_maindb(idkey)
    group_dict = {}

    # BOSS DR14
    new_groups = get_build_groups('v03')
    gname = 'BOSS_DR14'
    # Survey flag
    flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)

    if not redo_dr14:
        v030file = os.getenv('SPECDB') + '/IGMspec_DB_v03.0.hdf5'
        igmsp_v030 = IgmSpec(db_file=v030file)

        grp = hdf.create_group(gname)
        # Copy spectra
        #warnings.warn("GET THE DR14 spectra!")
        igmsp_v030.hdf.copy(gname + '/spec', hdf[gname])
        # Copy meta
        igmsp_v030.hdf.copy(gname + '/meta', hdf[gname])
        # Meta for maindb (a little risky as Meta needs to be aligned to the spectra but they should be)
        meta = igmsp_v030['BOSS_DR14'].meta
        meta.remove_column('IGM_ID')
        maindb = sdbbu.add_ids(maindb,
                               meta,
                               flag_g,
                               tkeys,
                               idkey,
                               first=(flag_g == 1),
                               close_pairs=(gname in pair_groups),
                               debug=False)
        #hdf[key+'/meta'] = meta
        #for akey in v01hdf[key+'/meta'].attrs.keys():
        #    hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey]
        # SSA info
        #new_groups[gname].add_ssa(hdf, gname)
    else:
        # BOSS DR14
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # IDs
        maindb = sdbbu.add_ids(maindb,
                               meta,
                               flag_g,
                               tkeys,
                               idkey,
                               first=(flag_g == 1),
                               close_pairs=(gname in pair_groups),
                               debug=False)
        # Spectra
        new_groups[gname].hdf5_adddata(hdf, gname, meta)
        new_groups[gname].add_ssa(hdf, gname)

    # Pop me
    new_groups.pop('BOSS_DR14')

    # Loop on new v3 groups before copying in the others
    for gname in new_groups.keys():
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname,
                                         group_dict,
                                         skip_for_debug=True)
        # IDs
        maindb = sdbbu.add_ids(maindb,
                               meta,
                               flag_g,
                               tkeys,
                               idkey,
                               first=(flag_g == 1),
                               close_pairs=(gname in pair_groups),
                               debug=False)
        # Spectra
        new_groups[gname].hdf5_adddata(hdf, gname, meta)
        new_groups[gname].add_ssa(hdf, gname)

    # Copy over all the old stuff
    redo_groups = []  #'HD-LLS_DR1']
    skip_groups = [
        'BOSS_DR12'
    ]  # 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!")
    skip_copy = False
    if (not test) and (not skip_copy):
        old1 = get_build_groups('v01')
        old2 = get_build_groups('v02')
        # Add v02 to v01 list
        for key, item in old2.items():
            old1[key] = item
        # Loop on the combined
        for key in old1.keys():
            if key in ['catalog'] + redo_groups + skip_groups:
                continue
            print("Working on: {:s}".format(key))
            grp = hdf.create_group(key)
            # Meta
            meta = Table(v02hdf[key + '/meta'].value)
            meta.remove_column('IGM_ID')
            # Survey flag
            flag_g = sdbbu.add_to_group_dict(key,
                                             group_dict,
                                             skip_for_debug=True)
            # IDs
            maindb = sdbbu.add_ids(maindb,
                                   meta,
                                   flag_g,
                                   tkeys,
                                   idkey,
                                   first=(flag_g == 1),
                                   close_pairs=(key in pair_groups),
                                   debug=False)
            # Add meta to HDF5
            #meta.rename_column('GRATING', 'DISPERSER')
            hdf[key + '/meta'] = meta
            for akey in v02hdf[key + '/meta'].attrs.keys():
                hdf[key + '/meta'].attrs[akey] = v02hdf[key +
                                                        '/meta'].attrs[akey]
            # SSA info
            old1[key].add_ssa(hdf, key)
            # Copy spectra
            v02hdf.copy(key + '/spec', hdf[key])

    skip_myers = False
    if skip_myers:
        warnings.warn("NEED TO INCLUDE MYERS!")
    else:
        # Copy from v02
        _ = hdf.create_group('quasars')
        v02hdf.copy('quasars', hdf['quasars'])
        #myers.add_to_hdf(hdf)

    # Setup groups
    pair_groups = []
    '''
    # Loop over the old groups to update (as needed)
    new_IDs = False
    for gname in redo_groups:
        print("Working to replace meta/spec for group: {:s}".format(gname))
        # Meta
        meta = old_groups[gname].grab_meta()
        # Group flag
        flag_g = group_dict[gname]
        # IDs
        if new_IDs:
            pdb.set_trace()  # NOT READY FOR THIS
            #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
            #                   first=(flag_g==1), close_pairs=(gname in pair_groups))
        else:
            _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey)
        # Spectra
        old_groups[gname].hdf5_adddata(hdf, gname, meta)
        old_groups[gname].add_ssa(hdf, gname)
    '''
    '''
    meta_only = False
    # Loop over the new groups
    for gname in new_groups:
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)
        # IDs
        debug= False
        #if gname == 'XQ-100':
        #    debug = True
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=debug)
        # Spectra
        if not meta_only:
            pdb.set_trace()
            new_groups[gname].hdf5_adddata(hdf, gname, meta)
            new_groups[gname].add_ssa(hdf, gname)
    '''

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check stacking
    if not sdbbu.chk_vstack(hdf):
        print("Meta data will not stack using specdb.utils.clean_vstack")
        print("Proceed to write at your own risk..")
        pdb.set_trace()

    # Finish
    zpri = v02hdf['catalog'].attrs['Z_PRIORITY']
    sdbbu.write_hdf(hdf,
                    str('igmspec'),
                    maindb,
                    zpri,
                    group_dict,
                    version,
                    Publisher=str(publisher))

    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")
Beispiel #4
0
def ver02(test=False,
          skip_copy=False,
          publisher='J.X. Prochaska',
          clobber=False,
          version='v02',
          out_path=None):
    """ Build version 2.X

    Reads previous datasets from v1.X

    Parameters
    ----------
    test : bool, optional
      Run test only
    skip_copy : bool, optional
      Skip copying the data from v01

    Returns
    -------
    """
    import os
    from specdb.specdb import IgmSpec
    # Read v01
    v01file = os.getenv('SPECDB') + '/IGMspec_DB_v01.hdf5'
    #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5'
    print("Loading v01")
    igmsp_v01 = IgmSpec(db_file=v01file)
    v01hdf = igmsp_v01.hdf
    maindb = igmsp_v01.cat.copy()

    # Start new file
    if out_path is None:
        out_path = igmspec.__path__[0] + '/../DB/'
    outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version)
    # Clobber?
    if not chk_clobber(outfil, clobber=clobber):
        return
    # Begin
    hdf = h5py.File(outfil, 'w')

    # Copy over the old stuff
    redo_groups = ['HD-LLS_DR1']
    skip_groups = [
    ]  #'BOSS_DR12', 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!")
    skip_copy = False
    if (not test) and (not skip_copy):
        old_groups = get_build_groups('v01')
        for key in v01hdf.keys():
            if key in ['catalog', 'quasars'] + redo_groups + skip_groups:
                continue
            else:
                #v01hdf.copy(key, hdf)  # ONE STOP SHOPPING
                grp = hdf.create_group(key)
                # Copy spectra
                v01hdf.copy(key + '/spec', hdf[key])
                # Modify v01 meta and add
                if key == 'BOSS_DR12':
                    meta = boss.add_coflag(v01hdf)
                else:
                    meta = Table(v01hdf[key + '/meta'].value)
                meta.rename_column('GRATING', 'DISPERSER')
                hdf[key + '/meta'] = meta
                for akey in v01hdf[key + '/meta'].attrs.keys():
                    hdf[key +
                        '/meta'].attrs[akey] = v01hdf[key +
                                                      '/meta'].attrs[akey]
                # SSA info
                old_groups[key].add_ssa(hdf, key)
    skip_myers = False
    if skip_myers:
        warnings.warn("NEED TO INCLUDE MYERS!")
    else:
        myers.add_to_hdf(hdf)

    # Setup groups
    old_groups = get_build_groups('v01')
    pair_groups = []
    group_dict = igmsp_v01.qcat.group_dict
    # Set/Check keys (and set idkey internally for other checks)
    idkey = 'IGM_ID'
    _, tkeys = sdbbu.start_maindb(idkey)
    mkeys = list(maindb.keys())
    for key in tkeys:
        assert key in mkeys

    # Loop over the old groups to update (as needed)
    new_IDs = False
    for gname in redo_groups:
        print("Working to replace meta/spec for group: {:s}".format(gname))
        # Meta
        meta = old_groups[gname].grab_meta()
        # Group flag
        flag_g = group_dict[gname]
        # IDs
        if new_IDs:
            pdb.set_trace()  # NOT READY FOR THIS
            #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
            #                   first=(flag_g==1), close_pairs=(gname in pair_groups))
        else:
            _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey)
        # Spectra
        old_groups[gname].hdf5_adddata(hdf, gname, meta)
        old_groups[gname].add_ssa(hdf, gname)

    meta_only = False
    new_groups = get_build_groups(version)
    # Loop over the new groups
    for gname in new_groups:
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname,
                                         group_dict,
                                         skip_for_debug=True)
        # IDs
        debug = False
        #if gname == 'XQ-100':
        #    debug = True
        maindb = sdbbu.add_ids(maindb,
                               meta,
                               flag_g,
                               tkeys,
                               idkey,
                               first=(flag_g == 1),
                               close_pairs=(gname in pair_groups),
                               debug=debug)
        # Spectra
        if not meta_only:
            new_groups[gname].hdf5_adddata(hdf, gname, meta)
            new_groups[gname].add_ssa(hdf, gname)

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check stacking
    if not sdbbu.chk_vstack(hdf):
        print("Meta data will not stack using specdb.utils.clean_vstack")
        print("Proceed to write at your own risk..")
        pdb.set_trace()

    # Finish
    zpri = v01hdf['catalog'].attrs['Z_PRIORITY']
    sdbbuwrite_hdf(hdf,
                   str('igmspec'),
                   maindb,
                   zpri,
                   group_dict,
                   version,
                   Publisher=str(publisher))

    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")
def hdf5_writter(path, reso, lowl, highl, outputname):
    '''
    :param path: path to the spectra file
    :param lowl float: lower limit for request redshift
    :param highl float: higher limit for request redshift
    :param reso: delta v in km/s to rebin the data
    :param outputname: the name for the output file
    :return: hdf5file
    '''
    outfil = str(outputname)
    hdf = h5py.File(outfil, 'w')
    #
    gdict = {}
    # creating group
    for z in np.arange(lowl, highl, 0.05):
        # hdf.create_group('z'+str(z)+'-'+str(z+0.05))
        print(z)
        spectest1 = []
        spec2 = []
        spectest1, spec2 = reading_data(path, reso, z, z + 0.05)
        print(np.round(z, 2), len(spectest1.z))
        # hdf_append = h5py.File('tmp'+str(z)+'.hdf', 'w')
        # hdf.create_group('z' + str(z) + '-' + str(z + 0.05))
        f = hdf.create_group('z' + str(np.round(z, 2)) + '-' +
                             str(np.round(z + 0.05, 2)))
        npix = len(spec2.wavelength)
        data = sdb_u.init_data(npix)
        nspec = len(spectest1.z)
        print(npix)
        #creat dataset
        spec_set = hdf['z' + str(np.round(z, 2)) + '-' +
                       str(np.round(z + 0.05, 2))].create_dataset(
                           'spec',
                           data=data,
                           chunks=True,
                           maxshape=(None, ),
                           compression='gzip')
        spec_set.resize((nspec, ))

        for ii in range(nspec):
            data['flux'][0][:npix] = spec2[ii].flux  # Should be flux values
            data['sig'][0][:npix] = spec2[ii].sig  # SHould be sigma values
            # print (spec[ii].sig)
            data['wave'][0][:npix] = spec2[
                ii].wavelength  # Should be wavelength values
            # Fill
            spec_set[ii] = data

        # hdf.copy('z'+str(z)+'-'+str(z+0.05), hdf_append['z'+str(z)+'-'+str(z+0.05)])

        # making meta data
        # hdfnew = h5py.File('z2.8_specdb_test1.hdf', 'w')
        # group = 'z2.8-2.85'
        # _ = hdfnew.create_group(group)
        group = 'z' + str(np.round(z, 2)) + '-' + str(np.round(z + 0.05, 2))
        id_key = 'DESI_ID'
        maindb, tkeys = spbu.start_maindb(id_key)

        meta = Table()
        meta['zem_GROUP'] = spectest1.z
        meta['RA_GROUP'] = spectest1.ra
        meta['DEC_GROUP'] = spectest1.dec

        meta['EPOCH'] = 2000.
        meta['sig_zem'] = 0.
        meta['flag_zem'] = np.string_('DESI')
        meta['STYPE'] = np.string_('QSO')
        # Observation
        meta['SPEC_FILE'] = np.array(spectest1.filename, dtype=float)
        meta['DATE-OBS'] = spectest1.date
        #
        meta['GROUP_ID'] = np.arange(len(meta)).astype(int)
        # Spectrograph
        meta['R'] = 3000.
        meta['TELESCOPE'] = np.string_('KPNO-4m')
        meta['DISPERSER'] = np.string_('ALL')
        meta['INSTR'] = np.string_('DESI')
        meta['WV_MIN'] = 3800.  # Should be the right value
        meta['WV_MAX'] = 9900.  # Should be the right value
        meta['NPIX'] = 8000  # Should be the right value
        meta['PLATE'] = np.array(np.tile(1, len(spectest1.id)), dtype=int)
        meta['FIBERID'] = spectest1.id
        meta['MOCK_ID'] = spectest1.id

        flag_g = spbu.add_to_group_dict(group, gdict)
        maindb = spbu.add_ids(maindb,
                              meta,
                              flag_g,
                              tkeys,
                              'DESI_ID',
                              first=(flag_g == flag_g))
        hdf[group]['meta'] = meta
        zpri = spb_defs.z_priority()
        print(flag_g)
    spbu.write_hdf(hdf,
                   str('DESI_v05'),
                   maindb,
                   zpri,
                   gdict,
                   str('v0.1'),
                   Publisher='jding')

    hdf.close()
Beispiel #6
0
def generate_by_refs(input_refs, outfile, version):
    """
    Build a specDB file according to the input references

    Args:
        input_refs (list):
            List of references from which to build the specDB
        outfile (str):
            Output filename
        version (str):
            Version number
    """
    # Not elegant but it works
    all_folders = glob.glob(db_path+'/*/*')
    all_refs = [os.path.basename(ifolder) for ifolder in all_folders]

    # z_tbl
    allz_tbl = Table()
    
    # Loop in input refs
    all_spec_files = []
    refs_list = []
    for ref in input_refs:
        idx = all_refs.index(ref)
        # Redshift tables
        z_tbl = load_z_tables(all_folders[idx])
        allz_tbl = vstack([allz_tbl, z_tbl])
        # Grab the list of spectra
        specs = glob.glob(os.path.join(all_folders[idx], 'J*_spec.fits'))
        if len(specs) == 0:
            continue
        # Save
        all_spec_files += specs
        refs_list += [ref]*len(specs)

    # Get it started
    # HDF5 file
    hdf = h5py.File(outfile, 'w')

    # Defs
    zpri = defs.z_priority()

    # Main DB Table
    id_key = 'FRB_ID'
    maindb, tkeys = spbu.start_maindb(id_key)
    tkeys += ['ZQ']
    gdict = {}

    # Loop on Instruments
    #pair_groups = ['MUSE']
    pair_groups = []
    badf = None
    for instr in all_instruments:
        print("Working on {}".format(instr))
        fits_files, irefs = grab_files(all_spec_files, refs_list, instr)
        if len(fits_files) == 0:
            continue
        # Option dicts
        mwargs = {}
        mwargs['toler'] = 1.0 * units.arcsec  # Require an
        skipz = False
        swargs = {}
        # Meta
        parse_head, mdict, fname = None, None, True
        if instr == 'SDSS':
            mdict = dict(DISPERSER='BOTH', R=2000., TELESCOPE='SDSS 2.5-M', INSTR='SDSS')
            parse_head = {'DATE-OBS': 'MJD'}
            maxpix = 4000
            scale = 1e-17
        elif instr == 'FORS2':
            mdict = dict(TELESCOPE='VLT', INSTR='FORS2')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'R': True}
            maxpix = 2050
            scale = 1e-17
        elif instr == 'MUSE':
            mdict = dict(TELESCOPE='VLT', R=2000.)
            parse_head = {'DATE-OBS': 'MJD-OBS', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'}
            maxpix = 4000
            scale = 1e-20
        elif instr == 'KCWI':
            mdict = dict(TELESCOPE='Keck-2')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME', 'R': True}
            maxpix = 4000
            scale = 1e-17
        elif instr == 'MagE':
            parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 'TELESCOPE': 'TELESCOP',
                          'INSTR': 'INSTRUME', 'DISPERSER': 'DISPNAME'}
            maxpix = 18000
            scale = 1e-17
        elif instr == 'GMOS-S':
            mdict = dict(TELESCOPE='Gemini-S', INSTR='GMOS-S')
            parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 'DISPERSER': 'DISPNAME'}
            maxpix = 3500
            scale = 1e-17
        elif instr == 'LRISb':
            mdict = dict(TELESCOPE='Keck-1')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'}
            maxpix = 2050  # 2x binning
        elif instr == 'GMOS-N':
            mdict = dict(TELESCOPE='Gemini-N', INSTR='GMOS-N')
            parse_head = {'R': True, 'DATE-OBS': 'MJD-OBS', 
                          'DISPERSER': 'DISPNAME'}
            maxpix = 3500
            scale = 1e-17
        elif instr == 'LRISr':
            mdict = dict(TELESCOPE='Keck-1')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'}
            maxpix = 2050
            scale = 1e-17
        elif instr == 'DEIMOS':
            mdict = dict(TELESCOPE='Keck-2')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 
                          'INSTR': 'INSTRUME'}
            maxpix = 9000
            scale = 1e-17
        elif instr == 'Goodman':
            mdict = dict(TELESCOPE='SOAR', INSTR='Goodman')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME',
                          'R': True}
            maxpix = 2048
            scale = 1e-17
        elif instr == 'XSHOOTER':
            mdict = dict(TELESCOPE='VLT')
            parse_head = {'DATE-OBS': 'MJD', 'DISPERSER': 'DISPNAME', 'INSTR': 'INSTRUME'}
            maxpix = 33000
            scale = 1e-17
        else:
            embed(header='172')
        

        # Meta
        full_meta = pbuild.mk_meta(fits_files, allz_tbl, mdict=mdict, fname=fname,
                                   verbose=True, parse_head=parse_head, skip_badz=skipz,
                                   stype='GAL',
                                   chkz=True, **mwargs)
        full_meta['Ref'] = irefs
        # Survey flag
        flag_g = spbu.add_to_group_dict(instr, gdict, skip_for_debug=True)
        # IDs
        #if 'MUSE' in instr:
        #    embed(header='278 of build specdb')
        maindb = spbu.add_ids(maindb, full_meta, flag_g, tkeys, id_key, 
                              first=(flag_g==1), 
                              mtch_toler=1.*units.arcsec,
                              close_pairs=(instr in pair_groups))

        # Ingest --
        pbuild.ingest_spectra(hdf, instr, full_meta, max_npix=maxpix, verbose=False,
                              badf=badf, grab_conti=False, scale=scale, **swargs)

    # Write
    spbu.write_hdf(hdf, str('FRB'), maindb, zpri, gdict, version, Publisher=str('JXP'))
    print("Wrote {:s} DB file".format(outfile))
    print("You probably need to move it into SPECDB")
Beispiel #7
0
def mk_db(dbname, tree, outfil, iztbl, version='v00', id_key='PRIV_ID',
          publisher='Unknown', **kwargs):
    """ Generate the DB

    Parameters
    ----------
    dbname : str
      Name for the database
    tree : str
      Path to top level of the tree of FITS files
      Typically, each branch in the tree corresponds to a single instrument
    outfil : str
      Output file name for the hdf5 file
    iztbl : Table or str
      If Table, see meta() docs for details on its format
      If str, it must be 'igmspec' and the user must have that DB downloaded
    version : str, optional
      Version code

    Returns
    -------

    """
    from specdb import defs

    # ztbl
    if isinstance(iztbl, str):
        if iztbl == 'igmspec':
            from specdb.specdb import IgmSpec
            igmsp = IgmSpec()
            ztbl = Table(igmsp.idb.hdf['quasars'][...])
    elif isinstance(iztbl, Table):
        ztbl = iztbl
    else:
        raise IOError("Bad type for ztbl")

    # Find the branches
    branches = glob.glob(tree+'/*')
    branches.sort()
    # HDF5 file
    hdf = h5py.File(outfil,'w')

    # Defs
    zpri = defs.z_priority()
    gdict = {}

    # Main DB Table
    maindb, tkeys = spbu.start_maindb(id_key)

    # MAIN LOOP
    for ss,branch in enumerate(branches):
        # Skip files
        if not os.path.isdir(branch):
            continue
        print('Working on branch: {:s}'.format(branch))
        # Files
        fits_files, out_tup = grab_files(branch)
        meta_file, mtbl_file, ssa_file = out_tup

        # Meta
        maxpix, phead, mdict, stype = 10000, None, None, 'QSO'
        if meta_file is not None:
            # Load
            meta_dict = ltu.loadjson(meta_file)
            # Maxpix
            if 'maxpix' in meta_dict.keys():
                maxpix = meta_dict['maxpix']
            # STYPE
            if 'stype' in meta_dict.keys():
                stype = meta_dict['stype']
            # Parse header
            if 'parse_head' in meta_dict.keys():
                phead = meta_dict['parse_head']
            if 'meta_dict' in meta_dict.keys():
                mdict = meta_dict['meta_dict']
        full_meta = mk_meta(fits_files, ztbl, mtbl_file=mtbl_file,
                            parse_head=phead, mdict=mdict, **kwargs)
        # Update group dict
        group_name = branch.split('/')[-1]
        flag_g = spbu.add_to_group_dict(group_name, gdict)
        # IDs
        maindb = add_ids(maindb, full_meta, flag_g, tkeys, 'PRIV_ID', first=(flag_g==1))
        # Ingest
        ingest_spectra(hdf, group_name, full_meta, max_npix=maxpix, **kwargs)
        # SSA
        if ssa_file is not None:
            user_ssa = ltu.loadjson(ssa_file)
            ssa_dict = default_fields(user_ssa['Title'], flux=user_ssa['flux'], fxcalib=user_ssa['fxcalib'])
            hdf[group_name]['meta'].attrs['SSA'] = json.dumps(ltu.jsonify(ssa_dict))

    # Check stacking
    if not spbu.chk_vstack(hdf):
        print("Meta data will not stack using specdb.utils.clean_vstack")
        print("Proceed to write at your own risk..")
        pdb.set_trace()

    # Write
    write_hdf(hdf, str(dbname), maindb, zpri, gdict, version,
              Publisher=publisher)
    print("Wrote {:s} DB file".format(outfil))
Beispiel #8
0
def ver01(test=False, clobber=False, publisher='J.X. Prochaska', **kwargs):
    """ Build version 1.0

    Parameters
    ----------
    test : bool, optional
      Run test only

    Returns
    -------

    """
    pdb.set_trace()  # THIS VERSION IS NOW FROZEN
    raise IOError("THIS VERSION IS NOW FROZEN")
    version = 'v01'
    # HDF5 file
    outfil = igmspec.__path__[0]+'/../DB/IGMspec_DB_{:s}.hdf5'.format(version)
    # Chk clobber
    if os.path.isfile(outfil):
        if clobber:
            warnings.warn("Overwriting previous DB file {:s}".format(outfil))
        else:
            warnings.warn("Not overwiting previous DB file.  Use clobber=True to do so")
            return
    # Begin
    hdf = h5py.File(outfil,'w')

    ''' Myers QSOs '''
    myers.orig_add_to_hdf(hdf)

    # Main DB Table
    idkey = 'IGM_ID'
    maindb, tkeys = sdbbu.start_maindb(idkey)

    # Group dict
    group_dict = {}

    # Organize for main loop
    groups = get_build_groups(version)

    pair_groups = ['SDSS_DR7']

    meta_only = False
    # Loop over the groups
    for gname in groups:
        # Meta
        if gname == 'SDSS_DR7':
            meta = groups[gname].grab_meta(hdf)
        else:
            meta = groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict)
        # IDs
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups))
        # Spectra
        if not meta_only:
            groups[gname].hdf5_adddata(hdf, gname, meta, idkey)

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check for junk
    zpri = defs.z_priority()

    # Finish
    sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri,
                    group_dict, version, Publisher=str(publisher))
    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")
Beispiel #9
0
def ver03(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False,
          version='v03.1', out_path=None, redo_dr14=False):
    """ Build version 3.X
    Reads several previous datasets from v1.X
    Remakes the maindb using BOSS DR14 as the main driver

    v3.0 will be BOSS DR14 only to speed up generation of the rest

    Parameters
    ----------
    test : bool, optional
      Run test only
    skip_copy : bool, optional
      Skip copying the data from v01

    Returns
    -------
    """
    import os
    from specdb.specdb import IgmSpec
    # Read v02
    v02file = os.getenv('SPECDB')+'/IGMspec_DB_v02.1.hdf5'
    #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5'
    print("Loading v02")
    igmsp_v02 = IgmSpec(db_file=v02file)
    v02hdf = igmsp_v02.hdf
    #maindb = igmsp_v02.cat.copy()

    # Start new file
    if out_path is None:
        out_path = '/scratch/IGMSpec/'
    outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version)
    # Clobber?
    if not chk_clobber(outfil, clobber=clobber):
        return

    # Other bits
    pair_groups = ['SDSS_DR7']

    # Begin
    hdf = h5py.File(outfil,'w')

    # Set/Check keys (and set idkey internally for other checks)
    idkey = 'IGM_ID'
    maindb, tkeys = sdbbu.start_maindb(idkey)
    group_dict = {}

    # BOSS DR14
    new_groups = get_build_groups('v03')
    gname = 'BOSS_DR14'
    # Survey flag
    flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)

    if not redo_dr14:
        v030file = os.getenv('SPECDB')+'/IGMspec_DB_v03.0.hdf5'
        igmsp_v030 = IgmSpec(db_file=v030file)

        grp = hdf.create_group(gname)
        # Copy spectra
        #warnings.warn("GET THE DR14 spectra!")
        igmsp_v030.hdf.copy(gname+'/spec', hdf[gname])
        # Copy meta
        igmsp_v030.hdf.copy(gname+'/meta', hdf[gname])
        # Meta for maindb (a little risky as Meta needs to be aligned to the spectra but they should be)
        meta = igmsp_v030['BOSS_DR14'].meta
        meta.remove_column('IGM_ID')
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=False)
        #hdf[key+'/meta'] = meta
        #for akey in v01hdf[key+'/meta'].attrs.keys():
        #    hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey]
        # SSA info
        #new_groups[gname].add_ssa(hdf, gname)
    else:
        # BOSS DR14
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # IDs
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=False)
        # Spectra
        new_groups[gname].hdf5_adddata(hdf, gname, meta)
        new_groups[gname].add_ssa(hdf, gname)

    # Pop me
    new_groups.pop('BOSS_DR14')

    # Loop on new v3 groups before copying in the others
    for gname in new_groups.keys():
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)
        # IDs
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=False)
        # Spectra
        new_groups[gname].hdf5_adddata(hdf, gname, meta)
        new_groups[gname].add_ssa(hdf, gname)

    # Copy over all the old stuff
    redo_groups = []#'HD-LLS_DR1']
    skip_groups = ['BOSS_DR12']# 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!")
    skip_copy = False
    if (not test) and (not skip_copy):
        old1 = get_build_groups('v01')
        old2 = get_build_groups('v02')
        # Add v02 to v01 list
        for key,item in old2.items():
            old1[key] = item
        # Loop on the combined
        for key in old1.keys():
            if key in ['catalog']+redo_groups+skip_groups:
                continue
            print("Working on: {:s}".format(key))
            grp = hdf.create_group(key)
            # Meta
            meta = Table(v02hdf[key+'/meta'].value)
            meta.remove_column('IGM_ID')
            # Survey flag
            flag_g = sdbbu.add_to_group_dict(key, group_dict, skip_for_debug=True)
            # IDs
            maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                                   first=(flag_g==1), close_pairs=(key in pair_groups),
                                   debug=False)
            # Add meta to HDF5
            #meta.rename_column('GRATING', 'DISPERSER')
            hdf[key+'/meta'] = meta
            for akey in v02hdf[key+'/meta'].attrs.keys():
                hdf[key+'/meta'].attrs[akey] = v02hdf[key+'/meta'].attrs[akey]
            # SSA info
            old1[key].add_ssa(hdf, key)
            # Copy spectra
            v02hdf.copy(key+'/spec', hdf[key])


    skip_myers = False
    if skip_myers:
        warnings.warn("NEED TO INCLUDE MYERS!")
    else:
        # Copy from v02
        _ = hdf.create_group('quasars')
        v02hdf.copy('quasars', hdf['quasars'])
        #myers.add_to_hdf(hdf)

    # Setup groups
    pair_groups = []

    '''
    # Loop over the old groups to update (as needed)
    new_IDs = False
    for gname in redo_groups:
        print("Working to replace meta/spec for group: {:s}".format(gname))
        # Meta
        meta = old_groups[gname].grab_meta()
        # Group flag
        flag_g = group_dict[gname]
        # IDs
        if new_IDs:
            pdb.set_trace()  # NOT READY FOR THIS
            #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
            #                   first=(flag_g==1), close_pairs=(gname in pair_groups))
        else:
            _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey)
        # Spectra
        old_groups[gname].hdf5_adddata(hdf, gname, meta)
        old_groups[gname].add_ssa(hdf, gname)
    '''

    '''
    meta_only = False
    # Loop over the new groups
    for gname in new_groups:
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)
        # IDs
        debug= False
        #if gname == 'XQ-100':
        #    debug = True
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=debug)
        # Spectra
        if not meta_only:
            pdb.set_trace()
            new_groups[gname].hdf5_adddata(hdf, gname, meta)
            new_groups[gname].add_ssa(hdf, gname)
    '''

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check stacking
    if not sdbbu.chk_vstack(hdf):
        print("Meta data will not stack using specdb.utils.clean_vstack")
        print("Proceed to write at your own risk..")
        pdb.set_trace()

    # Finish
    zpri = v02hdf['catalog'].attrs['Z_PRIORITY']
    sdbbu.write_hdf(hdf, str('igmspec'), maindb, zpri,
                    group_dict, version, Publisher=str(publisher))

    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")
Beispiel #10
0
def ver02(test=False, skip_copy=False, publisher='J.X. Prochaska', clobber=False,
          version='v02', out_path=None):
    """ Build version 2.X

    Reads previous datasets from v1.X

    Parameters
    ----------
    test : bool, optional
      Run test only
    skip_copy : bool, optional
      Skip copying the data from v01

    Returns
    -------
    """
    import os
    from specdb.specdb import IgmSpec
    # Read v01
    v01file = os.getenv('SPECDB')+'/IGMspec_DB_v01.hdf5'
    #v01file_debug = igmspec.__path__[0]+'/tests/files/IGMspec_DB_v01_debug.hdf5'
    print("Loading v01")
    igmsp_v01 = IgmSpec(db_file=v01file)
    v01hdf = igmsp_v01.hdf
    maindb = igmsp_v01.cat.copy()

    # Start new file
    if out_path is None:
        out_path = igmspec.__path__[0]+'/../DB/'
    outfil = out_path + 'IGMspec_DB_{:s}.hdf5'.format(version)
    # Clobber?
    if not chk_clobber(outfil, clobber=clobber):
        return
    # Begin
    hdf = h5py.File(outfil,'w')


    # Copy over the old stuff
    redo_groups = ['HD-LLS_DR1']
    skip_groups = []#'BOSS_DR12', 'SDSS_DR7'] #warnings.warn("NEED TO PUT BACK SDSS AND BOSS!")
    skip_copy = False
    if (not test) and (not skip_copy):
        old_groups = get_build_groups('v01')
        for key in v01hdf.keys():
            if key in ['catalog','quasars']+redo_groups+skip_groups:
                continue
            else:
                #v01hdf.copy(key, hdf)  # ONE STOP SHOPPING
                grp = hdf.create_group(key)
                # Copy spectra
                v01hdf.copy(key+'/spec', hdf[key])
                # Modify v01 meta and add
                if key == 'BOSS_DR12':
                    meta = boss.add_coflag(v01hdf)
                else:
                    meta = Table(v01hdf[key+'/meta'].value)
                meta.rename_column('GRATING', 'DISPERSER')
                hdf[key+'/meta'] = meta
                for akey in v01hdf[key+'/meta'].attrs.keys():
                    hdf[key+'/meta'].attrs[akey] = v01hdf[key+'/meta'].attrs[akey]
                # SSA info
                old_groups[key].add_ssa(hdf, key)
    skip_myers = False
    if skip_myers:
        warnings.warn("NEED TO INCLUDE MYERS!")
    else:
        myers.add_to_hdf(hdf)

    # Setup groups
    old_groups = get_build_groups('v01')
    pair_groups = []
    group_dict = igmsp_v01.qcat.group_dict
    # Set/Check keys (and set idkey internally for other checks)
    idkey = 'IGM_ID'
    _, tkeys = sdbbu.start_maindb(idkey)
    mkeys = list(maindb.keys())
    for key in tkeys:
        assert key in mkeys

    # Loop over the old groups to update (as needed)
    new_IDs = False
    for gname in redo_groups:
        print("Working to replace meta/spec for group: {:s}".format(gname))
        # Meta
        meta = old_groups[gname].grab_meta()
        # Group flag
        flag_g = group_dict[gname]
        # IDs
        if new_IDs:
            pdb.set_trace()  # NOT READY FOR THIS
            #maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
            #                   first=(flag_g==1), close_pairs=(gname in pair_groups))
        else:
            _, _, ids = sdbbu.set_new_ids(maindb, meta, idkey)
        # Spectra
        old_groups[gname].hdf5_adddata(hdf, gname, meta)
        old_groups[gname].add_ssa(hdf, gname)

    meta_only = False
    new_groups = get_build_groups(version)
    # Loop over the new groups
    for gname in new_groups:
        print("Working on group: {:s}".format(gname))
        # Meta
        meta = new_groups[gname].grab_meta()
        # Survey flag
        flag_g = sdbbu.add_to_group_dict(gname, group_dict, skip_for_debug=True)
        # IDs
        debug= False
        #if gname == 'XQ-100':
        #    debug = True
        maindb = sdbbu.add_ids(maindb, meta, flag_g, tkeys, idkey,
                               first=(flag_g==1), close_pairs=(gname in pair_groups),
                               debug=debug)
        # Spectra
        if not meta_only:
            new_groups[gname].hdf5_adddata(hdf, gname, meta)
            new_groups[gname].add_ssa(hdf, gname)

    # Check for duplicates -- There is 1 pair in SDSS (i.e. 2 duplicates)
    if not sdbbu.chk_for_duplicates(maindb, dup_lim=2):
        raise ValueError("Failed duplicates")

    # Check stacking
    if not sdbbu.chk_vstack(hdf):
        print("Meta data will not stack using specdb.utils.clean_vstack")
        print("Proceed to write at your own risk..")
        pdb.set_trace()

    # Finish
    zpri = v01hdf['catalog'].attrs['Z_PRIORITY']
    sdbbuwrite_hdf(hdf, str('igmspec'), maindb, zpri,
                    group_dict, version, Publisher=str(publisher))

    print("Wrote {:s} DB file".format(outfil))
    print("Update DB info in specdb.defs.dbase_info !!")