Example #1
0
def Build_TinkerCatalog(Mrcut=18):
    ''' Preprocess the group catalog data into a more python friendly format
    with appropriate *little h* corrections!
    '''
    h = 0.7
    M_cut = Tinker_Masscut(Mrcut)
    # Read Group Catalog GalData
    galdata_file = ''.join([
        UT.dir_dat(), 'tinker2011catalogs/', 'clf_groups_M',
        str(Mrcut), '_',
        str(M_cut), '_D360.', 'galdata_corr.fits'
    ])
    gal_data = mrdfits(galdata_file)

    catalog = {}
    for column in gal_data.__dict__.keys():
        column_data = getattr(gal_data, column)
        if column == 'stellmass':
            # stellmass is in units of Msol/h^2
            # why jeremy why?/?
            column_data = column_data / h**2
            catalog['mass'] = np.log10(column_data)  # convert to log Mass
        elif column == 'ssfr':
            column_data += np.log10(h**2)  # little h #*(@*#$
            catalog['ssfr'] = column_data
        elif column == 'cz':  # convert to z
            catalog['z'] = column_data / 299792.458
        elif column in ['ra', 'dec']:
            catalog[column] = column_data * 57.2957795
        else:
            catalog[column] = column_data
    catalog['sfr'] = catalog['mass'] + catalog[
        'ssfr']  # calculate SFR form mass and ssfr

    # Read Group Catalog probability
    prob_file = ''.join([
        UT.dir_dat(), '/tinker2011catalogs/', 'clf_groups_M',
        str(Mrcut), '_',
        str(M_cut), '_D360.', 'prob.fits'
    ])
    prob_data = mrdfits(prob_file)  # import probability file
    for column in prob_data.__dict__.keys():
        catalog[column] = getattr(prob_data, column)

    tinker_file = ''.join([
        UT.dir_dat(), 'tinker2011catalogs/', 'GroupCat.Mr',
        str(Mrcut), '.Mass',
        str(M_cut), '.D360.hdf5'
    ])

    f = h5py.File(tinker_file, 'w')
    grp = f.create_group('data')
    for key in catalog.keys():
        grp.create_dataset(key, data=catalog[key])

    f.close()
    return None
    def Read(self): 
        '''
        '''
        sfr_class = [] 
        for sfq in ['star-forming', 'quiescent']:
            file_dir = code_dir()+'dat/observations/envcount/'
            if sfq == 'star-forming': 
                sfq_str = 'active'
            else: 
                sfq_str = sfq 

            sdss_file = ''.join([file_dir, 
                    'envcount_cylr2.5h35_thresh75_sdss_', sfq_str, '_z0.05_0.12_primuszerr.fits']) 
            primus_file = ''.join([file_dir, 
                    'envcount_cylr2.5h35_thresh75_', sfq_str, '_z0.2_1.0_lit.fits']) 
            # redshift determines SDSS or PRIMUS sample
            if self.redshift_bin < 0.2: 
                galdata = mrdfits(sdss_file) 
            else: 
                galdata = mrdfits(primus_file)
            # environment cuts 
            if self.environment == 'no': 
                envcuts = (galdata.envcount == 0.)
            else: 
                raise NotImplementedError
            # redshift cuts  
            zlow = [0., 0.2, 0.4, 0.6, 0.8]
            zhigh = [0.2, 0.4, 0.6, 0.8, 1.0]
            i_z = int(np.floor(self.redshift_bin/0.2))
            zcuts = (galdata.redshift >= zlow[i_z]) & (galdata.redshift < zhigh[i_z])

            all_cuts = np.where(
                    envcuts & zcuts & 
                    (galdata.mass > galdata.masslimit) & 
                    (galdata.edgecut == 1) 
                    ) 
            for key in galdata.__dict__.keys(): 
                try: 
                    setattr(self, key, 
                            np.concatenate([getattr(self,key), getattr(galdata, key)[all_cuts]]))
                except AttributeError: 
                    setattr(self, key, getattr(galdata, key)[all_cuts])
            
            sfr_class_tmp = np.chararray(len(all_cuts[0]), itemsize=16)
            sfr_class_tmp[:] = sfq

            sfr_class.append(sfr_class_tmp)

        setattr(self, 'sfr_class', np.concatenate(sfr_class))

        return None
    def _iSEDfitMatch(self): 
        ''' Match the GroupCat galaxies with iSEDfit galaxy properties from 
        John Moustakas's MFData objects. The matching is done using PyDL's 
        spherematch
        '''
        # import SDSS MFdata catalog
        mfdata = mrdfits(code_dir(), 'dat/observations/mfdata_all_supergrid01_sdss.fits.gz') 
        spherematch_time = time.time()
        match1, match2, d_match = spherematch(
                self.ra, self.dec, mfdata.ra, mfdata.dec, 0.001)
        print 'spherematch took ', time.time() - spherematch_time

        iSEDfit_mass = np.repeat(-999., len(self.ra))
        iSEDfit_SFR = np.repeat(-999., len(self.ra))
        iSEDfit_SSFR = np.repeat(-999., len(self.ra))
        
        if np.max(self.z[match1] - mfdata.z[match2]) > 0.1: 
            raise ValueError
        #wrong = np.argmax(self.z[match1] - mfdata.z[match2])
        #print self.ra[match1[wrong]], self.dec[match1[wrong]], self.z[match1[wrong]]
        #print mfdata.ra[match2[wrong]], mfdata.dec[match2[wrong]], mfdata.z[match2[wrong]]
        iSEDfit_mass[match1] = mfdata.mass[match2]
        iSEDfit_SFR[match1] = mfdata.sfr[match2]
        iSEDfit_SSFR[match1] = iSEDfit_SFR[match1]-iSEDfit_mass[match1]
        
        setattr(self, 'iSEDfit_mass', iSEDfit_mass) 
        setattr(self, 'iSEDfit_sfr', iSEDfit_SFR) 
        setattr(self, 'iSEDfit_ssfr', iSEDfit_SSFR) 
        return None
    def _ReadGroupCat_GalData(self): 
        '''
        '''
        h = 0.7
        
        galdata_file = ''.join([
            code_dir(), 'dat/observations/', 
            'clf_groups_M', str(self.Mrcut), '_', str(self.masscut), '_D360.', 
            'galdata_corr.fits']) 
        gal_data = mrdfits(galdata_file) 
        #print gal_data.__dict__.keys()
        for column in gal_data.__dict__.keys(): 
            column_data = getattr(gal_data, column)
            if column == 'stellmass': 
                # stellmass is in units of Msol/h^2
                column_data = column_data / h**2
                # convert to log Mass
                setattr(gal_data, 'mass', np.log10(column_data))
            elif column == 'ssfr': 
                column_data = column_data + np.log10(h**2)
                # convert to log Mass 
                setattr(gal_data, 'ssfr', column_data)    

            elif column == 'cz': 
                # convert to z else: 
                setattr(gal_data, 'z', column_data/299792.458)
            else: 
                pass

        setattr(gal_data, 'sfr', gal_data.mass + gal_data.ssfr)     # get sfr values
        
        return gal_data
    def _ReadGroupCat_Prob(self): 
        ''' wrapper for reading in probability galaxy data 
        '''
        file = ''.join([
            code_dir(), 'dat/observations/', 
            'clf_groups_M', str(self.Mrcut), '_', str(self.masscut), '_D360.', 
            'prob.fits']) 

        prob_data = mrdfits(file)            # import probability file 
        return prob_data
Example #6
0
    def Write(self):
        ''' Write into hdf5 file 
        '''
        # Read in .fits file
        drpall = Fits.mrdfits(self.fits_file)
        # write to hdf5 file
        f = h5py.File(self.hdf5_file, 'w')
        nsa_grp = f.create_group('nsa_data')

        for key in drpall.__dict__.keys():
            nsa_grp.create_dataset(key, data=getattr(drpall, key))
        f.close()
        return None
Example #7
0
def PreProcess(name, version): 
    ''' PreProcess disparate data files in order to general some consistent 
    data file convenient for the fortran files to read in

    notes
    -----
    * this code is messy because of all the different ways people distribute 
        catalogs. I have not figured out a dignified way to do this...
    '''
    CI = CatalogInventory()
    catalog_info = CI.LoadCatalog(name, version)

    for type in ['data', 'ran']: 
        if name == 'eBOSS_QSO': # eBOSS QSO catalogs
            if version == 'v1.6_N': # version 1.6 North 
                # this particular catalog is for the P(k) comparison tests of 
                # https://trac.sdss.org/wiki/eBOSS/QGC/pk_comparison
                file = ''.join([catalog_info['dir_data'], catalog_info['unprocessed_'+type+'_file']]) 
                
                data_fits = mrdfits(file) # data from .fits file ... ugh 
                
                cuts = np.where((data_fits.z > np.float(catalog_info['z_min'])) & 
                        (data_fits.z < np.float(catalog_info['z_max'])))
                print 'impose redshift limit ', catalog_info['z_min'], ' < z < ', catalog_info['z_max']
                print 1.-np.float(len(cuts[0]))/np.float(len(data_fits.z)), ' of galaxies removed in preprocessing'

                # of the many data columns extract: ra, dec, z, n(z), wfkp, wsys, wnoz, wcp
                columns = catalog_info['unprocessed_'+type+'_column']
                
                data_list, data_fmt = [], []  # list of all the data, and list of data formats
                for col in columns:
                    data_list.append(getattr(data_fits, col)[cuts]) 
                    if col == 'nz': 
                        fmt = '%.5e'
                    else: 
                        fmt = '%10.5f'
                    data_fmt.append(fmt) 
                data_hdr = 'columns : '+', '.join(columns)

                # output file name 
                if type == 'data': # galaxy catalog 
                    file_out = ''.join([catalog_info['dir_data'], catalog_info['file'], '.dat']) 
                elif type == 'ran': # random catalog
                    file_out = ''.join([catalog_info['dir_data'], catalog_info['file'], '.ran']) 
        else: 
            raise NotImplementedError
    
        np.savetxt(file_out, 
                (np.vstack(np.array(data_list))).T, 
                fmt=data_fmt, delimiter='\t', header=data_hdr) 
    return None      
Example #8
0
def Build_MPAJHU_TinkerCatalog_ASCII(Mrcut=18):
    ''' Append MPA-JHU SSFR values to the Tinker et al. (2011) catalog.
    The main purpose is to try to reproduce the Kauffmann et al. (2013) results. 
    Galaxies are matched to each other through spherematch. 
    '''
    # import Tinker et al. (2011) catalog with specified Mr cut
    catalog = TinkerCatalog(Mrcut=Mrcut)

    # import MPA-JHU catalog
    mpajhu_gals = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_info_dr7_v5_2.fit']))
    # SFR total
    mpajhu_sfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totsfr_dr7_v5_2.fits']))
    # SFR fiber
    mpajhu_sfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibsfr_dr7_v5_2.fits']))
    # SSFR total
    mpajhu_ssfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totspecsfr_dr7_v5_2.fits']))
    # SSFR fiber
    mpajhu_ssfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibspecsfr_dr7_v5_2.fits']))
    # stellar mass total
    mpajhu_masstot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'totlgm_dr7_v5_2.fit']))
    # stellar mass fiber
    mpajhu_massfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'fiblgm_dr7_v5_2.fit']))

    t_spherematch = time.time()
    match = spherematch(catalog['ra'], catalog['dec'], mpajhu_gals.ra,
                        mpajhu_gals.dec, 0.000833333)
    print 'Spherematch with matchlenght = ', 0.000833333
    print 'takes ', time.time() - t_spherematch, 'seconds'
    print 1. - np.float(len(match[0])) / np.float(len(
        catalog['ra'])), 'of the VAGC galaxies'
    print 'do not have matches, likely due to fiber collisions'
    if len(match[0]) != len(np.unique(match[0])):
        raise ValueError

    # save the MPAJHU indices, jsut in case
    catalog['mpajhu_index'] = np.repeat(-999, len(catalog['ra']))
    catalog['mpajhu_index'][match[0]] = match[1]

    # append SFR, SSFR, and mass values to catalog
    for col in [
            'sfr_tot_mpajhu', 'sfr_fib_mpajhu', 'ssfr_tot_mpajhu',
            'ssfr_fib_mpajhu', 'mass_tot_mpajhu', 'mass_fib_mpajhu'
    ]:  # initiate arrays
        catalog[col] = np.repeat(-999., len(catalog['ra']))

    catalog['sfr_tot_mpajhu'][match[0]] = mpajhu_sfrtot.median[match[1]]
    catalog['sfr_fib_mpajhu'][match[0]] = mpajhu_sfrfib.median[match[1]]
    catalog['ssfr_tot_mpajhu'][match[0]] = mpajhu_ssfrtot.median[match[1]]
    catalog['ssfr_fib_mpajhu'][match[0]] = mpajhu_ssfrfib.median[match[1]]
    catalog['mass_tot_mpajhu'][match[0]] = mpajhu_masstot.median[match[1]]
    catalog['mass_fib_mpajhu'][match[0]] = mpajhu_massfib.median[match[1]]
    print mpajhu_massfib.median[match[1]]

    first_cols = [
        'id_gal', 'ra', 'dec', 'z', 'mass', 'sfr', 'ssfr', 'mass_tot_mpajhu',
        'mass_fib_mpajhu', 'sfr_tot_mpajhu', 'sfr_fib_mpajhu',
        'ssfr_tot_mpajhu', 'ssfr_fib_mpajhu'
    ]

    data_fmt = []
    data_list = []
    for i_key, key in enumerate(first_cols):
        data_list.append(catalog[key])
        if key == 'id_gal':
            data_fmt.append('%i')
        else:
            data_fmt.append('%10.5f')

    later_cols = []
    for key in catalog.keys():
        if key not in first_cols:
            later_cols.append(key)

    for key in later_cols:
        data_list.append(catalog[key])
        if 'id' in key:
            data_fmt.append('%i')
        elif 'index' in key:
            data_fmt.append('%i')
        elif key == 'n_sersic':
            data_fmt.append('%i')
        elif key == 'stellmass':
            data_fmt.append('%1.5e')
        else:
            data_fmt.append('%10.5f')

    str_header = ', '.join(first_cols + later_cols)

    M_cut = Tinker_Masscut(Mrcut)
    mpajhu_tinker_file = ''.join([
        UT.dir_dat(), 'tinker2011catalogs/', 'GroupCat.Mr',
        str(Mrcut), '.Mass',
        str(M_cut), '.D360.MPAJHU.dat'
    ])
    np.savetxt(mpajhu_tinker_file, (np.vstack(np.array(data_list))).T,
               fmt=data_fmt,
               delimiter='\t',
               header=str_header)
    return None
Example #9
0
def Build_VAGCdr72_MPAJHU(Ascii=False):
    ''' Build VAGC dr72 with cross referenced MPAJHU stellar masses 
    and SSFRs.
    '''
    # import VAGC dr72bright34
    vagc_dr72 = VAGCdr72bright34_Catalog()
    print len(vagc_dr72['ra']), ', VAGC dr72bright34 galaxies'

    # import MPA-JHU catalog
    mpajhu_gals = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_info_dr7_v5_2.fit']))
    # SFR total
    mpajhu_sfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totsfr_dr7_v5_2.fits']))
    # SFR fiber
    mpajhu_sfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibsfr_dr7_v5_2.fits']))
    # SSFR total
    mpajhu_ssfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totspecsfr_dr7_v5_2.fits']))
    # SSFR fiber
    mpajhu_ssfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibspecsfr_dr7_v5_2.fits']))
    # stellar mass total
    mpajhu_masstot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'totlgm_dr7_v5_2.fit']))
    # stellar mass fiber
    mpajhu_massfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'fiblgm_dr7_v5_2.fit']))

    catalog = {}
    catalog['ra'] = vagc_dr72['ra']
    catalog['dec'] = vagc_dr72['dec']
    catalog['z'] = vagc_dr72['z']
    for i_band, band in enumerate(['u', 'g', 'r', 'i', 'z']):
        catalog['M_' + band] = vagc_dr72['M_' + band]

    t_spherematch = time.time()
    match = spherematch(catalog['ra'], catalog['dec'], mpajhu_gals.ra,
                        mpajhu_gals.dec, 0.000833333)
    print 'Spherematch with matchlenght = ', 0.000833333
    print 'takes ', time.time() - t_spherematch, 'seconds'
    print 1. - np.float(len(match[0])) / np.float(len(
        catalog['ra'])), 'of the VAGC galaxies'
    print 'do not have matches'
    if len(match[0]) != len(np.unique(match[0])):
        raise ValueError

    # save the MPAJHU indices, jsut in case
    catalog['mpajhu_index'] = np.repeat(-999, len(catalog['ra']))
    catalog['mpajhu_index'][match[0]] = match[1]

    # append SFR, SSFR, and mass values to catalog
    for col in [
            'sfr_tot', 'sfr_fib', 'ssfr_tot', 'ssfr_fib', 'mass_tot',
            'mass_fib'
    ]:  # initiate arrays
        catalog[col] = np.repeat(-999., len(catalog['ra']))

    catalog['sfr_tot'][match[0]] = mpajhu_sfrtot.median[match[1]]
    catalog['sfr_fib'][match[0]] = mpajhu_sfrfib.median[match[1]]
    catalog['ssfr_tot'][match[0]] = mpajhu_ssfrtot.median[match[1]]
    catalog['ssfr_fib'][match[0]] = mpajhu_ssfrfib.median[match[1]]
    catalog['mass_tot'][match[0]] = mpajhu_masstot.median[match[1]]
    catalog['mass_fib'][match[0]] = mpajhu_massfib.median[match[1]]

    mpajhu_file = ''.join(
        [UT.dir_dat(), 'vagc/', 'VAGCdr72.MPAJHU.nocut.hdf5'])

    f = h5py.File(mpajhu_file, 'w')
    grp = f.create_group('data')
    for key in catalog.keys():
        grp.create_dataset(key, data=catalog[key])
    f.close()

    if Ascii:  # write to Ascii (for jeremy)
        mpajhu_file = ''.join(
            [UT.dir_dat(), 'vagc/', 'VAGCdr72.MPAJHU.nocut.dat'])
        column_order = [
            'ra', 'dec', 'z', 'mass_tot', 'sfr_tot', 'ssfr_tot', 'mass_fib',
            'sfr_fib', 'ssfr_fib'
        ]
        data_list = []
        data_fmt = ['%10.5f' for i in range(len(column_order))]
        str_header = ''
        for col in column_order:
            data_list.append(catalog[col])
            if 'mass' in col:
                str_header += ' ' + col + ' (Msun),'
            elif 'sfr' in col:
                if 'ssfr' not in col:
                    str_header += ' ' + col + ' (Msun/yr),'
                else:
                    str_header += ' ' + col + ','
            else:
                str_header += ' ' + col + ','
        np.savetxt(mpajhu_file, (np.vstack(np.array(data_list))).T,
                   fmt=data_fmt,
                   delimiter='\t',
                   header=str_header)
    return None
Example #10
0
def Build_KauffmannParent():
    ''' Try to create the parent sample of Kauffmann et al.(2013) 
    '''
    # import VAGC dr72bright34
    vagc_dr72 = VAGCdr72bright34_Catalog()

    # import MPA-JHU catalog
    mpajhu_gals = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_info_dr7_v5_2.fit']))
    # SFR total
    mpajhu_sfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totsfr_dr7_v5_2.fits']))
    # SFR fiber
    mpajhu_sfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibsfr_dr7_v5_2.fits']))
    # SSFR total
    mpajhu_ssfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totspecsfr_dr7_v5_2.fits']))
    # SSFR fiber
    mpajhu_ssfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibspecsfr_dr7_v5_2.fits']))
    # stellar mass total
    mpajhu_masstot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'totlgm_dr7_v5_2.fit']))
    # stellar mass fiber
    mpajhu_massfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'fiblgm_dr7_v5_2.fit']))

    catalog = {}
    catalog['ra'] = vagc_dr72['ra']
    catalog['dec'] = vagc_dr72['dec']
    catalog['z'] = vagc_dr72['z']
    for i_band, band in enumerate(['u', 'g', 'r', 'i', 'z']):
        catalog['M_' + band] = vagc_dr72['M_' + band]

    # pre cut
    cut_z = (catalog['z'] > 0.017) & (catalog['z'] < 0.03)
    pre_cuts = np.where(cut_z)  #& cut_stellarmass & cut_absmag)
    for key in catalog.keys():
        catalog[key] = catalog[key][pre_cuts]

    t_spherematch = time.time()
    match = spherematch(catalog['ra'], catalog['dec'], mpajhu_gals.ra,
                        mpajhu_gals.dec, 0.000833333)
    print 'Spherematch with matchlenght = ', 0.000833333
    print 'takes ', time.time() - t_spherematch, 'seconds'
    print 1. - np.float(len(match[0])) / np.float(len(
        catalog['ra'])), 'of the VAGC galaxies'
    print 'do not have matches'
    if len(match[0]) != len(np.unique(match[0])):
        raise ValueError

    # save the MPAJHU indices, jsut in case
    catalog['mpajhu_index'] = np.repeat(-999, len(catalog['ra']))
    catalog['mpajhu_index'][match[0]] = match[1]

    # append SFR, SSFR, and mass values to catalog
    for col in [
            'sfr_tot_mpajhu', 'sfr_fib_mpajhu', 'ssfr_tot_mpajhu',
            'ssfr_fib_mpajhu', 'mass_tot_mpajhu', 'mass_fib_mpajhu'
    ]:  # initiate arrays
        catalog[col] = np.repeat(-999., len(catalog['ra']))

    catalog['sfr_tot_mpajhu'][match[0]] = mpajhu_sfrtot.median[match[1]]
    catalog['sfr_fib_mpajhu'][match[0]] = mpajhu_sfrfib.median[match[1]]
    catalog['ssfr_tot_mpajhu'][match[0]] = mpajhu_ssfrtot.median[match[1]]
    catalog['ssfr_fib_mpajhu'][match[0]] = mpajhu_ssfrfib.median[match[1]]
    catalog['mass_tot_mpajhu'][match[0]] = mpajhu_masstot.median[match[1]]
    catalog['mass_fib_mpajhu'][match[0]] = mpajhu_massfib.median[match[1]]

    # kauffmann et al.(2013) cuts
    cut_stellarmass = (catalog['mass_tot_mpajhu'] > 9.25)
    cut_absmag = (catalog['M_r'] < -16.) & (catalog['M_r'] > -24.)
    cut_match = (catalog['mpajhu_index'] != -999)

    final_cuts = np.where(cut_stellarmass & cut_absmag & cut_match)
    for key in catalog.keys():
        catalog[key] = catalog[key][final_cuts]

    mpajhu_file = ''.join(
        [UT.dir_dat(), 'vagc/', 'VAGCdr72.Kauff2013cut.hdf5'])

    f = h5py.File(mpajhu_file, 'w')
    grp = f.create_group('data')
    for key in catalog.keys():
        grp.create_dataset(key, data=catalog[key])
    f.close()
    return None
Example #11
0
def Build_MPAJHU_TinkerCatalog(Mrcut=18):
    ''' Append MPA-JHU SSFR values to the Tinker et al. (2011) catalog.
    The main purpose is to try to reproduce the Kauffmann et al. (2013) results. 
    Galaxies are matched to each other through spherematch. 
    '''
    # import Tinker et al. (2011) catalog with specified Mr cut
    catalog = TinkerCatalog(Mrcut=Mrcut)

    # import MPA-JHU catalog
    mpajhu_gals = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_info_dr7_v5_2.fit']))
    # SFR total
    mpajhu_sfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totsfr_dr7_v5_2.fits']))
    # SFR fiber
    mpajhu_sfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibsfr_dr7_v5_2.fits']))
    # SSFR total
    mpajhu_ssfrtot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_totspecsfr_dr7_v5_2.fits']))
    # SSFR fiber
    mpajhu_ssfrfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'gal_fibspecsfr_dr7_v5_2.fits']))
    # stellar mass total
    mpajhu_masstot = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'totlgm_dr7_v5_2.fit']))
    # stellar mass fiber
    mpajhu_massfib = mrdfits(''.join(
        [UT.dir_dat(), 'mpa_jhu/', 'fiblgm_dr7_v5_2.fit']))

    t_spherematch = time.time()
    match = spherematch(catalog['ra'], catalog['dec'], mpajhu_gals.ra,
                        mpajhu_gals.dec, 0.000833333)
    print 'Spherematch with matchlenght = ', 0.000833333
    print 'takes ', time.time() - t_spherematch, 'seconds'
    print 1. - np.float(len(match[0])) / np.float(len(
        catalog['ra'])), 'of the VAGC galaxies'
    print 'do not have matches, likely due to fiber collisions'
    if len(match[0]) != len(np.unique(match[0])):
        raise ValueError

    # save the MPAJHU indices, jsut in case
    catalog['mpajhu_index'] = np.repeat(-999, len(catalog['ra']))
    catalog['mpajhu_index'][match[0]] = match[1]

    # append SFR, SSFR, and mass values to catalog
    for col in [
            'sfr_tot_mpajhu', 'sfr_fib_mpajhu', 'ssfr_tot_mpajhu',
            'ssfr_fib_mpajhu', 'mass_tot_mpajhu', 'mass_fib_mpajhu'
    ]:  # initiate arrays
        catalog[col] = np.repeat(-999., len(catalog['ra']))

    catalog['sfr_tot_mpajhu'][match[0]] = mpajhu_sfrtot.median[match[1]]
    catalog['sfr_fib_mpajhu'][match[0]] = mpajhu_sfrfib.median[match[1]]
    catalog['ssfr_tot_mpajhu'][match[0]] = mpajhu_ssfrtot.median[match[1]]
    catalog['ssfr_fib_mpajhu'][match[0]] = mpajhu_ssfrfib.median[match[1]]
    catalog['mass_tot_mpajhu'][match[0]] = mpajhu_masstot.median[match[1]]
    catalog['mass_fib_mpajhu'][match[0]] = mpajhu_massfib.median[match[1]]

    # trim galaxies without matches
    hasmatch = np.where(catalog['mpajhu_index'] != -999)
    for key in catalog.keys():
        key_val = catalog[key]
        catalog[key] = key_val[hasmatch]
    catalog['mpajhu_tinker_index'] = hasmatch[0]

    M_cut = Tinker_Masscut(Mrcut)
    mpajhu_tinker_file = ''.join([
        UT.dir_dat(), 'tinker2011catalogs/', 'GroupCat.Mr',
        str(Mrcut), '.Mass',
        str(M_cut), '.D360.MPAJHU.hdf5'
    ])

    f = h5py.File(mpajhu_tinker_file, 'w')
    grp = f.create_group('data')
    for key in catalog.keys():
        grp.create_dataset(key, data=catalog[key])

    f.close()
    return None
    def build(self): 
        ''' Build Fibercollided mock catalogs using specific idl routines or by using the given fiber collision weights

        Parameters
        ----------
        cat_corr : catalog correction dictionary 

        Notes
        -----
        '''

        catdict = (self.cat_corr)['catalog']
        catalog_name = catdict['name'].lower()

        data_cols = self.datacolumns()
        data_fmts = self.datacols_fmt()
        data_hdrs = self.datacols_header()

        data_dir = direc('data', self.cat_corr) 
        if catalog_name == 'nseries':          
            # N-series mocks (high quality mocks with actual CMASS tiling)
            # original file 
            orig_file = ''.join([
                data_dir, 
                'CutskyN', str(catdict['n_mock']), '.rdzwc'
                ]) 
            orig_ra, orig_dec, orig_z, orig_wfc, orig_zupw, orig_upw_index = np.loadtxt(
                    orig_file, 
                    unpack = True, 
                    usecols = [0,1,2,4,5,6]
                    )
            # file with mask completeness
            mask_file = ''.join([data_dir, 'CutskyN', str(catdict['n_mock']), '.mask_info']) 
            orig_wcomp = np.loadtxt(mask_file, unpack=True, usecols=[0]) 

            coll = np.where(orig_wfc == 0.0) 
            # data column list 
            data_list = [
                    orig_ra, 
                    orig_dec, 
                    orig_z, 
                    orig_wfc, 
                    orig_wcomp, 
                    orig_zupw, 
                    orig_upw_index
                    ]   
    
            # handle upweighted redshift/index discrepancies by simply ignoring them. 
            if not np.array(orig_z[orig_upw_index.astype(int)[coll]] == orig_zupw[coll]).all(): 
                wrong_index = (coll[0])[np.where(orig_z[orig_upw_index.astype(int)[coll]] != orig_zupw[coll])[0]]
                warn_message = ''.join([
                    'upweighted galaxy redshift and index data discrepancies in ', 
                    self.file(), 
                    ' ', 
                    str(len(wrong_index)), 
                    ' galaxies affected'
                    ])
                warnings.warn(warn_message, Warning)
                if len(wrong_index) > 0: 
                    for i_data, datum in enumerate(data_list): 
                        data_list[i_data] = np.delete(datum, wrong_index)
    
        elif catalog_name == 'qpm': 
            # Quick Particle Mesh mocks from Jeremy (quantity over quality mocks)
            orig_file = ''.join([
                '/mount/riachuelo2/rs123/BOSS/QPM/cmass/mocks/dr12d/ngc/data/', 
                'a0.6452_', str("%04d" % catdict['n_mock']), '.dr12d_cmass_ngc.rdz']) 
            ra, dec, z, wfc  = np.loadtxt(orig_file, unpack=True, usecols=[0,1,2,4]) 
        
            orig_info_file = ''.join([
                '/mount/riachuelo2/rs123/BOSS/QPM/cmass/mocks/dr12d/ngc/data/', 
                'a0.6452_', str("%04d" % catdict['n_mock']), '.dr12d_cmass_ngc.rdz.info']) 
            # gal_id, comp, z_real, z_red, mass_halo, flag_sta, id_halo
            comp = np.loadtxt(orig_info_file, unpack=True, skiprows=3, usecols=[1])    

            if catdict['n_mock'] in (44, 46, 52, 53, 54, 56, 61, 707, 756, 794, 819, 831, 835, 838):
                orig_veto_file = ''.join([
                    '/mount/riachuelo1/hahn/data/QPM/dr12d/', 
                    'a0.6452_', str("%04d" % catdict['n_mock']), '.dr12d_cmass_ngc.veto']) 
            else:
                orig_veto_file = ''.join([
                    '/mount/riachuelo2/rs123/BOSS/QPM/cmass/mocks/dr12d/ngc/data/', 
                    'a0.6452_', str("%04d" % catdict['n_mock']), '.dr12d_cmass_ngc.veto']) 

            veto = np.loadtxt(orig_veto_file) 
            n_gal = len(veto)

            if len(ra) != n_gal: 
                print orig_file
                print orig_veto_file 
                raise ValueError('veto mask doesnt match') 

            vetomask = np.where(veto == 0)
            # data column list 
            data_list = [
                    ra[vetomask], 
                    dec[vetomask], 
                    z[vetomask], 
                    wfc[vetomask], 
                    comp[vetomask]
                    ]
        elif catalog_name == 'bigmd':
            # Big MultiDark
            P0 = 20000.0
            # read original random catalog 
            data_dir = '/mount/riachuelo1/hahn/data/BigMD/'
            if 'version' in catdict.keys():
                if catdict['version'] == 'nowiggles':
                    # simulation with no wiggles 
                    orig_file = ''.join([data_dir, 'nowiggles/BigMD-cmass-dr12v4-nowiggle-veto.dat']) 
                else: 
                    raise NotImplementedError
                    #orig_file = ''.join([data_dir, 'bigMD-cmass-dr12v4-wcp-veto.dat'])  # hardcoded
                    #orig_file = ''.join([data_dir, 'bigMD-cmass-dr12v4-RST-standardHAM-veto.dat'])
                    #orig_file = ''.join([data_dir, 'bigMD-cmass-dr12v4-RST-quadru-veto.dat'])
            else:       # default 
                orig_file = ''.join([data_dir, 'BigMD-cmass-dr12v4-RST-standHAM-Vpeak-veto.dat'])

            # RA, Decl, Redhsift, veto  
            ra, dec, z, wfkp, veto, wfc = np.loadtxt(
                    orig_file, 
                    unpack=True, 
                    usecols=[0,1,2,3,4,5]
                    ) 
            n_gal = len(ra) 
            #nbar = (1.0 / P0) * (1.0/wfkp - 1.0) 

            vetomask = np.where(veto == 1)  # impose vetomask 
            data_list = [
                        ra[vetomask], 
                        dec[vetomask], 
                        z[vetomask], 
                        wfc[vetomask]
                        ]

        elif catalog_name == 'tilingmock':  # tiling mock 
            input_file = ''.join([
                '/mount/riachuelo1/hahn/data/tiling_mocks/', 
                'cmass-boss5003sector-icoll012.fidcosmo.dat'])
            output_file = self.file()

            idl_cmd = ' '.join([
                'idl', '-e', '"', 
                "build_wcp_assign, 'tilingmock', input_file='"+input_file+"', output_file='"+output_file+"'", '"'])
            os.system(idl_cmd) 

            return None

        elif 'cmass' in catalog_name: 

            if catalog_name == 'cmass': 
                # CMASS DR12v4 galaxy data
                data_file = ''.join([
                    data_dir, 
                    'cmass-dr12v4-N-Reid.dat.fits'
                    ]) 

                data = mrdfits(data_file) # fits data object

                zlimit = np.where((data.z >= 0.43) & (data.z <= 0.7))
            
            elif 'cmasslowz' in catalog_name: 
                # CMASS LOWZ DR12v5 combined sample
                # for Ariel's sample has three separate 
                # set of sectors '', 'e2', and 'e3'

                cmasslowz_str = ''
                if 'e2' in catalog_name: 
                    cmasslowz_str = 'E2'
                elif 'e3' in catalog_name: 
                    cmasslowz_str = 'E3'

                # Divide combined sample in two 
                # two bins of redshift 
                if '_low' in catalog_name:  
                    zmin, zmax = 0.2, 0.5
                elif '_high' in catalog_name: 
                    zmin, zmax = 0.5, 0.75
                else: 
                    raise NameError("redshift bin must be specified") 
        
                # .fits data files from mk_catalog pipeline  
                data_file = ''.join([
                    data_dir, 
                    'galaxy_DR12v5_CMASSLOWZ', cmasslowz_str, '_North.fits.gz'
                    ])
                data = mrdfits(data_file) 

                zlimit = np.where((data.z >= zmin) & (data.z < zmax))  # redshift limit

            else: 
                raise NameError() 

            data_list = [
                (data.ra)[zlimit], 
                (data.dec)[zlimit], 
                (data.z)[zlimit], 
                (data.nz)[zlimit],
                (data.weight_systot)[zlimit], 
                (data.weight_noz)[zlimit], 
                (data.weight_cp)[zlimit], 
                (data.comp)[zlimit]
                ] 
        
        # write to corrected file 
        output_file = self.file()
        np.savetxt(
                output_file, 
                (np.vstack(np.array(data_list))).T, 
                fmt=data_fmts, 
                delimiter='\t', 
                header=data_hdrs
                ) 

        return None