Esempio n. 1
0
    def Load_ExoArchive_Universe(self, composite_table=True, force_new_pull=False, fill_empties=True):
        '''
        A function that reads the Exoplanet Archive data to populate the planet table
        
        Unless force_new_pull=True:
        If the filename provided in constructor is new, new data is pulled from the archive
        If the filename already exists, we try to load that file as an astroquery QTable
        
        Kwargs:
        composite_table  - Bool. True [default]: pull "Planetary Systems Composite
                           Parameters Table". False: pull simple "Planetary Systems" Table
                           NOTE: see Archive website for difference between these tables
                           
        force_new_pull   - Bool. False [default]: loads table from filename if filename
                           file exists. True: pull new archive data and overwrite filename
                           
        fill_empties     - Bool. True [default]: approximate empty table values using
                           other values present in data. Ex: radius, mass, logg, angsep, etc.
                           NOTE: When composite_table=True we do not approximate the planet 
                             radius or mass; we keep the archive-computed approx.
                           
        
        Approximation methods:
        - AngSep     - theta[mas] = SMA[au]/distance[pc] * 1e3
        - logg       - logg [log(cgs)] = log10(G*mass/radius**2)
        - StarLum    - absVmag = Vmag - 5*log10(distance[pc]/10)
                       starlum[L/Lsun] = 10**-(absVmag-4.83)/2.5
        - StarRad    - rad[Rsun] = (5800/Teff[K])**2 *sqrt(starlum)
        - PlanetRad  - ** when composite_table=True, keep archive-computed approx
                       Based on Thorngren 2019 and Chen&Kipping 2016
        - PlanetMass - ^^ Inverse of PlanetRad
        
        
        *** Note: the resulting planet table will have nan's where data is missing/unknown. 
            Ex. if a planet lacks a radius val, the 'PlanetRadius' for will be np.nan        
        '''

        #-- Define columns to read. NOTE: add columns here if needed. 
          # col2pull entries should be matched with colNewNames entries
        col2pull =  "pl_name,hostname,pl_orbsmax,pl_orbeccen,pl_orbincl,pl_bmasse,pl_rade," + \
                    "pl_eqt,ra,dec,sy_dist,st_spectype,st_mass,st_teff," + \
                    "st_rad,st_logg,st_lum,st_age,st_vsin,st_radv," + \
                    "st_met,sy_plx,sy_bmag,sy_vmag,sy_rmag,sy_icmag," + \
                    "sy_jmag,sy_hmag,sy_kmag,discoverymethod"
        colNewNames = ["PlanetName","StarName","SMA","Ecc","Inc","PlanetMass","PlanetRadius",
                       "PlanetTeq","RA","Dec","Distance","StarSpT","StarMass","StarTeff",
                       "StarRad","StarLogg","StarLum","StarAge","StarVsini","StarRadialVelocity",
                       "StarZ","StarParallax","StarBMag","StarVmag","StarRmag","StarImag",
                       "StarJmag","StarHmag","StarKmag","DiscoveryMethod"]

        #-- Load/Pull data depending on provided filename
        import os
        if os.path.isfile(self.filename) and not force_new_pull:
            
            # Existing filename was provided so let's try use that
            
            print("%s already exists:\n    we'll attempt to read this file as an astropy QTable"%self.filename)

            NArx_table = QTable.read(self.filename, format='ascii.ecsv')
            
            # Check that the provided table file matches the requested table type
            if NArx_table.meta['isPSCOMPPARS'] != composite_table:
                err0 = '%s contained the wrong table-type:'%self.filename
                err1 = 'pscomppars' if composite_table else 'ps'
                err2 = 'pscomppars' if NArx_table.meta['isPSCOMPPARS'] else 'ps'
                err3 = " Expected '{}' table but found '{}' table.".format(err1,err2)
                err4 = ' Consider setting force_new_pull=True.'
                raise ValueError(err0+err3+err4)

        else:
            # New filename was provided or a new pull was explicitly requested. Pull new data
            
            if not force_new_pull:
                print("%s does not exist:\n    we'll pull new data from the archive and save it to this filename"%self.filename)
            else:
                print("%s may or may not exist:\n    force_new_pull=True so we'll pull new data regardless and overwrite as needed"%self.filename) 

            # Import pyVO package used to query the Exoplanet Archive
            import pyvo as vo

            # Create a "service" which can be used to access the archive TAP server
            NArx_service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")

            # Create a "query" string formatted per the TAP specifications
              # 'select': specify which columns to pull
              # 'from': specify which table to pull 
              # 'where': (optional) specify parameters to be met when choosing what to pull
                # Add where flag for ps to only pull the best row for each planet
            tab2pull = "pscomppars" if composite_table else "ps where default_flag=1"
            query = "select "+col2pull+" from "+tab2pull

            # Pull the data and convert to astropy masked QTable
            NArx_res = NArx_service.search(query) 
            
            NArx_table = QTable(NArx_res.to_table())

            # Add a flag to the table metadata to denote what kind of table it was
              # This'll prevent trying to read the table as the wrong type later
            NArx_table.meta['isPSCOMPPARS'] = composite_table
            # Save raw table for future use 
            NArx_table.write(self.filename,format='ascii.ecsv',overwrite=force_new_pull)
            # Read table back in to ensure that formatting from a fresh pull matches
              # the formatting from an old pull (as done when filename exists)
            NArx_table = QTable.read(self.filename, format='ascii.ecsv')
            
        #-- Rename columns to psisim-expected names
        NArx_table.rename_columns(col2pull.split(','),colNewNames)
        
        #-- Change fill value from default 1e20 to np.nan
        for col in NArx_table.colnames:
            if isinstance(NArx_table[col],MaskedColumn) and isinstance(NArx_table[col].fill_value,(int,float)):
                # Only change numeric fill values to nan
                NArx_table[col].fill_value = np.nan
        
        #-- Add new columns for values not easily available or computable from table
          # TODO: for now, these are masked but we should find a good way to populate them
        NArx_table.add_columns([MaskedColumn(length=len(NArx_table),mask=True,fill_value=np.nan)]*3,
                               names=['Flux Ratio','ProjAU','Phase'])
        
        if fill_empties:
            #-- Compute missing planet columns
            # Compute missing masses and radii using mass-radius relations
            if not composite_table:
                # NOTE: composite table already has radius-mass approximation so we'll
                  # only repeat them if we don't pull that table
                    
                # Convert masked columns to ndarrays with 0's instead of mask
                  # as needed by the approximate_... functions
                masses   = np.array(NArx_table['PlanetMass'].filled(fill_value=0.0))
                radii    = np.array(NArx_table['PlanetRadius'].filled(fill_value=0.0))
                eqtemps  = np.array(NArx_table['PlanetTeq'].filled(fill_value=0.0))
                # Perform approximations
                radii = self.approximate_radii(masses,radii,eqtemps)
                masses = self.approximate_masses(masses,radii,eqtemps)
                # Create masks for non-zero values (0's are values where data was missing)
                rad_mask = (radii != 0.)
                mss_mask = (masses != 0.)
                # Create mask to only missing values in NArx_table with valid values
                rad_mask = NArx_table['PlanetRadius'].mask & rad_mask
                mss_mask = NArx_table['PlanetMass'].mask & mss_mask
                # Place results back in the table
                NArx_table['PlanetRadius'][rad_mask] = radii[rad_mask]
                NArx_table['PlanetMass'][mss_mask] = masses[mss_mask]
        
            # Angular separation
            NArx_table['AngSep'] = NArx_table['SMA']/NArx_table['Distance'] * 1e3
            # Planet logg
            grav = constants.G * (NArx_table['PlanetMass'].filled()*u.earthMass) / (NArx_table['PlanetRadius'].filled()*u.earthRad)**2
            NArx_table['PlanetLogg'] = np.ma.log10(MaskedColumn(np.ma.masked_invalid(grav.cgs.value),fill_value=np.nan))  # logg cgs

            #-- Guess star luminosity, radius, and gravity for missing (masked) values only
              # The guesses will be questionably reliabile
            # Star Luminosity
            host_MVs = NArx_table['StarVmag'] - 5*np.ma.log10(NArx_table['Distance']/10)  # absolute v mag
            host_lum = -(host_MVs-4.83)/2.5    #log10(L/Lsun)
            NArx_table['StarLum'][NArx_table['StarLum'].mask] = host_lum[NArx_table['StarLum'].mask]

            # Star radius
            host_rad = (5800/NArx_table['StarTeff'])**2 *np.ma.sqrt(10**NArx_table['StarLum'])   # Rsun
            NArx_table['StarRad'][NArx_table['StarRad'].mask] = host_rad[NArx_table['StarRad'].mask]

            # Star logg
            host_grav = constants.G * (NArx_table['StarMass'].filled()*u.solMass) / (NArx_table['StarRad'].filled()*u.solRad)**2
            host_logg = np.ma.log10(np.ma.masked_invalid(host_grav.cgs.value))  # logg cgs
            NArx_table['StarLogg'][NArx_table['StarLogg'].mask] = host_logg[NArx_table['StarLogg'].mask]
        else:
            # Create fully masked columns for AngSep and PlanetLogg
            NArx_table.add_columns([MaskedColumn(length=len(NArx_table),mask=True,fill_value=np.nan)]*2,
                       names=['AngSep','PlanetLogg'])

            
        #-- Deal with units (conversions and Quantity multiplications)
        # Set host luminosity to L/Lsun from log10(L/Lsun)
        NArx_table['StarLum'] = 10**NArx_table['StarLum']    # L/Lsun
        
        # Make sure all number fill_values are np.nan after the column manipulations
        for col in NArx_table.colnames:
            if isinstance(NArx_table[col],MaskedColumn) and isinstance(NArx_table[col].fill_value,(int,float)):
                # Only change numeric fill values to nan
                NArx_table[col].fill_value = np.nan
                
        # Fill in masked values 
        NArx_table = NArx_table.filled()
        # Apply units
        NArx_table['SMA'] *= u.AU
        NArx_table['Inc'] *= u.deg
        NArx_table['PlanetMass'] *= u.earthMass
        NArx_table['PlanetRadius'] *= u.earthRad
        NArx_table['PlanetTeq'] *= u.K
        NArx_table['RA'] *= u.deg
        NArx_table['Dec'] *= u.deg
        NArx_table['Distance'] *= u.pc
        NArx_table['StarMass'] *= u.solMass
        NArx_table['StarTeff'] *= u.K
        NArx_table['StarRad'] *= u.solRad
        NArx_table['StarLogg'] *= u.dex(u.cm/(u.s**2))
        NArx_table['StarLum'] *= u.solLum
        NArx_table['StarAge'] *= u.Gyr
        NArx_table['StarVsini'] *= u.km/u.s
        NArx_table['StarRadialVelocity'] *= u.km/u.s
        #NArx_table['StarZ']  *= u.dex
        NArx_table['StarParallax'] *= u.mas
        NArx_table['ProjAU'] *= u.AU
        NArx_table['Phase'] *= u.rad
        NArx_table['AngSep'] *= u.mas
        NArx_table['PlanetLogg'] *= u.dex(u.cm/(u.s**2))
        
        self.planets = NArx_table