def query_gaia_match(user_name,table_2_match,radius_arc): ''' This function queries Gaia DR2 to find the match with table table_2_match. This table needs to be uploaded to the Gaia Archive in advance. The steps to upload the table can be found in http://gea.esac.esa.int/archive-help/index.html. Input: user_name(str): User name in the Gaia Archive. table_2_match(str): Name of the table uploaded to the Gaia Archive. radius_arc(float): Radius of search in arcsec. Output: file.vot: Output file from the Archive with all the columns in table_2_match plus all Gaia DR2 columns. ''' Gaia.login_gui() #Ask for userName and userPassword to authenticated access mode #This could be done with: #Gaia.login(user='******', password='******') #Cross-match user table and gaia source job = Gaia.launch_job_async("""\ SELECT crossmatch_positional(\ 'user_{}','{}',\ 'gaiadr2','gaia_source',\ {},\ 'xmatch')\ FROM dual;\ """.format(user_name,table_2_match,radius_arc)) #For the matches saved in test_xmatch, get the information from Gaia. #The last line saves the information into a vot table job2 = Gaia.launch_job_async("""\ SELECT c."dist", a.*, b.* \ FROM user_{}.{} AS a, \ gaiadr2.gaia_source AS b, \ user_{}.xmatch AS c \ WHERE (c.{}_{}_oid = a.{}_oid AND \ c.gaia_source_source_id = b.source_id)\ """.format(user_name,table_2_match,user_name, table_2_match,table_2_match,table_2_match), dump_to_file=True) Gaia.logout()
def get_data_subset(ra_deg, dec_deg, rad_deg, dist, dist_span=None, rv_only=False, login=False, login_path='/shared/ebla/cotar/'): if dist_span is not None: max_parallax = 1e3 / (max(dist - dist_span, 1.)) min_parallax = 1e3 / (dist + dist_span) else: min_parallax = -1. max_parallax = 100. # construct complete Gaia data query string gaia_query = "SELECT source_id,ra,dec,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,radial_velocity,radial_velocity_error " +\ "FROM gaiadr2.gaia_source " +\ "WHERE parallax >= {:.4f} AND parallax <= {:.4f} ".format(min_parallax, max_parallax) +\ "AND CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),CIRCLE('ICRS',{:.5f},{:.5f},{:.5f}))=1 ".format(ra_deg, dec_deg, rad_deg) if rv_only: gaia_query += 'AND (radial_velocity IS NOT NULL) ' # print(' QUERY:', gaia_query) try: if login: # login enables unlimited asynchronous download of data # NOTE: only up to 20 GB in total - needs manual deletition of data in the Gaia portal print(' Gaia login initiated') Gaia.login(credentials_file=login_path + 'gaia_archive_login.txt') # disable dump as results will be saved to a custom location later on in the analysis code gaia_job = Gaia.launch_job_async(gaia_query, dump_to_file=False) gaia_data = gaia_job.get_results() if login: Gaia.logout() except Exception as ee: print(ee) print(' Problem querying data.') return list([]) for g_c in gaia_data.colnames: gaia_data[g_c].unit = '' gaia_data['radial_velocity'].name = 'rv' gaia_data['radial_velocity_error'].name = 'rv_error' # print(gaia_data) # print(' QUERY complete') print(' Retireved lines:', len(gaia_data)) return gaia_data
def get_gaia(df): import time import numpy as np import pandas as pd from astroquery.vizier import Vizier from astropy.coordinates import SkyCoord from astropy.coordinates import Angle from astropy import units as u from astropy.table import Column, Table, join from astroquery.gaia import Gaia Gaia.login(user='******',password='******') qry = """ SELECT TOP 10 g.*, t.* FROM gaiadr1.tmass_original_valid AS t JOIN gaiadr2.tmass_neighbourhood AS xt ON xt.tmass_oid=t.tmass_oid JOIN gaiadr2.gaia_source AS g ON g.source_id=xt.source_id WHERE g.phot_g_mean_mag IS NOT NULL """ bkg=Gaia.launch_job_async(qry).get_results().to_pandas() bkg['abs_g']=bkg.phot_g_mean_mag-5*np.log10(1000./bkg.parallax)+5. columns=bkg.columns.tolist() GAIAdf=pd.DataFrame(index=df.index,columns=columns) for k,row in df.iterrows(): qry=""" SELECT g.*, t.* FROM gaiadr1.tmass_original_valid AS t LEFT OUTER JOIN gaiadr2.tmass_neighbourhood AS xt ON xt.tmass_oid = t.tmass_oid LEFT OUTER JOIN gaiadr2.gaia_source AS g ON xt.source_id = g.source_id where 1=CONTAINS(POINT('ICRS', t.ra, t.dec),CIRCLE('ICRS', {}, {}, 5./3600)) """.format(row['RA (deg)'],row['Dec (deg)']) data=Gaia.launch_job_async(qry).get_results().to_pandas() data['INDEX']=k print(k,data.shape) GAIAdf=GAIAdf.append(data).dropna(how='all') Gaia.logout() return GAIAdf
def download_gaia(): """Download data from the Gaia archive.""" with contextlib.suppress(FileExistsError): os.mkdir('gaia') print('Login to Gaia Archive') username = input('Username: '******'Login aborted') return password = getpass.getpass('Password: '******'Login aborted') return Gaia.login(user=username, password=password) try: # the gaiadr2.hipparcos2_best_neighbour table misses a large number of HIP stars that are # actually present, so use the cone search file conesearch_file = os.path.join('gaia', 'hip2conesearch.zip') download_file( conesearch_file, 'https://www.cosmos.esa.int/documents/29201/1769576/Hipparcos2GaiaDR2coneSearch.zip') with ZipFile(conesearch_file, 'r') as csz: with csz.open('Hipparcos2GaiaDR2coneSearch.csv', 'r') as f: hip_map = io.ascii.read(f, names=['original_ext_source_id', 'source_id', 'dist']) gaia_downloads = [ ('hip_id', 'user_'+username+'.hip_cone', 'gaiadr2_hip-result.csv'), ('tyc2_id', 'gaiadr2.tycho2_best_neighbour', 'gaiadr2_tyc-result.csv') ] Gaia.upload_table(upload_resource=hip_map, table_name='hip_cone') try: for colname, xindex_table, filename in gaia_downloads: download_gaia_data(colname, xindex_table, os.path.join('gaia', filename)) finally: Gaia.delete_user_table('hip_cone') finally: Gaia.logout()
def vector_match_tmassbestneighbor_to_gaiaids(stats, homedir='/home/luke/', outdir='../data/rms_vs_mag/', projid=1301): """ if you do smart ADQL, it is like factor of >100x faster than doing item-by-item crossmatching. astroquery.Gaia is sick for this, because it lets you remotely upload tables on the fly. (at least, somewhat small ones, of <~10^5 members) """ outfile = os.path.join(outdir, 'proj{}_xmatch.xml.gz'.format(projid)) xmlpath = '../data/rms_vs_mag/proj1301_gaiaids.xml' if not os.path.exists(outfile): gaiaids = stats['lcobj'] xmlpath = make_votable_given_ids(gaiaids) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) jobstr = ('SELECT top 100000 upl.source_id, tm.source_id, ' 'tm.original_ext_source_id FROM ' 'gaiadr2.tmass_best_neighbour AS tm ' 'JOIN tap_upload.foobar as upl ' 'using (source_id)') if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=jobstr, upload_resource=xmlpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() return tab
def download_gaia() -> None: """Download data from the Gaia archive.""" with contextlib.suppress(FileExistsError): os.mkdir('gaia') print('Login to Gaia Archive') username = input('Username: '******'Login aborted') return password = getpass.getpass('Password: '******'Login aborted') return Gaia.login(user=username, password=password) try: download_gaia_hip(username) download_gaia_tyc(username) finally: Gaia.logout()
def main(kc19_groupid=113, Tmag_cutoff=14, clean_gaia_cache=False): # # get info needed to query gaia for comparison stars # source_df = pd.read_csv('../data/kounkel_table1_sourceinfo.csv') sdf = source_df[(source_df['Tmag_pred'] < Tmag_cutoff) & (source_df['group_id'] == kc19_groupid)] n_sel_sources_in_group = len(sdf) df2 = pd.read_csv('../data/string_table2.csv') gdf = df2[df2['group_id'] == kc19_groupid] group_coord = SkyCoord(float(gdf['l']) * u.deg, float(gdf['b']) * u.deg, frame='galactic') ra = group_coord.icrs.ra dec = group_coord.icrs.dec plx_mas = float(gdf['parallax']) # # define relevant directories / paths # gaiadir = os.path.join(basedir, 'gaia_queries') if not os.path.exists(gaiadir): os.mkdir(gaiadir) outfile = os.path.join( gaiadir, 'group{}_comparison_sample.xml.gz'.format(kc19_groupid)) # # run the gaia query. require the same cuts imposed by Kounkel & Covey 2019 # on stellar quality. also require close on-sky (within 5 degrees of KC19 # group position), and close in parallax space (within +/-20% of KC19 # parallax). # if clean_gaia_cache and os.path.exists(outfile): os.remove(outfile) if not os.path.exists(outfile): Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) jobstr = (''' SELECT * FROM gaiadr2.gaia_source WHERE 1=CONTAINS( POINT('ICRS', ra, dec), CIRCLE('ICRS', {ra:.8f}, {dec:.8f}, {sep_deg:.1f})) AND parallax < {plx_upper:.2f} AND parallax > {plx_lower:.2f} AND parallax > 1 AND parallax_error < 0.1 AND 1.0857/phot_g_mean_flux_over_error < 0.03 AND astrometric_sigma5d_max < 0.3 AND visibility_periods_used > 8 AND ( (astrometric_excess_noise < 1) OR (astrometric_excess_noise > 1 AND astrometric_excess_noise_sig < 2) ) ''') query = jobstr.format(sep_deg=5.0, ra=ra.value, dec=dec.value, plx_upper=1.3 * plx_mas, plx_lower=0.7 * plx_mas) if not os.path.exists(outfile): print(42 * '-') print('launching\n{}'.format(query)) print(42 * '-') j = Gaia.launch_job(query=query, verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() field_df = tab.to_pandas() # # require the same Tmag cutoff for the nbhd stars. ensure no overlap w/ # sample of stars from the group itself. then randomly sample the # collection of stars. # Tmag_pred = ( field_df['phot_g_mean_mag'] - 0.00522555 * (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag'])**3 + 0.0891337 * (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag'])**2 - 0.633923 * (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag']) + 0.0324473) field_df['Tmag_pred'] = Tmag_pred sfield_df = field_df[field_df['Tmag_pred'] < Tmag_cutoff] common = sfield_df.merge(sdf, on='source_id', how='inner') sfield_df = sfield_df[~sfield_df.source_id.isin(common.source_id)] n_field = len(sfield_df) if 2 * n_sel_sources_in_group > n_field: errmsg = ( 'ngroup: {}. nfield: {}. plz tune gaia query to get >2x the stars'. format(n_sel_sources_in_group, n_field)) raise AssertionError(errmsg) srfield_df = sfield_df.sample(n=n_sel_sources_in_group) # # now given the gaia ids, get the rotation periods # for ix, r in srfield_df.iterrows(): source_id = np.int64(r['source_id']) ra, dec = float(r['ra']), float(r['dec']) group_id = kc19_groupid name = str(gdf['name'].iloc[0]) c_obj = SkyCoord(ra, dec, unit=(u.deg, u.deg), frame='icrs') # # require that we are on-silicon. for year 1, this roughly means -- # were are in southern ecliptic hemisphere # if c_obj.barycentrictrueecliptic.lat > 0 * u.deg: print('group{}, {}: found in northern hemisphere. skip!'.format( group_id, name)) continue workingdir = os.path.join( basedir, 'fits_pkls_results_pngs', 'field_star_comparison_group{}_name{}'.format(group_id, name)) if not os.path.exists(workingdir): os.mkdir(workingdir) workingdir = os.path.join(workingdir, str(source_id)) if not os.path.exists(workingdir): os.mkdir(workingdir) outvppath = os.path.join(workingdir, 'verification_page_{}.png'.format(source_id)) if os.path.exists(outvppath): print('found {}, continue'.format(outvppath)) continue # # if you already downloaded ffi cutouts for this object, dont get any # more. otherwise, get them # cutouts = glob(os.path.join(workingdir, '*.fits')) if len(cutouts) >= 1: print('found {} cutouts in {}, skip'.format( len(cutouts), workingdir)) else: gfc.get_fficutout(c_obj, cutoutdir=workingdir) # # given the FFI cutouts, make simple light curves. # cutouts = glob(os.path.join(workingdir, '*.fits')) if len(cutouts) >= 1: d = glgf.get_lc_given_fficutout(workingdir, cutouts, c_obj, return_pkl=False) else: d = np.nan print('WRN! did not find fficutout for {}'.format(workingdir)) if not isinstance(d, dict): print('WRN! got bad light curve for {}. skipping.'.format( workingdir)) continue outpath = os.path.join(workingdir, 'GLS_rotation_period.results') # # do Lomb scargle w/ uniformly weighted points. # ls = LombScargle(d['time'], d['rel_flux']) period_min = 0.1 period_max = np.min( [0.9 * (np.max(d['time']) - np.min(d['time'])), 16]) freq, power = ls.autopower(minimum_frequency=1 / period_max, maximum_frequency=1 / period_min) try: _ = power.max() except ValueError: print('WRN! got bad Lomb-Scargle for {}. skipping.'.format( workingdir)) continue ls_fap = ls.false_alarm_probability(power.max(), method='baluev') ls_period = 1 / freq[np.argmax(power)] d['ls_fap'] = ls_fap d['ls_period'] = ls_period # # try to get TIC Teff. search TIC within 5 arcseconds, then take the # Gaia-ID match. (removing sources with no gaia ID, which do exist in # TICv8. # radius = 5.0 * u.arcsecond stars = Catalogs.query_region("{} {}".format(float(c_obj.ra.value), float(c_obj.dec.value)), catalog="TIC", radius=radius) nbhr_source_ids = np.array(stars['GAIA']) stars = stars[nbhr_source_ids != ''] nbhr_source_ids = nbhr_source_ids[nbhr_source_ids != ''] sel = nbhr_source_ids.astype(int) == source_id if len(sel[sel]) == 1: star = stars[sel] else: raise NotImplementedError('did not get any TIC match. why?') teff = float(star['Teff']) if not isinstance(teff, float) and np.isfinite(teff): raise NotImplementedError('got nan TIC teff. what do?') # # make "check plot" analog for visual inspection # outd = { 'ls_fap': d['ls_fap'], 'ls_period': d['ls_period'], 'source_id': source_id, 'ra': ra, 'dec': dec, 'name': name, 'group_id': group_id, 'teff': teff } pu.save_status(outpath, 'lomb-scargle', outd) vp.generate_verification_page(d, ls, freq, power, cutouts, c_obj, outvppath, outd)
def given_source_ids_get_gaia_data(source_ids, groupname, n_max=10000, overwrite=True, enforce_all_sourceids_viable=True, savstr='', whichcolumns='*', gaia_datarelease='gaiadr2', getdr2ruwe=False): """ Args: source_ids (np.ndarray) of np.int64 Gaia DR2/EDR3 source_ids. (If EDR3, be sure to use the correct `gaia_datarelease` kwarg) groupname (str) overwrite: if True, and finds that this crossmatch has already run, deletes previous cached output and reruns anyway. enforce_all_sourceids_viable: if True, will raise an assertion error if every source id does not return a result. (Unless the query returns n_max entries, in which case only a warning will be raised). savstr (str); optional string that will be included in the path to the downloaded vizier table. whichcolumns (str): ADQL column getter string. For instance "*", or " gaia_datarelease (str): 'gaiadr2' or 'gaiaedr3'. Default is Gaia DR2. getdr2ruwe (bool): if True, queries gaiadr2.ruwe instead of gaiadr2.gaia_source Returns: dataframe with Gaia DR2 / EDR3 crossmatch info. """ if n_max > int(5e4): raise NotImplementedError( 'the gaia archive / astroquery seems to give invalid results past ' '50000 source_ids in this implementation...') if type(source_ids) != np.ndarray: raise TypeError( 'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids') if type(source_ids[0]) != np.int64: raise TypeError( 'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids') xmltouploadpath = os.path.join( gaiadir, f'toupload_{groupname}{savstr}_{gaia_datarelease}.xml') dlpath = os.path.join( gaiadir, f'group{groupname}_matches{savstr}_{gaia_datarelease}.xml.gz') if overwrite: if os.path.exists(xmltouploadpath): os.remove(xmltouploadpath) if not os.path.exists(xmltouploadpath): make_votable_given_source_ids(source_ids, outpath=xmltouploadpath) if os.path.exists(dlpath) and overwrite: os.remove(dlpath) if not getdr2ruwe: jobstr = (''' SELECT top {n_max:d} {whichcolumns} FROM tap_upload.foobar as u, {gaia_datarelease:s}.gaia_source AS g WHERE u.source_id=g.source_id ''').format(whichcolumns=whichcolumns, n_max=n_max, gaia_datarelease=gaia_datarelease) else: assert gaia_datarelease == 'gaiadr2' jobstr = (''' SELECT top {n_max:d} * FROM tap_upload.foobar as u, gaiadr2.ruwe AS g WHERE u.source_id=g.source_id ''').format(n_max=n_max) query = jobstr if not os.path.exists(dlpath): Gaia.login(credentials_file=credentials_file) # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=dlpath) Gaia.logout() df = given_votable_get_df(dlpath, assert_equal='source_id') if len(df) != len(source_ids) and enforce_all_sourceids_viable: if len(df) == n_max: wrnmsg = ('WRN! got {} matches vs {} source id queries'.format( len(df), len(source_ids))) print(wrnmsg) else: errmsg = ('ERROR! got {} matches vs {} source id queries'.format( len(df), len(source_ids))) print(errmsg) raise AssertionError(errmsg) if len(df) != len(source_ids) and not enforce_all_sourceids_viable: wrnmsg = ('WRN! got {} matches vs {} source id queries'.format( len(df), len(source_ids))) print(wrnmsg) return df
def given_dr2_sourceids_get_edr3_xmatch(dr2_source_ids, runid, overwrite=True, enforce_all_sourceids_viable=True): """ Use the dr2_neighborhood table to look up the EDR3 source_ids given DR2 source_ids. "The only safe way to compare source records between different Data Releases in general is to check the records of proximal source(s) in the same small part of the sky. This table provides the means to do this via a precomputed crossmatch of such sources, taking into account the proper motions available at E/DR3." "Within the neighbourhood of a given E/DR3 source there may be none, one or (rarely) several possible counterparts in DR2 indicated by rows in this table. This occasional source confusion is an inevitable consequence of the merging, splitting and deletion of identifiers introduced in previous releases during the DR3 processing and results in no guaranteed one–to–one correspondence in source identifiers between the releases." See: https://gea.esac.esa.int/archive/documentation/GEDR3/Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/ssec_dm_dr2_neighbourhood.html Args: dr2_source_ids (np.ndarray) of np.int64 Gaia DR2 source_ids runid (str): identifier used to identify and cache jobs. overwrite: if True, and finds that this crossmatch has already run, deletes previous cached output and reruns anyway. enforce_all_sourceids_viable: if True, will raise an assertion error if every source id does not return a result. (Unless the query returns n_max entries, in which case only a warning will be raised). Returns: dr2_x_edr3_df (pd.DataFrame), containing: ['source_id', 'dr2_source_id', 'dr3_source_id', 'angular_distance', 'magnitude_difference', 'proper_motion_propagation'] where "source_id" is the requested source_id, and the remaining columns are matches from the dr2_neighborhood table. This DataFrame should then be used to ensure e.g., that every REQUESTED source_id provides only one MATCHED star. """ if type(dr2_source_ids) != np.ndarray: raise TypeError( 'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids') if type(dr2_source_ids[0]) != np.int64: raise TypeError( 'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids') if not isinstance(runid, str): raise TypeError( 'Expect runid to be a (preferentially unique among jobs) string.') xmltouploadpath = os.path.join(gaiadir, f'toupload_{runid}.xml') dlpath = os.path.join(gaiadir, f'{runid}_matches.xml.gz') if overwrite: if os.path.exists(xmltouploadpath): os.remove(xmltouploadpath) if not os.path.exists(xmltouploadpath): make_votable_given_source_ids(dr2_source_ids, outpath=xmltouploadpath) if os.path.exists(dlpath) and overwrite: os.remove(dlpath) if not os.path.exists(dlpath): n_max = 2 * len(dr2_source_ids) print(f"Setting n_max = 2 * (number of dr2_source_ids) = {n_max}") Gaia.login(credentials_file=credentials_file) jobstr = (''' SELECT top {n_max:d} * FROM tap_upload.foobar as u, gaiaedr3.dr2_neighbourhood AS g WHERE u.source_id=g.dr2_source_id ''').format(n_max=n_max) query = jobstr # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=dlpath) Gaia.logout() df = given_votable_get_df(dlpath, assert_equal=None) if len(df) > len(dr2_source_ids): wrnmsg = ( 'WRN! got {} matches vs {} source id queries. Fix via angular_distance or magnitude_difference' .format(len(df), len(dr2_source_ids))) print(wrnmsg) if len(df) < len(dr2_source_ids) and enforce_all_sourceids_viable: errmsg = ('ERROR! got {} matches vs {} dr2 source id queries'.format( len(df), len(dr2_source_ids))) print(errmsg) raise AssertionError(errmsg) return df
def query_neighborhood(bounds, groupname, n_max=2000, overwrite=True, manual_gmag_limit=16, mstr='', use_bonus_quality_cuts=True): """ Given the bounds in position and parallax corresponding to a group in the CDIPS target catalogs, get the DR2 stars from the group's neighborhood. The bounds are lower and upper in ra, dec, parallax, and there is a limiting G magnitude. A maximum number of stars, `n_max`, are selected from within these bounds. Args: bounds (dict): dict with keys parallax_lower, parallax_upper, ra_lower, ra_upper, dec_lower, dec_upper. (Each of which has a float value). groupname (str): string used when cacheing for files. if you are querying a field star, best to include sourceid. n_max (int): maximum number of stars in the neighborhood to acquire. mstr (str): string used in the cached neighborhood pickle file. use_bonus_quality_cuts (bool): default True. Imposes some things like "there need to be at least 8 Gaia transits", and similar requirements. Returns: dataframe of DR2 stars within the bounds given. This is useful for querying stars that are in the neighborhood of some group. """ if manual_gmag_limit is not None: g_mag_limit = manual_gmag_limit else: g_mag_limit = 16 LOGINFO(f'Using default g_mag_limit of {g_mag_limit}') dlpath = os.path.join( gaiadir, 'nbhd_group{}_matches{}.xml.gz'.format(groupname, mstr)) if os.path.exists(dlpath) and overwrite: os.remove(dlpath) if not os.path.exists(dlpath): Gaia.login(credentials_file=credentials_file) jobstr = (""" select top {n_max:d} g.source_id, g.phot_bp_mean_mag, g.phot_rp_mean_mag, g.phot_g_mean_mag, g.parallax, g.ra, g.dec, g.pmra, g.pmdec, g.radial_velocity, g.radial_velocity_error from gaiadr2.gaia_source as g where g.parallax > {parallax_lower:.2f} and g.parallax < {parallax_upper:.2f} and g.dec < {dec_upper:.2f} and g.dec > {dec_lower:.2f} and g.ra > {ra_lower:.2f} and g.ra < {ra_upper:.2f} and g.phot_g_mean_mag < {g_mag_limit:.1f} order by random_index """) query = jobstr.format(n_max=n_max, parallax_lower=bounds['parallax_lower'], parallax_upper=bounds['parallax_upper'], ra_lower=bounds['ra_lower'], ra_upper=bounds['ra_upper'], dec_lower=bounds['dec_lower'], dec_upper=bounds['dec_upper'], g_mag_limit=g_mag_limit) if use_bonus_quality_cuts: # Impose some extra quality cuts, originally from Kounkel & Covey # 2019, but generally applicable. jobstr = (""" select top {n_max:d} g.source_id, g.phot_bp_mean_mag, g.phot_rp_mean_mag, g.phot_g_mean_mag, g.parallax, g.ra, g.dec, g.pmra, g.pmdec, g.radial_velocity, g.radial_velocity_error from gaiadr2.gaia_source as g where g.parallax > {parallax_lower:.2f} and g.parallax < {parallax_upper:.2f} and g.dec < {dec_upper:.2f} and g.dec > {dec_lower:.2f} and g.ra > {ra_lower:.2f} and g.ra < {ra_upper:.2f} and g.phot_g_mean_mag < {g_mag_limit:.1f} and parallax > 1 and parallax_error < 0.1 and 1.0857/phot_g_mean_flux_over_error < 0.03 and astrometric_sigma5d_max < 0.3 and visibility_periods_used > 8 and ( (astrometric_excess_noise < 1) or (astrometric_excess_noise > 1 and astrometric_excess_noise_sig < 2) ) order by random_index """) query = jobstr.format(n_max=n_max, parallax_lower=bounds['parallax_lower'], parallax_upper=bounds['parallax_upper'], ra_lower=bounds['ra_lower'], ra_upper=bounds['ra_upper'], dec_lower=bounds['dec_lower'], dec_upper=bounds['dec_upper'], g_mag_limit=g_mag_limit) # async jobs can avoid timeout j = Gaia.launch_job_async(query=query, verbose=True, dump_to_file=True, output_file=dlpath) #j = Gaia.launch_job(query=query, verbose=True, dump_to_file=True, # output_file=dlpath) Gaia.logout() df = given_votable_get_df(dlpath, assert_equal='source_id') return df
def make_Bell17_GaiaDR2_crossmatch(maxsep=10, outdir=datadir, homedir='/home/luke/' ): with open(os.path.join(datadir,'Bell_2017_32Ori_table_3.txt')) as f: lines = f.readlines() lines = [l.replace('\n','') for l in lines if not l.startswith('#') and len(l) > 200] twomass_id_strs, pm_ra_strs, pm_dec_strs = [], [], [] for l in lines: try: # regex finds the 2mass id twomass_id_strs.append( re.search('[0-9]{8}.[0-9]{7}', l).group(0) ) ix = 0 # regex finds floats in order, with the \pm appended. first is # always pm_RA, second is always pm_DEC. for m in re.finditer('[+-]?([0-9]*[.])?[0-9]+\\\\pm', l): if ix >= 2: continue if ix == 0: pm_ra_strs.append(float(m.group(0).rstrip('\\pm'))) elif ix == 1: pm_dec_strs.append(float(m.group(0).rstrip('\\pm'))) ix += 1 except: print('skipping') print(l) continue RA = [t[0:2]+'h'+t[2:4]+'m'+t[4:6]+'.'+t[6:8] for t in twomass_id_strs ] DE = [t[8]+t[9:11]+'d'+t[11:13]+'m'+t[13:15]+'.'+t[15] for t in twomass_id_strs ] c = SkyCoord(RA, DE, frame='icrs') RA = arr(c.ra.value) dec = arr(c.dec.value) pm_RA = arr(pm_ra_strs) pm_dec = arr(pm_dec_strs) name = arr(twomass_id_strs) assoc_name = '32Ori' assoc = np.repeat(assoc_name, len(RA)) print(42*'-') outname = os.path.join(datadir, 'Bell17_table_32Ori.csv') print('{}'.format(outname)) print('initial number of members: {}'.format(len(RA))) xmatchoutpath = outname.replace('.csv','_MATCHES_GaiaDR2.csv') outfile = outname.replace('.csv','_GOTMATCHES_GaiaDR2.xml') xmltouploadpath = outname.replace('.csv','_TOUPLOAD_GaiaDR2.xml') # do the spatial crossmatch... if os.path.exists(outfile): os.remove(outfile) if not os.path.exists(outfile): _ = make_votable_given_cols(name, assoc, RA, dec, pm_RA, pm_dec, outpath=xmltouploadpath) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) # separated less than 10 arcsec. jobstr = ( ''' SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec, g.source_id, DISTANCE( POINT('ICRS', u.ra, u.dec), POINT('ICRS', g.ra,g.dec)) AS dist, g.phot_g_mean_mag as gaia_gmag, g.pmra AS gaia_pmra, g.pmdec AS gaia_pmdec FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g WHERE 1=CONTAINS( POINT('ICRS', u.ra, u.dec), CIRCLE('ICRS', g.ra, g.dec, {sep:.8f}) ) ''' ) maxncut = int(5*len(name)) # to avoid query timeout maxsep = (maxsep*u.arcsec).to(u.deg).value query = jobstr.format(sep=maxsep, ncut=maxncut) if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() if maxncut - len(tab) < 10: errmsg = 'ERROR! too many matches' raise AssertionError(errmsg) print('number of members after gaia 10 arcsec search: {}'.format(len(tab))) # if nonzero and finite proper motion, require Gaia pm match to sign # of stated PMs. df = tab.to_pandas() print('\n'+42*'-') sel = (df['gaia_gmag'] < 18) print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel]))) sel &= ( ( (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) & ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) & ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) ) ) | ( (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 ) ) ) df = df[sel] print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'. format(len(df))) # make multiplicity column. then sort by name, then by distance. then drop # name duplicates, keeping the first (you have nearest neighbor saved!) _, inv, cnts = np.unique(df['name'], return_inverse=True, return_counts=True) df['n_in_nbhd'] = cnts[inv] df['name'] = df['name'].str.decode('utf-8') df['assoc'] = df['assoc'].str.decode('utf-8') df = df.sort_values(['name','dist']) df = df.drop_duplicates(subset='name', keep='first') df['source_id'] = df['source_id'].astype('int64') print('{} stars after above cuts + chosing nearest nbhr by spatial sep'. format(len(df))) df.to_csv(xmatchoutpath, index=False) print('made {}'.format(xmatchoutpath)) print(79*'=')
def make_vizier_GaiaDR2_crossmatch(vizier_search_str, ra_str, dec_str, pmra_str, pmdec_str, name_str, assoc_name, table_num=0, outname='', maxsep=10, outdir=datadir, homedir='/home/luke/' ): ''' Spatially crossmatch catalog of <~100,000 members w/ coords and PMs against Gaia DR2. This assumes that the catalog is on vizier. make_Kraus14_GaiaDR2_crossmatch is an example of a call. ''' Vizier.ROW_LIMIT = -1 catalog_list = Vizier.find_catalogs(vizier_search_str) catalogs = Vizier.get_catalogs(catalog_list.keys()) tab = catalogs[table_num] print(42*'-') print('{}'.format(outname)) print('initial number of members: {}'.format(len(tab))) # gaia xmatch need these two column names RA = tab[ra_str] dec = tab[dec_str] pm_RA = tab[pmra_str] pm_dec = tab[pmdec_str] name = tab[name_str] assoc = np.repeat(assoc_name, len(RA)) assert tab[ra_str].unit == u.deg assert tab[pmra_str].unit == u.mas/u.year xmatchoutpath = outname.replace('.csv','_MATCHES_GaiaDR2.csv') outfile = outname.replace('.csv','_GOTMATCHES_GaiaDR2.xml') xmltouploadpath = outname.replace('.csv','_TOUPLOAD_GaiaDR2.xml') # do the spatial crossmatch... if os.path.exists(outfile): os.remove(outfile) if not os.path.exists(outfile): _ = make_votable_given_cols(name, assoc, RA, dec, pm_RA, pm_dec, outpath=xmltouploadpath) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) # separated less than 10 arcsec. jobstr = ( ''' SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec, g.source_id, DISTANCE( POINT('ICRS', u.ra, u.dec), POINT('ICRS', g.ra,g.dec)) AS dist, g.phot_g_mean_mag as gaia_gmag, g.pmra AS gaia_pmra, g.pmdec AS gaia_pmdec FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g WHERE 1=CONTAINS( POINT('ICRS', u.ra, u.dec), CIRCLE('ICRS', g.ra, g.dec, {sep:.8f}) ) ''' ) maxncut = int(5*len(name)) # to avoid query timeout maxsep = (maxsep*u.arcsec).to(u.deg).value query = jobstr.format(sep=maxsep, ncut=maxncut) if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() if maxncut - len(tab) < 10: errmsg = 'ERROR! too many matches' raise AssertionError(errmsg) print('number of members after gaia 10 arcsec search: {}'.format(len(tab))) # if nonzero and finite proper motion, require Gaia pm match to sign # of stated PMs. df = tab.to_pandas() print('\n'+42*'-') sel = (df['gaia_gmag'] < 18) print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel]))) sel &= ( ( (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) & ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) & ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) ) ) | ( (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 ) ) ) df = df[sel] print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'. format(len(df))) # make multiplicity column. then sort by name, then by distance. then drop # name duplicates, keeping the first (you have nearest neighbor saved!) _, inv, cnts = np.unique(df['name'], return_inverse=True, return_counts=True) df['n_in_nbhd'] = cnts[inv] df['name'] = df['name'].str.decode('utf-8') df['assoc'] = df['assoc'].str.decode('utf-8') df = df.sort_values(['name','dist']) df = df.drop_duplicates(subset='name', keep='first') df['source_id'] = df['source_id'].astype('int64') print('{} stars after above cuts + chosing nearest nbhr by spatial sep'. format(len(df))) df.to_csv(xmatchoutpath, index=False) print('made {}'.format(xmatchoutpath)) print(79*'=')
def make_Oh17_GaiaDR2_crossmatch( namestr='Oh_2017_clustering_GaiaDR2_crossmatched', outdir=datadir, homedir='/home/luke/'): ''' Semyeong Oh et al (2017) discovered 10.6k stars with separations <10pc that are in likely comoving pairs / groups. see http://vizier.cfa.harvard.edu/viz-bin/VizieR?-source=J/AJ/153/257 ''' # Download Oh's tables of stars, pairs, and groups. Vizier.ROW_LIMIT = -1 catalog_list = Vizier.find_catalogs('J/AJ/153/257') catalogs = Vizier.get_catalogs(catalog_list.keys()) stars = catalogs[0] pairs = catalogs[1] groups = catalogs[2] # IMPOSE GROUP SIZE >= 3. stars = stars[stars['Size'] >= 3] # use the gaia dr1 ids, to match the dr1_neighbourhood table. outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr)) xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr)) if os.path.exists(outfile): os.remove(outfile) if not os.path.exists(outfile): t = Table() t['name'] = arr(stars['Star']).astype(str) # note "Group" is a bad thing to name a column b/c it is a SQL # keyword... t['groupname'] = arr(stars['Group']).astype(str) t['ra'] = arr(stars['RAJ2000'])*u.deg t['dec'] = arr(stars['DEJ2000'])*u.deg t['gaia'] = arr(stars['Gaia']).astype(int) t['gmag'] = arr(stars['Gmag'])*u.mag t['size'] = arr(stars['Size']).astype(int) votable = from_table(t) writeto(votable, xmltouploadpath) print('made {}'.format(xmltouploadpath)) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) # https://gea.esac.esa.int/archive/documentation/GDR2/... # Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/... # ssec_dm_dr1_neighbourhood.html jobstr = ( ''' SELECT TOP {ncut:d} u.name, u.gaia, u.ra, u.dec, u.groupname as assoc, u.size, u.gmag, n.dr2_source_id as source_id, n.angular_distance as dist, n.rank, n.magnitude_difference FROM tap_upload.foobar as u, gaiadr2.dr1_neighbourhood AS n WHERE u.gaia = n.dr1_source_id ''' ) maxncut = int(5*len(stars)) # to avoid query timeout query = jobstr.format(ncut=maxncut) if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() if maxncut - len(tab) < 10: errmsg = 'ERROR! too many matches' raise AssertionError(errmsg) # if nonzero and finite proper motion, require Gaia pm match to sign # of stated Gagne PMs. df = tab.to_pandas() print('\n'+42*'-') print('{} stars in original Oh table (size>=3)'.format(len(stars))) print('{} stars in gaia nbhd match'.format(len(df))) # we want DR2 mags. "gmag" is the uploaded DR1 mag. df['gaia_gmag'] = df['gmag'] + df['magnitude_difference'] df.drop(['gmag'], axis=1, inplace=True) _, inv, cnts = np.unique(df['name'], return_inverse=True, return_counts=True) df['n_in_nbhd'] = cnts[inv] df['name'] = df['name'].str.decode('utf-8') df['assoc'] = df['assoc'].str.decode('utf-8') df = df.sort_values(['name','dist']) df = df.drop_duplicates(subset='name', keep='first') df['gaia'] = df['gaia'].astype('int64') df = df.rename(index=str, columns={"gaia":"gaia_dr1_source_id"}) df['size'] = df['size'].astype('int') df['source_id'] = df['source_id'].astype('int64') print('{} stars after above cuts + chosing nearest nbhr by spatial sep'. format(len(df))) outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr)) df.to_csv(outpath, index=False) print('made {}'.format(outpath)) print(79*'=')
def make_Rizzuto11_GaiaDR2_crossmatch( outdir=datadir, homedir='/home/luke/'): ''' Aaron Rizzuto et al (2011) gave a list of 436 Sco OB2 members. http://vizier.cfa.harvard.edu/viz-bin/VizieR?-source=J/MNRAS/416/3108 ''' vizier_search_str = 'J/MNRAS/416/3108' table_num = 0 ra_str = '_RA' dec_str = '_DE' outname = os.path.join(datadir,'Rizzuto_11_table_1_ScoOB2_members.csv') assoc_name = 'ScoOB2' namestr = 'Rizzuto_11_table_1_ScoOB2_members' Vizier.ROW_LIMIT = -1 catalog_list = Vizier.find_catalogs(vizier_search_str) catalogs = Vizier.get_catalogs(catalog_list.keys()) tab = catalogs[table_num] print(42*'-') print('{}'.format(outname)) print('initial number of members: {}'.format(len(tab))) # gaia xmatch need these two column names RA = tab[ra_str] dec = tab[dec_str] assoc = np.repeat(assoc_name, len(RA)) # match "HIP" number to hipparcos table... outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr)) xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr)) if os.path.exists(outfile): os.remove(outfile) if not os.path.exists(outfile): votable = from_table(tab) writeto(votable, xmltouploadpath) print('made {}'.format(xmltouploadpath)) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) # https://gea.esac.esa.int/archive/documentation/GDR2/... # Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/... # ssec_dm_dr1_neighbourhood.html jobstr = ( ''' SELECT TOP {ncut:d} u.hip, u.memb, u.col_ra, u.col_de, n.source_id, n.angular_distance as dist, n.number_of_neighbours FROM tap_upload.foobar as u, gaiadr2.hipparcos2_best_neighbour AS n WHERE u.hip = n.original_ext_source_id ''' ) maxncut = int(5*len(tab)) # to avoid query timeout query = jobstr.format(ncut=maxncut) if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() print('number after hipparcos xmatch: {}'.format(len(tab))) if maxncut - len(tab) < 10: errmsg = 'ERROR! too many matches' raise AssertionError(errmsg) df = tab.to_pandas() df['source_id'] = df['source_id'].astype('int64') df['name'] = df['hip'].astype('int64') outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr)) df.to_csv(outpath, index=False) print('made {}'.format(outpath)) print(79*'=')
def make_Gagne18_BANYAN_any_DR2_crossmatch( tablepath, namestr=None, maxsep=10, outdir=datadir, homedir='/home/luke/'): """ J. Gagne's tables have a particular format that requires some wrangling. Also, since so many of the stars are high PM, the spatial cross-matches will be crap unless we also include PM information in the matching. Do the matching via astroquery's Gaia API. """ assert type(namestr) == str t = Table.read(tablepath, format='ascii.cds') RAh, RAm, RAs = arr(t['RAh']), arr(t['RAm']), arr(t['RAs']) RA_hms = [str(rah).zfill(2)+'h'+ str(ram).zfill(2)+'m'+ str(ras).zfill(2)+'s' for rah,ram,ras in zip(RAh, RAm, RAs)] DEd, DEm, DEs = arr(t['DEd']),arr(t['DEm']),arr(t['DEs']) DEsign = arr(t['DE-']) DEsign[DEsign != '-'] = '+' DE_dms = [str(desgn)+ str(ded).zfill(2)+'d'+ str(dem).zfill(2)+'m'+ str(des).zfill(2)+'s' for desgn,ded,dem,des in zip(DEsign, DEd, DEm, DEs)] coords = SkyCoord(ra=RA_hms, dec=DE_dms, frame='icrs') RA = coords.ra.value dec = coords.dec.value pm_RA, pm_dec = arr(t['pmRA']), arr(t['pmDE']) u_pm_RA, u_pm_dec = arr(t['e_pmRA']), arr(t['e_pmDE']) maxsep = (maxsep*u.arcsec).to(u.deg).value name = t['Main'] if 'XI_' in namestr else t['Name'] assoc = t['Assoc'] outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr)) xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr)) if os.path.exists(outfile): os.remove(outfile) # NOTE if it's fast, can just do this to overwrite if not os.path.exists(outfile): _ = make_votable_given_full_cols(name, assoc, RA, dec, pm_RA, pm_dec, u_pm_RA, u_pm_dec, outpath=xmltouploadpath) Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials')) # separated less than 10 arcsec. jobstr = ( ''' SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec, u.err_pm_ra, u.err_pm_dec, g.source_id, DISTANCE( POINT('ICRS', u.ra, u.dec), POINT('ICRS', g.ra,g.dec)) AS dist, g.phot_g_mean_mag as gaia_gmag, g.pmra AS gaia_pmra, g.pmdec AS gaia_pmdec FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g WHERE 1=CONTAINS( POINT('ICRS', u.ra, u.dec), CIRCLE('ICRS', g.ra, g.dec, {sep:.8f}) ) ''' ) maxncut = int(5*len(name)) # to avoid query timeout query = jobstr.format(sep=maxsep, ncut=maxncut) if not os.path.exists(outfile): # might do async if this times out. but it doesn't. j = Gaia.launch_job(query=query, upload_resource=xmltouploadpath, upload_table_name="foobar", verbose=True, dump_to_file=True, output_file=outfile) Gaia.logout() vot = parse(outfile) tab = vot.get_first_table().to_table() if maxncut - len(tab) < 10: errmsg = 'ERROR! too many matches' raise AssertionError(errmsg) # if nonzero and finite proper motion, require Gaia pm match to sign # of stated Gagne PMs. df = tab.to_pandas() print('\n'+42*'-') print('{} stars in original Gagne table'.format(len(t))) print('{} stars in sep < 10 arcsec xmatch'.format(len(df))) sel = (df['gaia_gmag'] < 18) print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel]))) sel &= ( ( (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) & ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) & ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) ) ) | ( (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 ) ) ) df = df[sel] print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'. format(len(df))) # make multiplicity column. then sort by name, then by distance. then drop # name duplicates, keeping the first (you have nearest neighbor saved!) _, inv, cnts = np.unique(df['name'], return_inverse=True, return_counts=True) df['n_in_nbhd'] = cnts[inv] df['name'] = df['name'].str.decode('utf-8') df['assoc'] = df['assoc'].str.decode('utf-8') df = df.sort_values(['name','dist']) df = df.drop_duplicates(subset='name', keep='first') df['source_id'] = df['source_id'].astype('int64') print('{} stars after above cuts + chosing nearest nbhr by spatial sep'. format(len(df))) outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr)) df.to_csv(outpath, index=False) print('made {}'.format(outpath)) print(79*'=')