Beispiel #1
0
def query_gaia_match(user_name,table_2_match,radius_arc):
	'''
	This function queries Gaia DR2 to find the match with table
	table_2_match. This table needs to be uploaded to the Gaia
	Archive in advance. The steps to upload the table can be found
	in http://gea.esac.esa.int/archive-help/index.html.

	Input:
	user_name(str): User name in the Gaia Archive.
	table_2_match(str): Name of the table uploaded to the Gaia 
	Archive.
	radius_arc(float): Radius of search in arcsec.

	Output:
	file.vot: Output file from the Archive with all the columns
	in table_2_match plus all Gaia DR2 columns. 
	'''

	Gaia.login_gui() 
	#Ask for userName and userPassword to authenticated access mode
	#This could be done with: 
	#Gaia.login(user='******', password='******') 

	#Cross-match user table and gaia source
	job = Gaia.launch_job_async("""\
		SELECT crossmatch_positional(\
		'user_{}','{}',\
		'gaiadr2','gaia_source',\
		{},\
		'xmatch')\
		FROM dual;\
		""".format(user_name,table_2_match,radius_arc))

	#For the matches saved in test_xmatch, get the information from Gaia. 
	#The last line saves the information into a vot table
	job2 = Gaia.launch_job_async("""\
		SELECT c."dist", a.*, b.* \
		FROM user_{}.{} AS a, \
		gaiadr2.gaia_source AS b, \
		user_{}.xmatch AS c \
		WHERE (c.{}_{}_oid = a.{}_oid AND \
		c.gaia_source_source_id = b.source_id)\
		""".format(user_name,table_2_match,user_name,
			table_2_match,table_2_match,table_2_match), dump_to_file=True)

	Gaia.logout()
def get_data_subset(ra_deg,
                    dec_deg,
                    rad_deg,
                    dist,
                    dist_span=None,
                    rv_only=False,
                    login=False,
                    login_path='/shared/ebla/cotar/'):
    if dist_span is not None:
        max_parallax = 1e3 / (max(dist - dist_span, 1.))
        min_parallax = 1e3 / (dist + dist_span)
    else:
        min_parallax = -1.
        max_parallax = 100.
    # construct complete Gaia data query string
    gaia_query = "SELECT source_id,ra,dec,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,radial_velocity,radial_velocity_error " +\
                 "FROM gaiadr2.gaia_source " +\
                 "WHERE parallax >= {:.4f} AND parallax <= {:.4f} ".format(min_parallax, max_parallax) +\
                 "AND CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),CIRCLE('ICRS',{:.5f},{:.5f},{:.5f}))=1 ".format(ra_deg, dec_deg, rad_deg)
    if rv_only:
        gaia_query += 'AND (radial_velocity IS NOT NULL) '
    # print(' QUERY:', gaia_query)
    try:
        if login:
            # login enables unlimited asynchronous download of data
            # NOTE: only up to 20 GB in total - needs manual deletition of data in the Gaia portal
            print(' Gaia login initiated')
            Gaia.login(credentials_file=login_path + 'gaia_archive_login.txt')
        # disable dump as results will be saved to a custom location later on in the analysis code
        gaia_job = Gaia.launch_job_async(gaia_query, dump_to_file=False)
        gaia_data = gaia_job.get_results()
        if login:
            Gaia.logout()
    except Exception as ee:
        print(ee)
        print(' Problem querying data.')
        return list([])
    for g_c in gaia_data.colnames:
        gaia_data[g_c].unit = ''
    gaia_data['radial_velocity'].name = 'rv'
    gaia_data['radial_velocity_error'].name = 'rv_error'
    # print(gaia_data)
    # print(' QUERY complete')
    print(' Retireved lines:', len(gaia_data))
    return gaia_data
Beispiel #3
0
def get_gaia(df):

    import time
    import numpy as np
    import pandas as pd
    from astroquery.vizier import Vizier
    from astropy.coordinates import SkyCoord
    from astropy.coordinates import Angle
    from astropy import units as u
    from astropy.table import Column, Table, join
    from astroquery.gaia import Gaia
    Gaia.login(user='******',password='******')
    
    qry = """
        SELECT TOP 10 g.*, t.*
        FROM gaiadr1.tmass_original_valid AS t
        JOIN gaiadr2.tmass_neighbourhood AS xt ON xt.tmass_oid=t.tmass_oid
        JOIN gaiadr2.gaia_source AS g ON g.source_id=xt.source_id
        WHERE g.phot_g_mean_mag IS NOT NULL
        """

    bkg=Gaia.launch_job_async(qry).get_results().to_pandas()
    bkg['abs_g']=bkg.phot_g_mean_mag-5*np.log10(1000./bkg.parallax)+5.
    columns=bkg.columns.tolist()
    
    GAIAdf=pd.DataFrame(index=df.index,columns=columns)
    
    for k,row in df.iterrows():
        qry="""
            SELECT g.*, t.*
            FROM gaiadr1.tmass_original_valid AS t
            LEFT OUTER JOIN gaiadr2.tmass_neighbourhood AS xt ON xt.tmass_oid = t.tmass_oid
            LEFT OUTER JOIN gaiadr2.gaia_source AS g ON xt.source_id = g.source_id
            where 1=CONTAINS(POINT('ICRS', t.ra, t.dec),CIRCLE('ICRS', {}, {}, 5./3600))
            """.format(row['RA (deg)'],row['Dec (deg)'])
        
        data=Gaia.launch_job_async(qry).get_results().to_pandas()
        
        data['INDEX']=k
        print(k,data.shape)
        GAIAdf=GAIAdf.append(data).dropna(how='all')
        Gaia.logout()
    
    return GAIAdf
Beispiel #4
0
def download_gaia():
    """Download data from the Gaia archive."""
    with contextlib.suppress(FileExistsError):
        os.mkdir('gaia')

    print('Login to Gaia Archive')
    username = input('Username: '******'Login aborted')
        return
    password = getpass.getpass('Password: '******'Login aborted')
        return

    Gaia.login(user=username, password=password)
    try:
        # the gaiadr2.hipparcos2_best_neighbour table misses a large number of HIP stars that are
        # actually present, so use the cone search file
        conesearch_file = os.path.join('gaia', 'hip2conesearch.zip')
        download_file(
            conesearch_file,
            'https://www.cosmos.esa.int/documents/29201/1769576/Hipparcos2GaiaDR2coneSearch.zip')

        with ZipFile(conesearch_file, 'r') as csz:
            with csz.open('Hipparcos2GaiaDR2coneSearch.csv', 'r') as f:
                hip_map = io.ascii.read(f, names=['original_ext_source_id', 'source_id', 'dist'])

        gaia_downloads = [
            ('hip_id', 'user_'+username+'.hip_cone', 'gaiadr2_hip-result.csv'),
            ('tyc2_id', 'gaiadr2.tycho2_best_neighbour', 'gaiadr2_tyc-result.csv')
        ]

        Gaia.upload_table(upload_resource=hip_map, table_name='hip_cone')
        try:
            for colname, xindex_table, filename in gaia_downloads:
                download_gaia_data(colname, xindex_table, os.path.join('gaia', filename))
        finally:
            Gaia.delete_user_table('hip_cone')

    finally:
        Gaia.logout()
Beispiel #5
0
def vector_match_tmassbestneighbor_to_gaiaids(stats,
                                              homedir='/home/luke/',
                                              outdir='../data/rms_vs_mag/',
                                              projid=1301):
    """
    if you do smart ADQL, it is like factor of >100x faster than doing
    item-by-item crossmatching.

    astroquery.Gaia is sick for this, because it lets you remotely upload
    tables on the fly. (at least, somewhat small ones, of <~10^5 members)
    """

    outfile = os.path.join(outdir, 'proj{}_xmatch.xml.gz'.format(projid))

    xmlpath = '../data/rms_vs_mag/proj1301_gaiaids.xml'
    if not os.path.exists(outfile):
        gaiaids = stats['lcobj']
        xmlpath = make_votable_given_ids(gaiaids)

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        jobstr = ('SELECT top 100000 upl.source_id, tm.source_id, '
                  'tm.original_ext_source_id FROM '
                  'gaiadr2.tmass_best_neighbour AS tm '
                  'JOIN tap_upload.foobar as upl '
                  'using (source_id)')
        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=jobstr,
                                upload_resource=xmlpath,
                                upload_table_name="foobar",
                                verbose=True,
                                dump_to_file=True,
                                output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()

    return tab
Beispiel #6
0
def download_gaia() -> None:
    """Download data from the Gaia archive."""
    with contextlib.suppress(FileExistsError):
        os.mkdir('gaia')

    print('Login to Gaia Archive')
    username = input('Username: '******'Login aborted')
        return
    password = getpass.getpass('Password: '******'Login aborted')
        return

    Gaia.login(user=username, password=password)
    try:
        download_gaia_hip(username)
        download_gaia_tyc(username)

    finally:
        Gaia.logout()
Beispiel #7
0
def main(kc19_groupid=113, Tmag_cutoff=14, clean_gaia_cache=False):

    #
    # get info needed to query gaia for comparison stars
    #
    source_df = pd.read_csv('../data/kounkel_table1_sourceinfo.csv')
    sdf = source_df[(source_df['Tmag_pred'] < Tmag_cutoff)
                    & (source_df['group_id'] == kc19_groupid)]
    n_sel_sources_in_group = len(sdf)

    df2 = pd.read_csv('../data/string_table2.csv')

    gdf = df2[df2['group_id'] == kc19_groupid]

    group_coord = SkyCoord(float(gdf['l']) * u.deg,
                           float(gdf['b']) * u.deg,
                           frame='galactic')
    ra = group_coord.icrs.ra
    dec = group_coord.icrs.dec
    plx_mas = float(gdf['parallax'])

    #
    # define relevant directories / paths
    #
    gaiadir = os.path.join(basedir, 'gaia_queries')
    if not os.path.exists(gaiadir):
        os.mkdir(gaiadir)

    outfile = os.path.join(
        gaiadir, 'group{}_comparison_sample.xml.gz'.format(kc19_groupid))

    #
    # run the gaia query. require the same cuts imposed by Kounkel & Covey 2019
    # on stellar quality. also require close on-sky (within 5 degrees of KC19
    # group position), and close in parallax space (within +/-20% of KC19
    # parallax).
    #
    if clean_gaia_cache and os.path.exists(outfile):
        os.remove(outfile)

    if not os.path.exists(outfile):

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        jobstr = ('''
        SELECT *
        FROM gaiadr2.gaia_source
        WHERE 1=CONTAINS(
          POINT('ICRS', ra, dec),
            CIRCLE('ICRS', {ra:.8f}, {dec:.8f}, {sep_deg:.1f}))
        AND parallax < {plx_upper:.2f} AND parallax > {plx_lower:.2f}
        AND parallax > 1
        AND parallax_error < 0.1
        AND 1.0857/phot_g_mean_flux_over_error < 0.03
        AND astrometric_sigma5d_max < 0.3
        AND visibility_periods_used > 8
        AND (
                (astrometric_excess_noise < 1)
                OR
                (astrometric_excess_noise > 1 AND astrometric_excess_noise_sig < 2)
        )
        ''')

        query = jobstr.format(sep_deg=5.0,
                              ra=ra.value,
                              dec=dec.value,
                              plx_upper=1.3 * plx_mas,
                              plx_lower=0.7 * plx_mas)

        if not os.path.exists(outfile):
            print(42 * '-')
            print('launching\n{}'.format(query))
            print(42 * '-')
            j = Gaia.launch_job(query=query,
                                verbose=True,
                                dump_to_file=True,
                                output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()
    field_df = tab.to_pandas()

    #
    # require the same Tmag cutoff for the nbhd stars. ensure no overlap w/
    # sample of stars from the group itself. then randomly sample the
    # collection of stars.
    #

    Tmag_pred = (
        field_df['phot_g_mean_mag'] - 0.00522555 *
        (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag'])**3 +
        0.0891337 *
        (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag'])**2 -
        0.633923 *
        (field_df['phot_bp_mean_mag'] - field_df['phot_rp_mean_mag']) +
        0.0324473)

    field_df['Tmag_pred'] = Tmag_pred

    sfield_df = field_df[field_df['Tmag_pred'] < Tmag_cutoff]
    common = sfield_df.merge(sdf, on='source_id', how='inner')
    sfield_df = sfield_df[~sfield_df.source_id.isin(common.source_id)]

    n_field = len(sfield_df)

    if 2 * n_sel_sources_in_group > n_field:
        errmsg = (
            'ngroup: {}. nfield: {}. plz tune gaia query to get >2x the stars'.
            format(n_sel_sources_in_group, n_field))
        raise AssertionError(errmsg)

    srfield_df = sfield_df.sample(n=n_sel_sources_in_group)

    #
    # now given the gaia ids, get the rotation periods
    #
    for ix, r in srfield_df.iterrows():

        source_id = np.int64(r['source_id'])
        ra, dec = float(r['ra']), float(r['dec'])
        group_id = kc19_groupid
        name = str(gdf['name'].iloc[0])

        c_obj = SkyCoord(ra, dec, unit=(u.deg, u.deg), frame='icrs')

        #
        # require that we are on-silicon. for year 1, this roughly means --
        # were are in southern ecliptic hemisphere
        #
        if c_obj.barycentrictrueecliptic.lat > 0 * u.deg:
            print('group{}, {}: found in northern hemisphere. skip!'.format(
                group_id, name))
            continue

        workingdir = os.path.join(
            basedir, 'fits_pkls_results_pngs',
            'field_star_comparison_group{}_name{}'.format(group_id, name))
        if not os.path.exists(workingdir):
            os.mkdir(workingdir)
        workingdir = os.path.join(workingdir, str(source_id))
        if not os.path.exists(workingdir):
            os.mkdir(workingdir)

        outvppath = os.path.join(workingdir,
                                 'verification_page_{}.png'.format(source_id))
        if os.path.exists(outvppath):
            print('found {}, continue'.format(outvppath))
            continue

        #
        # if you already downloaded ffi cutouts for this object, dont get any
        # more. otherwise, get them
        #
        cutouts = glob(os.path.join(workingdir, '*.fits'))
        if len(cutouts) >= 1:
            print('found {} cutouts in {}, skip'.format(
                len(cutouts), workingdir))
        else:
            gfc.get_fficutout(c_obj, cutoutdir=workingdir)

        #
        # given the FFI cutouts, make simple light curves.
        #
        cutouts = glob(os.path.join(workingdir, '*.fits'))
        if len(cutouts) >= 1:
            d = glgf.get_lc_given_fficutout(workingdir,
                                            cutouts,
                                            c_obj,
                                            return_pkl=False)
        else:
            d = np.nan
            print('WRN! did not find fficutout for {}'.format(workingdir))

        if not isinstance(d, dict):
            print('WRN! got bad light curve for {}. skipping.'.format(
                workingdir))
            continue

        outpath = os.path.join(workingdir, 'GLS_rotation_period.results')

        #
        # do Lomb scargle w/ uniformly weighted points.
        #
        ls = LombScargle(d['time'], d['rel_flux'])
        period_min = 0.1
        period_max = np.min(
            [0.9 * (np.max(d['time']) - np.min(d['time'])), 16])
        freq, power = ls.autopower(minimum_frequency=1 / period_max,
                                   maximum_frequency=1 / period_min)
        try:
            _ = power.max()
        except ValueError:
            print('WRN! got bad Lomb-Scargle for {}. skipping.'.format(
                workingdir))
            continue

        ls_fap = ls.false_alarm_probability(power.max(), method='baluev')
        ls_period = 1 / freq[np.argmax(power)]

        d['ls_fap'] = ls_fap
        d['ls_period'] = ls_period

        #
        # try to get TIC Teff. search TIC within 5 arcseconds, then take the
        # Gaia-ID match.  (removing sources with no gaia ID, which do exist in
        # TICv8.
        #
        radius = 5.0 * u.arcsecond

        stars = Catalogs.query_region("{} {}".format(float(c_obj.ra.value),
                                                     float(c_obj.dec.value)),
                                      catalog="TIC",
                                      radius=radius)

        nbhr_source_ids = np.array(stars['GAIA'])

        stars = stars[nbhr_source_ids != '']
        nbhr_source_ids = nbhr_source_ids[nbhr_source_ids != '']

        sel = nbhr_source_ids.astype(int) == source_id

        if len(sel[sel]) == 1:
            star = stars[sel]
        else:
            raise NotImplementedError('did not get any TIC match. why?')

        teff = float(star['Teff'])
        if not isinstance(teff, float) and np.isfinite(teff):
            raise NotImplementedError('got nan TIC teff. what do?')

        #
        # make "check plot" analog for visual inspection
        #
        outd = {
            'ls_fap': d['ls_fap'],
            'ls_period': d['ls_period'],
            'source_id': source_id,
            'ra': ra,
            'dec': dec,
            'name': name,
            'group_id': group_id,
            'teff': teff
        }
        pu.save_status(outpath, 'lomb-scargle', outd)

        vp.generate_verification_page(d, ls, freq, power, cutouts, c_obj,
                                      outvppath, outd)
Beispiel #8
0
def given_source_ids_get_gaia_data(source_ids,
                                   groupname,
                                   n_max=10000,
                                   overwrite=True,
                                   enforce_all_sourceids_viable=True,
                                   savstr='',
                                   whichcolumns='*',
                                   gaia_datarelease='gaiadr2',
                                   getdr2ruwe=False):
    """
    Args:

        source_ids (np.ndarray) of np.int64 Gaia DR2/EDR3 source_ids. (If EDR3,
        be sure to use the correct `gaia_datarelease` kwarg)

        groupname (str)

        overwrite: if True, and finds that this crossmatch has already run,
        deletes previous cached output and reruns anyway.

        enforce_all_sourceids_viable: if True, will raise an assertion error if
        every source id does not return a result. (Unless the query returns
        n_max entries, in which case only a warning will be raised).

        savstr (str); optional string that will be included in the path to
        the downloaded vizier table.

        whichcolumns (str): ADQL column getter string. For instance "*", or "

        gaia_datarelease (str): 'gaiadr2' or 'gaiaedr3'. Default is Gaia DR2.

        getdr2ruwe (bool): if True, queries gaiadr2.ruwe instead of
        gaiadr2.gaia_source

    Returns:

        dataframe with Gaia DR2 / EDR3 crossmatch info.
    """

    if n_max > int(5e4):
        raise NotImplementedError(
            'the gaia archive / astroquery seems to give invalid results past '
            '50000 source_ids in this implementation...')

    if type(source_ids) != np.ndarray:
        raise TypeError(
            'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids')
    if type(source_ids[0]) != np.int64:
        raise TypeError(
            'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids')

    xmltouploadpath = os.path.join(
        gaiadir, f'toupload_{groupname}{savstr}_{gaia_datarelease}.xml')
    dlpath = os.path.join(
        gaiadir, f'group{groupname}_matches{savstr}_{gaia_datarelease}.xml.gz')

    if overwrite:
        if os.path.exists(xmltouploadpath):
            os.remove(xmltouploadpath)

    if not os.path.exists(xmltouploadpath):
        make_votable_given_source_ids(source_ids, outpath=xmltouploadpath)

    if os.path.exists(dlpath) and overwrite:
        os.remove(dlpath)

    if not getdr2ruwe:
        jobstr = ('''
        SELECT top {n_max:d} {whichcolumns}
        FROM tap_upload.foobar as u, {gaia_datarelease:s}.gaia_source AS g
        WHERE u.source_id=g.source_id
        ''').format(whichcolumns=whichcolumns,
                    n_max=n_max,
                    gaia_datarelease=gaia_datarelease)
    else:
        assert gaia_datarelease == 'gaiadr2'
        jobstr = ('''
        SELECT top {n_max:d} *
        FROM tap_upload.foobar as u, gaiadr2.ruwe AS g
        WHERE u.source_id=g.source_id
        ''').format(n_max=n_max)

    query = jobstr

    if not os.path.exists(dlpath):

        Gaia.login(credentials_file=credentials_file)

        # might do async if this times out. but it doesn't.
        j = Gaia.launch_job(query=query,
                            upload_resource=xmltouploadpath,
                            upload_table_name="foobar",
                            verbose=True,
                            dump_to_file=True,
                            output_file=dlpath)

        Gaia.logout()

    df = given_votable_get_df(dlpath, assert_equal='source_id')

    if len(df) != len(source_ids) and enforce_all_sourceids_viable:
        if len(df) == n_max:
            wrnmsg = ('WRN! got {} matches vs {} source id queries'.format(
                len(df), len(source_ids)))
            print(wrnmsg)
        else:
            errmsg = ('ERROR! got {} matches vs {} source id queries'.format(
                len(df), len(source_ids)))
            print(errmsg)
            raise AssertionError(errmsg)

    if len(df) != len(source_ids) and not enforce_all_sourceids_viable:
        wrnmsg = ('WRN! got {} matches vs {} source id queries'.format(
            len(df), len(source_ids)))
        print(wrnmsg)

    return df
Beispiel #9
0
def given_dr2_sourceids_get_edr3_xmatch(dr2_source_ids,
                                        runid,
                                        overwrite=True,
                                        enforce_all_sourceids_viable=True):
    """
    Use the dr2_neighborhood table to look up the EDR3 source_ids given DR2
    source_ids.

    "The only safe way to compare source records between different Data
    Releases in general is to check the records of proximal source(s) in the
    same small part of the sky. This table provides the means to do this via a
    precomputed crossmatch of such sources, taking into account the proper
    motions available at E/DR3."

    "Within the neighbourhood of a given E/DR3 source there may be none, one or
    (rarely) several possible counterparts in DR2 indicated by rows in this
    table. This occasional source confusion is an inevitable consequence of the
    merging, splitting and deletion of identifiers introduced in previous
    releases during the DR3 processing and results in no guaranteed one–to–one
    correspondence in source identifiers between the releases."

    See:
    https://gea.esac.esa.int/archive/documentation/GEDR3/Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/ssec_dm_dr2_neighbourhood.html

    Args:

        dr2_source_ids (np.ndarray) of np.int64 Gaia DR2 source_ids

        runid (str): identifier used to identify and cache jobs.

        overwrite: if True, and finds that this crossmatch has already run,
        deletes previous cached output and reruns anyway.

        enforce_all_sourceids_viable: if True, will raise an assertion error if
        every source id does not return a result. (Unless the query returns
        n_max entries, in which case only a warning will be raised).

    Returns:

        dr2_x_edr3_df (pd.DataFrame), containing:
            ['source_id', 'dr2_source_id', 'dr3_source_id', 'angular_distance',
            'magnitude_difference', 'proper_motion_propagation']

        where "source_id" is the requested source_id, and the remaining columns
        are matches from the dr2_neighborhood table.

        This DataFrame should then be used to ensure e.g., that every REQUESTED
        source_id provides only one MATCHED star.
    """

    if type(dr2_source_ids) != np.ndarray:
        raise TypeError(
            'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids')
    if type(dr2_source_ids[0]) != np.int64:
        raise TypeError(
            'source_ids must be np.ndarray of np.int64 Gaia DR2 source_ids')
    if not isinstance(runid, str):
        raise TypeError(
            'Expect runid to be a (preferentially unique among jobs) string.')

    xmltouploadpath = os.path.join(gaiadir, f'toupload_{runid}.xml')
    dlpath = os.path.join(gaiadir, f'{runid}_matches.xml.gz')

    if overwrite:
        if os.path.exists(xmltouploadpath):
            os.remove(xmltouploadpath)

    if not os.path.exists(xmltouploadpath):
        make_votable_given_source_ids(dr2_source_ids, outpath=xmltouploadpath)

    if os.path.exists(dlpath) and overwrite:
        os.remove(dlpath)

    if not os.path.exists(dlpath):

        n_max = 2 * len(dr2_source_ids)
        print(f"Setting n_max = 2 * (number of dr2_source_ids) = {n_max}")

        Gaia.login(credentials_file=credentials_file)

        jobstr = ('''
        SELECT top {n_max:d} *
        FROM tap_upload.foobar as u, gaiaedr3.dr2_neighbourhood AS g
        WHERE u.source_id=g.dr2_source_id
        ''').format(n_max=n_max)
        query = jobstr

        # might do async if this times out. but it doesn't.
        j = Gaia.launch_job(query=query,
                            upload_resource=xmltouploadpath,
                            upload_table_name="foobar",
                            verbose=True,
                            dump_to_file=True,
                            output_file=dlpath)

        Gaia.logout()

    df = given_votable_get_df(dlpath, assert_equal=None)

    if len(df) > len(dr2_source_ids):
        wrnmsg = (
            'WRN! got {} matches vs {} source id queries. Fix via angular_distance or magnitude_difference'
            .format(len(df), len(dr2_source_ids)))
        print(wrnmsg)

    if len(df) < len(dr2_source_ids) and enforce_all_sourceids_viable:
        errmsg = ('ERROR! got {} matches vs {} dr2 source id queries'.format(
            len(df), len(dr2_source_ids)))
        print(errmsg)
        raise AssertionError(errmsg)

    return df
Beispiel #10
0
def query_neighborhood(bounds,
                       groupname,
                       n_max=2000,
                       overwrite=True,
                       manual_gmag_limit=16,
                       mstr='',
                       use_bonus_quality_cuts=True):
    """
    Given the bounds in position and parallax corresponding to a group in the
    CDIPS target catalogs, get the DR2 stars from the group's neighborhood.

    The bounds are lower and upper in ra, dec, parallax, and there is a
    limiting G magnitude. A maximum number of stars, `n_max`, are selected from
    within these bounds.

    Args:
        bounds (dict): dict with keys parallax_lower, parallax_upper, ra_lower,
        ra_upper, dec_lower, dec_upper. (Each of which has a float value).

        groupname (str): string used when cacheing for files. if you are
        querying a field star, best to include sourceid.

        n_max (int): maximum number of stars in the neighborhood to acquire.

        mstr (str): string used in the cached neighborhood pickle file.

        use_bonus_quality_cuts (bool): default True. Imposes some things like
        "there need to be at least 8 Gaia transits", and similar requirements.

    Returns:
        dataframe of DR2 stars within the bounds given. This is useful for
        querying stars that are in the neighborhood of some group.
    """

    if manual_gmag_limit is not None:
        g_mag_limit = manual_gmag_limit
    else:
        g_mag_limit = 16
        LOGINFO(f'Using default g_mag_limit of {g_mag_limit}')

    dlpath = os.path.join(
        gaiadir, 'nbhd_group{}_matches{}.xml.gz'.format(groupname, mstr))

    if os.path.exists(dlpath) and overwrite:
        os.remove(dlpath)

    if not os.path.exists(dlpath):

        Gaia.login(credentials_file=credentials_file)

        jobstr = ("""
        select top {n_max:d}
            g.source_id, g.phot_bp_mean_mag, g.phot_rp_mean_mag,
            g.phot_g_mean_mag, g.parallax, g.ra, g.dec, g.pmra, g.pmdec,
            g.radial_velocity, g.radial_velocity_error
        from gaiadr2.gaia_source as g
        where
            g.parallax > {parallax_lower:.2f}
        and
            g.parallax < {parallax_upper:.2f}
        and
            g.dec < {dec_upper:.2f}
        and
            g.dec > {dec_lower:.2f}
        and
            g.ra > {ra_lower:.2f}
        and
            g.ra < {ra_upper:.2f}
        and
            g.phot_g_mean_mag < {g_mag_limit:.1f}
        order by
            random_index
        """)
        query = jobstr.format(n_max=n_max,
                              parallax_lower=bounds['parallax_lower'],
                              parallax_upper=bounds['parallax_upper'],
                              ra_lower=bounds['ra_lower'],
                              ra_upper=bounds['ra_upper'],
                              dec_lower=bounds['dec_lower'],
                              dec_upper=bounds['dec_upper'],
                              g_mag_limit=g_mag_limit)

        if use_bonus_quality_cuts:
            # Impose some extra quality cuts, originally from Kounkel & Covey
            # 2019, but generally applicable.
            jobstr = ("""
            select top {n_max:d}
                g.source_id, g.phot_bp_mean_mag, g.phot_rp_mean_mag,
                g.phot_g_mean_mag, g.parallax, g.ra, g.dec, g.pmra, g.pmdec,
                g.radial_velocity, g.radial_velocity_error
            from gaiadr2.gaia_source as g
            where
                g.parallax > {parallax_lower:.2f}
            and
                g.parallax < {parallax_upper:.2f}
            and
                g.dec < {dec_upper:.2f}
            and
                g.dec > {dec_lower:.2f}
            and
                g.ra > {ra_lower:.2f}
            and
                g.ra < {ra_upper:.2f}
            and
                g.phot_g_mean_mag < {g_mag_limit:.1f}
            and
                parallax > 1
            and
                parallax_error < 0.1
            and
                1.0857/phot_g_mean_flux_over_error < 0.03
            and
                astrometric_sigma5d_max < 0.3
            and
                visibility_periods_used > 8
            and (
                    (astrometric_excess_noise < 1)
                    or
                    (astrometric_excess_noise > 1 and astrometric_excess_noise_sig < 2)
            )
            order by
                random_index
            """)
            query = jobstr.format(n_max=n_max,
                                  parallax_lower=bounds['parallax_lower'],
                                  parallax_upper=bounds['parallax_upper'],
                                  ra_lower=bounds['ra_lower'],
                                  ra_upper=bounds['ra_upper'],
                                  dec_lower=bounds['dec_lower'],
                                  dec_upper=bounds['dec_upper'],
                                  g_mag_limit=g_mag_limit)

        # async jobs can avoid timeout
        j = Gaia.launch_job_async(query=query,
                                  verbose=True,
                                  dump_to_file=True,
                                  output_file=dlpath)
        #j = Gaia.launch_job(query=query, verbose=True, dump_to_file=True,
        #                    output_file=dlpath)

        Gaia.logout()

    df = given_votable_get_df(dlpath, assert_equal='source_id')

    return df
Beispiel #11
0
def make_Bell17_GaiaDR2_crossmatch(maxsep=10,
                                   outdir=datadir,
                                   homedir='/home/luke/' ):

    with open(os.path.join(datadir,'Bell_2017_32Ori_table_3.txt')) as f:
        lines = f.readlines()

    lines = [l.replace('\n','') for l in lines if not l.startswith('#') and
             len(l) > 200]

    twomass_id_strs, pm_ra_strs, pm_dec_strs = [], [], []
    for l in lines:
        try:
            # regex finds the 2mass id
            twomass_id_strs.append(
                re.search('[0-9]{8}.[0-9]{7}', l).group(0)
            )
            ix = 0
            # regex finds floats in order, with the \pm appended. first is
            # always pm_RA, second is always pm_DEC.
            for m in re.finditer('[+-]?([0-9]*[.])?[0-9]+\\\\pm', l):
                if ix >= 2:
                    continue
                if ix == 0:
                    pm_ra_strs.append(float(m.group(0).rstrip('\\pm')))
                elif ix == 1:
                    pm_dec_strs.append(float(m.group(0).rstrip('\\pm')))
                ix += 1

        except:
            print('skipping')
            print(l)
            continue

    RA = [t[0:2]+'h'+t[2:4]+'m'+t[4:6]+'.'+t[6:8]
              for t in twomass_id_strs
         ]

    DE = [t[8]+t[9:11]+'d'+t[11:13]+'m'+t[13:15]+'.'+t[15]
              for t in twomass_id_strs
         ]

    c = SkyCoord(RA, DE, frame='icrs')

    RA = arr(c.ra.value)
    dec = arr(c.dec.value)
    pm_RA = arr(pm_ra_strs)
    pm_dec = arr(pm_dec_strs)
    name = arr(twomass_id_strs)
    assoc_name = '32Ori'
    assoc = np.repeat(assoc_name, len(RA))

    print(42*'-')
    outname = os.path.join(datadir, 'Bell17_table_32Ori.csv')
    print('{}'.format(outname))
    print('initial number of members: {}'.format(len(RA)))

    xmatchoutpath = outname.replace('.csv','_MATCHES_GaiaDR2.csv')
    outfile = outname.replace('.csv','_GOTMATCHES_GaiaDR2.xml')
    xmltouploadpath = outname.replace('.csv','_TOUPLOAD_GaiaDR2.xml')

    # do the spatial crossmatch...
    if os.path.exists(outfile):
        os.remove(outfile)
    if not os.path.exists(outfile):
        _ = make_votable_given_cols(name, assoc, RA, dec, pm_RA, pm_dec,
                                    outpath=xmltouploadpath)

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        # separated less than 10 arcsec.
        jobstr = (
        '''
        SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec,
        g.source_id, DISTANCE(
          POINT('ICRS', u.ra, u.dec),
          POINT('ICRS', g.ra,g.dec)) AS dist,
          g.phot_g_mean_mag as gaia_gmag,
          g.pmra AS gaia_pmra,
          g.pmdec AS gaia_pmdec
        FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g
        WHERE 1=CONTAINS(
          POINT('ICRS', u.ra, u.dec),
          CIRCLE('ICRS', g.ra, g.dec, {sep:.8f})
        )
        '''
        )
        maxncut = int(5*len(name)) # to avoid query timeout
        maxsep = (maxsep*u.arcsec).to(u.deg).value
        query = jobstr.format(sep=maxsep, ncut=maxncut)

        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=query,
                                upload_resource=xmltouploadpath,
                                upload_table_name="foobar", verbose=True,
                                dump_to_file=True, output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()

    if maxncut - len(tab) < 10:
        errmsg = 'ERROR! too many matches'
        raise AssertionError(errmsg)

    print('number of members after gaia 10 arcsec search: {}'.format(len(tab)))

    # if nonzero and finite proper motion, require Gaia pm match to sign
    # of stated PMs.
    df = tab.to_pandas()

    print('\n'+42*'-')

    sel = (df['gaia_gmag'] < 18)
    print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel])))

    sel &= (
        (   (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) &
            ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) &
            ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) )
        )
        |
        (
            (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 )
        )
    )
    df = df[sel]
    print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'.
          format(len(df)))

    # make multiplicity column. then sort by name, then by distance. then drop
    # name duplicates, keeping the first (you have nearest neighbor saved!)
    _, inv, cnts = np.unique(df['name'], return_inverse=True,
                             return_counts=True)

    df['n_in_nbhd'] = cnts[inv]

    df['name'] = df['name'].str.decode('utf-8')
    df['assoc'] = df['assoc'].str.decode('utf-8')

    df = df.sort_values(['name','dist'])

    df = df.drop_duplicates(subset='name', keep='first')

    df['source_id'] = df['source_id'].astype('int64')

    print('{} stars after above cuts + chosing nearest nbhr by spatial sep'.
          format(len(df)))

    df.to_csv(xmatchoutpath, index=False)
    print('made {}'.format(xmatchoutpath))
    print(79*'=')
Beispiel #12
0
def make_vizier_GaiaDR2_crossmatch(vizier_search_str, ra_str, dec_str,
                                   pmra_str, pmdec_str, name_str, assoc_name,
                                   table_num=0, outname='', maxsep=10,
                                   outdir=datadir,
                                   homedir='/home/luke/' ):
    '''
    Spatially crossmatch catalog of <~100,000 members w/ coords and PMs against
    Gaia DR2.  This assumes that the catalog is on vizier.

    make_Kraus14_GaiaDR2_crossmatch is an example of a call.
    '''

    Vizier.ROW_LIMIT = -1
    catalog_list = Vizier.find_catalogs(vizier_search_str)
    catalogs = Vizier.get_catalogs(catalog_list.keys())

    tab = catalogs[table_num]
    print(42*'-')
    print('{}'.format(outname))
    print('initial number of members: {}'.format(len(tab)))

    # gaia xmatch need these two column names
    RA = tab[ra_str]
    dec = tab[dec_str]
    pm_RA = tab[pmra_str]
    pm_dec = tab[pmdec_str]
    name = tab[name_str]

    assoc = np.repeat(assoc_name, len(RA))

    assert tab[ra_str].unit == u.deg
    assert tab[pmra_str].unit == u.mas/u.year

    xmatchoutpath = outname.replace('.csv','_MATCHES_GaiaDR2.csv')
    outfile = outname.replace('.csv','_GOTMATCHES_GaiaDR2.xml')
    xmltouploadpath = outname.replace('.csv','_TOUPLOAD_GaiaDR2.xml')

    # do the spatial crossmatch...
    if os.path.exists(outfile):
        os.remove(outfile)
    if not os.path.exists(outfile):
        _ = make_votable_given_cols(name, assoc, RA, dec, pm_RA, pm_dec,
                                    outpath=xmltouploadpath)

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        # separated less than 10 arcsec.
        jobstr = (
        '''
        SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec,
        g.source_id, DISTANCE(
          POINT('ICRS', u.ra, u.dec),
          POINT('ICRS', g.ra,g.dec)) AS dist,
          g.phot_g_mean_mag as gaia_gmag,
          g.pmra AS gaia_pmra,
          g.pmdec AS gaia_pmdec
        FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g
        WHERE 1=CONTAINS(
          POINT('ICRS', u.ra, u.dec),
          CIRCLE('ICRS', g.ra, g.dec, {sep:.8f})
        )
        '''
        )
        maxncut = int(5*len(name)) # to avoid query timeout
        maxsep = (maxsep*u.arcsec).to(u.deg).value
        query = jobstr.format(sep=maxsep, ncut=maxncut)

        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=query,
                                upload_resource=xmltouploadpath,
                                upload_table_name="foobar", verbose=True,
                                dump_to_file=True, output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()

    if maxncut - len(tab) < 10:
        errmsg = 'ERROR! too many matches'
        raise AssertionError(errmsg)

    print('number of members after gaia 10 arcsec search: {}'.format(len(tab)))

    # if nonzero and finite proper motion, require Gaia pm match to sign
    # of stated PMs.
    df = tab.to_pandas()

    print('\n'+42*'-')

    sel = (df['gaia_gmag'] < 18)
    print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel])))

    sel &= (
        (   (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) &
            ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) &
            ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) )
        )
        |
        (
            (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 )
        )
    )
    df = df[sel]
    print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'.
          format(len(df)))

    # make multiplicity column. then sort by name, then by distance. then drop
    # name duplicates, keeping the first (you have nearest neighbor saved!)
    _, inv, cnts = np.unique(df['name'], return_inverse=True,
                             return_counts=True)

    df['n_in_nbhd'] = cnts[inv]

    df['name'] = df['name'].str.decode('utf-8')
    df['assoc'] = df['assoc'].str.decode('utf-8')

    df = df.sort_values(['name','dist'])

    df = df.drop_duplicates(subset='name', keep='first')

    df['source_id'] = df['source_id'].astype('int64')

    print('{} stars after above cuts + chosing nearest nbhr by spatial sep'.
          format(len(df)))

    df.to_csv(xmatchoutpath, index=False)
    print('made {}'.format(xmatchoutpath))
    print(79*'=')
Beispiel #13
0
def make_Oh17_GaiaDR2_crossmatch(
    namestr='Oh_2017_clustering_GaiaDR2_crossmatched',
    outdir=datadir, homedir='/home/luke/'):
    '''
    Semyeong Oh et al (2017) discovered 10.6k stars with separations <10pc that
    are in likely comoving pairs / groups.

    see
    http://vizier.cfa.harvard.edu/viz-bin/VizieR?-source=J/AJ/153/257
    '''

    # Download Oh's tables of stars, pairs, and groups.
    Vizier.ROW_LIMIT = -1
    catalog_list = Vizier.find_catalogs('J/AJ/153/257')
    catalogs = Vizier.get_catalogs(catalog_list.keys())

    stars = catalogs[0]
    pairs = catalogs[1]
    groups = catalogs[2]

    # IMPOSE GROUP SIZE >= 3.
    stars = stars[stars['Size'] >= 3]

    # use the gaia dr1 ids, to match the dr1_neighbourhood table.

    outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr))
    xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr))

    if os.path.exists(outfile):
        os.remove(outfile)
    if not os.path.exists(outfile):

        t = Table()
        t['name'] = arr(stars['Star']).astype(str)
        # note "Group" is a bad thing to name a column b/c it is a SQL
        # keyword...
        t['groupname'] = arr(stars['Group']).astype(str)
        t['ra'] = arr(stars['RAJ2000'])*u.deg
        t['dec'] = arr(stars['DEJ2000'])*u.deg
        t['gaia'] = arr(stars['Gaia']).astype(int)
        t['gmag'] = arr(stars['Gmag'])*u.mag
        t['size'] = arr(stars['Size']).astype(int)

        votable = from_table(t)
        writeto(votable, xmltouploadpath)
        print('made {}'.format(xmltouploadpath))

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        # https://gea.esac.esa.int/archive/documentation/GDR2/...
        # Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/...
        # ssec_dm_dr1_neighbourhood.html
        jobstr = (
        '''
        SELECT TOP {ncut:d} u.name, u.gaia, u.ra, u.dec,
        u.groupname as assoc, u.size, u.gmag,
        n.dr2_source_id as source_id, n.angular_distance as dist, n.rank,
        n.magnitude_difference
        FROM tap_upload.foobar as u, gaiadr2.dr1_neighbourhood AS n
        WHERE u.gaia = n.dr1_source_id
        '''
        )
        maxncut = int(5*len(stars)) # to avoid query timeout
        query = jobstr.format(ncut=maxncut)

        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=query,
                                upload_resource=xmltouploadpath,
                                upload_table_name="foobar", verbose=True,
                                dump_to_file=True, output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()

    if maxncut - len(tab) < 10:
        errmsg = 'ERROR! too many matches'
        raise AssertionError(errmsg)

    # if nonzero and finite proper motion, require Gaia pm match to sign
    # of stated Gagne PMs.
    df = tab.to_pandas()

    print('\n'+42*'-')
    print('{} stars in original Oh table (size>=3)'.format(len(stars)))
    print('{} stars in gaia nbhd match'.format(len(df)))

    # we want DR2 mags. "gmag" is the uploaded DR1 mag.
    df['gaia_gmag'] = df['gmag'] + df['magnitude_difference']
    df.drop(['gmag'], axis=1, inplace=True)

    _, inv, cnts = np.unique(df['name'], return_inverse=True,
                             return_counts=True)
    df['n_in_nbhd'] = cnts[inv]

    df['name'] = df['name'].str.decode('utf-8')
    df['assoc'] = df['assoc'].str.decode('utf-8')

    df = df.sort_values(['name','dist'])

    df = df.drop_duplicates(subset='name', keep='first')

    df['gaia'] = df['gaia'].astype('int64')
    df = df.rename(index=str, columns={"gaia":"gaia_dr1_source_id"})

    df['size'] = df['size'].astype('int')

    df['source_id'] = df['source_id'].astype('int64')

    print('{} stars after above cuts + chosing nearest nbhr by spatial sep'.
          format(len(df)))

    outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr))
    df.to_csv(outpath, index=False)
    print('made {}'.format(outpath))
    print(79*'=')
Beispiel #14
0
def make_Rizzuto11_GaiaDR2_crossmatch(
    outdir=datadir, homedir='/home/luke/'):
    '''
    Aaron Rizzuto et al (2011) gave a list of 436 Sco OB2 members.
    http://vizier.cfa.harvard.edu/viz-bin/VizieR?-source=J/MNRAS/416/3108
    '''
    vizier_search_str = 'J/MNRAS/416/3108'
    table_num = 0
    ra_str = '_RA'
    dec_str = '_DE'
    outname = os.path.join(datadir,'Rizzuto_11_table_1_ScoOB2_members.csv')
    assoc_name = 'ScoOB2'
    namestr = 'Rizzuto_11_table_1_ScoOB2_members'

    Vizier.ROW_LIMIT = -1
    catalog_list = Vizier.find_catalogs(vizier_search_str)
    catalogs = Vizier.get_catalogs(catalog_list.keys())

    tab = catalogs[table_num]
    print(42*'-')
    print('{}'.format(outname))
    print('initial number of members: {}'.format(len(tab)))

    # gaia xmatch need these two column names
    RA = tab[ra_str]
    dec = tab[dec_str]
    assoc = np.repeat(assoc_name, len(RA))

    # match "HIP" number to hipparcos table...
    outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr))
    xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr))

    if os.path.exists(outfile):
        os.remove(outfile)
    if not os.path.exists(outfile):

        votable = from_table(tab)
        writeto(votable, xmltouploadpath)
        print('made {}'.format(xmltouploadpath))

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        # https://gea.esac.esa.int/archive/documentation/GDR2/...
        # Gaia_archive/chap_datamodel/sec_dm_auxiliary_tables/...
        # ssec_dm_dr1_neighbourhood.html
        jobstr = (
        '''
        SELECT TOP {ncut:d} u.hip, u.memb, u.col_ra, u.col_de,
        n.source_id, n.angular_distance as dist, n.number_of_neighbours
        FROM tap_upload.foobar as u, gaiadr2.hipparcos2_best_neighbour AS n
        WHERE u.hip = n.original_ext_source_id
        '''
        )
        maxncut = int(5*len(tab)) # to avoid query timeout
        query = jobstr.format(ncut=maxncut)

        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=query,
                                upload_resource=xmltouploadpath,
                                upload_table_name="foobar", verbose=True,
                                dump_to_file=True, output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()
    print('number after hipparcos xmatch: {}'.format(len(tab)))

    if maxncut - len(tab) < 10:
        errmsg = 'ERROR! too many matches'
        raise AssertionError(errmsg)

    df = tab.to_pandas()
    df['source_id'] = df['source_id'].astype('int64')

    df['name'] = df['hip'].astype('int64')

    outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr))
    df.to_csv(outpath, index=False)
    print('made {}'.format(outpath))
    print(79*'=')
Beispiel #15
0
def make_Gagne18_BANYAN_any_DR2_crossmatch(
        tablepath,
        namestr=None,
        maxsep=10,
        outdir=datadir,
        homedir='/home/luke/'):
    """
    J. Gagne's tables have a particular format that requires some wrangling.
    Also, since so many of the stars are high PM, the spatial cross-matches
    will be crap unless we also include PM information in the matching.

    Do the matching via astroquery's Gaia API.
    """
    assert type(namestr) == str
    t = Table.read(tablepath, format='ascii.cds')

    RAh, RAm, RAs = arr(t['RAh']), arr(t['RAm']), arr(t['RAs'])

    RA_hms =  [str(rah).zfill(2)+'h'+
               str(ram).zfill(2)+'m'+
               str(ras).zfill(2)+'s'
               for rah,ram,ras in zip(RAh, RAm, RAs)]

    DEd, DEm, DEs = arr(t['DEd']),arr(t['DEm']),arr(t['DEs'])
    DEsign = arr(t['DE-'])
    DEsign[DEsign != '-'] = '+'

    DE_dms = [str(desgn)+
              str(ded).zfill(2)+'d'+
              str(dem).zfill(2)+'m'+
              str(des).zfill(2)+'s'
              for desgn,ded,dem,des in zip(DEsign, DEd, DEm, DEs)]

    coords = SkyCoord(ra=RA_hms, dec=DE_dms, frame='icrs')

    RA = coords.ra.value
    dec = coords.dec.value
    pm_RA, pm_dec = arr(t['pmRA']), arr(t['pmDE'])
    u_pm_RA, u_pm_dec = arr(t['e_pmRA']), arr(t['e_pmDE'])

    maxsep = (maxsep*u.arcsec).to(u.deg).value

    name = t['Main'] if 'XI_' in namestr else t['Name']
    assoc = t['Assoc']

    outfile = os.path.join(outdir,'gotmatches_{}.xml.gz'.format(namestr))
    xmltouploadpath = os.path.join(outdir,'toupload_{}.xml'.format(namestr))

    if os.path.exists(outfile):
        os.remove(outfile) # NOTE if it's fast, can just do this to overwrite
    if not os.path.exists(outfile):
        _ = make_votable_given_full_cols(name, assoc, RA, dec, pm_RA, pm_dec,
                                    u_pm_RA, u_pm_dec,
                                    outpath=xmltouploadpath)

        Gaia.login(credentials_file=os.path.join(homedir, '.gaia_credentials'))

        # separated less than 10 arcsec.
        jobstr = (
        '''
        SELECT TOP {ncut:d} u.name, u.assoc, u.ra, u.dec, u.pm_ra, u.pm_dec,
        u.err_pm_ra, u.err_pm_dec,
        g.source_id, DISTANCE(
          POINT('ICRS', u.ra, u.dec),
          POINT('ICRS', g.ra,g.dec)) AS dist,
          g.phot_g_mean_mag as gaia_gmag,
          g.pmra AS gaia_pmra,
          g.pmdec AS gaia_pmdec
        FROM tap_upload.foobar as u, gaiadr2.gaia_source AS g
        WHERE 1=CONTAINS(
          POINT('ICRS', u.ra, u.dec),
          CIRCLE('ICRS', g.ra, g.dec, {sep:.8f})
        )
        '''
        )
        maxncut = int(5*len(name)) # to avoid query timeout
        query = jobstr.format(sep=maxsep, ncut=maxncut)

        if not os.path.exists(outfile):
            # might do async if this times out. but it doesn't.
            j = Gaia.launch_job(query=query,
                                upload_resource=xmltouploadpath,
                                upload_table_name="foobar", verbose=True,
                                dump_to_file=True, output_file=outfile)

        Gaia.logout()

    vot = parse(outfile)
    tab = vot.get_first_table().to_table()

    if maxncut - len(tab) < 10:
        errmsg = 'ERROR! too many matches'
        raise AssertionError(errmsg)

    # if nonzero and finite proper motion, require Gaia pm match to sign
    # of stated Gagne PMs.
    df = tab.to_pandas()

    print('\n'+42*'-')
    print('{} stars in original Gagne table'.format(len(t)))
    print('{} stars in sep < 10 arcsec xmatch'.format(len(df)))

    sel = (df['gaia_gmag'] < 18)
    print('{} stars in sep < 10 arcsec, G<18, xmatch'.format(len(df[sel])))

    sel &= (
        (   (df['pm_ra'] != 0 ) & (df['pm_dec'] != 0 ) &
            ( np.sign(df['pm_ra']) == np.sign(df['gaia_pmra']) ) &
            ( np.sign(df['pm_dec']) == np.sign(df['gaia_pmdec']) )
        )
        |
        (
            (df['pm_ra'] == 0 ) & (df['pm_dec'] == 0 )
        )
    )
    df = df[sel]
    print('{} stars in sep < 10 as xmatch, G<18, after pm cut (xor zero pm)'.
          format(len(df)))

    # make multiplicity column. then sort by name, then by distance. then drop
    # name duplicates, keeping the first (you have nearest neighbor saved!)
    _, inv, cnts = np.unique(df['name'], return_inverse=True,
                             return_counts=True)

    df['n_in_nbhd'] = cnts[inv]

    df['name'] = df['name'].str.decode('utf-8')
    df['assoc'] = df['assoc'].str.decode('utf-8')

    df = df.sort_values(['name','dist'])

    df = df.drop_duplicates(subset='name', keep='first')

    df['source_id'] = df['source_id'].astype('int64')

    print('{} stars after above cuts + chosing nearest nbhr by spatial sep'.
          format(len(df)))

    outpath = os.path.join(outdir,'MATCHED_{}.csv'.format(namestr))
    df.to_csv(outpath, index=False)
    print('made {}'.format(outpath))
    print(79*'=')