def main(extra_overplot=0):

    overwrite = 1
    groupname = 'NGC2354'
    targetname = '554-051'
    target_id = 5617126180115568256
    cutoff_probability = 0.1

    ##########################################
    # some data for a group that have a source_id list
    cg18_path = os.path.join(
        datadir, 'ngc_2354_CG18_subset.vot.gz')
    group_df = given_votable_get_df(cg18_path, assert_equal='Source')
    group_df = group_df[group_df['PMemb'] > cutoff_probability]
    group_source_ids = np.array(group_df['Source']).astype(np.int64)
    # todo: generalize above -- generically, it just needs some source id list.
    ##########################################

    group_df_dr2 = given_source_ids_get_gaia_data(group_source_ids, groupname,
                                                  overwrite=overwrite)

    target_d = objectid_search(target_id)
    target_df = pd.read_csv(target_d['result'])
    assert len(target_df) == 1

    # now acquire the mean properties of the group, and query the neighborhood
    # based on those properties. the number of neighbor stars to randomly
    # select is min(5* the number of group members, 5000).
    bounds = {}
    params = ['parallax', 'ra', 'dec']
    for param in params:

        bounds[param+'_upper'] = (
            group_df_dr2[param].mean() + 5*group_df_dr2[param].std()
        )

        bounds[param+'_lower'] = (
            group_df_dr2[param].mean() - 5*group_df_dr2[param].std()
        )

    assert bounds['ra_upper'] < 360
    assert bounds['ra_lower'] > 0

    n_max = min((50*len(group_df_dr2), 10000))
    nbhd_df = query_neighborhood(bounds, groupname, n_max=n_max,
                                 overwrite=overwrite, is_cg18_group=True,
                                 is_kc19_group=False, is_k13_group=False)

    # ensure no overlap between the group members and the neighborhood sample.
    common = group_df_dr2.merge(nbhd_df, on='source_id', how='inner')
    snbhd_df = nbhd_df[~nbhd_df.source_id.isin(common.source_id)]

    plot_group_neighborhood(targetname, groupname, group_df_dr2, target_df,
                            nbhd_df, cutoff_probability,
                            extra_overplot=extra_overplot)
Esempio n. 2
0
def plot_TIC268_nbhd_small(outdir=RESULTSDIR):

    set_style()

    df = get_cdips_catalog(ver=0.4)

    kc19_sel = ((df.cluster.str.contains('NGC_2516')) &
                (df.reference.str.contains('Kounkel_2019')))
    cg18_sel = ((df.cluster.str.contains('NGC_2516')) &
                (df.reference.str.contains('CantatGaudin_2018')))

    target_df = df[df.source_id == 5489726768531119616]  # TIC 2683...
    kc19_df = df[kc19_sel]
    cg18_df = df[cg18_sel]

    kc19_df = kc19_df[~(kc19_df.source_id.isin(cg18_df.source_id))]

    ##########

    # NGC 2516 rough
    bounds = {
        'parallax_lower': 1.5,
        'parallax_upper': 4.0,
        'ra_lower': 108,
        'ra_upper': 132,
        'dec_lower': -76,
        'dec_upper': -45
    }
    groupname = 'customngc2516'

    nbhd_df = query_neighborhood(bounds,
                                 groupname,
                                 n_max=6000,
                                 overwrite=False,
                                 manual_gmag_limit=17)

    sel_nbhd = ((~nbhd_df.source_id.isin(kc19_df.source_id))
                & (~nbhd_df.source_id.isin(cg18_df.source_id)))
    from copy import deepcopy
    orig_nbhd_df = deepcopy(nbhd_df)
    nbhd_df = nbhd_df[sel_nbhd]

    print(f'Got {len(nbhd_df)} neighbors')
    print(f'Got {len(cg18_df)} in core')
    print(f'Got {len(kc19_df)} in corona')

    ##########

    plt.close('all')

    f, axs = plt.subplots(figsize=(4, 3), ncols=2)

    xv, yv = 'ra', 'dec'
    axs[0].scatter(nbhd_df[xv],
                   nbhd_df[yv],
                   c='gray',
                   alpha=0.5,
                   zorder=2,
                   s=7,
                   rasterized=True,
                   linewidths=0,
                   label='Field',
                   marker='.')
    axs[0].scatter(kc19_df[xv],
                   kc19_df[yv],
                   c='lightskyblue',
                   alpha=0.9,
                   zorder=3,
                   s=7,
                   rasterized=True,
                   linewidths=0.15,
                   label='Corona',
                   marker='.',
                   edgecolors='k')
    axs[0].scatter(cg18_df[xv],
                   cg18_df[yv],
                   c='k',
                   alpha=0.9,
                   zorder=4,
                   s=7,
                   rasterized=True,
                   label='Core',
                   marker='.')
    axs[0].plot(target_df[xv],
                target_df[yv],
                alpha=1,
                mew=0.5,
                zorder=8,
                label='TOI 1937',
                markerfacecolor='lightskyblue',
                markersize=14,
                marker='*',
                color='black',
                lw=0)

    axs[0].set_xlabel(r'$\alpha$ [deg]')
    axs[0].set_ylabel(r'$\delta$ [deg]')
    axs[0].set_xlim([108, 132])
    axs[0].set_ylim([-76, -45])

    ##########

    get_yval = (lambda _df: np.array(_df['phot_g_mean_mag'] + 5 * np.log10(_df[
        'parallax'] / 1e3) + 5))
    get_xval = (lambda _df: np.array(_df['phot_bp_mean_mag'] - _df[
        'phot_rp_mean_mag']))

    axs[1].scatter(get_xval(nbhd_df),
                   get_yval(nbhd_df),
                   c='gray',
                   alpha=0.8,
                   zorder=2,
                   s=7,
                   rasterized=True,
                   linewidths=0,
                   label='Field',
                   marker='.')
    axs[1].scatter(get_xval(kc19_df),
                   get_yval(kc19_df),
                   c='lightskyblue',
                   alpha=1,
                   zorder=3,
                   s=7,
                   rasterized=True,
                   linewidths=0.15,
                   label='Corona',
                   marker='.',
                   edgecolors='k')
    axs[1].scatter(get_xval(cg18_df),
                   get_yval(cg18_df),
                   c='k',
                   alpha=0.9,
                   zorder=4,
                   s=7,
                   rasterized=True,
                   linewidths=0,
                   label='Core',
                   marker='.')
    axs[1].plot(get_xval(target_df),
                get_yval(target_df),
                alpha=1,
                mew=0.5,
                zorder=8,
                label='TOI 1937',
                markerfacecolor='lightskyblue',
                markersize=14,
                marker='*',
                color='black',
                lw=0)

    axs[1].set_ylim(axs[1].get_ylim()[::-1])

    axs[1].set_xlabel('Bp - Rp [mag]')
    axs[1].set_ylabel('Absolute G [mag]', labelpad=-6)

    ##########

    words = ['Field', 'Corona', 'Core', 'TOI1937'][::-1]
    colors = ['gray', 'lightskyblue', 'k', 'lightskyblue'][::-1]
    rainbow_text(0.98, 0.02, words, colors, size='medium', ax=axs[0])

    f.tight_layout(w_pad=2)

    outpath = os.path.join(outdir, 'small_ngc2516.png')
    savefig(f, outpath)
def get_neighborhood_information(
    source_id,
    overwrite=0,
    min_n_nbhrs=1000,
    manual_gmag_limit=None
    ):
    """
    Given a source_id for a star (potentially a field star), acquire
    information necessary for neighborhood diagnostic plots.

    Parameters:

        source_id: Gaia DR2 source_id

        overwrite: Whether the Gaia cache gets overwritten.

        manual_gmag_limit: G < manual_gmag_limit for the neighborhood
    """

    #
    # Get the targetname
    #
    ticid = gaiadr2_to_tic(str(source_id))
    toiid = ticid_to_toiid(ticid)

    if isinstance(toiid, str):
        targetname = toiid
    else:
        targetname = 'TIC{}.01'.format(ticid)

    #
    # Get Gaia information for target.
    #
    enforce_all_sourceids_viable = True
    savstr = '_nbhdonly'

    target_d = objectid_search(
        source_id,
        columns=('source_id', 'ra','dec', 'ra_error', 'dec_error',
                 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag',
                 'l','b', 'parallax, parallax_error', 'pmra','pmra_error',
                 'pmdec','pmdec_error', 'radial_velocity'),
        forcefetch=True,
        gaia_mirror='vizier'
    )
    target_df = pd.read_csv(target_d['result'])
    assert len(target_df) == 1

    # now acquire the mean properties of the group, and query the neighborhood
    # based on those properties. the number of neighbor stars to randomly
    # select is min(5* the number of group members, 5000). (cutoff group
    # bounds based on parallax because further groups more uncertain).
    bounds = {}
    params = ['parallax', 'ra', 'dec']

    plx_mean = float(target_df.parallax)

    n_nbhrs = 0
    n_std = 5
    n_std_incr = 10
    n_std_max = 200

    if plx_mean > 10:
        n_std = 5
        n_std_incr = 20
        n_std_max = 1000

    while n_nbhrs < min_n_nbhrs:

        if n_std > n_std_max:
            return None

        LOGINFO('trying when bounding by {} stdevns'.format(n_std))

        for param in params:
            mult = 1 if 'parallax' in param else 2
            bounds[param+'_upper'] = (
                float(target_df[param]) + mult*n_std*float(target_df[param + '_error'])
            )
            bounds[param+'_lower'] = (
                float(target_df[param]) - mult*n_std*float(target_df[param + '_error'])
            )

        if bounds['ra_upper'] > 360:
            bounds['ra_upper'] = 359.99
        if bounds['ra_lower'] < 0:
            bounds['ra_lower'] = 0
        if bounds['parallax_lower'] < 0:
            bounds['parallax_lower'] = 0

        n_max = int(1e4)

        if manual_gmag_limit is None:
            manual_gmag_limit = 17

        groupname = '{}'.format(source_id)
        # only force overwrite if iterating
        if n_nbhrs == 0:
            nbhd_df = query_neighborhood(bounds, groupname, n_max=n_max,
                                         overwrite=overwrite,
                                         manual_gmag_limit=manual_gmag_limit)
        else:
            nbhd_df = query_neighborhood(bounds, groupname, n_max=n_max,
                                         overwrite=True,
                                         manual_gmag_limit=manual_gmag_limit)

        n_nbhrs = len(nbhd_df)
        LOGINFO(42*'=')
        LOGINFO('Got {} neighborhods, when minimum was {}'.
              format(n_nbhrs, min_n_nbhrs))
        LOGINFO(42*'=')

        n_std += n_std_incr

    n_std = 3
    pmdec_min = np.nanmean(nbhd_df['pmdec']) - n_std*np.nanstd(nbhd_df['pmdec'])
    pmdec_max = np.nanmean(nbhd_df['pmdec']) + n_std*np.nanstd(nbhd_df['pmdec'])
    pmra_min = np.nanmean(nbhd_df['pmra']) - n_std*np.nanstd(nbhd_df['pmra'])
    pmra_max = np.nanmean(nbhd_df['pmra']) + n_std*np.nanstd(nbhd_df['pmra'])

    pmdec_min = min((pmdec_min, float(target_df['pmdec'])))
    pmdec_max = max((pmdec_max, float(target_df['pmdec'])))
    pmra_min = min((pmra_min, float(target_df['pmra'])))
    pmra_max = max((pmra_max, float(target_df['pmra'])))

    return (targetname, groupname, target_df, nbhd_df,
            pmdec_min, pmdec_max, pmra_min, pmra_max)
def get_group_and_neighborhood_information(
    source_id,
    overwrite=0,
    force_groupname=None,
    force_references=None,
    force_cdips_match=True,
    manual_gmag_limit=None,
    CATALOG_VERSION=0.6):
    """
    Given a source_id for a cluster member, acquire information necessary for
    neighborhood diagnostic plots. (Namely, find all the group members, then do
    a Gaia query of everything).

    Parameters:

        source_id: Gaia DR2 source_id

        overwrite: Whether the Gaia cache gets overwritten.

        Optional kwargs: force_groupname and force_references. If passed, for
        example as the arrays ["kc19group_1222"] and ["Kounkel_2019"], plot
        generation will be forced without verifying that the target "source_id"
        is a member of the group.
    """

    if not isinstance(source_id, np.int64):
        source_id = np.int64(source_id)

    cdips_df = get_cdips_pub_catalog(ver=CATALOG_VERSION)
    row = cdips_df[cdips_df.source_id == source_id]

    if pd.isnull(row['cluster'].iloc[0]):
        LOGWARNING(f'Did not find any group matches for GAIA DR2 {source_id}.')
        return None

    #
    # Get numpy arrays of references and cluster names for this star.
    # This is needed b/c multiple memberships are comma-separated.
    #
    if 'reference' in row:
        references = np.array(row['reference'].iloc[0].split(','))
    elif 'reference_id' in row:
        references = np.array(row['reference_id'].iloc[0].split(','))
    clusters = np.array(row['cluster'].iloc[0].split(','))
    if force_references and force_groupname:
        references = force_references
        clusters = np.array([force_groupname])

    assert len(references) == len(clusters)

    if row is None and force_cdips_match:
        LOGINFO('Failed to get CDIPS target list match for {}'.format(source_id))
        return None

    #
    # Given the numpy array of clusters, find the best cluster membership list
    # to make the report with.
    #
    from cdips.catalogbuild.membership_lists import RANKED_MEMBERSHIP_DICT

    references_in_ensembles = RANKED_MEMBERSHIP_DICT[CATALOG_VERSION]['isgroup']
    references_in_field = RANKED_MEMBERSHIP_DICT[CATALOG_VERSION]['isfield']

    is_in_group = np.any(np.in1d(references, references_in_ensembles))
    is_in_field  = np.any(np.in1d(references, references_in_field))

    if is_in_group:
        # At least one of the references given corresponds to an actual coeval
        # group, and not a list of young field stars.  In this case, take the
        # highest precedence group (lowest index) as the one to make the plot
        # for.
        referencename = references_in_ensembles[
            int(min(np.argwhere(np.in1d(references_in_ensembles, references))))
        ]

        groupname = clusters[references == referencename][0]

    else:
        LOGWARNING(f'Did not find any group matches for GAIA DR2 {source_id}.')
        return None

    #
    # Get the targetname
    #
    ticid = gaiadr2_to_tic(str(source_id))
    toiid = ticid_to_toiid(ticid)

    if isinstance(toiid, str):
        targetname = toiid
    else:
        targetname = 'TIC{}.01'.format(ticid)

    #
    # Get the group members!
    #

    # (We avoided the field star case earlier.)
    cdips_df = cdips_df[~pd.isnull(cdips_df.cluster)]

    group_df = cdips_df[
        # avoids e.g., "kc19group_981" matching on "kc19group_98". deals with
        # cases beginning of string, middle of string, and end of string.
        (
            (cdips_df.cluster.str.contains(groupname+','))
            |
            (cdips_df.cluster.str.contains(','+groupname+','))
            |
            (cdips_df.cluster.str.contains(','+groupname+'$'))
            |
            (cdips_df.cluster == groupname)
        )
        &
        (cdips_df.reference_id.str.contains(referencename))
    ]

    group_source_ids = np.array(group_df['source_id']).astype(np.int64)
    np.testing.assert_array_equal(group_df['source_id'], group_source_ids)

    #
    # Given the source ids, get all the relevant Gaia information.
    #
    enforce_all_sourceids_viable = True
    savstr = f'_{groupname.replace(" ","_")}_{referencename}'

    group_df_dr2 = given_source_ids_get_gaia_data(
        group_source_ids, groupname.replace(" ","_"), overwrite=overwrite,
        enforce_all_sourceids_viable=enforce_all_sourceids_viable,
        n_max=min((len(group_source_ids), 10000)),
        savstr=savstr
    )

    target_d = objectid_search(
        source_id,
        columns=('source_id', 'ra','dec', 'phot_g_mean_mag',
                 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'l','b',
                 'parallax, parallax_error', 'pmra','pmra_error',
                 'pmdec','pmdec_error', 'radial_velocity'),
        forcefetch=True,
        gaia_mirror='vizier'
    )
    target_df = pd.read_csv(target_d['result'])
    assert len(target_df) == 1

    # now acquire the mean properties of the group, and query the neighborhood
    # based on those properties. the number of neighbor stars to randomly
    # select is min(5* the number of group members, 5000). (cutoff group
    # bounds based on parallax because further groups more uncertain).
    bounds = {}
    params = ['parallax', 'ra', 'dec']

    plx_mean = group_df_dr2['parallax'].mean()
    if plx_mean > 5:
        n_std = 5
    elif plx_mean > 3:
        n_std = 4
    else:
        n_std = 3

    LOGINFO(f'bounding by {n_std} stdevns')

    for param in params:
        bounds[param+'_upper'] = np.minimum(
            group_df_dr2[param].mean() + n_std*group_df_dr2[param].std(),
            360-1e-5
        )
        bounds[param+'_lower'] = np.maximum(
            group_df_dr2[param].mean() - n_std*group_df_dr2[param].std(),
            1e-5
        )

    if bounds['parallax_lower'] < 0:
        bounds['parallax_lower'] = 0

    assert bounds['ra_upper'] < 360
    assert bounds['ra_lower'] > 0
    assert bounds['parallax_lower'] >= 0

    n_max = min((50*len(group_df_dr2), 10000))

    if manual_gmag_limit is None:
        manual_gmag_limit = np.nanpercentile(group_df_dr2.phot_g_mean_mag,95)

    mstr = savstr
    nbhd_df = query_neighborhood(bounds, groupname.replace(" ","_"), n_max=n_max,
                                 overwrite=overwrite,
                                 manual_gmag_limit=manual_gmag_limit,
                                 mstr=mstr)

    # ensure no overlap between the group members and the neighborhood sample.
    common = group_df_dr2.merge(nbhd_df, on='source_id', how='inner')
    snbhd_df = nbhd_df[~nbhd_df.source_id.isin(common.source_id)]

    n_std = 5
    pmdec_min = group_df_dr2['pmdec'].mean() - n_std*group_df_dr2['pmdec'].std()
    pmdec_max = group_df_dr2['pmdec'].mean() + n_std*group_df_dr2['pmdec'].std()
    pmra_min = group_df_dr2['pmra'].mean() - n_std*group_df_dr2['pmra'].std()
    pmra_max = group_df_dr2['pmra'].mean() + n_std*group_df_dr2['pmra'].std()

    pmdec_min = min((pmdec_min, float(target_df['pmdec'])))
    pmdec_max = max((pmdec_max, float(target_df['pmdec'])))
    pmra_min = min((pmra_min, float(target_df['pmra'])))
    pmra_max = max((pmra_max, float(target_df['pmra'])))

    return (targetname, groupname, referencename, group_df_dr2, target_df,
            snbhd_df, pmdec_min, pmdec_max, pmra_min, pmra_max)
Esempio n. 5
0
def _get_fullfaint_edr3_dataframes():
    """
    Return: nbhd_df, core_df, halo_df, full_df, target_df
    (for NGC 2516, "full faint" sample -- i.e., as faint as possible, but
    ***after crossmatching the GAIA DR2 targets with GAIA EDR3***. This
    crossmatch is run using the dr2_neighbourhood table from the Gaia archive,
    and then taking the closest angular separation match for cases with
    multiple matches.)

    Further notes are in "_get_fullfaint_dataframes" docstring.

    This procedure yields:

		FOR DR2:
			Got 1106 in fullfaint CG18
			Got 3003 in fullfaint KC19
			Got 1860 in fullfaint M21
			Got 1912 in fullfaint KC19 after removing core matches
			Got 1096 in fullfaint M21 after removing core matches
			Got 280 in fullfaint M21 after removing KC19 matches
			Got 13834 neighbors
			Got 1106 in core
			Got 2192 in corona
			Got 1091 KC19 / CG18 overlaps
			Got 764 M21 / CG18 overlaps

        FOR EDR3:

			Got 1106 EDR3 matches in core.
			99th pct [arcsec] 1577.8 -> 0.3
			Got 1912 EDR3 matches in KC19.
			99th pct [arcsec] 1702.8 -> 0.5
			Got 280 EDR3 matches in M21.
			99th pct [arcsec] 1426.6 -> 0.3
			Got 13843 EDR3 matches in nbhd.
			99th pct [arcsec] 1833.9 -> 3.7

			(((
				CG18/core: got 1143 matches vs 1106 source id queries.
				KC19/halo: got 2005 matches vs 1912 source id queries
				Nbhd:      got 15123 matches vs 13843 source id queries.
			)))
    """

    # get the full CG18 NGC 2516 memberships, downloaded from Vizier
    cg18path = os.path.join(DATADIR, 'gaia',
                            'CantatGaudin2018_vizier_only_NGC2516.fits')
    hdul = fits.open(cg18path)
    cg18_tab = Table(hdul[1].data)
    cg18_df = cg18_tab.to_pandas()
    cg18_df['source_id'] = cg18_df['Source']

    # get the full KC19 NGC 2516 memberships, from Marina's file
    # NGC 2516 == "Theia 613" in Kounkel's approach.
    kc19path = os.path.join(DATADIR, 'gaia', 'string_table1.csv')
    kc19_df = pd.read_csv(kc19path)
    kc19_df = kc19_df[kc19_df.group_id == 613]

    # get the full M21 NGC 2516 memberships
    m21path = os.path.join(DATADIR, 'gaia',
                           'Meingast_2021_NGC2516_all1860members.fits')
    m21_df = Table(fits.open(m21path)[1].data).to_pandas()
    m21_df = m21_df.rename(mapper={'GaiaDR2': 'source_id'}, axis=1)

    print(42 * '=' + '\nFOR DR2:')
    print(f'Got {len(cg18_df)} in fullfaint CG18')
    print(f'Got {len(kc19_df)} in fullfaint KC19')
    print(f'Got {len(m21_df)} in fullfaint M21')

    kc19_cg18_overlap_df = kc19_df[(kc19_df.source_id.isin(cg18_df.source_id))]
    kc19_df = kc19_df[~(kc19_df.source_id.isin(cg18_df.source_id))]
    print(f'Got {len(kc19_df)} in fullfaint KC19 after removing core matches')

    m21_cg18_overlap_df = m21_df[(m21_df.source_id.isin(cg18_df.source_id))]
    m21_df = m21_df[~(m21_df.source_id.isin(cg18_df.source_id))]
    print(f'Got {len(m21_df)} in fullfaint M21 after removing core matches')
    m21_df = m21_df[~(m21_df.source_id.isin(kc19_df.source_id))]
    print(f'Got {len(m21_df)} in fullfaint M21 after removing KC19 matches')

    ##########

    # NGC 2516 rough
    bounds = {
        'parallax_lower': 1.5,
        'parallax_upper': 4.0,
        'ra_lower': 108,
        'ra_upper': 132,
        'dec_lower': -76,
        'dec_upper': -45
    }
    groupname = 'customngc2516_fullfaint'

    nbhd_df = query_neighborhood(bounds,
                                 groupname,
                                 n_max=14000,
                                 overwrite=False,
                                 manual_gmag_limit=19)

    sel_nbhd = ((~nbhd_df.source_id.isin(kc19_df.source_id))
                & (~nbhd_df.source_id.isin(cg18_df.source_id))
                & (~nbhd_df.source_id.isin(m21_df.source_id)))
    orig_nbhd_df = deepcopy(nbhd_df)
    nbhd_df = nbhd_df[sel_nbhd]

    print(f'Got {len(nbhd_df)} neighbors')
    print(f'Got {len(cg18_df)} in core')
    print(f'Got {len(kc19_df)+len(m21_df)} in corona')
    print(f'Got {len(kc19_cg18_overlap_df)} KC19 / CG18 overlaps')
    print(f'Got {len(m21_cg18_overlap_df)} M21 / CG18 overlaps')
    assert (len(cg18_df) + len(kc19_df) + len(m21_df) == len(
        np.unique(np.array(pd.concat(
            (cg18_df, kc19_df, m21_df))['source_id']))))

    cg18_df_edr3 = (given_dr2_sourceids_get_edr3_xmatch(
        nparr(cg18_df.Source).astype(np.int64),
        'fullfaint_ngc2516_cg18_df',
        overwrite=False))
    kc19_df_edr3 = (given_dr2_sourceids_get_edr3_xmatch(
        nparr(kc19_df.source_id).astype(np.int64),
        'fullfaint_ngc2516_kc19_df',
        overwrite=False))
    m21_df_edr3 = (given_dr2_sourceids_get_edr3_xmatch(
        nparr(m21_df.source_id).astype(np.int64),
        'fullfaint_ngc2516_m21_df',
        overwrite=False))
    nbhd_df_edr3 = (given_dr2_sourceids_get_edr3_xmatch(
        nparr(nbhd_df.source_id).astype(np.int64),
        'fullfaint_ngc2516_nbhd_df',
        overwrite=False))

    print(42 * '=' + '\nFOR EDR3:')

    # Take the closest (proper motion and epoch-corrected) angular distance as
    # THE single match.
    get_edr3_xm = lambda _df: (_df.sort_values(by='angular_distance').
                               drop_duplicates(subset='dr2_source_id',
                                               keep='first'))

    s_cg18_df_edr3 = get_edr3_xm(cg18_df_edr3)
    s_kc19_df_edr3 = get_edr3_xm(kc19_df_edr3)
    s_m21_df_edr3 = get_edr3_xm(m21_df_edr3)
    s_nbhd_df_edr3 = get_edr3_xm(nbhd_df_edr3)

    print(
        f'Got {len(s_cg18_df_edr3)} EDR3 matches in core.\n' +
        f'99th pct [arcsec] {np.nanpercentile(cg18_df_edr3.angular_distance, 99):.1f} -> {np.nanpercentile(s_cg18_df_edr3.angular_distance, 99):.1f}'
    )

    print(
        f'Got {len(s_kc19_df_edr3)} EDR3 matches in KC19.\n' +
        f'99th pct [arcsec] {np.nanpercentile(kc19_df_edr3.angular_distance, 99):.1f} -> {np.nanpercentile(s_kc19_df_edr3.angular_distance, 99):.1f}'
    )

    print(
        f'Got {len(s_m21_df_edr3)} EDR3 matches in M21.\n' +
        f'99th pct [arcsec] {np.nanpercentile(m21_df_edr3.angular_distance, 99):.1f} -> {np.nanpercentile(s_m21_df_edr3.angular_distance, 99):.1f}'
    )

    print(
        f'Got {len(s_nbhd_df_edr3)} EDR3 matches in nbhd.\n' +
        f'99th pct [arcsec] {np.nanpercentile(nbhd_df_edr3.angular_distance, 99):.1f} -> {np.nanpercentile(s_nbhd_df_edr3.angular_distance, 99):.1f}'
    )

    # Finally, query Gaia EDR3 to get the latest and greatest fullfaint
    # photometry
    kc19_df_0 = given_source_ids_get_gaia_data(
        np.array(s_kc19_df_edr3.dr3_source_id),
        'fullfaint_ngc2516_kc19_df_edr3',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True,
        gaia_datarelease='gaiaedr3')
    cg18_df_0 = given_source_ids_get_gaia_data(
        np.array(s_cg18_df_edr3.dr3_source_id),
        'fullfaint_ngc2516_cg18_df_edr3',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True,
        gaia_datarelease='gaiaedr3')
    m21_df_0 = given_source_ids_get_gaia_data(
        np.array(s_m21_df_edr3.dr3_source_id),
        'fullfaint_ngc2516_m21_df_edr3',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True,
        gaia_datarelease='gaiaedr3')
    nbhd_df_0 = given_source_ids_get_gaia_data(
        np.array(s_nbhd_df_edr3.dr3_source_id),
        'fullfaint_ngc2516_nbhd_df_edr3',
        n_max=15000,
        overwrite=False,
        enforce_all_sourceids_viable=True,
        gaia_datarelease='gaiaedr3')

    assert len(cg18_df) == len(cg18_df_0)
    assert len(kc19_df) == len(kc19_df_0)
    assert len(m21_df) == len(m21_df_0)
    assert len(nbhd_df) == len(nbhd_df_0)

    # nb. these "source_ids" are now EDR3 source_ids.
    np.testing.assert_array_equal(np.array(kc19_df_0.source_id),
                                  np.array(kc19_df_0.source_id_2))
    np.testing.assert_array_equal(np.array(cg18_df_0.source_id),
                                  np.array(cg18_df_0.source_id_2))
    np.testing.assert_array_equal(np.array(m21_df_0.source_id),
                                  np.array(m21_df_0.source_id_2))
    np.testing.assert_array_equal(np.array(nbhd_df_0.source_id),
                                  np.array(nbhd_df_0.source_id_2))

    kc19_df_0['dr2_source_id'] = nparr(s_kc19_df_edr3['dr2_source_id']).astype(
        np.int64)
    cg18_df_0['dr2_source_id'] = nparr(s_cg18_df_edr3['dr2_source_id']).astype(
        np.int64)
    m21_df_0['dr2_source_id'] = nparr(s_m21_df_edr3['dr2_source_id']).astype(
        np.int64)
    nbhd_df_0['dr2_source_id'] = nparr(s_nbhd_df_edr3['dr2_source_id']).astype(
        np.int64)

    target_df = kc19_df_0[kc19_df_0.source_id ==
                          5489726768531119616]  # TIC 2683...

    #
    # wrap up into the full source list
    #
    cg18_df_0['subcluster'] = 'core'
    kc19_df_0['subcluster'] = 'halo'
    m21_df_0['subcluster'] = 'halo'

    core_df = cg18_df_0
    halo_df = pd.concat((kc19_df_0, m21_df_0)).reset_index()

    full_df = pd.concat((core_df, halo_df)).reset_index()
    assert len(np.unique(full_df.source_id)) == len(full_df)
    print(f'Got {len(full_df)} unique sources in the cluster.')

    full_df['in_CG18'] = full_df.source_id.isin(cg18_df.source_id)
    full_df['in_KC19'] = full_df.source_id.isin(kc19_df.source_id)
    full_df['in_M21'] = full_df.source_id.isin(m21_df.source_id)

    nbhd_df['dr2_radial_velocity'] = nbhd_df['radial_velocity']

    return nbhd_df, core_df, halo_df, full_df, target_df
Esempio n. 6
0
def _get_fullfaint_dataframes():
    """
    Return: nbhd_df, core_df, halo_df, full_df, target_df
    (for NGC 2516, "full faint" sample -- i.e., as faint as possible.)

    The "core" is all available Cantat-Gaudin 2018 members, with no magnitude
    cutoff.

    The "halo" is the full Kounkel & Covey 2019 + Meingast 2021 member set,
    provided that the source is not in the core. (i.e., KC19 and M21 get no
    points for getting the "core" targets correct).

    The "neighborhood" was selected via

        bounds = { 'parallax_lower': 1.5, 'parallax_upper': 4.0, 'ra_lower': 108,
        'ra_upper': 132, 'dec_lower': -76, 'dec_upper': -45 }

        nbhd_df = query_neighborhood(bounds, groupname, n_max=14000,
                                     overwrite=False, manual_gmag_limit=19)

    This procedure yields:

        Got 1106 in fullfaint CG18
        Got 3003 in fullfaint KC19
        Got 1860 in fullfaint M21
        Got 1912 in fullfaint KC19 after removing core matches
        Got 1096 in fullfaint M21 after removing core matches
        Got 280 in fullfaint M21 after removing KC19 matches

        Got 13834 neighbors
        Got 1106 in core
        Got 2192 in corona
        Got 1091 KC19 / CG18 overlaps
        Got 764 M21 / CG18 overlaps
        Got 3298 unique sources in the cluster.
    """

    # get the full CG18 NGC 2516 memberships, downloaded from Vizier
    cg18path = os.path.join(DATADIR, 'gaia',
                            'CantatGaudin2018_vizier_only_NGC2516.fits')
    hdul = fits.open(cg18path)
    cg18_tab = Table(hdul[1].data)
    cg18_df = cg18_tab.to_pandas()
    cg18_df['source_id'] = cg18_df['Source']

    # get the full KC19 NGC 2516 memberships, from Marina's file
    # NGC 2516 == "Theia 613" in Kounkel's approach.
    kc19path = os.path.join(DATADIR, 'gaia', 'string_table1.csv')
    kc19_df = pd.read_csv(kc19path)
    kc19_df = kc19_df[kc19_df.group_id == 613]

    # get the full M21 NGC 2516 memberships
    m21path = os.path.join(DATADIR, 'gaia',
                           'Meingast_2021_NGC2516_all1860members.fits')
    m21_df = Table(fits.open(m21path)[1].data).to_pandas()
    m21_df = m21_df.rename(mapper={'GaiaDR2': 'source_id'}, axis=1)

    print(f'Got {len(cg18_df)} in fullfaint CG18')
    print(f'Got {len(kc19_df)} in fullfaint KC19')
    print(f'Got {len(m21_df)} in fullfaint M21')

    kc19_cg18_overlap_df = kc19_df[(kc19_df.source_id.isin(cg18_df.source_id))]
    kc19_df = kc19_df[~(kc19_df.source_id.isin(cg18_df.source_id))]
    print(f'Got {len(kc19_df)} in fullfaint KC19 after removing core matches')

    m21_cg18_overlap_df = m21_df[(m21_df.source_id.isin(cg18_df.source_id))]
    m21_df = m21_df[~(m21_df.source_id.isin(cg18_df.source_id))]
    print(f'Got {len(m21_df)} in fullfaint M21 after removing core matches')
    m21_df = m21_df[~(m21_df.source_id.isin(kc19_df.source_id))]
    print(f'Got {len(m21_df)} in fullfaint M21 after removing KC19 matches')

    ##########

    # NGC 2516 rough
    bounds = {
        'parallax_lower': 1.5,
        'parallax_upper': 4.0,
        'ra_lower': 108,
        'ra_upper': 132,
        'dec_lower': -76,
        'dec_upper': -45
    }
    groupname = 'customngc2516_fullfaint'

    nbhd_df = query_neighborhood(bounds,
                                 groupname,
                                 n_max=14000,
                                 overwrite=False,
                                 manual_gmag_limit=19)

    # query gaia DR2 to get the fullfaint photometry
    kc19_df_0 = given_source_ids_get_gaia_data(
        np.array(kc19_df.source_id),
        'ngc2516_kc19_earhart_fullfaint',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True)
    cg18_df_0 = given_source_ids_get_gaia_data(
        np.array(cg18_df.Source),
        'ngc2516_cg18_earhart_fullfaint',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True)
    m21_df_0 = given_source_ids_get_gaia_data(
        np.array(m21_df.source_id),
        'ngc2516_m21_earhart_fullfaint',
        n_max=10000,
        overwrite=False,
        enforce_all_sourceids_viable=True)

    assert len(cg18_df) == len(cg18_df_0)
    assert len(kc19_df) == len(kc19_df_0)
    assert len(m21_df) == len(m21_df_0)

    target_df = kc19_df_0[kc19_df_0.source_id ==
                          5489726768531119616]  # TIC 2683...

    sel_nbhd = ((~nbhd_df.source_id.isin(kc19_df.source_id))
                & (~nbhd_df.source_id.isin(cg18_df.source_id))
                & (~nbhd_df.source_id.isin(m21_df.source_id)))
    orig_nbhd_df = deepcopy(nbhd_df)
    nbhd_df = nbhd_df[sel_nbhd]

    print(f'Got {len(nbhd_df)} neighbors')
    print(f'Got {len(cg18_df)} in core')
    print(f'Got {len(kc19_df)+len(m21_df)} in corona')
    print(f'Got {len(kc19_cg18_overlap_df)} KC19 / CG18 overlaps')
    print(f'Got {len(m21_cg18_overlap_df)} M21 / CG18 overlaps')

    #
    # wrap up into the full source list
    #
    cg18_df_0['subcluster'] = 'core'
    kc19_df_0['subcluster'] = 'halo'
    m21_df_0['subcluster'] = 'halo'

    core_df = cg18_df_0
    halo_df = pd.concat((kc19_df_0, m21_df_0)).reset_index()

    full_df = pd.concat((core_df, halo_df)).reset_index()
    assert len(np.unique(full_df.source_id)) == len(full_df)
    print(f'Got {len(full_df)} unique sources in the cluster.')

    full_df['in_CG18'] = full_df.source_id.isin(cg18_df.source_id)
    kc19_df = pd.read_csv(kc19path)
    kc19_df = kc19_df[kc19_df.group_id == 613]
    full_df['in_KC19'] = full_df.source_id.isin(kc19_df.source_id)
    m21_df = Table(fits.open(m21path)[1].data).to_pandas()
    m21_df = m21_df.rename(mapper={'GaiaDR2': 'source_id'}, axis=1)
    full_df['in_M21'] = full_df.source_id.isin(m21_df.source_id)

    return nbhd_df, core_df, halo_df, full_df, target_df