コード例 #1
0
def do_allvariable_report_making(source_id,
                                 outdir=None,
                                 overwrite=False,
                                 apply_extinction=None,
                                 use_calib=False):
    """
    Args:

        source_id (np.int64): Gaia DR2 source identifier

        apply_extinction (float): E(Bp-Rp) applied to plotted colors.

        use_calib (bool): if True, searches for _calibration_ light curves, not
        just _cluster_ light curves.
    """

    print(42 * '=')
    thetime = datetime.utcnow().isoformat()
    print(f'{thetime}: Beginning {source_id} do_allvariable_report_making.')

    picklepath = os.path.join(outdir, 'data', f'{source_id}_allvar.pkl')
    statuspath = os.path.join(outdir, 'logs', f'{source_id}_status.log')

    if not os.path.exists(statuspath):
        # initialize status file
        lc_info = {
            'n_sectors': None,
            'lcpaths': None,
            'detrending_completed': None
        }
        ppu.save_status(statuspath, 'lc_info', lc_info)
        report_info = {
            'report_completed': None,
            'ls_period': None,
            'bestlspval': None,
            'nbestperiods': None,
            'nbestlspvals': None,
            'n_dict': None
        }
        ppu.save_status(statuspath, 'report_info', report_info)

    s = ppu.load_status(statuspath)
    if not overwrite:
        if str2bool(s['report_info']['report_completed']):
            print(f'Found {source_id} report_completed')
            return 0
        if s['lc_info']['n_sectors'] == '0':
            print(f'Found {source_id} n_sectors = 0')
            return 0
        if s['lc_info']['detrending_completed'] == 'False':
            print(f'Found {source_id} not detrending_completed')
            return 0

    # get the data needed to make the report if it hasn't already been made.
    if not os.path.exists(picklepath):

        thetime = datetime.utcnow().isoformat()
        print(f'{thetime}: Beginning {source_id} detrending.')

        #
        # get the light curves
        #

        lcpaths = lcu.find_cdips_lc_paths(source_id,
                                          raise_error=False,
                                          use_calib=use_calib)

        if lcpaths is None:
            lc_info = {
                'n_sectors': 0,
                'lcpaths': None,
                'detrending_completed': False
            }
            ppu.save_status(statuspath, 'lc_info', lc_info)
            return 0

        #
        # detrend systematics. each light curve yields tuples of:
        #   primaryhdr, data, ap, dtrvecs, eigenvecs, smooth_eigenvecs
        #
        dtr_infos = []
        try:
            for lcpath in lcpaths:
                dtr_info = dtr.detrend_systematics(lcpath)
                dtr_infos.append(dtr_info)
        except Exception as e:
            print(f'ERR! {e}')
            lc_info = {
                'n_sectors': len(lcpaths),
                'lcpaths': lcpaths,
                'detrending_completed': False
            }
            ppu.save_status(statuspath, 'lc_info', lc_info)
            return 0

        #
        # stitch all available light curves
        #
        ap = dtr_infos[0][2]
        timelist = [d[1]['TMID_BJD'] for d in dtr_infos]
        maglist = [d[1][f'PCA{ap}'] for d in dtr_infos]
        magerrlist = [d[1][f'IRE{ap}'] for d in dtr_infos]

        extravecdict = {}
        extravecdict[f'IRM{ap}'] = [d[1][f'IRM{ap}'] for d in dtr_infos]
        for i in range(0, 7):
            extravecdict[f'CBV{i}'] = [d[3][i, :] for d in dtr_infos]

        try:
            time, flux, fluxerr, vec_dict = lcu.stitch_light_curves(
                timelist, maglist, magerrlist, extravecdict=extravecdict)
        except ValueError:
            lc_info = {
                'n_sectors': len(lcpaths),
                'lcpaths': lcpaths,
                'detrending_completed': False
            }
            ppu.save_status(statuspath, 'lc_info', lc_info)
            return 0

        #
        # mask orbit edges
        #
        s_time, s_flux, inds = moe.mask_orbit_start_and_end(
            time,
            flux,
            raise_expectation_error=False,
            orbitgap=0.7,
            return_inds=True)
        s_fluxerr = fluxerr[inds]

        #
        # remove outliers with windowed stdevn removal
        #
        window_length = 1.5  # days
        s_flux = slide_clip(s_time,
                            s_flux,
                            window_length,
                            low=3,
                            high=2,
                            method='mad',
                            center='median')

        #
        # fix any "zero" values in s_flux to be NaN
        #
        s_flux[s_flux == 0] = np.nan

        ap = dtr_infos[0][2]
        allvardict = {
            'source_id': source_id,
            'E_BpmRp': apply_extinction,
            'ap': ap,
            'TMID_BJD': time,
            f'PCA{ap}': flux,
            f'IRE{ap}': fluxerr,
            'STIME': s_time,
            f'SPCA{ap}': s_flux,
            f'SPCAE{ap}': s_fluxerr,
            'dtr_infos': dtr_infos,
            'vec_dict': vec_dict
        }

        with open(picklepath, 'wb') as f:
            pickle.dump(allvardict, f)

        #
        # sanity check that PCA / detrending worked
        #
        limit_fraction = 0.75
        if len(flux[pd.isnull(flux)]) / len(flux) > limit_fraction:
            lc_info = {
                'n_sectors': len(lcpaths),
                'lcpaths': lcpaths,
                'detrending_completed': False
            }
            ppu.save_status(statuspath, 'lc_info', lc_info)
            return 0

        #
        # update status that detrending worked.
        #
        lc_info = {
            'n_sectors': len(lcpaths),
            'lcpaths': lcpaths,
            'detrending_completed': True
        }
        ppu.save_status(statuspath, 'lc_info', lc_info)

    else:
        thetime = datetime.utcnow().isoformat()
        print(f'{thetime}: Found {picklepath}, skipping detrending.')

    s = ppu.load_status(statuspath)
    if str2bool(s['lc_info']['detrending_completed']):
        with open(picklepath, 'rb') as f:
            allvardict = pickle.load(f)
    else:
        return 0

    #
    # make summary plots.
    #

    if not str2bool(s['report_info']['report_completed']):

        thetime = datetime.utcnow().isoformat()
        print(f'{thetime}: Beginning {source_id} allvar report.')

        plotdir = os.path.join(outdir, 'reports')
        outd = make_allvar_report(allvardict, plotdir)

    #
    # save their output (most crucially, including the bestperiods)
    #
    outpicklepath = os.path.join(outdir, 'data', f'{source_id}_reportinfo.pkl')
    with open(outpicklepath, 'wb') as f:
        pickle.dump(outd, f)

    report_info = {
        'report_completed': True,
        'ls_period': outd['lsp']['bestperiod'],
        'bestlspval': outd['lsp']['bestlspval'],
        'nbestperiods': outd['lsp']['nbestperiods'],
        'nbestlspvals': outd['lsp']['nbestlspvals'],
        'n_dict': outd['n_dict']
    }
    ppu.save_status(statuspath, 'report_info', report_info)

    #
    # save the SPCA light curve
    #
    ap = allvardict['ap']
    outdf = pd.DataFrame({
        'selected_time_bjdtdb_STIME':
        allvardict['STIME'],
        f'selected_flux_special_PCA_detrending_SPCA{ap}':
        allvardict[f'SPCA{ap}'],
        f'selected_flux_error_SPCAE{ap}':
        allvardict[f'SPCAE{ap}'],
    })
    outlcpath = os.path.join(outdir, 'data',
                             f'{source_id}_SPCA_lightcurve.csv')
    outdf.to_csv(outlcpath, index=False)
    print(f'Wrote {outlcpath}')

    return 1
コード例 #2
0
ファイル: fit_models_to_gold.py プロジェクト: lgbouma/cdips
def main(overwrite=0,
         sector=None,
         nworkers=40,
         cdipsvnum=1,
         cdips_cat_vnum=None,
         is_not_cdips_still_good=False):
    """
    ------------------------------------------
    Description:

    Fit Mandel-Agol transits to planet candidates. The fit parameters are then
    used for CTOIs.

    The goal of these models is to provide a good ephemeris (so: reliable
    epoch, period, and duration + uncertainties).

    For publication-quality parameters, joint modelling of stellar rotation
    signals along with planet signals may be preferred. The approach here is to
    just whiten the light curve in order to get a good ephemeris (which may
    distort the depth, for example).

    ------------------------------------------
    Explanation of design:

        Directory structure is made like:

            /results/fit_gold/sector-?/fitresults/hlsp_*/
            /results/fit_gold/sector-?/samples/hlsp_*/

        where each `fitresults` sub-directory contains images to diagnose fit
        quality, pickle files with saved parameters from the fits, etc.
        The `samples` sub-directories have the .h5 files used when sampling.

        First, all these directories are made.

        Then, pdf files under either
        /results/vetting_classifications/sector-?_CLEAR_THRESHOLD or
        /results/vetting_classifications/sector-?_NOT_CDIPS_STILL_GOOD
        are parsed to collect the light curves and any necessary metadata.

        Then there's a for loop for over each planet candidate, in which the
        fit is performed.

    ------------------------------------------
    Args:

        overwrite: if False, and the pickle file with saved parameters exists
        (i.e. you already fit the PC), no sampling is done.

        sector: the sector number.

        nworkers: for threading

        cdipsvnum: version number of CDIPS LCs in their name

        cdips_cat_vnum: target star catalog version identifier.

        is_not_cdips_still_good: if true, parses planet candidates from
        `/results/vetting_classifications/sector-?_NOT_CDIPS_STILL_GOOD`;
        otherwise does the CLEAR_THRESHOLD directory.

    """

    lcbasedir, tfasrdir, resultsdir = _define_and_make_directories(
        sector, is_not_cdips_still_good=is_not_cdips_still_good)

    df, cdips_df, pfdf, supplementstatsdf, toidf, ctoidf = _get_data(
        sector, cdips_cat_vnum=cdips_cat_vnum)

    tfa_sr_paths = _get_lcpaths(df, tfasrdir)

    for tfa_sr_path in tfa_sr_paths:

        #
        # given the TFASR LC path, get the complete LC path
        #
        source_id = np.int64(
            tfa_sr_path.split('gaiatwo')[1].split('-')[0].lstrip('0'))
        mdf = cdips_df[cdips_df['source_id'] == source_id]
        if len(mdf) != 1:
            errmsg = 'expected exactly 1 source match in CDIPS cat'
            raise AssertionError(errmsg)

        _hdr = iu.get_header_keyword_list(tfa_sr_path, ['CAMERA', 'CCD'],
                                          ext=0)
        cam, ccd = _hdr['CAMERA'], _hdr['CCD']

        lcname = ('hlsp_cdips_tess_ffi_'
                  'gaiatwo{zsource_id}-{zsector}-cam{cam}-ccd{ccd}_'
                  'tess_v{zcdipsvnum}_llc.fits').format(
                      cam=cam,
                      ccd=ccd,
                      zsource_id=str(source_id).zfill(22),
                      zsector=str(sector).zfill(4),
                      zcdipsvnum=str(cdipsvnum).zfill(2))
        lcpath = os.path.join(lcbasedir, 'cam{}_ccd{}'.format(cam, ccd),
                              lcname)

        #
        # make fitresults and samples directories
        #
        outdirs = [
            os.path.join(resultsdir, 'fitresults', lcname.replace('.fits',
                                                                  '')),
            os.path.join(resultsdir, 'samples', lcname.replace('.fits', ''))
        ]
        for outdir in outdirs:
            if not os.path.exists(outdir):
                os.mkdir(outdir)

        #
        # collect metadata for this target star
        #
        supprow = mavr._get_supprow(source_id, supplementstatsdf)
        suppfulldf = supplementstatsdf

        pfrow = pfdf.loc[pfdf['source_id'] == source_id]
        if len(pfrow) != 1:
            errmsg = 'expected exactly 1 source match in period find df'
            raise AssertionError(errmsg)

        outpath = os.path.join(resultsdir, 'fitresults',
                               lcname.replace('.fits', ''),
                               lcname.replace('.fits', '_fitparameters.csv'))

        #
        # if you haven't already made the output parameter file (which requires
        # convergence) then start fitting.
        #
        if not os.path.exists(outpath):

            _fit_transit_model_single_sector(tfa_sr_path,
                                             lcpath,
                                             outpath,
                                             mdf,
                                             source_id,
                                             supprow,
                                             suppfulldf,
                                             pfdf,
                                             pfrow,
                                             toidf,
                                             ctoidf,
                                             sector,
                                             nworkers,
                                             cdipsvnum=cdipsvnum,
                                             overwrite=overwrite)

        else:

            status_file = os.path.join(os.path.dirname(outpath),
                                       'run_status.stat')
            status = load_status(status_file)

            fittype = 'fivetransitparam_fit'
            if str2bool(status[fittype]['is_converged']):
                print('{} converged and already wrote wrote ctoi csv.'.format(
                    source_id))

            elif (not str2bool(status[fittype]['is_converged'])
                  and int(source_id) in SKIP_CONVERGENCE_IDENTIFIERS):
                print('WRN! {} not converged, but wrote wrote ctoi csv b/c in '
                      'SKIP_CONVERGENCE_IDENTIFIERS.'.format(source_id))

            else:
                raise ValueError(
                    'got parameter file existing, but not converged.'
                    'should never happen. for DR2 {}'.format(source_id))
コード例 #3
0
def get_auto_rotation_periods(runid='NGC_2516', get_spdm=True):
    """
    Given a `runid` (an identifier string for a particular CDIPS
    "allvariability" sub-pipeline processing run), retrieve the following
    output and concatenate into a table, which is then saved to
    '../../data/rotation/{runid}_rotation_periods.csv':

        [
        'source_id': source_ids,
        'n_cdips_sector': nsectors,
        'period': periods,
        'lspval': lspvals,
        'nequal': nequal,
        'nclose': nclose,
        'nfaint': nfaint
        ]

    Valid runids include:
        IC_2602, CrA, kc19group_113, Orion, NGC_2516, ScoOB2, compstar_NGC_2516
    """

    # the allvariability logs, including the top 5 lomb-scargle periods, and
    # peak values, are here.
    logdir = f'/Users/luke/Dropbox/proj/cdips/results/allvariability_reports/{runid}/logs'
    logfiles = glob(os.path.join(logdir, '*status.log'))
    print(f'Got {len(logfiles)} log files.')

    if get_spdm:
        pkldir = f'/Users/luke/Dropbox/proj/cdips/results/allvariability_reports/{runid}/data'
        pklfiles = glob(os.path.join(pkldir, '*reportinfo.pkl'))
        N_pklfiles = len(pklfiles)
        print(f'Got {N_pklfiles} pickle files.')
        if N_pklfiles < 10:
            raise ValueError('Too few pickle files... Port from phtess2?')

    source_ids = np.array(
        [np.int64(os.path.basename(f).split('_')[0]) for f in logfiles])

    # retrieve the LS periods. only top period; since we're not bothering with
    # the "second period" classification option.
    periods, lspvals, nequal, nclose, nfaint, nsectors = [], [], [], [], [], []
    if get_spdm:
        spdmperiods, spdmvals = [], []

    ix = 0
    for source_id, logpath in zip(source_ids, logfiles):

        s = load_status(logpath)

        if get_spdm:
            pklpath = os.path.join(pkldir, f'{source_id}_reportinfo.pkl')
            if not os.path.exists(pklpath):
                spdmperiods.append(np.nan)
                spdmvals.append(np.nan)
            else:
                with open(pklpath, 'rb') as f:
                    d = pickle.load(f)
                spdmperiods.append(float(d['spdm']['bestperiod']))
                spdmvals.append(float(d['spdm']['bestlspval']))

        n_sectors = int(s['lc_info']['n_sectors'])
        nsectors.append(n_sectors)
        try:
            periods.append(float(s['report_info']['ls_period']))
            lspvals.append(float(s['report_info']['bestlspval']))
            nequal.append(int(eval(s['report_info']['n_dict'])['equal']))
            nclose.append(int(eval(s['report_info']['n_dict'])['close']))
            nfaint.append(int(eval(s['report_info']['n_dict'])['faint']))
        except (TypeError, ValueError) as e:
            periods.append(np.nan)
            lspvals.append(np.nan)
            nequal.append(np.nan)
            nclose.append(np.nan)
            nfaint.append(np.nan)

    period_df = pd.DataFrame({
        'source_id': source_ids,
        'n_cdips_sector': nsectors,
        'period': periods,
        'lspval': lspvals,
        'nequal': nequal,
        'nclose': nclose,
        'nfaint': nfaint
    })
    if get_spdm:
        period_df['spdmperiod'] = spdmperiods
        period_df['spdmval'] = spdmvals

    print(
        f'Got {len(period_df[period_df.n_cdips_sector > 0])} sources with at least 1 cdips sector'
    )
    print(f'Got {len(period_df[~pd.isnull(period_df.period)])} periods')

    # get the runid's source list
    if 'compstar' not in runid:
        if runid == 'NGC_2516':
            sourcelistpath = (
                '/Users/luke/Dropbox/proj/cdips/data/cluster_data/NGC_2516_full_fullfaint_20210305.csv'
            )
        else:
            sourcelistpath = os.path.join(
                '/Users/luke/Dropbox/proj/cdips/data/cluster_data/cdips_catalog_split',
                f'OC_MG_FINAL_v0.4_publishable_CUT_{runid}.csv')
    else:
        sourcelistpath = (
            f'/Users/luke/Dropbox/proj/earhart/results/tables/{runid}_sourcelist.csv'
        )

    df = pd.read_csv(sourcelistpath)
    if 'compstar' in runid:

        print(42 * '-')
        print(
            f'{len(df)} light curves made for stars in neighborhood (calib+cdips)'
        )
        print(f'... for {len(np.unique(df.source_id))} unique stars')

        df = df[df.phot_rp_mean_mag < 13]
        print(
            f'{len(df)} light curves made for stars in neighborhood (calib+cdips) w/ Rp<13'
        )
        print(f'... for {len(np.unique(df.source_id))} unique stars')
        print(42 * '-')

        df = df.drop_duplicates(subset='source_id', keep='first')

    mdf = period_df.merge(df, how='inner', on='source_id')

    outpath = f'../../data/rotation/{runid}_rotation_periods.csv'
    mdf.to_csv(outpath, index=False)
    print(f'Made {outpath}')
コード例 #4
0
ファイル: fit_models_to_gold.py プロジェクト: lgbouma/cdips
def _fit_transit_model_single_sector(tfa_sr_path,
                                     lcpath,
                                     outpath,
                                     mdf,
                                     source_id,
                                     supprow,
                                     suppfulldf,
                                     pfdf,
                                     pfrow,
                                     toidf,
                                     ctoidf,
                                     sector,
                                     nworkers,
                                     cdipsvnum=1,
                                     overwrite=1):
    try_mcmc = True
    identifier = source_id
    #
    # read and re-detrend lc if needed. (recall: these planet candidates were
    # found using a penalized spline detrending in most cases).
    #
    hdul_sr = fits.open(tfa_sr_path)
    hdul = fits.open(lcpath)

    lc_sr = hdul_sr[1].data
    lc, hdr = hdul[1].data, hdul[0].header

    # FIXME: logic needs updating in >=S14 processing
    raise NotImplementedError
    is_pspline_dtr = bool(pfrow['pspline_detrended'].iloc[0])

    fluxap = 'IRM2' if is_pspline_dtr else 'TFASR2'

    time, mag = lc_sr['TMID_BJD'], lc_sr[fluxap]
    try:
        time, mag = moe.mask_orbit_start_and_end(time,
                                                 mag,
                                                 raise_expectation_error=False)
    except AssertionError:
        raise AssertionError(
            'moe.mask_orbit_start_and_end failed for {}'.format(tfa_sr_path))

    flux = vp._given_mag_get_flux(mag)
    err = np.ones_like(flux) * 1e-4

    time, flux, err = sigclip_magseries(time,
                                        flux,
                                        err,
                                        magsarefluxes=True,
                                        sigclip=[50, 5])

    if is_pspline_dtr or identifier in KNOWN_EXTRA_DETREND:
        flux, _ = dtr.detrend_flux(time, flux)

    if identifier in KNOWN_EXTRA_DETREND:
        fit_savdir = os.path.dirname(outpath)
        dtrpath = os.path.join(fit_savdir, 'extra_detrend_lc.png')

        if not os.path.exists(dtrpath):
            plt.close('all')
            f, ax = plt.subplots(figsize=(6, 3))
            ax.scatter(time,
                       flux,
                       c='black',
                       alpha=0.9,
                       zorder=2,
                       s=8,
                       rasterized=True,
                       linewidths=0)
            ax.set_xlabel('bjdtdb')
            ax.set_ylabel('detrended flux')
            f.savefig(dtrpath, bbox_inches='tight')
            raise AssertionError(
                'U NEED TO MANUALLY LOOK AT {} AND VERIFY ITS OK'.format(
                    dtrpath))
        else:
            print('WRN! found {}. continuing to fit.'.format(dtrpath))

    #
    # define the paths. get the stellar parameters, and do the fit!
    #
    fit_savdir = os.path.dirname(outpath)
    chain_savdir = os.path.dirname(outpath).replace('fitresults', 'samples')

    try:
        teff, teff_err, rstar, rstar_err, logg, logg_err = (
            get_teff_rstar_logg(hdr))
    except (NotImplementedError, ValueError) as e:
        print(e)
        print('did not get rstar for {}. MUST MANUALLY FIX.'.format(source_id))
        try_mcmc = False

    #
    # initialize status file
    #
    status_file = os.path.join(fit_savdir, 'run_status.stat')
    fittype = 'fivetransitparam_fit'
    if not os.path.exists(status_file):
        save_status(status_file, fittype, {
            'is_converged': False,
            'n_steps_run': 0
        })
    status = load_status(status_file)[fittype]

    #
    # if not converged and no steps previously run:
    #   run 4k steps. write status file.
    #
    # reload status file.
    # if not converged and 4k steps previously run and in long ID list:
    #   run 25k steps, write status file.
    #
    # reload status file.
    # if not converged:
    #   print a warning.
    #
    if identifier in KNOWN_MCMC_FAILS:
        print('WRN! identifier {} requires manual fixing.'.format(identifier))
        try_mcmc = False

    if (not str2bool(status['is_converged'])
            and int(status['n_steps_run']) == 0 and try_mcmc):

        n_mcmc_steps = 4000

        mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
            time,
            flux,
            err,
            teff,
            rstar,
            logg,
            identifier,
            fit_savdir,
            chain_savdir,
            n_mcmc_steps=n_mcmc_steps,
            overwriteexistingsamples=False,
            n_transit_durations=5,
            make_tlsfit_plot=True,
            exp_time_minutes=30,
            bandpass='******',
            magsarefluxes=True,
            nworkers=nworkers)

        status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps}
        save_status(status_file, fittype, status)

    status = load_status(status_file)[fittype]
    if (not str2bool(status['is_converged'])
            and int(status['n_steps_run']) != 25000
            and int(identifier) in LONG_RUN_IDENTIFIERS and try_mcmc):

        n_mcmc_steps = 25000

        # NOTE: hard-code nworkers, since we dont get multithreading
        # improvement anyway (this is some kind of bug)
        mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
            time,
            flux,
            err,
            teff,
            rstar,
            logg,
            identifier,
            fit_savdir,
            chain_savdir,
            n_mcmc_steps=n_mcmc_steps,
            overwriteexistingsamples=True,
            n_transit_durations=5,
            make_tlsfit_plot=True,
            exp_time_minutes=30,
            bandpass='******',
            magsarefluxes=True,
            nworkers=4)

        status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps}
        save_status(status_file, fittype, status)

    #
    # if converged or in the list of IDs for which its fine to skip convegence
    # (because by-eye, the fits are converged), convert fit results to ctoi csv
    # format
    #
    status = load_status(status_file)[fittype]

    if (str2bool(status['is_converged'])
            or int(identifier) in SKIP_CONVERGENCE_IDENTIFIERS):

        try:
            _ = isinstance(mafr, dict)
        except UnboundLocalError:
            #
            # get the MCMC results from the pickle file; regenerate the TLS
            # result.
            #

            fitparamdir = os.path.dirname(status_file)
            fitpklsavpath = os.path.join(
                fitparamdir,
                '{}_phased_fivetransitparam_fit_empiricalerrs.pickle'.format(
                    identifier))
            with open(fitpklsavpath, 'rb') as f:
                mafr = pickle.load(f)

            tlsp = htls.tls_parallel_pfind(time,
                                           flux,
                                           err,
                                           magsarefluxes=True,
                                           tls_rstar_min=0.1,
                                           tls_rstar_max=10,
                                           tls_mstar_min=0.1,
                                           tls_mstar_max=5.0,
                                           tls_oversample=8,
                                           tls_mintransits=1,
                                           tls_transit_template='default',
                                           nbestpeaks=5,
                                           sigclip=None,
                                           nworkers=nworkers)
            tlsr = tlsp['tlsresult']

        ticid = int(hdr['TICID'])
        ra, dec = hdr['RA_OBJ'], hdr['DEC_OBJ']
        print('{} converged. writing ctoi csv.'.format(identifier))
        fit_results_to_ctoi_csv(ticid,
                                ra,
                                dec,
                                mafr,
                                tlsr,
                                outpath,
                                toidf,
                                ctoidf,
                                teff,
                                teff_err,
                                rstar,
                                rstar_err,
                                logg,
                                logg_err,
                                cdipsvnum=cdipsvnum)
    else:
        print('WRN! {} did not converge, after {} steps. MUST MANUALLY FIX.'.
              format(identifier, status['n_steps_run']))
コード例 #5
0
ファイル: merge_for_exofoptess.py プロジェクト: lgbouma/cdips
def main(is_dayspecific_exofop_upload=1,
         cdipssource_vnum=0.4,
         uploadnamestr='sectors_12_thru_13_clear_threshold'):
    """
    Put together a few useful CSV candidate summaries:

    * bulk uploads to exofop/tess

    * observer info sparse (focus on TICIDs, gaia mags, positions on sky, etc)

    * observer info full (stellar rvs for membership assessment; ephemeris
    information)

    * merge of everything (exoFOP upload, + the subset of gaia information
    useful to observers)

    ----------
    Args:

        is_dayspecific_exofop_upload: if True, reads in the manually-written (from
        google spreadsheet) comments and source_ids, and writes those to a
        special "TO_EXOFOP" csv file.

        uploadnamestr: used as unique identifying string in file names
    """

    #
    # Read in the results from the fits
    #
    paramglob = os.path.join(
        fitdir,
        "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*fitparameters.csv"
    )
    parampaths = glob(paramglob)
    statusglob = os.path.join(
        fitdir,
        "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*.stat")
    statuspaths = glob(statusglob)

    statuses = [
        dict(load_status(f)['fivetransitparam_fit']) for f in statuspaths
    ]

    param_df = pd.concat((pd.read_csv(f, sep='|') for f in parampaths))

    outpath = os.path.join(
        fitdir, "{}_{}_mergedfitparams.csv".format(today_YYYYMMDD(),
                                                   uploadnamestr))
    param_df['param_path'] = parampaths
    param_df.to_csv(outpath, index=False, sep='|')
    print('made {}'.format(outpath))

    status_df = pd.DataFrame(statuses)

    status_df['statuspath'] = statuspaths

    status_gaiaids = list(
        map(
            lambda x: int(
                os.path.dirname(x).split('gaiatwo')[1].split('-')[0].lstrip(
                    '0')), statuspaths))

    status_df['source_id'] = status_gaiaids

    if is_dayspecific_exofop_upload:

        #
        # Manually commented candidates are the only ones we're uploading.
        #
        manual_comment_df = pd.read_csv(
            '/nfs/phtess2/ar0/TESS/PROJ/lbouma/cdips/data/exoFOP_uploads/{}_cdips_candidate_upload.csv'
            .format(today_YYYYMMDD()),
            sep=",")
        common = status_df.merge(manual_comment_df,
                                 on='source_id',
                                 how='inner')
        sel_status_df = status_df[status_df.source_id.isin(common.source_id)]

        #
        # WARN: the MCMC fits should have converged before uploading.
        # (20190918 had two exceptions, where the fit looked fine.)
        #
        if len(sel_status_df[sel_status_df['is_converged'] == 'False']) > 0:

            print('\nWRN! THE FOLLOWING CANDIDATES ARE NOT CONVERGED')
            print(sel_status_df[sel_status_df['is_converged'] == 'False'])

        param_gaiaids = list(
            map(
                lambda x: int(
                    os.path.basename(x).split('gaiatwo')[1].split('-')[0].
                    lstrip('0')), parampaths))
        param_df['source_id'] = param_gaiaids

        #
        # Require that you actually have a parameter file (...).
        #
        _df = sel_status_df.merge(param_df, on='source_id', how='inner')

        to_exofop_df = param_df[param_df.source_id.isin(_df.source_id)]

        if len(to_exofop_df) != len(manual_comment_df):

            print('\nWRN! {} CANDIDATES DID NOT HAVE PARAMETERS'.format(
                len(manual_comment_df) - len(to_exofop_df)))
            print('They are...')
            print(manual_comment_df[~manual_comment_df.source_id.
                                    isin(to_exofop_df.source_id)])
            print('\n')

        #
        # Duplicate entries in "to_exofop_df" are multi-sector. Average their
        # parameters (really will end up just being durations) across sectors,
        # and then remove the duplicate multi-sector rows using the "groupby"
        # aggregator. This removes the string-based columns, which we can
        # reclaim by a "drop_duplicates" call, since they don't have
        # sector-specific information.  Then, assign comments and format as
        # appropriate for ExoFop-TESS. Unique tag for the entire upload.
        #

        to_exofop_df['source_id'] = to_exofop_df['source_id'].astype(str)

        mean_val_to_exofop_df = to_exofop_df.groupby(
            'target').mean().reset_index()

        string_cols = [
            'target', 'flag', 'disp', 'tag', 'group', 'notes', 'source_id'
        ]
        dup_dropped_str_df = (to_exofop_df.drop_duplicates(
            subset=['target'], keep='first', inplace=False)[string_cols])

        out_df = mean_val_to_exofop_df.merge(dup_dropped_str_df,
                                             how='left',
                                             on='target')

        #
        # The above procedure got the epochs on multisector planets wrong.
        # Determine (t0,P) by fitting a line to entries with >=3 sectors
        # instead. For the two-sector case, due to bad covariance matrices,
        # just use the newest ephemeris.
        #
        multisector_df = (to_exofop_df[to_exofop_df.target.groupby(
            to_exofop_df.target).transform('value_counts') > 1])
        u_multisector_df = out_df[out_df.target.isin(multisector_df.target)]

        # temporarily drop the multisector rows from out_df (they will be
        # re-merged)
        out_df = out_df.drop(np.argwhere(
            out_df.target.isin(multisector_df.target)).flatten(),
                             axis=0)

        ephem_d = {}
        for ix, t in enumerate(np.unique(multisector_df.target)):
            sel = (multisector_df.target == t)
            tmid = nparr(multisector_df[sel].epoch)
            tmid_err = nparr(multisector_df[sel].epoch_unc)
            init_period = nparr(multisector_df[sel].period.mean())

            E, init_t0 = get_epochs_given_midtimes_and_period(tmid,
                                                              init_period,
                                                              verbose=False)

            popt, pcov = curve_fit(linear_model,
                                   E,
                                   tmid,
                                   p0=(init_period, init_t0),
                                   sigma=tmid_err)

            if np.all(np.isinf(pcov)):
                # if least-squares doesn't give good error (i.e., just two
                # epochs), take the most recent epoch.
                s = np.argmax(tmid)
                use_t0 = tmid[s]
                use_t0_err = tmid_err[s]
                use_period = nparr(multisector_df[sel].period)[s]
                use_period_err = nparr(multisector_df[sel].period_unc)[s]

            else:
                use_t0 = popt[1]
                use_t0_err = pcov[1, 1]**0.5
                use_period = popt[0]
                use_period_err = pcov[0, 0]**0.5

            if DEBUG:
                print(
                    'init tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format(
                        tmid, tmid_err, nparr(multisector_df[sel].period),
                        nparr(multisector_df[sel].period_unc)))
                print(
                    'use tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format(
                        use_t0, use_t0_err, use_period, use_period_err))
                print(10 * '-')

            ephem_d[ix] = {
                'target': t,
                'epoch': use_t0,
                'epoch_unc': use_t0_err,
                'period': use_period,
                'period_unc': use_period_err
            }

        ephem_df = pd.DataFrame(ephem_d).T

        mdf = ephem_df.merge(u_multisector_df,
                             how='left',
                             on='target',
                             suffixes=('', '_DEPRECATED'))
        mdf = mdf.drop([c for c in mdf.columns if 'DEPRECATED' in c],
                       axis=1,
                       inplace=False)

        temp_df = out_df.append(mdf, ignore_index=True, sort=False)
        out_df = temp_df

        to_exofop_df = out_df[COLUMN_ORDER]

        # to_exofop_df = mdf[COLUMN_ORDER] # special behavior for 2020/02/07 fix
        # to_exofop_df['flag'] = 'newparams'

        _df = manual_comment_df[manual_comment_df.source_id.isin(
            to_exofop_df.source_id)]

        comments = list(_df['comment'])
        # comments = 'Fixed ephemeris bug. (Old epoch was erroneous).' # #2020/02/07

        for c in comments:
            assert len(c) <= 119

        to_exofop_df = to_exofop_df.sort_values(by="source_id")
        _df = _df.sort_values(by="source_id")

        to_exofop_df['notes'] = comments
        to_exofop_df['tag'] = ('{}_bouma_cdips-v01_00001'.format(
            today_YYYYMMDD()))

        istoi = ~to_exofop_df['target'].astype(str).str.startswith('TIC')
        if np.any(istoi):
            newtargetname = 'TOI' + to_exofop_df[istoi].target.astype(str)
            to_exofop_df.loc[istoi, 'target'] = newtargetname

        outpath = os.path.join(
            exofopdir, "{}_{}_w_sourceid.csv".format(today_YYYYMMDD(),
                                                     uploadnamestr))
        to_exofop_df.to_csv(outpath, index=False, sep='|')
        print('made {}'.format(outpath))

        to_exofop_df = to_exofop_df.drop(['source_id'], axis=1)

        outpath = os.path.join(
            exofopdir, "params_planet_{}_001.txt".format(today_YYYYMMDD()))
        for c in ['epoch', 'epoch_unc', 'period', 'period_unc']:
            to_exofop_df[c] = to_exofop_df[c].astype(float)
        to_exofop_df = to_exofop_df.round(FORMATDICT)
        to_exofop_df['depth'] = to_exofop_df['depth'].astype(int)
        to_exofop_df['depth_unc'] = to_exofop_df['depth_unc'].astype(int)
        to_exofop_df.to_csv(outpath, index=False, sep='|', header=False)
        print('made {}'.format(outpath))

        # manually check these...
        print('\n' + 42 * '=' + '\n')
        print('\nPeriod uncertainties [minutes]')
        print(to_exofop_df['period_unc'] * 24 * 60)
        print('\nEpoch uncertainties [minutes]')
        print(to_exofop_df['epoch_unc'] * 24 * 60)
        print('\nPlanet radii [Rearth]')
        print(to_exofop_df[['radius', 'radius_unc', 'notes']])
        print('\n' + 42 * '=' + '\n')

    #
    # above is the format exofop-TESS wants. however it's not particularly
    # useful for followup. for that, we want: gaia IDs, magnitudes, ra, dec.
    #
    gaiaids = list(
        map(
            lambda x: int(
                os.path.basename(x).split('gaiatwo')[1].split('-')[0].lstrip(
                    '0')), parampaths))

    lcnames = list(
        map(
            lambda x: os.path.basename(x).replace('_fitparameters.csv', '.fits'
                                                  ), parampaths))

    lcdir = '/nfs/phtess2/ar0/TESS/PROJ/lbouma/CDIPS_LCS/sector-*/cam?_ccd?/'
    lcpaths = [glob(os.path.join(lcdir, lcn))[0] for lcn in lcnames]

    # now get the header values
    kwlist = [
        'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag',
        'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TESSMAG', 'Gaia-ID', 'TICID',
        'TICTEFF', 'TICRAD', 'TICMASS'
    ]

    for k in kwlist:
        thislist = []
        for l in lcpaths:
            thislist.append(iu.get_header_keyword(l, k, ext=0))
        param_df[k] = np.array(thislist)

    # now search for stellar RV xmatch
    res = [
        fr.get_rv_xmatch(ra, dec, G_mag=gmag, dr2_sourceid=s)
        for ra, dec, gmag, s in zip(
            list(param_df['RA_OBJ']), list(param_df['DEC_OBJ']),
            list(param_df['phot_g_mean_mag']), list(param_df['Gaia-ID']))
    ]

    res = np.array(res)
    param_df['stellar_rv'] = res[:, 0]
    param_df['stellar_rv_unc'] = res[:, 1]
    param_df['stellar_rv_provenance'] = res[:, 2]

    # make column showing whether there are ESO spectra available
    res = [
        fr.wrangle_eso_for_rv_availability(ra, dec)
        for ra, dec in zip(list(param_df['RA_OBJ']), list(param_df['DEC_OBJ']))
    ]
    param_df['eso_rv_availability'] = nparr(res)[:, 2]

    #
    # try to get cluster RV. first from Soubiran, then from Kharchenko.
    # to do this, load in CDIPS target catalog. merging the CDCLSTER name
    # (comma-delimited string) against the target catalog on source identifiers
    # allows unique cluster name identification, since I already did that,
    # earlier.
    #
    cdips_df = ccl.get_cdips_pub_catalog(ver=cdipssource_vnum)
    dcols = 'cluster;reference;source_id;unique_cluster_name'
    ccdf = cdips_df[dcols.split(';')]
    ccdf['source_id'] = ccdf['source_id'].astype(np.int64)
    mdf = param_df.merge(ccdf,
                         how='left',
                         left_on='source_id',
                         right_on='source_id')
    param_df['unique_cluster_name'] = nparr(mdf['unique_cluster_name'])

    s19 = gvc.get_soubiran_19_rv_table()
    k13_param = gvc.get_k13_param_table()

    c_rvs, c_err_rvs, c_rv_nstar, c_rv_prov = [], [], [], []
    for ix, row in param_df.iterrows():

        if row['unique_cluster_name'] in nparr(s19['ID']):
            sel = (s19['ID'] == row['unique_cluster_name'])
            c_rvs.append(float(s19[sel]['RV'].iloc[0]))
            c_err_rvs.append(float(s19[sel]['e_RV'].iloc[0]))
            c_rv_nstar.append(int(s19[sel]['Nsele'].iloc[0]))
            c_rv_prov.append('Soubiran+19')
            continue

        elif row['unique_cluster_name'] in nparr(k13_param['Name']):
            sel = (k13_param['Name'] == row['unique_cluster_name'])
            c_rvs.append(float(k13_param[sel]['RV'].iloc[0]))
            c_err_rvs.append(float(k13_param[sel]['e_RV'].iloc[0]))
            c_rv_nstar.append(int(k13_param[sel]['o_RV'].iloc[0]))
            c_rv_prov.append('Kharchenko+13')
            continue

        else:
            c_rvs.append(np.nan)
            c_err_rvs.append(np.nan)
            c_rv_nstar.append(np.nan)
            c_rv_prov.append('')

    param_df['cluster_rv'] = c_rvs
    param_df['cluster_err_rv'] = c_err_rvs
    param_df['cluster_rv_nstar'] = c_rv_nstar
    param_df['cluster_rv_provenance'] = c_rv_prov

    #
    # finally, begin writing the output
    #

    outpath = ("/home/lbouma/proj/cdips/results/fit_gold/"
               "{}_{}_fitparams_plus_observer_info.csv".format(
                   today_YYYYMMDD(), uploadnamestr))
    param_df.to_csv(outpath, index=False, sep='|')
    print('made {}'.format(outpath))

    #
    # sparse observer info cut
    #
    scols = [
        'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ',
        'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag',
        'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS',
        'Gaia-ID'
    ]
    sparam_df = param_df[scols]

    outpath = ("/home/lbouma/proj/cdips/results/fit_gold/"
               "{}_{}_observer_info_sparse.csv".format(today_YYYYMMDD(),
                                                       uploadnamestr))
    sparam_df.to_csv(outpath, index=False, sep='|')
    print('made {}'.format(outpath))

    #
    # full observer info cut
    #
    scols = [
        'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ',
        'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag',
        'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS',
        'Gaia-ID', 'period', 'period_unc', 'epoch', 'epoch_unc', 'depth',
        'depth_unc', 'duration', 'duration_unc', 'radius', 'radius_unc',
        'stellar_rv', 'stellar_rv_unc', 'stellar_rv_provenance',
        'eso_rv_availability', 'cluster_rv', 'cluster_err_rv',
        'cluster_rv_nstar', 'cluster_rv_provenance'
    ]
    sparam_df = param_df[scols]

    outpath = ("/home/lbouma/proj/cdips/results/fit_gold/"
               "{}_{}_observer_info_full.csv".format(today_YYYYMMDD(),
                                                     uploadnamestr))
    sparam_df.to_csv(outpath, index=False, sep='|')
    print('made {}'.format(outpath))