def do_allvariable_report_making(source_id, outdir=None, overwrite=False, apply_extinction=None, use_calib=False): """ Args: source_id (np.int64): Gaia DR2 source identifier apply_extinction (float): E(Bp-Rp) applied to plotted colors. use_calib (bool): if True, searches for _calibration_ light curves, not just _cluster_ light curves. """ print(42 * '=') thetime = datetime.utcnow().isoformat() print(f'{thetime}: Beginning {source_id} do_allvariable_report_making.') picklepath = os.path.join(outdir, 'data', f'{source_id}_allvar.pkl') statuspath = os.path.join(outdir, 'logs', f'{source_id}_status.log') if not os.path.exists(statuspath): # initialize status file lc_info = { 'n_sectors': None, 'lcpaths': None, 'detrending_completed': None } ppu.save_status(statuspath, 'lc_info', lc_info) report_info = { 'report_completed': None, 'ls_period': None, 'bestlspval': None, 'nbestperiods': None, 'nbestlspvals': None, 'n_dict': None } ppu.save_status(statuspath, 'report_info', report_info) s = ppu.load_status(statuspath) if not overwrite: if str2bool(s['report_info']['report_completed']): print(f'Found {source_id} report_completed') return 0 if s['lc_info']['n_sectors'] == '0': print(f'Found {source_id} n_sectors = 0') return 0 if s['lc_info']['detrending_completed'] == 'False': print(f'Found {source_id} not detrending_completed') return 0 # get the data needed to make the report if it hasn't already been made. if not os.path.exists(picklepath): thetime = datetime.utcnow().isoformat() print(f'{thetime}: Beginning {source_id} detrending.') # # get the light curves # lcpaths = lcu.find_cdips_lc_paths(source_id, raise_error=False, use_calib=use_calib) if lcpaths is None: lc_info = { 'n_sectors': 0, 'lcpaths': None, 'detrending_completed': False } ppu.save_status(statuspath, 'lc_info', lc_info) return 0 # # detrend systematics. each light curve yields tuples of: # primaryhdr, data, ap, dtrvecs, eigenvecs, smooth_eigenvecs # dtr_infos = [] try: for lcpath in lcpaths: dtr_info = dtr.detrend_systematics(lcpath) dtr_infos.append(dtr_info) except Exception as e: print(f'ERR! {e}') lc_info = { 'n_sectors': len(lcpaths), 'lcpaths': lcpaths, 'detrending_completed': False } ppu.save_status(statuspath, 'lc_info', lc_info) return 0 # # stitch all available light curves # ap = dtr_infos[0][2] timelist = [d[1]['TMID_BJD'] for d in dtr_infos] maglist = [d[1][f'PCA{ap}'] for d in dtr_infos] magerrlist = [d[1][f'IRE{ap}'] for d in dtr_infos] extravecdict = {} extravecdict[f'IRM{ap}'] = [d[1][f'IRM{ap}'] for d in dtr_infos] for i in range(0, 7): extravecdict[f'CBV{i}'] = [d[3][i, :] for d in dtr_infos] try: time, flux, fluxerr, vec_dict = lcu.stitch_light_curves( timelist, maglist, magerrlist, extravecdict=extravecdict) except ValueError: lc_info = { 'n_sectors': len(lcpaths), 'lcpaths': lcpaths, 'detrending_completed': False } ppu.save_status(statuspath, 'lc_info', lc_info) return 0 # # mask orbit edges # s_time, s_flux, inds = moe.mask_orbit_start_and_end( time, flux, raise_expectation_error=False, orbitgap=0.7, return_inds=True) s_fluxerr = fluxerr[inds] # # remove outliers with windowed stdevn removal # window_length = 1.5 # days s_flux = slide_clip(s_time, s_flux, window_length, low=3, high=2, method='mad', center='median') # # fix any "zero" values in s_flux to be NaN # s_flux[s_flux == 0] = np.nan ap = dtr_infos[0][2] allvardict = { 'source_id': source_id, 'E_BpmRp': apply_extinction, 'ap': ap, 'TMID_BJD': time, f'PCA{ap}': flux, f'IRE{ap}': fluxerr, 'STIME': s_time, f'SPCA{ap}': s_flux, f'SPCAE{ap}': s_fluxerr, 'dtr_infos': dtr_infos, 'vec_dict': vec_dict } with open(picklepath, 'wb') as f: pickle.dump(allvardict, f) # # sanity check that PCA / detrending worked # limit_fraction = 0.75 if len(flux[pd.isnull(flux)]) / len(flux) > limit_fraction: lc_info = { 'n_sectors': len(lcpaths), 'lcpaths': lcpaths, 'detrending_completed': False } ppu.save_status(statuspath, 'lc_info', lc_info) return 0 # # update status that detrending worked. # lc_info = { 'n_sectors': len(lcpaths), 'lcpaths': lcpaths, 'detrending_completed': True } ppu.save_status(statuspath, 'lc_info', lc_info) else: thetime = datetime.utcnow().isoformat() print(f'{thetime}: Found {picklepath}, skipping detrending.') s = ppu.load_status(statuspath) if str2bool(s['lc_info']['detrending_completed']): with open(picklepath, 'rb') as f: allvardict = pickle.load(f) else: return 0 # # make summary plots. # if not str2bool(s['report_info']['report_completed']): thetime = datetime.utcnow().isoformat() print(f'{thetime}: Beginning {source_id} allvar report.') plotdir = os.path.join(outdir, 'reports') outd = make_allvar_report(allvardict, plotdir) # # save their output (most crucially, including the bestperiods) # outpicklepath = os.path.join(outdir, 'data', f'{source_id}_reportinfo.pkl') with open(outpicklepath, 'wb') as f: pickle.dump(outd, f) report_info = { 'report_completed': True, 'ls_period': outd['lsp']['bestperiod'], 'bestlspval': outd['lsp']['bestlspval'], 'nbestperiods': outd['lsp']['nbestperiods'], 'nbestlspvals': outd['lsp']['nbestlspvals'], 'n_dict': outd['n_dict'] } ppu.save_status(statuspath, 'report_info', report_info) # # save the SPCA light curve # ap = allvardict['ap'] outdf = pd.DataFrame({ 'selected_time_bjdtdb_STIME': allvardict['STIME'], f'selected_flux_special_PCA_detrending_SPCA{ap}': allvardict[f'SPCA{ap}'], f'selected_flux_error_SPCAE{ap}': allvardict[f'SPCAE{ap}'], }) outlcpath = os.path.join(outdir, 'data', f'{source_id}_SPCA_lightcurve.csv') outdf.to_csv(outlcpath, index=False) print(f'Wrote {outlcpath}') return 1
def main(overwrite=0, sector=None, nworkers=40, cdipsvnum=1, cdips_cat_vnum=None, is_not_cdips_still_good=False): """ ------------------------------------------ Description: Fit Mandel-Agol transits to planet candidates. The fit parameters are then used for CTOIs. The goal of these models is to provide a good ephemeris (so: reliable epoch, period, and duration + uncertainties). For publication-quality parameters, joint modelling of stellar rotation signals along with planet signals may be preferred. The approach here is to just whiten the light curve in order to get a good ephemeris (which may distort the depth, for example). ------------------------------------------ Explanation of design: Directory structure is made like: /results/fit_gold/sector-?/fitresults/hlsp_*/ /results/fit_gold/sector-?/samples/hlsp_*/ where each `fitresults` sub-directory contains images to diagnose fit quality, pickle files with saved parameters from the fits, etc. The `samples` sub-directories have the .h5 files used when sampling. First, all these directories are made. Then, pdf files under either /results/vetting_classifications/sector-?_CLEAR_THRESHOLD or /results/vetting_classifications/sector-?_NOT_CDIPS_STILL_GOOD are parsed to collect the light curves and any necessary metadata. Then there's a for loop for over each planet candidate, in which the fit is performed. ------------------------------------------ Args: overwrite: if False, and the pickle file with saved parameters exists (i.e. you already fit the PC), no sampling is done. sector: the sector number. nworkers: for threading cdipsvnum: version number of CDIPS LCs in their name cdips_cat_vnum: target star catalog version identifier. is_not_cdips_still_good: if true, parses planet candidates from `/results/vetting_classifications/sector-?_NOT_CDIPS_STILL_GOOD`; otherwise does the CLEAR_THRESHOLD directory. """ lcbasedir, tfasrdir, resultsdir = _define_and_make_directories( sector, is_not_cdips_still_good=is_not_cdips_still_good) df, cdips_df, pfdf, supplementstatsdf, toidf, ctoidf = _get_data( sector, cdips_cat_vnum=cdips_cat_vnum) tfa_sr_paths = _get_lcpaths(df, tfasrdir) for tfa_sr_path in tfa_sr_paths: # # given the TFASR LC path, get the complete LC path # source_id = np.int64( tfa_sr_path.split('gaiatwo')[1].split('-')[0].lstrip('0')) mdf = cdips_df[cdips_df['source_id'] == source_id] if len(mdf) != 1: errmsg = 'expected exactly 1 source match in CDIPS cat' raise AssertionError(errmsg) _hdr = iu.get_header_keyword_list(tfa_sr_path, ['CAMERA', 'CCD'], ext=0) cam, ccd = _hdr['CAMERA'], _hdr['CCD'] lcname = ('hlsp_cdips_tess_ffi_' 'gaiatwo{zsource_id}-{zsector}-cam{cam}-ccd{ccd}_' 'tess_v{zcdipsvnum}_llc.fits').format( cam=cam, ccd=ccd, zsource_id=str(source_id).zfill(22), zsector=str(sector).zfill(4), zcdipsvnum=str(cdipsvnum).zfill(2)) lcpath = os.path.join(lcbasedir, 'cam{}_ccd{}'.format(cam, ccd), lcname) # # make fitresults and samples directories # outdirs = [ os.path.join(resultsdir, 'fitresults', lcname.replace('.fits', '')), os.path.join(resultsdir, 'samples', lcname.replace('.fits', '')) ] for outdir in outdirs: if not os.path.exists(outdir): os.mkdir(outdir) # # collect metadata for this target star # supprow = mavr._get_supprow(source_id, supplementstatsdf) suppfulldf = supplementstatsdf pfrow = pfdf.loc[pfdf['source_id'] == source_id] if len(pfrow) != 1: errmsg = 'expected exactly 1 source match in period find df' raise AssertionError(errmsg) outpath = os.path.join(resultsdir, 'fitresults', lcname.replace('.fits', ''), lcname.replace('.fits', '_fitparameters.csv')) # # if you haven't already made the output parameter file (which requires # convergence) then start fitting. # if not os.path.exists(outpath): _fit_transit_model_single_sector(tfa_sr_path, lcpath, outpath, mdf, source_id, supprow, suppfulldf, pfdf, pfrow, toidf, ctoidf, sector, nworkers, cdipsvnum=cdipsvnum, overwrite=overwrite) else: status_file = os.path.join(os.path.dirname(outpath), 'run_status.stat') status = load_status(status_file) fittype = 'fivetransitparam_fit' if str2bool(status[fittype]['is_converged']): print('{} converged and already wrote wrote ctoi csv.'.format( source_id)) elif (not str2bool(status[fittype]['is_converged']) and int(source_id) in SKIP_CONVERGENCE_IDENTIFIERS): print('WRN! {} not converged, but wrote wrote ctoi csv b/c in ' 'SKIP_CONVERGENCE_IDENTIFIERS.'.format(source_id)) else: raise ValueError( 'got parameter file existing, but not converged.' 'should never happen. for DR2 {}'.format(source_id))
def get_auto_rotation_periods(runid='NGC_2516', get_spdm=True): """ Given a `runid` (an identifier string for a particular CDIPS "allvariability" sub-pipeline processing run), retrieve the following output and concatenate into a table, which is then saved to '../../data/rotation/{runid}_rotation_periods.csv': [ 'source_id': source_ids, 'n_cdips_sector': nsectors, 'period': periods, 'lspval': lspvals, 'nequal': nequal, 'nclose': nclose, 'nfaint': nfaint ] Valid runids include: IC_2602, CrA, kc19group_113, Orion, NGC_2516, ScoOB2, compstar_NGC_2516 """ # the allvariability logs, including the top 5 lomb-scargle periods, and # peak values, are here. logdir = f'/Users/luke/Dropbox/proj/cdips/results/allvariability_reports/{runid}/logs' logfiles = glob(os.path.join(logdir, '*status.log')) print(f'Got {len(logfiles)} log files.') if get_spdm: pkldir = f'/Users/luke/Dropbox/proj/cdips/results/allvariability_reports/{runid}/data' pklfiles = glob(os.path.join(pkldir, '*reportinfo.pkl')) N_pklfiles = len(pklfiles) print(f'Got {N_pklfiles} pickle files.') if N_pklfiles < 10: raise ValueError('Too few pickle files... Port from phtess2?') source_ids = np.array( [np.int64(os.path.basename(f).split('_')[0]) for f in logfiles]) # retrieve the LS periods. only top period; since we're not bothering with # the "second period" classification option. periods, lspvals, nequal, nclose, nfaint, nsectors = [], [], [], [], [], [] if get_spdm: spdmperiods, spdmvals = [], [] ix = 0 for source_id, logpath in zip(source_ids, logfiles): s = load_status(logpath) if get_spdm: pklpath = os.path.join(pkldir, f'{source_id}_reportinfo.pkl') if not os.path.exists(pklpath): spdmperiods.append(np.nan) spdmvals.append(np.nan) else: with open(pklpath, 'rb') as f: d = pickle.load(f) spdmperiods.append(float(d['spdm']['bestperiod'])) spdmvals.append(float(d['spdm']['bestlspval'])) n_sectors = int(s['lc_info']['n_sectors']) nsectors.append(n_sectors) try: periods.append(float(s['report_info']['ls_period'])) lspvals.append(float(s['report_info']['bestlspval'])) nequal.append(int(eval(s['report_info']['n_dict'])['equal'])) nclose.append(int(eval(s['report_info']['n_dict'])['close'])) nfaint.append(int(eval(s['report_info']['n_dict'])['faint'])) except (TypeError, ValueError) as e: periods.append(np.nan) lspvals.append(np.nan) nequal.append(np.nan) nclose.append(np.nan) nfaint.append(np.nan) period_df = pd.DataFrame({ 'source_id': source_ids, 'n_cdips_sector': nsectors, 'period': periods, 'lspval': lspvals, 'nequal': nequal, 'nclose': nclose, 'nfaint': nfaint }) if get_spdm: period_df['spdmperiod'] = spdmperiods period_df['spdmval'] = spdmvals print( f'Got {len(period_df[period_df.n_cdips_sector > 0])} sources with at least 1 cdips sector' ) print(f'Got {len(period_df[~pd.isnull(period_df.period)])} periods') # get the runid's source list if 'compstar' not in runid: if runid == 'NGC_2516': sourcelistpath = ( '/Users/luke/Dropbox/proj/cdips/data/cluster_data/NGC_2516_full_fullfaint_20210305.csv' ) else: sourcelistpath = os.path.join( '/Users/luke/Dropbox/proj/cdips/data/cluster_data/cdips_catalog_split', f'OC_MG_FINAL_v0.4_publishable_CUT_{runid}.csv') else: sourcelistpath = ( f'/Users/luke/Dropbox/proj/earhart/results/tables/{runid}_sourcelist.csv' ) df = pd.read_csv(sourcelistpath) if 'compstar' in runid: print(42 * '-') print( f'{len(df)} light curves made for stars in neighborhood (calib+cdips)' ) print(f'... for {len(np.unique(df.source_id))} unique stars') df = df[df.phot_rp_mean_mag < 13] print( f'{len(df)} light curves made for stars in neighborhood (calib+cdips) w/ Rp<13' ) print(f'... for {len(np.unique(df.source_id))} unique stars') print(42 * '-') df = df.drop_duplicates(subset='source_id', keep='first') mdf = period_df.merge(df, how='inner', on='source_id') outpath = f'../../data/rotation/{runid}_rotation_periods.csv' mdf.to_csv(outpath, index=False) print(f'Made {outpath}')
def _fit_transit_model_single_sector(tfa_sr_path, lcpath, outpath, mdf, source_id, supprow, suppfulldf, pfdf, pfrow, toidf, ctoidf, sector, nworkers, cdipsvnum=1, overwrite=1): try_mcmc = True identifier = source_id # # read and re-detrend lc if needed. (recall: these planet candidates were # found using a penalized spline detrending in most cases). # hdul_sr = fits.open(tfa_sr_path) hdul = fits.open(lcpath) lc_sr = hdul_sr[1].data lc, hdr = hdul[1].data, hdul[0].header # FIXME: logic needs updating in >=S14 processing raise NotImplementedError is_pspline_dtr = bool(pfrow['pspline_detrended'].iloc[0]) fluxap = 'IRM2' if is_pspline_dtr else 'TFASR2' time, mag = lc_sr['TMID_BJD'], lc_sr[fluxap] try: time, mag = moe.mask_orbit_start_and_end(time, mag, raise_expectation_error=False) except AssertionError: raise AssertionError( 'moe.mask_orbit_start_and_end failed for {}'.format(tfa_sr_path)) flux = vp._given_mag_get_flux(mag) err = np.ones_like(flux) * 1e-4 time, flux, err = sigclip_magseries(time, flux, err, magsarefluxes=True, sigclip=[50, 5]) if is_pspline_dtr or identifier in KNOWN_EXTRA_DETREND: flux, _ = dtr.detrend_flux(time, flux) if identifier in KNOWN_EXTRA_DETREND: fit_savdir = os.path.dirname(outpath) dtrpath = os.path.join(fit_savdir, 'extra_detrend_lc.png') if not os.path.exists(dtrpath): plt.close('all') f, ax = plt.subplots(figsize=(6, 3)) ax.scatter(time, flux, c='black', alpha=0.9, zorder=2, s=8, rasterized=True, linewidths=0) ax.set_xlabel('bjdtdb') ax.set_ylabel('detrended flux') f.savefig(dtrpath, bbox_inches='tight') raise AssertionError( 'U NEED TO MANUALLY LOOK AT {} AND VERIFY ITS OK'.format( dtrpath)) else: print('WRN! found {}. continuing to fit.'.format(dtrpath)) # # define the paths. get the stellar parameters, and do the fit! # fit_savdir = os.path.dirname(outpath) chain_savdir = os.path.dirname(outpath).replace('fitresults', 'samples') try: teff, teff_err, rstar, rstar_err, logg, logg_err = ( get_teff_rstar_logg(hdr)) except (NotImplementedError, ValueError) as e: print(e) print('did not get rstar for {}. MUST MANUALLY FIX.'.format(source_id)) try_mcmc = False # # initialize status file # status_file = os.path.join(fit_savdir, 'run_status.stat') fittype = 'fivetransitparam_fit' if not os.path.exists(status_file): save_status(status_file, fittype, { 'is_converged': False, 'n_steps_run': 0 }) status = load_status(status_file)[fittype] # # if not converged and no steps previously run: # run 4k steps. write status file. # # reload status file. # if not converged and 4k steps previously run and in long ID list: # run 25k steps, write status file. # # reload status file. # if not converged: # print a warning. # if identifier in KNOWN_MCMC_FAILS: print('WRN! identifier {} requires manual fixing.'.format(identifier)) try_mcmc = False if (not str2bool(status['is_converged']) and int(status['n_steps_run']) == 0 and try_mcmc): n_mcmc_steps = 4000 mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=n_mcmc_steps, overwriteexistingsamples=False, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=nworkers) status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps} save_status(status_file, fittype, status) status = load_status(status_file)[fittype] if (not str2bool(status['is_converged']) and int(status['n_steps_run']) != 25000 and int(identifier) in LONG_RUN_IDENTIFIERS and try_mcmc): n_mcmc_steps = 25000 # NOTE: hard-code nworkers, since we dont get multithreading # improvement anyway (this is some kind of bug) mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=n_mcmc_steps, overwriteexistingsamples=True, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=4) status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps} save_status(status_file, fittype, status) # # if converged or in the list of IDs for which its fine to skip convegence # (because by-eye, the fits are converged), convert fit results to ctoi csv # format # status = load_status(status_file)[fittype] if (str2bool(status['is_converged']) or int(identifier) in SKIP_CONVERGENCE_IDENTIFIERS): try: _ = isinstance(mafr, dict) except UnboundLocalError: # # get the MCMC results from the pickle file; regenerate the TLS # result. # fitparamdir = os.path.dirname(status_file) fitpklsavpath = os.path.join( fitparamdir, '{}_phased_fivetransitparam_fit_empiricalerrs.pickle'.format( identifier)) with open(fitpklsavpath, 'rb') as f: mafr = pickle.load(f) tlsp = htls.tls_parallel_pfind(time, flux, err, magsarefluxes=True, tls_rstar_min=0.1, tls_rstar_max=10, tls_mstar_min=0.1, tls_mstar_max=5.0, tls_oversample=8, tls_mintransits=1, tls_transit_template='default', nbestpeaks=5, sigclip=None, nworkers=nworkers) tlsr = tlsp['tlsresult'] ticid = int(hdr['TICID']) ra, dec = hdr['RA_OBJ'], hdr['DEC_OBJ'] print('{} converged. writing ctoi csv.'.format(identifier)) fit_results_to_ctoi_csv(ticid, ra, dec, mafr, tlsr, outpath, toidf, ctoidf, teff, teff_err, rstar, rstar_err, logg, logg_err, cdipsvnum=cdipsvnum) else: print('WRN! {} did not converge, after {} steps. MUST MANUALLY FIX.'. format(identifier, status['n_steps_run']))
def main(is_dayspecific_exofop_upload=1, cdipssource_vnum=0.4, uploadnamestr='sectors_12_thru_13_clear_threshold'): """ Put together a few useful CSV candidate summaries: * bulk uploads to exofop/tess * observer info sparse (focus on TICIDs, gaia mags, positions on sky, etc) * observer info full (stellar rvs for membership assessment; ephemeris information) * merge of everything (exoFOP upload, + the subset of gaia information useful to observers) ---------- Args: is_dayspecific_exofop_upload: if True, reads in the manually-written (from google spreadsheet) comments and source_ids, and writes those to a special "TO_EXOFOP" csv file. uploadnamestr: used as unique identifying string in file names """ # # Read in the results from the fits # paramglob = os.path.join( fitdir, "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*fitparameters.csv" ) parampaths = glob(paramglob) statusglob = os.path.join( fitdir, "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*.stat") statuspaths = glob(statusglob) statuses = [ dict(load_status(f)['fivetransitparam_fit']) for f in statuspaths ] param_df = pd.concat((pd.read_csv(f, sep='|') for f in parampaths)) outpath = os.path.join( fitdir, "{}_{}_mergedfitparams.csv".format(today_YYYYMMDD(), uploadnamestr)) param_df['param_path'] = parampaths param_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) status_df = pd.DataFrame(statuses) status_df['statuspath'] = statuspaths status_gaiaids = list( map( lambda x: int( os.path.dirname(x).split('gaiatwo')[1].split('-')[0].lstrip( '0')), statuspaths)) status_df['source_id'] = status_gaiaids if is_dayspecific_exofop_upload: # # Manually commented candidates are the only ones we're uploading. # manual_comment_df = pd.read_csv( '/nfs/phtess2/ar0/TESS/PROJ/lbouma/cdips/data/exoFOP_uploads/{}_cdips_candidate_upload.csv' .format(today_YYYYMMDD()), sep=",") common = status_df.merge(manual_comment_df, on='source_id', how='inner') sel_status_df = status_df[status_df.source_id.isin(common.source_id)] # # WARN: the MCMC fits should have converged before uploading. # (20190918 had two exceptions, where the fit looked fine.) # if len(sel_status_df[sel_status_df['is_converged'] == 'False']) > 0: print('\nWRN! THE FOLLOWING CANDIDATES ARE NOT CONVERGED') print(sel_status_df[sel_status_df['is_converged'] == 'False']) param_gaiaids = list( map( lambda x: int( os.path.basename(x).split('gaiatwo')[1].split('-')[0]. lstrip('0')), parampaths)) param_df['source_id'] = param_gaiaids # # Require that you actually have a parameter file (...). # _df = sel_status_df.merge(param_df, on='source_id', how='inner') to_exofop_df = param_df[param_df.source_id.isin(_df.source_id)] if len(to_exofop_df) != len(manual_comment_df): print('\nWRN! {} CANDIDATES DID NOT HAVE PARAMETERS'.format( len(manual_comment_df) - len(to_exofop_df))) print('They are...') print(manual_comment_df[~manual_comment_df.source_id. isin(to_exofop_df.source_id)]) print('\n') # # Duplicate entries in "to_exofop_df" are multi-sector. Average their # parameters (really will end up just being durations) across sectors, # and then remove the duplicate multi-sector rows using the "groupby" # aggregator. This removes the string-based columns, which we can # reclaim by a "drop_duplicates" call, since they don't have # sector-specific information. Then, assign comments and format as # appropriate for ExoFop-TESS. Unique tag for the entire upload. # to_exofop_df['source_id'] = to_exofop_df['source_id'].astype(str) mean_val_to_exofop_df = to_exofop_df.groupby( 'target').mean().reset_index() string_cols = [ 'target', 'flag', 'disp', 'tag', 'group', 'notes', 'source_id' ] dup_dropped_str_df = (to_exofop_df.drop_duplicates( subset=['target'], keep='first', inplace=False)[string_cols]) out_df = mean_val_to_exofop_df.merge(dup_dropped_str_df, how='left', on='target') # # The above procedure got the epochs on multisector planets wrong. # Determine (t0,P) by fitting a line to entries with >=3 sectors # instead. For the two-sector case, due to bad covariance matrices, # just use the newest ephemeris. # multisector_df = (to_exofop_df[to_exofop_df.target.groupby( to_exofop_df.target).transform('value_counts') > 1]) u_multisector_df = out_df[out_df.target.isin(multisector_df.target)] # temporarily drop the multisector rows from out_df (they will be # re-merged) out_df = out_df.drop(np.argwhere( out_df.target.isin(multisector_df.target)).flatten(), axis=0) ephem_d = {} for ix, t in enumerate(np.unique(multisector_df.target)): sel = (multisector_df.target == t) tmid = nparr(multisector_df[sel].epoch) tmid_err = nparr(multisector_df[sel].epoch_unc) init_period = nparr(multisector_df[sel].period.mean()) E, init_t0 = get_epochs_given_midtimes_and_period(tmid, init_period, verbose=False) popt, pcov = curve_fit(linear_model, E, tmid, p0=(init_period, init_t0), sigma=tmid_err) if np.all(np.isinf(pcov)): # if least-squares doesn't give good error (i.e., just two # epochs), take the most recent epoch. s = np.argmax(tmid) use_t0 = tmid[s] use_t0_err = tmid_err[s] use_period = nparr(multisector_df[sel].period)[s] use_period_err = nparr(multisector_df[sel].period_unc)[s] else: use_t0 = popt[1] use_t0_err = pcov[1, 1]**0.5 use_period = popt[0] use_period_err = pcov[0, 0]**0.5 if DEBUG: print( 'init tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format( tmid, tmid_err, nparr(multisector_df[sel].period), nparr(multisector_df[sel].period_unc))) print( 'use tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format( use_t0, use_t0_err, use_period, use_period_err)) print(10 * '-') ephem_d[ix] = { 'target': t, 'epoch': use_t0, 'epoch_unc': use_t0_err, 'period': use_period, 'period_unc': use_period_err } ephem_df = pd.DataFrame(ephem_d).T mdf = ephem_df.merge(u_multisector_df, how='left', on='target', suffixes=('', '_DEPRECATED')) mdf = mdf.drop([c for c in mdf.columns if 'DEPRECATED' in c], axis=1, inplace=False) temp_df = out_df.append(mdf, ignore_index=True, sort=False) out_df = temp_df to_exofop_df = out_df[COLUMN_ORDER] # to_exofop_df = mdf[COLUMN_ORDER] # special behavior for 2020/02/07 fix # to_exofop_df['flag'] = 'newparams' _df = manual_comment_df[manual_comment_df.source_id.isin( to_exofop_df.source_id)] comments = list(_df['comment']) # comments = 'Fixed ephemeris bug. (Old epoch was erroneous).' # #2020/02/07 for c in comments: assert len(c) <= 119 to_exofop_df = to_exofop_df.sort_values(by="source_id") _df = _df.sort_values(by="source_id") to_exofop_df['notes'] = comments to_exofop_df['tag'] = ('{}_bouma_cdips-v01_00001'.format( today_YYYYMMDD())) istoi = ~to_exofop_df['target'].astype(str).str.startswith('TIC') if np.any(istoi): newtargetname = 'TOI' + to_exofop_df[istoi].target.astype(str) to_exofop_df.loc[istoi, 'target'] = newtargetname outpath = os.path.join( exofopdir, "{}_{}_w_sourceid.csv".format(today_YYYYMMDD(), uploadnamestr)) to_exofop_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) to_exofop_df = to_exofop_df.drop(['source_id'], axis=1) outpath = os.path.join( exofopdir, "params_planet_{}_001.txt".format(today_YYYYMMDD())) for c in ['epoch', 'epoch_unc', 'period', 'period_unc']: to_exofop_df[c] = to_exofop_df[c].astype(float) to_exofop_df = to_exofop_df.round(FORMATDICT) to_exofop_df['depth'] = to_exofop_df['depth'].astype(int) to_exofop_df['depth_unc'] = to_exofop_df['depth_unc'].astype(int) to_exofop_df.to_csv(outpath, index=False, sep='|', header=False) print('made {}'.format(outpath)) # manually check these... print('\n' + 42 * '=' + '\n') print('\nPeriod uncertainties [minutes]') print(to_exofop_df['period_unc'] * 24 * 60) print('\nEpoch uncertainties [minutes]') print(to_exofop_df['epoch_unc'] * 24 * 60) print('\nPlanet radii [Rearth]') print(to_exofop_df[['radius', 'radius_unc', 'notes']]) print('\n' + 42 * '=' + '\n') # # above is the format exofop-TESS wants. however it's not particularly # useful for followup. for that, we want: gaia IDs, magnitudes, ra, dec. # gaiaids = list( map( lambda x: int( os.path.basename(x).split('gaiatwo')[1].split('-')[0].lstrip( '0')), parampaths)) lcnames = list( map( lambda x: os.path.basename(x).replace('_fitparameters.csv', '.fits' ), parampaths)) lcdir = '/nfs/phtess2/ar0/TESS/PROJ/lbouma/CDIPS_LCS/sector-*/cam?_ccd?/' lcpaths = [glob(os.path.join(lcdir, lcn))[0] for lcn in lcnames] # now get the header values kwlist = [ 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TESSMAG', 'Gaia-ID', 'TICID', 'TICTEFF', 'TICRAD', 'TICMASS' ] for k in kwlist: thislist = [] for l in lcpaths: thislist.append(iu.get_header_keyword(l, k, ext=0)) param_df[k] = np.array(thislist) # now search for stellar RV xmatch res = [ fr.get_rv_xmatch(ra, dec, G_mag=gmag, dr2_sourceid=s) for ra, dec, gmag, s in zip( list(param_df['RA_OBJ']), list(param_df['DEC_OBJ']), list(param_df['phot_g_mean_mag']), list(param_df['Gaia-ID'])) ] res = np.array(res) param_df['stellar_rv'] = res[:, 0] param_df['stellar_rv_unc'] = res[:, 1] param_df['stellar_rv_provenance'] = res[:, 2] # make column showing whether there are ESO spectra available res = [ fr.wrangle_eso_for_rv_availability(ra, dec) for ra, dec in zip(list(param_df['RA_OBJ']), list(param_df['DEC_OBJ'])) ] param_df['eso_rv_availability'] = nparr(res)[:, 2] # # try to get cluster RV. first from Soubiran, then from Kharchenko. # to do this, load in CDIPS target catalog. merging the CDCLSTER name # (comma-delimited string) against the target catalog on source identifiers # allows unique cluster name identification, since I already did that, # earlier. # cdips_df = ccl.get_cdips_pub_catalog(ver=cdipssource_vnum) dcols = 'cluster;reference;source_id;unique_cluster_name' ccdf = cdips_df[dcols.split(';')] ccdf['source_id'] = ccdf['source_id'].astype(np.int64) mdf = param_df.merge(ccdf, how='left', left_on='source_id', right_on='source_id') param_df['unique_cluster_name'] = nparr(mdf['unique_cluster_name']) s19 = gvc.get_soubiran_19_rv_table() k13_param = gvc.get_k13_param_table() c_rvs, c_err_rvs, c_rv_nstar, c_rv_prov = [], [], [], [] for ix, row in param_df.iterrows(): if row['unique_cluster_name'] in nparr(s19['ID']): sel = (s19['ID'] == row['unique_cluster_name']) c_rvs.append(float(s19[sel]['RV'].iloc[0])) c_err_rvs.append(float(s19[sel]['e_RV'].iloc[0])) c_rv_nstar.append(int(s19[sel]['Nsele'].iloc[0])) c_rv_prov.append('Soubiran+19') continue elif row['unique_cluster_name'] in nparr(k13_param['Name']): sel = (k13_param['Name'] == row['unique_cluster_name']) c_rvs.append(float(k13_param[sel]['RV'].iloc[0])) c_err_rvs.append(float(k13_param[sel]['e_RV'].iloc[0])) c_rv_nstar.append(int(k13_param[sel]['o_RV'].iloc[0])) c_rv_prov.append('Kharchenko+13') continue else: c_rvs.append(np.nan) c_err_rvs.append(np.nan) c_rv_nstar.append(np.nan) c_rv_prov.append('') param_df['cluster_rv'] = c_rvs param_df['cluster_err_rv'] = c_err_rvs param_df['cluster_rv_nstar'] = c_rv_nstar param_df['cluster_rv_provenance'] = c_rv_prov # # finally, begin writing the output # outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_fitparams_plus_observer_info.csv".format( today_YYYYMMDD(), uploadnamestr)) param_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) # # sparse observer info cut # scols = [ 'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS', 'Gaia-ID' ] sparam_df = param_df[scols] outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_observer_info_sparse.csv".format(today_YYYYMMDD(), uploadnamestr)) sparam_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) # # full observer info cut # scols = [ 'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS', 'Gaia-ID', 'period', 'period_unc', 'epoch', 'epoch_unc', 'depth', 'depth_unc', 'duration', 'duration_unc', 'radius', 'radius_unc', 'stellar_rv', 'stellar_rv_unc', 'stellar_rv_provenance', 'eso_rv_availability', 'cluster_rv', 'cluster_err_rv', 'cluster_rv_nstar', 'cluster_rv_provenance' ] sparam_df = param_df[scols] outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_observer_info_full.csv".format(today_YYYYMMDD(), uploadnamestr)) sparam_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath))