def test_fivetransitparam_fit_magseries_easy(): """ Fit one TESS sector of data, with a HJ (candidate) in it, for a transit model (t0, period, incl, sma, rp/star) with believable error bars. """ # path and identifier for GaiaDR2 3007171311355035136 lcpath = LCPATHS[0] identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0')) lc = iu.get_data_keyword_list(lcpath, ['TMID_BJD', 'TFA2']) hdr = iu.get_header_keyword_list(lcpath, ['TICTEFF', 'TICRAD', 'TICLOGG']) time = lc['TMID_BJD'] mag = lc['TFA2'] mag_0, f_0 = 12, 1e4 flux = f_0 * 10**(-0.4 * (mag - mag_0)) flux /= np.nanmedian(flux) err = np.ones_like(flux) * 1e-4 teff = hdr['TICTEFF'] rstar = hdr['TICRAD'] logg = hdr['TICLOGG'] fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results') chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains') mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=4000, overwriteexistingsamples=False, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=multiprocessing.cpu_count()) assert is_converged assert_allclose(tlsr['period'], 3.495, atol=1e-2) assert_allclose(mafr['fitinfo']['finalparams']['period'], 3.495, atol=1e-2) # theoretical t0 for this data (like 5 or 6 transits, over 1 TESS sector) # is 3.99e+00 min = 6.64e-02 h = 2.77e-03 days. assert mafr['fitinfo']['finalparamerrs']['std_perrs']['t0'] < 5e-3 assert mafr['fitinfo']['finalparamerrs']['std_merrs']['t0'] < 5e-3 # guess-timate period should be better than 3 minutes too. assert mafr['fitinfo']['finalparamerrs']['std_perrs']['period'] < 3 / (24 * 60) assert mafr['fitinfo']['finalparamerrs']['std_merrs']['period'] < 3 / (24 * 60)
def test_fivetransitparam_fit_magseries_hard(): """ Fit one TESS sector of data, with a hard WJ (candidate) in it, for a transit model (t0, period, incl, sma, rp/star) with believable error bars. """ # path and identifier for GaiaDR2 4827527233363019776 lcpath = LCPATHS[1] identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0')) lc = iu.get_data_keyword_list(lcpath, ['TMID_BJD', 'TFA2']) hdr = iu.get_header_keyword_list(lcpath, ['TICTEFF', 'TICRAD', 'TICLOGG']) time = lc['TMID_BJD'] mag = lc['TFA2'] mag_0, f_0 = 12, 1e4 flux = f_0 * 10**(-0.4 * (mag - mag_0)) flux /= np.nanmedian(flux) err = np.ones_like(flux) * 1e-4 teff = hdr['TICTEFF'] rstar = hdr['TICRAD'] logg = hdr['TICLOGG'] fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results') chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains') # Autocorrelation time is like 400 steps for this case. But it does # converge. mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=25000, overwriteexistingsamples=False, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=32) print(is_converged) print(tlsr['period']) print(mafr['fitinfo']['finalparams']['period']) assert is_converged assert_allclose(tlsr['period'], 10.714, atol=1e-2) assert_allclose(mafr['fitinfo']['finalparams']['period'], 10.714, atol=1e-2) # exofopTESS quotes t0 for TOI-450 to <1 minute (multisector). < 10 minutes # required. assert mafr['fitinfo']['finalparamerrs']['std_perrs']['t0'] < 10 / (24 * 60) assert mafr['fitinfo']['finalparamerrs']['std_merrs']['t0'] < 10 / (24 * 60) # guess-timate period should be better than say 10 minutes. assert mafr['fitinfo']['finalparamerrs']['std_perrs']['period'] < 10 / ( 24 * 60) assert mafr['fitinfo']['finalparamerrs']['std_merrs']['period'] < 10 / ( 24 * 60)
def test_multithread_speed(nworkers=20): """ Ensure that increasing nworkers speeds up the MCMC sampling. 2019/08/15: this test fails. Assumption is ideally that run time goes as 1/nworkers. We are a bit nicer here, and take out a factor of two for overhead. Even this fails, because the emcee multithread scaling in lcfit/transits.mandelagol_fit_magseries is non-existent. """ # NOTE: this test fails, because something is wrong with the emcee # multithreading in lcfit/transits.py. (This is an issue that would be nice # to resolve -- though for the time being not "mission-critical") # path and identifier for GaiaDR2 3007171311355035136, the nice HJ. lcpath = LCPATHS[0] identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0')) hdul = fits.open(lcpath) hdr, lc = hdul[0].header, hdul[1].data hdul.close() time = lc['TMID_BJD'] mag = lc['TFA2'] mag_0, f_0 = 12, 1e4 flux = f_0 * 10**(-0.4 * (mag - mag_0)) flux /= np.nanmedian(flux) err = np.ones_like(flux) * 1e-4 teff = hdr['TICTEFF'] rstar = hdr['TICRAD'] logg = hdr['TICLOGG'] fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results_single_thread') chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains_single_thread') start = _time.time() mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=1000, overwriteexistingsamples=True, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=1) end_singlethread = _time.time() fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results_manythread') chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains_manythread') mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=1000, overwriteexistingsamples=True, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=nworkers) end_multithread = _time.time() multithread_time = end_multithread - end_singlethread singlethread_time = end_singlethread - start print("Singlethread took {0:.1f} seconds".format(singlethread_time)) print("Multithreaded took {0:.1f} seconds with {} workers".format( multithread_time, nworkers)) print("{0:.1f} times faster than serial".format(singlethread_time / multithread_time)) # passes, but mainly b/c of the overhead from TLS assert multithread_time < singlethread_time # fails assert multithread_time < singlethread_time / (0.5 * nworkers)
def _fit_transit_model_single_sector(tfa_sr_path, lcpath, outpath, mdf, source_id, supprow, suppfulldf, pfdf, pfrow, toidf, ctoidf, sector, nworkers, cdipsvnum=1, overwrite=1): try_mcmc = True identifier = source_id # # read and re-detrend lc if needed. (recall: these planet candidates were # found using a penalized spline detrending in most cases). # hdul_sr = fits.open(tfa_sr_path) hdul = fits.open(lcpath) lc_sr = hdul_sr[1].data lc, hdr = hdul[1].data, hdul[0].header # FIXME: logic needs updating in >=S14 processing raise NotImplementedError is_pspline_dtr = bool(pfrow['pspline_detrended'].iloc[0]) fluxap = 'IRM2' if is_pspline_dtr else 'TFASR2' time, mag = lc_sr['TMID_BJD'], lc_sr[fluxap] try: time, mag = moe.mask_orbit_start_and_end(time, mag, raise_expectation_error=False) except AssertionError: raise AssertionError( 'moe.mask_orbit_start_and_end failed for {}'.format(tfa_sr_path)) flux = vp._given_mag_get_flux(mag) err = np.ones_like(flux) * 1e-4 time, flux, err = sigclip_magseries(time, flux, err, magsarefluxes=True, sigclip=[50, 5]) if is_pspline_dtr or identifier in KNOWN_EXTRA_DETREND: flux, _ = dtr.detrend_flux(time, flux) if identifier in KNOWN_EXTRA_DETREND: fit_savdir = os.path.dirname(outpath) dtrpath = os.path.join(fit_savdir, 'extra_detrend_lc.png') if not os.path.exists(dtrpath): plt.close('all') f, ax = plt.subplots(figsize=(6, 3)) ax.scatter(time, flux, c='black', alpha=0.9, zorder=2, s=8, rasterized=True, linewidths=0) ax.set_xlabel('bjdtdb') ax.set_ylabel('detrended flux') f.savefig(dtrpath, bbox_inches='tight') raise AssertionError( 'U NEED TO MANUALLY LOOK AT {} AND VERIFY ITS OK'.format( dtrpath)) else: print('WRN! found {}. continuing to fit.'.format(dtrpath)) # # define the paths. get the stellar parameters, and do the fit! # fit_savdir = os.path.dirname(outpath) chain_savdir = os.path.dirname(outpath).replace('fitresults', 'samples') try: teff, teff_err, rstar, rstar_err, logg, logg_err = ( get_teff_rstar_logg(hdr)) except (NotImplementedError, ValueError) as e: print(e) print('did not get rstar for {}. MUST MANUALLY FIX.'.format(source_id)) try_mcmc = False # # initialize status file # status_file = os.path.join(fit_savdir, 'run_status.stat') fittype = 'fivetransitparam_fit' if not os.path.exists(status_file): save_status(status_file, fittype, { 'is_converged': False, 'n_steps_run': 0 }) status = load_status(status_file)[fittype] # # if not converged and no steps previously run: # run 4k steps. write status file. # # reload status file. # if not converged and 4k steps previously run and in long ID list: # run 25k steps, write status file. # # reload status file. # if not converged: # print a warning. # if identifier in KNOWN_MCMC_FAILS: print('WRN! identifier {} requires manual fixing.'.format(identifier)) try_mcmc = False if (not str2bool(status['is_converged']) and int(status['n_steps_run']) == 0 and try_mcmc): n_mcmc_steps = 4000 mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=n_mcmc_steps, overwriteexistingsamples=False, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=nworkers) status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps} save_status(status_file, fittype, status) status = load_status(status_file)[fittype] if (not str2bool(status['is_converged']) and int(status['n_steps_run']) != 25000 and int(identifier) in LONG_RUN_IDENTIFIERS and try_mcmc): n_mcmc_steps = 25000 # NOTE: hard-code nworkers, since we dont get multithreading # improvement anyway (this is some kind of bug) mafr, tlsr, is_converged = fivetransitparam_fit_magseries( time, flux, err, teff, rstar, logg, identifier, fit_savdir, chain_savdir, n_mcmc_steps=n_mcmc_steps, overwriteexistingsamples=True, n_transit_durations=5, make_tlsfit_plot=True, exp_time_minutes=30, bandpass='******', magsarefluxes=True, nworkers=4) status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps} save_status(status_file, fittype, status) # # if converged or in the list of IDs for which its fine to skip convegence # (because by-eye, the fits are converged), convert fit results to ctoi csv # format # status = load_status(status_file)[fittype] if (str2bool(status['is_converged']) or int(identifier) in SKIP_CONVERGENCE_IDENTIFIERS): try: _ = isinstance(mafr, dict) except UnboundLocalError: # # get the MCMC results from the pickle file; regenerate the TLS # result. # fitparamdir = os.path.dirname(status_file) fitpklsavpath = os.path.join( fitparamdir, '{}_phased_fivetransitparam_fit_empiricalerrs.pickle'.format( identifier)) with open(fitpklsavpath, 'rb') as f: mafr = pickle.load(f) tlsp = htls.tls_parallel_pfind(time, flux, err, magsarefluxes=True, tls_rstar_min=0.1, tls_rstar_max=10, tls_mstar_min=0.1, tls_mstar_max=5.0, tls_oversample=8, tls_mintransits=1, tls_transit_template='default', nbestpeaks=5, sigclip=None, nworkers=nworkers) tlsr = tlsp['tlsresult'] ticid = int(hdr['TICID']) ra, dec = hdr['RA_OBJ'], hdr['DEC_OBJ'] print('{} converged. writing ctoi csv.'.format(identifier)) fit_results_to_ctoi_csv(ticid, ra, dec, mafr, tlsr, outpath, toidf, ctoidf, teff, teff_err, rstar, rstar_err, logg, logg_err, cdipsvnum=cdipsvnum) else: print('WRN! {} did not converge, after {} steps. MUST MANUALLY FIX.'. format(identifier, status['n_steps_run']))