def get_frequency_grid(times, samplesperpeak=5, nyquistfactor=5, minfreq=None, maxfreq=None, returnf0dfnf=False): '''This calculates a frequency grid for the period finding functions in this module. Based on the autofrequency function in astropy.stats.lombscargle. http://docs.astropy.org/en/stable/_modules/astropy/stats/lombscargle/core.html#LombScargle.autofrequency ''' baseline = times.max() - times.min() nsamples = times.size df = 1. / baseline / samplesperpeak if minfreq is not None: f0 = minfreq else: f0 = 0.5 * df if maxfreq is not None: Nf = int(npceil((maxfreq - f0) / df)) else: Nf = int(0.5 * samplesperpeak * nyquistfactor * nsamples) if returnf0dfnf: return f0, df, Nf, f0 + df * nparange(Nf) else: return f0 + df * nparange(Nf)
def _autocorr_func2(mags, lag, maglen, magmed, magstd): ''' This is an alternative function to calculate the autocorrelation. mags MUST be an array with no nans. lag is the current lag to calculate the autocorr for. MUST be less than the total number of observations in mags (maglen). maglen, magmed, magstd are provided by auto_correlation below. This version is from (first definition): https://en.wikipedia.org/wiki/Correlogram#Estimation_of_autocorrelations ''' lagindex = nparange(0, maglen - lag) products = (mags[lagindex] - magmed) * (mags[lagindex + lag] - magmed) autocovarfunc = npsum(products) / lagindex.size varfunc = npsum( (mags[lagindex] - magmed) * (mags[lagindex] - magmed)) / mags.size acorr = autocovarfunc / varfunc return acorr
def wma(close, length=None, asc=None, offset=None, **kwargs): """Indicator: Weighted Moving Average (WMA)""" # Validate Arguments close = verify_series(close) length = int(length) if length and length > 0 else 10 min_periods = int(kwargs["min_periods"]) if "min_periods" in kwargs and kwargs["min_periods"] is not None else length asc = asc if asc else True offset = get_offset(offset) # Calculate Result total_weight = 0.5 * length * (length + 1) weights_ = Series(nparange(1, length + 1)) weights = weights_ if asc else weights_[::-1] def linear(w): def _compute(x): return npdot(x, w) / total_weight return _compute close_ = close.rolling(length, min_periods=length) wma = close_.apply(linear(weights), raw=True) # Offset if offset != 0: wma = wma.shift(offset) # Name & Category wma.name = f"WMA_{length}" wma.category = "overlap" return wma
def make_data(n=None, i=None, p=None): from random import randint from numpy import log, random as nprandom, arange as nparange, exp as npexp n = n or randint(10,100) i = i or randint(1,100) p = p or randint(1,50) / 100 xx = nparange(1, n + 1) yy_clean = npexp(xx * p) * i yy = yy_clean + nprandom.normal(0, log(yy_clean).round() + (yy_clean//10), size=n) return (xx,yy,yy_clean, n,i,p)
def primes_below(n: int) -> ndarray: """ Computes all the primes strictly below the given integer. :param n: the upper bound of the primes to be computed. :return: an array containing all the primes strictly below n. """ assert isinstance( n, int), f"n is supposed to be an integer but {n} was given." primes = nparange(2, n) for i in range(2, n): primes[(primes % i == 0) * (primes > i)] = 0 return primes[primes > 0]
def _autocorr_func2(mags, lag, maglen, magmed, magstd): ''' This is an alternative function to calculate the autocorrelation. This version is from (first definition): https://en.wikipedia.org/wiki/Correlogram#Estimation_of_autocorrelations Parameters ---------- mags : np.array This is the magnitudes array. MUST NOT have any nans. lag : float The specific lag value to calculate the auto-correlation for. This MUST be less than total number of observations in `mags`. maglen : int The number of elements in the `mags` array. magmed : float The median of the `mags` array. magstd : float The standard deviation of the `mags` array. Returns ------- float The auto-correlation at this specific `lag` value. ''' lagindex = nparange(0,maglen-lag) products = (mags[lagindex] - magmed) * (mags[lagindex+lag] - magmed) autocovarfunc = npsum(products)/lagindex.size varfunc = npsum( (mags[lagindex] - magmed)*(mags[lagindex] - magmed) )/mags.size acorr = autocovarfunc/varfunc return acorr
def _autocorr_func1(mags, lag, maglen, magmed, magstd): '''Calculates the autocorr of mag series for specific lag. mags MUST be an array with no nans. lag is the current lag to calculate the autocorr for. MUST be less than the total number of observations in mags (maglen). maglen, magmed, magstd are provided by auto_correlation below. This version of the function taken from: doi:10.1088/0004-637X/735/2/68 (Kim et al. 2011) ''' lagindex = nparange(1, maglen - lag) products = (mags[lagindex] - magmed) * (mags[lagindex + lag] - magmed) acorr = (1.0 / ((maglen - lag) * magstd)) * npsum(products) return acorr
def _autocorr_func1(mags, lag, maglen, magmed, magstd): '''Calculates the autocorr of mag series for specific lag. This version of the function is taken from: Kim et al. (`2011 <https://dx.doi.org/10.1088/0004-637X/735/2/68>`_) Parameters ---------- mags : np.array This is the magnitudes array. MUST NOT have any nans. lag : float The specific lag value to calculate the auto-correlation for. This MUST be less than total number of observations in `mags`. maglen : int The number of elements in the `mags` array. magmed : float The median of the `mags` array. magstd : float The standard deviation of the `mags` array. Returns ------- float The auto-correlation at this specific `lag` value. ''' lagindex = nparange(1, maglen - lag) products = (mags[lagindex] - magmed) * (mags[lagindex + lag] - magmed) acorr = (1.0 / ((maglen - lag) * magstd)) * npsum(products) return acorr
def macf_period_find( times, mags, errs, fillgaps=0.0, filterwindow=11, forcetimebin=None, maxlags=None, maxacfpeaks=10, smoothacf=21, # set for Kepler-type LCs, see details below smoothfunc=_smooth_acf_savgol, smoothfunckwargs=None, magsarefluxes=False, sigclip=3.0, verbose=True, periodepsilon=0.1, # doesn't do anything, for consistent external API nworkers=None, # doesn't do anything, for consistent external API startp=None, # doesn't do anything, for consistent external API endp=None, # doesn't do anything, for consistent external API autofreq=None, # doesn't do anything, for consistent external API stepsize=None, # doesn't do anything, for consistent external API ): '''This finds periods using the McQuillan+ (2013a, 2014) ACF method. The kwargs from `periodepsilon` to `stepsize` don't do anything but are used to present a consistent API for all periodbase period-finders to an outside driver (e.g. the one in the checkplotserver). Parameters ---------- times,mags,errs : np.array The input magnitude/flux time-series to run the period-finding for. fillgaps : 'noiselevel' or float This sets what to use to fill in gaps in the time series. If this is 'noiselevel', will smooth the light curve using a point window size of `filterwindow` (this should be an odd integer), subtract the smoothed LC from the actual LC and estimate the RMS. This RMS will be used to fill in the gaps. Other useful values here are 0.0, and npnan. filterwindow : int The light curve's smoothing filter window size to use if `fillgaps='noiselevel`'. forcetimebin : None or float This is used to force a particular cadence in the light curve other than the automatically determined cadence. This effectively rebins the light curve to this cadence. This should be in the same time units as `times`. maxlags : None or int This is the maximum number of lags to calculate. If None, will calculate all lags. maxacfpeaks : int This is the maximum number of ACF peaks to use when finding the highest peak and obtaining a fit period. smoothacf : int This is the number of points to use as the window size when smoothing the ACF with the `smoothfunc`. This should be an odd integer value. If this is None, will not smooth the ACF, but this will probably lead to finding spurious peaks in a generally noisy ACF. For Kepler, a value between 21 and 51 seems to work fine. For ground based data, much larger values may be necessary: between 1001 and 2001 seem to work best for the HAT surveys. This is dependent on cadence, RMS of the light curve, the periods of the objects you're looking for, and finally, any correlated noise in the light curve. Make a plot of the smoothed/unsmoothed ACF vs. lag using the result dict of this function and the `plot_acf_results` function above to see the identified ACF peaks and what kind of smoothing might be needed. The value of `smoothacf` will also be used to figure out the interval to use when searching for local peaks in the ACF: this interval is 1/2 of the `smoothacf` value. smoothfunc : Python function This is the function that will be used to smooth the ACF. This should take at least one kwarg: 'windowsize'. Other kwargs can be passed in using a dict provided in `smoothfunckwargs`. By default, this uses a Savitsky-Golay filter, a Gaussian filter is also provided but not used. Another good option would be an actual low-pass filter (generated using scipy.signal?) to remove all high frequency noise from the ACF. smoothfunckwargs : dict or None The dict of optional kwargs to pass in to the `smoothfunc`. magsarefluxes : bool If your input measurements in `mags` are actually fluxes instead of mags, set this is True. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. verbose : bool If True, will indicate progress and report errors. Returns ------- dict Returns a dict with results. dict['bestperiod'] is the estimated best period and dict['fitperiodrms'] is its estimated error. Other interesting things in the output include: - dict['acfresults']: all results from calculating the ACF. in particular, the unsmoothed ACF might be of interest: dict['acfresults']['acf'] and dict['acfresults']['lags']. - dict['lags'] and dict['acf'] contain the ACF after smoothing was applied. - dict['periods'] and dict['lspvals'] can be used to construct a pseudo-periodogram. - dict['naivebestperiod'] is obtained by multiplying the lag at the highest ACF peak with the cadence. This is usually close to the fit period (dict['fitbestperiod']), which is calculated by doing a fit to the lags vs. peak index relation as in McQuillan+ 2014. ''' # get the ACF acfres = autocorr_magseries(times, mags, errs, maxlags=maxlags, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose) xlags = acfres['lags'] # smooth the ACF if requested if smoothacf and isinstance(smoothacf, int) and smoothacf > 0: if smoothfunckwargs is None: sfkwargs = {'windowsize': smoothacf} else: sfkwargs = smoothfunckwargs.copy() sfkwargs.update({'windowsize': smoothacf}) xacf = smoothfunc(acfres['acf'], **sfkwargs) else: xacf = acfres['acf'] # get the relative peak heights and fit best lag peakres = _get_acf_peakheights(xlags, xacf, npeaks=maxacfpeaks, searchinterval=int(smoothacf / 2)) # this is the best period's best ACF peak height bestlspval = peakres['bestpeakheight'] try: # get the fit best lag from a linear fit to the peak index vs time(peak # lag) function as in McQillian+ (2014) fity = npconcatenate(([ 0.0, peakres['bestlag'] ], peakres['relpeaklags'][peakres['relpeaklags'] > peakres['bestlag']] )) fity = fity * acfres['cadence'] fitx = nparange(fity.size) fitcoeffs, fitcovar = nppolyfit(fitx, fity, 1, cov=True) # fit best period is the gradient of fit fitbestperiod = fitcoeffs[0] bestperiodrms = npsqrt(fitcovar[0, 0]) # from the covariance matrix except Exception as e: LOGWARNING('linear fit to time at each peak lag ' 'value vs. peak number failed, ' 'naively calculated ACF period may not be accurate') fitcoeffs = nparray([npnan, npnan]) fitcovar = nparray([[npnan, npnan], [npnan, npnan]]) fitbestperiod = npnan bestperiodrms = npnan raise # calculate the naive best period using delta_tau = lag * cadence naivebestperiod = peakres['bestlag'] * acfres['cadence'] if fitbestperiod < naivebestperiod: LOGWARNING('fit bestperiod = %.5f may be an alias, ' 'naively calculated bestperiod is = %.5f' % (fitbestperiod, naivebestperiod)) if npisfinite(fitbestperiod): bestperiod = fitbestperiod else: bestperiod = naivebestperiod return { 'bestperiod': bestperiod, 'bestlspval': bestlspval, 'nbestpeaks': maxacfpeaks, # for compliance with the common pfmethod API 'nbestperiods': npconcatenate([[fitbestperiod], peakres['relpeaklags'][1:maxacfpeaks] * acfres['cadence']]), 'nbestlspvals': peakres['maxacfs'][:maxacfpeaks], 'lspvals': xacf, 'periods': xlags * acfres['cadence'], 'acf': xacf, 'lags': xlags, 'method': 'acf', 'naivebestperiod': naivebestperiod, 'fitbestperiod': fitbestperiod, 'fitperiodrms': bestperiodrms, 'periodfitcoeffs': fitcoeffs, 'periodfitcovar': fitcovar, 'kwargs': { 'maxlags': maxlags, 'maxacfpeaks': maxacfpeaks, 'fillgaps': fillgaps, 'filterwindow': filterwindow, 'smoothacf': smoothacf, 'smoothfunckwargs': sfkwargs, 'magsarefluxes': magsarefluxes, 'sigclip': sigclip }, 'acfresults': acfres, 'acfpeaks': peakres }
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency. Parameters ---------- times,mags,errs : np.array The input time-series and associated errors. frequency : float The test frequency to calculate the theta statistic at. binsize : float The phase bin size to use. minbin : int The minimum number of items in a phase bin to consider in the calculation of the statistic. Returns ------- theta_aov : float The value of the AoV statistic at the specified `frequency`. ''' period = 1.0/frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = nparange(0.0, 1.0, binsize) ndets = phases.size binnedphaseinds = npdigitize(phases, bins) bin_s1_tops = [] bin_s2_tops = [] binndets = [] goodbins = 0 all_xbar = npmedian(pmags) for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_ndet = thisbin_mags.size thisbin_xbar = npmedian(thisbin_mags) # get s1 thisbin_s1_top = ( thisbin_ndet * (thisbin_xbar - all_xbar) * (thisbin_xbar - all_xbar) ) # get s2 thisbin_s2_top = npsum((thisbin_mags - all_xbar) * (thisbin_mags - all_xbar)) bin_s1_tops.append(thisbin_s1_top) bin_s2_tops.append(thisbin_s2_top) binndets.append(thisbin_ndet) goodbins = goodbins + 1 # turn the quantities into arrays bin_s1_tops = nparray(bin_s1_tops) bin_s2_tops = nparray(bin_s2_tops) binndets = nparray(binndets) # calculate s1 first s1 = npsum(bin_s1_tops)/(goodbins - 1.0) # then calculate s2 s2 = npsum(bin_s2_tops)/(ndets - goodbins) theta_aov = s1/s2 return theta_aov
def analytic_false_alarm_probability(lspinfo, times, conservative_nfreq_eff=True, peakvals=None, inplace=True): '''This returns the analytic false alarm probabilities for periodogram peak values. FIXME: this doesn't actually work. Fix later. The calculation follows that on page 3 of Zechmeister & Kurster (2009):: FAP = 1 − [1 − Prob(z > z0)]**M where:: M is the number of independent frequencies Prob(z > z0) is the probability of peak with value > z0 z0 is the peak value we're evaluating For AoV and AoV-harmonic, the Prob(z > z0) is described by the F distribution, according to: - Schwarzenberg-Czerny (1997; https://ui.adsabs.harvard.edu/#abs/1997ApJ...489..941S) This is given by:: F( (B-1), (N-B); theta_aov ) Where:: N = number of observations B = number of phase bins This translates to a scipy.stats call to the F distribution CDF:: x = theta_aov_best prob_exceeds_val = scipy.stats.f.cdf(x, (B-1.0), (N-B)) Which we can then plug into the false alarm prob eqn above with the calculation of M. Parameters ---------- lspinfo : dict The dict returned by the :py:func:`~astrobase.periodbase.spdm.aov_periodfind` function. times : np.array The times for which the periodogram result in ``lspinfo`` was calculated. conservative_nfreq_eff : bool If True, will follow the prescription given in Schwarzenberg-Czerny (2003): http://adsabs.harvard.edu/abs/2003ASPC..292..383S and estimate the effective number of independent frequences M_eff as:: min(N_obs, N_freq, DELTA_f/delta_f) peakvals : sequence or None The peak values for which to evaluate the false-alarm probability. If None, will calculate this for each of the peak values in the ``nbestpeaks`` key of the ``lspinfo`` dict. inplace : bool If True, puts the results of the FAP calculation into the ``lspinfo`` dict as a list available as ``lspinfo['falsealarmprob']``. Returns ------- list The calculated false alarm probabilities for each of the peak values in ``peakvals``. ''' from scipy.stats import f frequencies = 1.0/lspinfo['periods'] M = independent_freq_count(frequencies, times, conservative=conservative_nfreq_eff) if peakvals is None: peakvals = lspinfo['nbestlspvals'] nphasebins = nparange(0.0, 1.0, lspinfo['kwargs']['phasebinsize']).size ndet = times.size false_alarm_probs = [] for peakval in peakvals: prob_xval = peakval prob_exceeds_val = f.cdf(prob_xval, nphasebins - 1.0, ndet - nphasebins) false_alarm_probs.append(1.0 - (1.0 - prob_exceeds_val)**M) if inplace: lspinfo['falsealarmprob'] = false_alarm_probs return false_alarm_probs
def aov_periodfind(times, mags, errs, magsarefluxes=False, startp=None, endp=None, stepsize=1.0e-4, autofreq=True, normalize=True, phasebinsize=0.05, mindetperbin=9, nbestpeaks=5, periodepsilon=0.1, sigclip=10.0, nworkers=None, verbose=True): '''This runs a parallelized Analysis-of-Variance (AoV) period search. NOTE: `normalize = True` here as recommended by Schwarzenberg-Czerny 1996, i.e. mags will be normalized to zero and rescaled so their variance = 1.0. Parameters ---------- times,mags,errs : np.array The mag/flux time-series with associated measurement errors to run the period-finding on. magsarefluxes : bool If the input measurement values in `mags` and `errs` are in fluxes, set this to True. startp,endp : float or None The minimum and maximum periods to consider for the transit search. stepsize : float The step-size in frequency to use when constructing a frequency grid for the period search. autofreq : bool If this is True, the value of `stepsize` will be ignored and the :py:func:`astrobase.periodbase.get_frequency_grid` function will be used to generate a frequency grid based on `startp`, and `endp`. If these are None as well, `startp` will be set to 0.1 and `endp` will be set to `times.max() - times.min()`. normalize : bool This sets if the input time-series is normalized to 0.0 and rescaled such that its variance = 1.0. This is the recommended procedure by Schwarzenberg-Czerny 1996. phasebinsize : float The bin size in phase to use when calculating the AoV theta statistic at a test frequency. mindetperbin : int The minimum number of elements in a phase bin to consider it valid when calculating the AoV theta statistic at a test frequency. nbestpeaks : int The number of 'best' peaks to return from the periodogram results, starting from the global maximum of the periodogram peak values. periodepsilon : float The fractional difference between successive values of 'best' periods when sorting by periodogram power to consider them as separate periods (as opposed to part of the same periodogram peak). This is used to avoid broad peaks in the periodogram and make sure the 'best' periods returned are all actually independent. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. nworkers : int The number of parallel workers to use when calculating the periodogram. verbose : bool If this is True, will indicate progress and details about the frequency grid used for the period search. Returns ------- dict This function returns a dict, referred to as an `lspinfo` dict in other astrobase functions that operate on periodogram results. This is a standardized format across all astrobase period-finders, and is of the form below:: {'bestperiod': the best period value in the periodogram, 'bestlspval': the periodogram peak associated with the best period, 'nbestpeaks': the input value of nbestpeaks, 'nbestlspvals': nbestpeaks-size list of best period peak values, 'nbestperiods': nbestpeaks-size list of best periods, 'lspvals': the full array of periodogram powers, 'periods': the full array of periods considered, 'method':'aov' -> the name of the period-finder method, 'kwargs':{ dict of all of the input kwargs for record-keeping}} ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # get the frequencies to use if startp: endf = 1.0/startp else: # default start period is 0.1 day endf = 1.0/0.1 if endp: startf = 1.0/endp else: # default end period is length of time series startf = 1.0/(stimes.max() - stimes.min()) # if we're not using autofreq, then use the provided frequencies if not autofreq: frequencies = nparange(startf, endf, stepsize) if verbose: LOGINFO( 'using %s frequency points, start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/endf, 1.0/startf) ) else: # this gets an automatic grid of frequencies to use frequencies = get_frequency_grid(stimes, minfreq=startf, maxfreq=endf) if verbose: LOGINFO( 'using autofreq with %s frequency points, ' 'start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/frequencies.max(), 1.0/frequencies.min()) ) # map to parallel workers if (not nworkers) or (nworkers > NCPUS): nworkers = NCPUS if verbose: LOGINFO('using %s workers...' % nworkers) pool = Pool(nworkers) # renormalize the working mags to zero and scale them so that the # variance = 1 for use with our LSP functions if normalize: nmags = (smags - npmedian(smags))/npstd(smags) else: nmags = smags tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin) for x in frequencies] lsp = pool.map(_aov_worker, tasks) pool.close() pool.join() del pool lsp = nparray(lsp) periods = 1.0/frequencies # find the nbestpeaks for the periodogram: 1. sort the lsp array by # highest value first 2. go down the values until we find five # values that are separated by at least periodepsilon in period # make sure to filter out non-finite values finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} sortedlspind = npargsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] # now get the nbestpeaks nbestperiods, nbestlspvals, peakcount = ( [finperiods[bestperiodind]], [finlsp[bestperiodind]], 1 ) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval in zip(sortedlspperiods, sortedlspvals): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different peak # in the periodogram if (perioddiff > (periodepsilon*prevperiod) and all(x > (periodepsilon*period) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) peakcount = peakcount + 1 prevperiod = period return {'bestperiod':finperiods[bestperiodind], 'bestlspval':finlsp[bestperiodind], 'nbestpeaks':nbestpeaks, 'nbestlspvals':nbestlspvals, 'nbestperiods':nbestperiods, 'lspvals':lsp, 'periods':periods, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} else: LOGERROR('no good detections for these times and mags, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}}
def bls_parallel_pfind( times, mags, errs, magsarefluxes=False, startp=0.1, # by default, search from 0.1 d to... endp=100.0, # ... 100.0 d -- don't search full timebase stepsize=1.0e-4, mintransitduration=0.01, # minimum transit length in phase maxtransitduration=0.8, # maximum transit length in phase nphasebins=200, autofreq=True, # figure out f0, nf, and df automatically nbestpeaks=5, periodepsilon=0.1, # 0.1 nworkers=None, sigclip=10.0, verbose=True): '''Runs the Box Least Squares Fitting Search for transit-shaped signals. Based on eebls.f from Kovacs et al. 2002 and python-bls from Foreman-Mackey et al. 2015. Breaks up the full frequency space into chunks and passes them to parallel BLS workers. NOTE: the combined BLS spectrum produced by this function is not identical to that produced by running BLS in one shot for the entire frequency space. There are differences on the order of 1.0e-3 or so in the respective peak values, but peaks appear at the same frequencies for both methods. This is likely due to different aliasing caused by smaller chunks of the frequency space used by the parallel workers in this function. When in doubt, confirm results for this parallel implementation by comparing to those from the serial implementation above. ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # if we're setting up everything automatically if autofreq: # figure out the best number of phasebins to use nphasebins = int(np.ceil(2.0 / mintransitduration)) # use heuristic to figure out best timestep stepsize = 0.25 * mintransitduration / (stimes.max() - stimes.min()) # now figure out the frequencies to use minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(np.ceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = True: using AUTOMATIC values for ' 'freq stepsize: %s, nphasebins: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, nphasebins, mintransitduration, maxtransitduration)) else: minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(np.ceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = False: using PROVIDED values for ' 'freq stepsize: %s, nphasebins: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, nphasebins, mintransitduration, maxtransitduration)) # check the minimum frequency if minfreq < (1.0 / (stimes.max() - stimes.min())): minfreq = 2.0 / (stimes.max() - stimes.min()) if verbose: LOGWARNING('the requested max P = %.3f is larger than ' 'the time base of the observations = %.3f, ' ' will make minfreq = 2 x 1/timebase' % (endp, stimes.max() - stimes.min())) LOGINFO('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq)) ############################# ## NOW RUN BLS IN PARALLEL ## ############################# # fix number of CPUs if needed if not nworkers or nworkers > NCPUS: nworkers = NCPUS if verbose: LOGINFO('using %s workers...' % nworkers) # break up the tasks into chunks frequencies = minfreq + nparange(nfreq) * stepsize csrem = int(fmod(nfreq, nworkers)) csint = int(float(nfreq / nworkers)) chunk_minfreqs, chunk_nfreqs = [], [] for x in range(nworkers): this_minfreqs = frequencies[x * csint] # handle usual nfreqs if x < (nworkers - 1): this_nfreqs = frequencies[x * csint:x * csint + csint].size else: this_nfreqs = frequencies[x * csint:x * csint + csint + csrem].size chunk_minfreqs.append(this_minfreqs) chunk_nfreqs.append(this_nfreqs) # chunk_minfreqs = [frequencies[x*chunksize] for x in range(nworkers)] # chunk_nfreqs = [frequencies[x*chunksize:x*chunksize+chunksize].size # for x in range(nworkers)] # populate the tasks list tasks = [(stimes, smags, chunk_minf, chunk_nf, stepsize, nphasebins, mintransitduration, maxtransitduration) for (chunk_nf, chunk_minf) in zip(chunk_minfreqs, chunk_nfreqs)] if verbose: for ind, task in enumerate(tasks): LOGINFO('worker %s: minfreq = %.6f, nfreqs = %s' % (ind + 1, task[3], task[2])) LOGINFO('running...') # return tasks # start the pool pool = Pool(nworkers) results = pool.map(parallel_bls_worker, tasks) pool.close() pool.join() del pool # now concatenate the output lsp arrays lsp = np.concatenate([x['power'] for x in results]) periods = 1.0 / frequencies # find the nbestpeaks for the periodogram: 1. sort the lsp array # by highest value first 2. go down the values until we find # five values that are separated by at least periodepsilon in # period # make sure to get only the finite peaks in the periodogram # this is needed because BLS may produce infs for some peaks finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } } sortedlspind = np.argsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] prevbestlspval = sortedlspvals[0] # now get the nbestpeaks nbestperiods, nbestlspvals, peakcount = ([finperiods[bestperiodind]], [finlsp[bestperiodind]], 1) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval in zip(sortedlspperiods, sortedlspvals): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different # peak in the periodogram if (perioddiff > (periodepsilon * prevperiod) and all(x > (periodepsilon * prevperiod) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) peakcount = peakcount + 1 prevperiod = period # generate the return dict resultdict = { 'bestperiod': finperiods[bestperiodind], 'bestlspval': finlsp[bestperiodind], 'nbestpeaks': nbestpeaks, 'nbestlspvals': nbestlspvals, 'nbestperiods': nbestperiods, 'lspvals': lsp, 'frequencies': frequencies, 'periods': periods, 'blsresult': results, 'stepsize': stepsize, 'nfreq': nfreq, 'nphasebins': nphasebins, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } } return resultdict else: LOGERROR('no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'blsresult': None, 'stepsize': stepsize, 'nfreq': None, 'nphasebins': None, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } }
def bls_serial_pfind( times, mags, errs, magsarefluxes=False, startp=0.1, # search from 0.1 d to... endp=100.0, # ... 100.0 d -- don't search full timebase stepsize=5.0e-4, mintransitduration=0.01, # minimum transit length in phase maxtransitduration=0.8, # maximum transit length in phase nphasebins=200, autofreq=True, # figure out f0, nf, and df automatically periodepsilon=0.1, nbestpeaks=5, sigclip=10.0, verbose=True): '''Runs the Box Least Squares Fitting Search for transit-shaped signals. Based on eebls.f from Kovacs et al. 2002 and python-bls from Foreman-Mackey et al. 2015. This is the serial version (which is good enough in most cases because BLS in Fortran is fairly fast). If nfreq > 5e5, this will take a while. ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # if we're setting up everything automatically if autofreq: # figure out the best number of phasebins to use nphasebins = int(np.ceil(2.0 / mintransitduration)) # use heuristic to figure out best timestep stepsize = 0.25 * mintransitduration / (stimes.max() - stimes.min()) # now figure out the frequencies to use minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(np.ceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = True: using AUTOMATIC values for ' 'freq stepsize: %s, nphasebins: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, nphasebins, mintransitduration, maxtransitduration)) else: minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(np.ceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = False: using PROVIDED values for ' 'freq stepsize: %s, nphasebins: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, nphasebins, mintransitduration, maxtransitduration)) if nfreq > 5.0e5: if verbose: LOGWARNING('more than 5.0e5 frequencies to go through; ' 'this will take a while. ' 'you might want to use the ' 'periodbase.bls_parallel_pfind function instead') if minfreq < (1.0 / (stimes.max() - stimes.min())): if verbose: LOGWARNING('the requested max P = %.3f is larger than ' 'the time base of the observations = %.3f, ' ' will make minfreq = 2 x 1/timebase' % (endp, stimes.max() - stimes.min())) minfreq = 2.0 / (stimes.max() - stimes.min()) if verbose: LOGINFO('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq)) # run BLS try: blsresult = _bls_runner(stimes, smags, nfreq, minfreq, stepsize, nphasebins, mintransitduration, maxtransitduration) # find the peaks in the BLS. this uses wavelet transforms to # smooth the spectrum and find peaks. a similar thing would be # to do a convolution with a gaussian kernel or a tophat # function, calculate d/dx(result), then get indices where this # is zero # blspeakinds = find_peaks_cwt(blsresults['power'], # nparray([2.0,3.0,4.0,5.0])) frequencies = minfreq + nparange(nfreq) * stepsize periods = 1.0 / frequencies lsp = blsresult['power'] # find the nbestpeaks for the periodogram: 1. sort the lsp array # by highest value first 2. go down the values until we find # five values that are separated by at least periodepsilon in # period # make sure to get only the finite peaks in the periodogram # this is needed because BLS may produce infs for some peaks finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } } sortedlspind = np.argsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] prevbestlspval = sortedlspvals[0] # now get the nbestpeaks nbestperiods, nbestlspvals, peakcount = ([ finperiods[bestperiodind] ], [finlsp[bestperiodind]], 1) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval in zip(sortedlspperiods, sortedlspvals): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different # peak in the periodogram if (perioddiff > (periodepsilon * prevperiod) and all(x > (periodepsilon * prevperiod) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) peakcount = peakcount + 1 prevperiod = period # generate the return dict resultdict = { 'bestperiod': finperiods[bestperiodind], 'bestlspval': finlsp[bestperiodind], 'nbestpeaks': nbestpeaks, 'nbestlspvals': nbestlspvals, 'nbestperiods': nbestperiods, 'lspvals': lsp, 'frequencies': frequencies, 'periods': periods, 'blsresult': blsresult, 'stepsize': stepsize, 'nfreq': nfreq, 'nphasebins': nphasebins, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } } return resultdict except Exception as e: LOGEXCEPTION('BLS failed!') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'stepsize': stepsize, 'nfreq': nfreq, 'nphasebins': nphasebins, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } } else: LOGERROR('no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'stepsize': stepsize, 'nfreq': None, 'nphasebins': None, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'nphasebins': nphasebins, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip } }
def bls_serial_pfind( times, mags, errs, magsarefluxes=False, startp=0.1, # search from 0.1 d to... endp=100.0, # ... 100.0 d -- don't search full timebase stepsize=5.0e-4, mintransitduration=0.01, # minimum transit length in phase maxtransitduration=0.4, # maximum transit length in phase ndurations=100, autofreq=True, # figure out f0, nf, and df automatically blsobjective='likelihood', blsmethod='fast', blsoversample=10, blsmintransits=3, blsfreqfactor=10.0, periodepsilon=0.1, nbestpeaks=5, sigclip=10.0, endp_timebase_check=True, verbose=True, raiseonfail=False): '''Runs the Box Least Squares Fitting Search for transit-shaped signals. Based on the version of BLS in Astropy 3.1: `astropy.stats.BoxLeastSquares`. If you don't have Astropy 3.1, this module will fail to import. Note that by default, this implementation of `bls_serial_pfind` doesn't use the `.autoperiod()` function from `BoxLeastSquares` but uses the same auto frequency-grid generation as the functions in `periodbase.kbls`. If you want to use Astropy's implementation, set the value of `autofreq` kwarg to 'astropy'. The dict returned from this function contains a `blsmodel` key, which is the generated model from Astropy's BLS. Use the `.compute_stats()` method to calculate the required stats like SNR, depth, duration, etc. Parameters ---------- times,mags,errs : np.array The magnitude/flux time-series to search for transits. magsarefluxes : bool If the input measurement values in `mags` and `errs` are in fluxes, set this to True. startp,endp : float The minimum and maximum periods to consider for the transit search. stepsize : float The step-size in frequency to use when constructing a frequency grid for the period search. mintransitduration,maxtransitduration : float The minimum and maximum transitdurations (in units of phase) to consider for the transit search. ndurations : int The number of transit durations to use in the period-search. autofreq : bool or str If this is True, the values of `stepsize` and `nphasebins` will be ignored, and these, along with a frequency-grid, will be determined based on the following relations:: nphasebins = int(ceil(2.0/mintransitduration)) if nphasebins > 3000: nphasebins = 3000 stepsize = 0.25*mintransitduration/(times.max()-times.min()) minfreq = 1.0/endp maxfreq = 1.0/startp nfreq = int(ceil((maxfreq - minfreq)/stepsize)) If this is False, you must set `startp`, `endp`, and `stepsize` as appropriate. If this is str == 'astropy', will use the `astropy.stats.BoxLeastSquares.autoperiod()` function to calculate the frequency grid instead of the kbls method. blsobjective : {'likelihood','snr'} Sets the type of objective to optimize in the `BoxLeastSquares.power()` function. blsmethod : {'fast','slow'} Sets the type of method to use in the `BoxLeastSquares.power()` function. blsoversample : {'likelihood','snr'} Sets the `oversample` kwarg for the `BoxLeastSquares.power()` function. blsmintransits : int Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()` function. blsfreqfactor : float Sets the `frequency_factor` kwarg for the `BoxLeastSquares.autperiod()` function. periodepsilon : float The fractional difference between successive values of 'best' periods when sorting by periodogram power to consider them as separate periods (as opposed to part of the same periodogram peak). This is used to avoid broad peaks in the periodogram and make sure the 'best' periods returned are all actually independent. nbestpeaks : int The number of 'best' peaks to return from the periodogram results, starting from the global maximum of the periodogram peak values. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. endp_timebase_check : bool If True, will check if the ``endp`` value is larger than the time-base of the observations. If it is, will change the ``endp`` value such that it is half of the time-base. If False, will allow an ``endp`` larger than the time-base of the observations. verbose : bool If this is True, will indicate progress and details about the frequency grid used for the period search. raiseonfail : bool If True, raises an exception if something goes wrong. Otherwise, returns None. Returns ------- dict This function returns a dict, referred to as an `lspinfo` dict in other astrobase functions that operate on periodogram results. This is a standardized format across all astrobase period-finders, and is of the form below:: {'bestperiod': the best period value in the periodogram, 'bestlspval': the periodogram peak associated with the best period, 'nbestpeaks': the input value of nbestpeaks, 'nbestlspvals': nbestpeaks-size list of best period peak values, 'nbestperiods': nbestpeaks-size list of best periods, 'lspvals': the full array of periodogram powers, 'frequencies': the full array of frequencies considered, 'periods': the full array of periods considered, 'durations': the array of durations used to run BLS, 'blsresult': Astropy BLS result object (BoxLeastSquaresResult), 'blsmodel': Astropy BLS BoxLeastSquares object used for work, 'stepsize': the actual stepsize used, 'nfreq': the actual nfreq used, 'durations': the durations array used, 'mintransitduration': the input mintransitduration, 'maxtransitduration': the input maxtransitdurations, 'method':'bls' -> the name of the period-finder method, 'kwargs':{ dict of all of the input kwargs for record-keeping}} ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # if we're setting up everything automatically if isinstance(autofreq, bool) and autofreq: # use heuristic to figure out best timestep stepsize = 0.25 * mintransitduration / (stimes.max() - stimes.min()) # now figure out the frequencies to use minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(npceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = True: using AUTOMATIC values for ' 'freq stepsize: %s, ndurations: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, ndurations, mintransitduration, maxtransitduration)) use_autoperiod = False elif isinstance(autofreq, bool) and not autofreq: minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(npceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = False: using PROVIDED values for ' 'freq stepsize: %s, ndurations: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, ndurations, mintransitduration, maxtransitduration)) use_autoperiod = False elif isinstance(autofreq, str) and autofreq == 'astropy': use_autoperiod = True minfreq = 1.0 / endp maxfreq = 1.0 / startp else: LOGERROR("unknown autofreq kwarg encountered. can't continue...") return None # check the minimum frequency if ((minfreq < (1.0 / (stimes.max() - stimes.min()))) and endp_timebase_check): LOGWARNING('the requested max P = %.3f is larger than ' 'the time base of the observations = %.3f, ' ' will make minfreq = 2 x 1/timebase' % (endp, stimes.max() - stimes.min())) minfreq = 2.0 / (stimes.max() - stimes.min()) LOGWARNING('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq)) # run BLS try: # astropy's BLS requires durations in units of time durations = nplinspace(mintransitduration * startp, maxtransitduration * startp, ndurations) # set up the correct units for the BLS model if magsarefluxes: blsmodel = BoxLeastSquares(stimes * u.day, smags * u.dimensionless_unscaled, dy=serrs * u.dimensionless_unscaled) else: blsmodel = BoxLeastSquares(stimes * u.day, smags * u.mag, dy=serrs * u.mag) # use autoperiod if requested if use_autoperiod: periods = nparray( blsmodel.autoperiod(durations, minimum_period=startp, maximum_period=endp, minimum_n_transit=blsmintransits, frequency_factor=blsfreqfactor)) nfreq = periods.size if verbose: LOGINFO("autofreq = 'astropy', used .autoperiod() with " "minimum_n_transit = %s, freq_factor = %s " "to generate the frequency grid" % (blsmintransits, blsfreqfactor)) LOGINFO( 'stepsize = %.5f, nfreq = %s, minfreq = %.5f, ' 'maxfreq = %.5f, ndurations = %s' % (abs(1.0 / periods[1] - 1.0 / periods[0]), nfreq, 1.0 / periods.max(), 1.0 / periods.min(), durations.size)) # otherwise, use kbls method else: frequencies = minfreq + nparange(nfreq) * stepsize periods = 1.0 / frequencies if nfreq > 5.0e5: if verbose: LOGWARNING('more than 5.0e5 frequencies to go through; ' 'this will take a while. ' 'you might want to use the ' 'abls.bls_parallel_pfind function instead') # run the periodogram blsresult = blsmodel.power(periods * u.day, durations * u.day, objective=blsobjective, method=blsmethod, oversample=blsoversample) # get the peak values lsp = nparray(blsresult.power) # find the nbestpeaks for the periodogram: 1. sort the lsp array # by highest value first 2. go down the values until we find # five values that are separated by at least periodepsilon in # period # make sure to get only the finite peaks in the periodogram # this is needed because BLS may produce infs for some peaks finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestinds': None, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'durations': None, 'method': 'bls', 'blsresult': None, 'blsmodel': None, 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'blsntransits': blsmintransits, 'blsfreqfactor': blsfreqfactor, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } } sortedlspind = npargsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] # now get the nbestpeaks nbestperiods, nbestlspvals, nbestinds, peakcount = ([ finperiods[bestperiodind] ], [finlsp[bestperiodind]], [bestperiodind], 1) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval, ind in zip(sortedlspperiods, sortedlspvals, sortedlspind): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different # peak in the periodogram if (perioddiff > (periodepsilon * prevperiod) and all(x > (periodepsilon * period) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) nbestinds.append(ind) peakcount = peakcount + 1 prevperiod = period # generate the return dict resultdict = { 'bestperiod': finperiods[bestperiodind], 'bestlspval': finlsp[bestperiodind], 'nbestpeaks': nbestpeaks, 'nbestinds': nbestinds, 'nbestlspvals': nbestlspvals, 'nbestperiods': nbestperiods, 'lspvals': lsp, 'frequencies': frequencies, 'periods': periods, 'durations': durations, 'blsresult': blsresult, 'blsmodel': blsmodel, 'stepsize': stepsize, 'nfreq': nfreq, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'blsntransits': blsmintransits, 'blsfreqfactor': blsfreqfactor, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } } return resultdict except Exception as e: LOGEXCEPTION('BLS failed!') if raiseonfail: raise return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestinds': None, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'durations': None, 'blsresult': None, 'blsmodel': None, 'stepsize': stepsize, 'nfreq': nfreq, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'blsntransits': blsmintransits, 'blsfreqfactor': blsfreqfactor, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } } else: LOGERROR('no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestinds': None, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'durations': None, 'blsresult': None, 'blsmodel': None, 'stepsize': stepsize, 'nfreq': None, 'nphasebins': None, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'blsntransits': blsmintransits, 'blsfreqfactor': blsfreqfactor, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } }
def bls_parallel_pfind( times, mags, errs, magsarefluxes=False, startp=0.1, # by default, search from 0.1 d to... endp=100.0, # ... 100.0 d -- don't search full timebase stepsize=1.0e-4, mintransitduration=0.01, # minimum transit length in phase maxtransitduration=0.4, # maximum transit length in phase ndurations=100, autofreq=True, # figure out f0, nf, and df automatically blsobjective='likelihood', blsmethod='fast', blsoversample=5, blsmintransits=3, blsfreqfactor=10.0, nbestpeaks=5, periodepsilon=0.1, # 0.1 sigclip=10.0, endp_timebase_check=True, verbose=True, nworkers=None, ): '''Runs the Box Least Squares Fitting Search for transit-shaped signals. Breaks up the full frequency space into chunks and passes them to parallel BLS workers. Based on the version of BLS in Astropy 3.1: `astropy.stats.BoxLeastSquares`. If you don't have Astropy 3.1, this module will fail to import. Note that by default, this implementation of `bls_parallel_pfind` doesn't use the `.autoperiod()` function from `BoxLeastSquares` but uses the same auto frequency-grid generation as the functions in `periodbase.kbls`. If you want to use Astropy's implementation, set the value of `autofreq` kwarg to 'astropy'. The generated period array will then be broken up into chunks and sent to the individual workers. NOTE: the combined BLS spectrum produced by this function is not identical to that produced by running BLS in one shot for the entire frequency space. There are differences on the order of 1.0e-3 or so in the respective peak values, but peaks appear at the same frequencies for both methods. This is likely due to different aliasing caused by smaller chunks of the frequency space used by the parallel workers in this function. When in doubt, confirm results for this parallel implementation by comparing to those from the serial implementation above. In particular, when you want to get reliable estimates of the SNR, transit depth, duration, etc. that Astropy's BLS gives you, rerun `bls_serial_pfind` with `startp`, and `endp` close to the best period you want to characterize the transit at. The dict returned from that function contains a `blsmodel` key, which is the generated model from Astropy's BLS. Use the `.compute_stats()` method to calculate the required stats. Parameters ---------- times,mags,errs : np.array The magnitude/flux time-series to search for transits. magsarefluxes : bool If the input measurement values in `mags` and `errs` are in fluxes, set this to True. startp,endp : float The minimum and maximum periods to consider for the transit search. stepsize : float The step-size in frequency to use when constructing a frequency grid for the period search. mintransitduration,maxtransitduration : float The minimum and maximum transitdurations (in units of phase) to consider for the transit search. ndurations : int The number of transit durations to use in the period-search. autofreq : bool or str If this is True, the values of `stepsize` and `nphasebins` will be ignored, and these, along with a frequency-grid, will be determined based on the following relations:: nphasebins = int(ceil(2.0/mintransitduration)) if nphasebins > 3000: nphasebins = 3000 stepsize = 0.25*mintransitduration/(times.max()-times.min()) minfreq = 1.0/endp maxfreq = 1.0/startp nfreq = int(ceil((maxfreq - minfreq)/stepsize)) If this is False, you must set `startp`, `endp`, and `stepsize` as appropriate. If this is str == 'astropy', will use the `astropy.stats.BoxLeastSquares.autoperiod()` function to calculate the frequency grid instead of the kbls method. blsobjective : {'likelihood','snr'} Sets the type of objective to optimize in the `BoxLeastSquares.power()` function. blsmethod : {'fast','slow'} Sets the type of method to use in the `BoxLeastSquares.power()` function. blsoversample : {'likelihood','snr'} Sets the `oversample` kwarg for the `BoxLeastSquares.power()` function. blsmintransits : int Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()` function. blsfreqfactor : float Sets the `frequency_factor` kwarg for the `BoxLeastSquares.autoperiod()` function. periodepsilon : float The fractional difference between successive values of 'best' periods when sorting by periodogram power to consider them as separate periods (as opposed to part of the same periodogram peak). This is used to avoid broad peaks in the periodogram and make sure the 'best' periods returned are all actually independent. nbestpeaks : int The number of 'best' peaks to return from the periodogram results, starting from the global maximum of the periodogram peak values. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. endp_timebase_check : bool If True, will check if the ``endp`` value is larger than the time-base of the observations. If it is, will change the ``endp`` value such that it is half of the time-base. If False, will allow an ``endp`` larger than the time-base of the observations. verbose : bool If this is True, will indicate progress and details about the frequency grid used for the period search. nworkers : int or None The number of parallel workers to launch for period-search. If None, nworkers = NCPUS. Returns ------- dict This function returns a dict, referred to as an `lspinfo` dict in other astrobase functions that operate on periodogram results. This is a standardized format across all astrobase period-finders, and is of the form below:: {'bestperiod': the best period value in the periodogram, 'bestlspval': the periodogram peak associated with the best period, 'nbestpeaks': the input value of nbestpeaks, 'nbestlspvals': nbestpeaks-size list of best period peak values, 'nbestperiods': nbestpeaks-size list of best periods, 'lspvals': the full array of periodogram powers, 'frequencies': the full array of frequencies considered, 'periods': the full array of periods considered, 'durations': the array of durations used to run BLS, 'blsresult': Astropy BLS result object (BoxLeastSquaresResult), 'blsmodel': Astropy BLS BoxLeastSquares object used for work, 'stepsize': the actual stepsize used, 'nfreq': the actual nfreq used, 'durations': the durations array used, 'mintransitduration': the input mintransitduration, 'maxtransitduration': the input maxtransitdurations, 'method':'bls' -> the name of the period-finder method, 'kwargs':{ dict of all of the input kwargs for record-keeping}} ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # if we're setting up everything automatically if isinstance(autofreq, bool) and autofreq: # use heuristic to figure out best timestep stepsize = 0.25 * mintransitduration / (stimes.max() - stimes.min()) # now figure out the frequencies to use minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(npceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = True: using AUTOMATIC values for ' 'freq stepsize: %s, ndurations: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, ndurations, mintransitduration, maxtransitduration)) use_autoperiod = False elif isinstance(autofreq, bool) and not autofreq: minfreq = 1.0 / endp maxfreq = 1.0 / startp nfreq = int(npceil((maxfreq - minfreq) / stepsize)) # say what we're using if verbose: LOGINFO('min P: %s, max P: %s, nfreq: %s, ' 'minfreq: %s, maxfreq: %s' % (startp, endp, nfreq, minfreq, maxfreq)) LOGINFO('autofreq = False: using PROVIDED values for ' 'freq stepsize: %s, ndurations: %s, ' 'min transit duration: %s, max transit duration: %s' % (stepsize, ndurations, mintransitduration, maxtransitduration)) use_autoperiod = False elif isinstance(autofreq, str) and autofreq == 'astropy': use_autoperiod = True minfreq = 1.0 / endp maxfreq = 1.0 / startp else: LOGERROR("unknown autofreq kwarg encountered. can't continue...") return None # check the minimum frequency if ((minfreq < (1.0 / (stimes.max() - stimes.min()))) and endp_timebase_check): LOGWARNING('the requested max P = %.3f is larger than ' 'the time base of the observations = %.3f, ' ' will make minfreq = 2 x 1/timebase' % (endp, stimes.max() - stimes.min())) minfreq = 2.0 / (stimes.max() - stimes.min()) LOGWARNING('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq)) ############################# ## NOW RUN BLS IN PARALLEL ## ############################# # fix number of CPUs if needed if not nworkers or nworkers > NCPUS: nworkers = NCPUS if verbose: LOGINFO('using %s workers...' % nworkers) # check if autoperiod is True and get the correct period-grid if use_autoperiod: # astropy's BLS requires durations in units of time durations = nplinspace(mintransitduration * startp, maxtransitduration * startp, ndurations) # set up the correct units for the BLS model if magsarefluxes: blsmodel = BoxLeastSquares(stimes * u.day, smags * u.dimensionless_unscaled, dy=serrs * u.dimensionless_unscaled) else: blsmodel = BoxLeastSquares(stimes * u.day, smags * u.mag, dy=serrs * u.mag) periods = nparray( blsmodel.autoperiod(durations * u.day, minimum_period=startp, maximum_period=endp, minimum_n_transit=blsmintransits, frequency_factor=blsfreqfactor)) frequencies = 1.0 / periods nfreq = frequencies.size if verbose: LOGINFO("autofreq = 'astropy', used .autoperiod() with " "minimum_n_transit = %s, freq_factor = %s " "to generate the frequency grid" % (blsmintransits, blsfreqfactor)) LOGINFO('stepsize = %s, nfreq = %s, minfreq = %.5f, ' 'maxfreq = %.5f, ndurations = %s' % (abs(frequencies[1] - frequencies[0]), nfreq, 1.0 / periods.max(), 1.0 / periods.min(), durations.size)) del blsmodel del durations # otherwise, use kbls method else: frequencies = minfreq + nparange(nfreq) * stepsize # break up the tasks into chunks csrem = int(fmod(nfreq, nworkers)) csint = int(float(nfreq / nworkers)) chunk_minfreqs, chunk_nfreqs = [], [] for x in range(nworkers): this_minfreqs = frequencies[x * csint] # handle usual nfreqs if x < (nworkers - 1): this_nfreqs = frequencies[x * csint:x * csint + csint].size else: this_nfreqs = frequencies[x * csint:x * csint + csint + csrem].size chunk_minfreqs.append(this_minfreqs) chunk_nfreqs.append(this_nfreqs) # populate the tasks list # # task[0] = times # task[1] = mags # task[2] = errs # task[3] = magsarefluxes # task[4] = minfreq # task[5] = nfreq # task[6] = stepsize # task[7] = nphasebins # task[8] = mintransitduration # task[9] = maxtransitduration # task[10] = blsobjective # task[11] = blsmethod # task[12] = blsoversample # populate the tasks list tasks = [(stimes, smags, serrs, magsarefluxes, chunk_minf, chunk_nf, stepsize, ndurations, mintransitduration, maxtransitduration, blsobjective, blsmethod, blsoversample) for (chunk_minf, chunk_nf) in zip(chunk_minfreqs, chunk_nfreqs)] if verbose: for ind, task in enumerate(tasks): LOGINFO('worker %s: minfreq = %.6f, nfreqs = %s' % (ind + 1, task[4], task[5])) LOGINFO('running...') # return tasks # start the pool pool = Pool(nworkers) results = pool.map(_parallel_bls_worker, tasks) pool.close() pool.join() del pool # now concatenate the output lsp arrays lsp = npconcatenate([x['power'] for x in results]) periods = 1.0 / frequencies # find the nbestpeaks for the periodogram: 1. sort the lsp array # by highest value first 2. go down the values until we find # five values that are separated by at least periodepsilon in # period # make sure to get only the finite peaks in the periodogram # this is needed because BLS may produce infs for some peaks finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestpeaks': nbestpeaks, 'nbestinds': None, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'durations': None, 'method': 'bls', 'blsresult': None, 'blsmodel': None, 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } } sortedlspind = npargsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] # now get the nbestpeaks nbestperiods, nbestlspvals, nbestinds, peakcount = ([ finperiods[bestperiodind] ], [finlsp[bestperiodind]], [bestperiodind], 1) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval, ind in zip(sortedlspperiods, sortedlspvals, sortedlspind): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different # peak in the periodogram if (perioddiff > (periodepsilon * prevperiod) and all(x > (periodepsilon * period) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) nbestinds.append(ind) peakcount = peakcount + 1 prevperiod = period # generate the return dict resultdict = { 'bestperiod': finperiods[bestperiodind], 'bestlspval': finlsp[bestperiodind], 'nbestpeaks': nbestpeaks, 'nbestinds': nbestinds, 'nbestlspvals': nbestlspvals, 'nbestperiods': nbestperiods, 'lspvals': lsp, 'frequencies': frequencies, 'periods': periods, 'durations': [x['durations'] for x in results], 'blsresult': [x['blsresult'] for x in results], 'blsmodel': [x['blsmodel'] for x in results], 'stepsize': stepsize, 'nfreq': nfreq, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } } return resultdict else: LOGERROR('no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'bestlspval': npnan, 'nbestinds': None, 'nbestpeaks': nbestpeaks, 'nbestlspvals': None, 'nbestperiods': None, 'lspvals': None, 'periods': None, 'durations': None, 'blsresult': None, 'blsmodel': None, 'stepsize': stepsize, 'nfreq': None, 'nphasebins': None, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'method': 'bls', 'kwargs': { 'startp': startp, 'endp': endp, 'stepsize': stepsize, 'mintransitduration': mintransitduration, 'maxtransitduration': maxtransitduration, 'ndurations': ndurations, 'blsobjective': blsobjective, 'blsmethod': blsmethod, 'blsoversample': blsoversample, 'autofreq': autofreq, 'periodepsilon': periodepsilon, 'nbestpeaks': nbestpeaks, 'sigclip': sigclip, 'magsarefluxes': magsarefluxes } }
def _parallel_bls_worker(task): ''' This wraps Astropy's BoxLeastSquares for use with bls_parallel_pfind below. `task` is a tuple:: task[0] = times task[1] = mags task[2] = errs task[3] = magsarefluxes task[4] = minfreq task[5] = nfreq task[6] = stepsize task[7] = ndurations task[8] = mintransitduration task[9] = maxtransitduration task[10] = blsobjective task[11] = blsmethod task[12] = blsoversample ''' try: times, mags, errs = task[:3] magsarefluxes = task[3] minfreq, nfreq, stepsize = task[4:7] ndurations, mintransitduration, maxtransitduration = task[7:10] blsobjective, blsmethod, blsoversample = task[10:] frequencies = minfreq + nparange(nfreq) * stepsize periods = 1.0 / frequencies # astropy's BLS requires durations in units of time durations = nplinspace(mintransitduration * periods.min(), maxtransitduration * periods.min(), ndurations) # set up the correct units for the BLS model if magsarefluxes: blsmodel = BoxLeastSquares(times * u.day, mags * u.dimensionless_unscaled, dy=errs * u.dimensionless_unscaled) else: blsmodel = BoxLeastSquares(times * u.day, mags * u.mag, dy=errs * u.mag) blsresult = blsmodel.power(periods * u.day, durations * u.day, objective=blsobjective, method=blsmethod, oversample=blsoversample) return { 'blsresult': blsresult, 'blsmodel': blsmodel, 'durations': durations, 'power': nparray(blsresult.power) } except Exception as e: LOGEXCEPTION('BLS for frequency chunk: (%.6f, %.6f) failed.' % (frequencies[0], frequencies[-1])) return { 'blsresult': None, 'blsmodel': None, 'durations': durations, 'power': nparray([npnan for x in range(nfreq)]), }
def autocorr_magseries(times, mags, errs, maxlags=1000, func=_autocorr_func3, fillgaps=0.0, forcetimebin=None, sigclip=3.0, magsarefluxes=False, filterwindow=11, verbose=True): '''This calculates the ACF of a light curve. This will pre-process the light curve to fill in all the gaps and normalize everything to zero. If fillgaps == 'noiselevel', fills the gaps with the noise level obtained via the procedure above. If fillgaps == 'nan', fills the gaps with np.nan. ''' # get the gap-filled timeseries interpolated = fill_magseries_gaps(times, mags, errs, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose) if not interpolated: LOGERROR('failed to interpolate light curve to minimum cadence!') return None itimes, imags, ierrs = (interpolated['itimes'], interpolated['imags'], interpolated['ierrs']) # calculate the lags up to maxlags if maxlags: lags = nparange(0, maxlags) else: lags = nparange(itimes.size) series_stdev = 1.483 * npmedian(npabs(imags)) if func != _autocorr_func3: # get the autocorrelation as a function of the lag of the mag series autocorr = nparray( [func(imags, x, imags.size, 0.0, series_stdev) for x in lags]) # this doesn't need a lags array else: autocorr = _autocorr_func3(imags, lags[0], imags.size, 0.0, series_stdev) interpolated.update({ 'minitime': itimes.min(), 'lags': lags, 'acf': autocorr }) return interpolated
def autocorr_magseries(times, mags, errs, maxlags=1000, func=_autocorr_func3, fillgaps=0.0, filterwindow=11, forcetimebin=None, sigclip=3.0, magsarefluxes=False, verbose=True): '''This calculates the ACF of a light curve. This will pre-process the light curve to fill in all the gaps and normalize everything to zero. If `fillgaps = 'noiselevel'`, fills the gaps with the noise level obtained via the procedure above. If `fillgaps = 'nan'`, fills the gaps with `np.nan`. Parameters ---------- times,mags,errs : np.array The measurement time-series and associated errors. maxlags : int The maximum number of lags to calculate. func : Python function This is a function to calculate the lags. fillgaps : 'noiselevel' or float This sets what to use to fill in gaps in the time series. If this is 'noiselevel', will smooth the light curve using a point window size of `filterwindow` (this should be an odd integer), subtract the smoothed LC from the actual LC and estimate the RMS. This RMS will be used to fill in the gaps. Other useful values here are 0.0, and npnan. filterwindow : int The light curve's smoothing filter window size to use if `fillgaps='noiselevel`'. forcetimebin : None or float This is used to force a particular cadence in the light curve other than the automatically determined cadence. This effectively rebins the light curve to this cadence. This should be in the same time units as `times`. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If your input measurements in `mags` are actually fluxes instead of mags, set this is True. verbose : bool If True, will indicate progress and report errors. Returns ------- dict A dict of the following form is returned:: {'itimes': the interpolated time values after gap-filling, 'imags': the interpolated mag/flux values after gap-filling, 'ierrs': the interpolated mag/flux values after gap-filling, 'cadence': the cadence of the output mag/flux time-series, 'minitime': the minimum value of the interpolated times array, 'lags': the lags used to calculate the auto-correlation function, 'acf': the value of the ACF at each lag used} ''' # get the gap-filled timeseries interpolated = fill_magseries_gaps(times, mags, errs, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose) if not interpolated: print('failed to interpolate light curve to minimum cadence!') return None itimes, imags = interpolated['itimes'], interpolated['imags'], # calculate the lags up to maxlags if maxlags: lags = nparange(0, maxlags) else: lags = nparange(itimes.size) series_stdev = 1.483 * npmedian(npabs(imags)) if func != _autocorr_func3: # get the autocorrelation as a function of the lag of the mag series autocorr = nparray( [func(imags, x, imags.size, 0.0, series_stdev) for x in lags]) # this doesn't need a lags array else: autocorr = _autocorr_func3(imags, lags[0], imags.size, 0.0, series_stdev) # return only the maximum number of lags if maxlags is not None: autocorr = autocorr[:maxlags] interpolated.update({ 'minitime': itimes.min(), 'lags': lags, 'acf': autocorr }) return interpolated
def stellingwerf_pdm_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): ''' This calculates the Stellingwerf PDM theta value at a test frequency. Parameters ---------- times,mags,errs : np.array The input time-series and associated errors. frequency : float The test frequency to calculate the theta statistic at. binsize : float The phase bin size to use. minbin : int The minimum number of items in a phase bin to consider in the calculation of the statistic. Returns ------- theta_pdm : float The value of the theta statistic at the specified `frequency`. ''' period = 1.0 / frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = nparange(0.0, 1.0, binsize) binnedphaseinds = npdigitize(phases, bins) binvariances = [] binndets = [] goodbins = 0 for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_variance = npvar(thisbin_mags, ddof=1) binvariances.append(thisbin_variance) binndets.append(thisbin_mags.size) goodbins = goodbins + 1 # now calculate theta binvariances = nparray(binvariances) binndets = nparray(binndets) theta_top = npsum(binvariances * (binndets - 1)) / (npsum(binndets) - goodbins) theta_bot = npvar(pmags, ddof=1) theta = theta_top / theta_bot return theta